Forecasting wind conditions.
Introduction
Selecting the nearest weather station isn’t just a box to check—it’s the difference between an rocket soaring smoothly or spiraling into a field for you to later pick up the pieces.
Wind conditions can shift dramatically within a few kilometers, and relying on distant stations is like trusting yesterday’s lottery numbers to predict tomorrow’s storm. For launch sites, hyperlocal data is everything. Machine learning steps in as the unsung hero here, crunching historical patterns, real-time feeds, and topographical quirks from the closest sensors to forecast wind behavior that generic models might miss. It’s not just about avoiding bad weather; it’s about rewriting the rules of when—and where—a smaller rocket can defy the breeze.
Selecting the nearest weather station to the launch site.
The data for this example can be obtained here.
Stations_id | von_datum | bis_datum | Stationshoehe | geoBreite | geoLaenge | Stationsname | Bundesland | Abgabe |
---|---|---|---|---|---|---|---|---|
12345 | 20100115 | 20190630 | 325 | 51.4872 | 9.7531 | Bergdorf | Sachsen | Frei |
67890 | 19950212 | 20210428 | 178 | 48.6291 | 11.3427 | Wiesenau | Bayern | Frei |
24680 | 20030917 | 20250305 | 86 | 53.8712 | 7.4893 | Nordhafen | Niedersachsen | Frei |
… | … | … | … | … | … | … | … | … |
Table showing first 3 rows of 1188 weather station records
The Haversine formula can be used to find the nearest location based target latitude/longitude.
import math
def haversine(lat1, lon1, lat2, lon2):
"""
Calculate the great-circle distance between two points
on the Earth (specified in decimal degrees)
"""
= map(math.radians, [lat1, lon1, lat2, lon2])
lat1, lon1, lat2, lon2
= lat2 - lat1
dlat = lon2 - lon1
dlon = math.sin(dlat/2)**2 + math.cos(lat1) * math.cos(lat2) * math.sin(dlon/2)**2
a = 2 * math.atan2(math.sqrt(a), math.sqrt(1-a))
c
= 6371
R return R * c
def find_nearest_location(file_path, target_lat, target_lon):
= float('inf')
min_distance = None
nearest_location
with open(file_path, 'r') as file:
for line in file:
= line.strip().split()
parts
if len(parts) < 8:
continue
try:
= float(parts[4])
lat = float(parts[5])
lon = parts[6]
name = parts[7]
state except (ValueError, IndexError):
continue
= haversine(lat, lon, target_lat, target_lon)
distance
if distance < min_distance:
= distance
min_distance = {
nearest_location 'name': name,
'state': state,
'latitude': lat,
'longitude': lon,
'distance': distance
}
return nearest_location
= # insert your target latitude
target_lat = # insert your target longitude
target_lon
= find_nearest_location("../data/weather_station.txt", target_lat, target_lon)
nearest
if nearest:
print(f"Nearest location: {nearest['name']}, {nearest['state']}")
print(f"Coordinates: ({nearest['latitude']:.6f}, {nearest['longitude']:.6f})")
print(f"Distance: {nearest['distance']:.2f} km")
else:
print("No locations found or data file is empty")
Producing the following:
Nearest location: Nordhafen , Niedersachsen
Coordinates: (53.8712, 7.4893)
Distance: 7.10 km
Stations_id von_datum bis_datum Stationshoehe geoBreite geoLaenge Stationsname Bundesland Abgabe
----------- --------- --------- ------------- --------- --------- ----------------------------------------- ---------- ------
24680 19520101 20250305 7 53.8712 7.4893 Nordhafen Niedersachsen Frei
Training the model (no GPU required!)
Data: https://opendata.dwd.de/climate_environment/CDC/observations_germany/climate/subdaily/wind/
Paramaters which are critical for predicting wind speed is ambient temperature and pressure.
import pandas as pd
= pd.read_csv('../data/station_704_wind.txt', sep=';', dtype={'DK_TER': str})
wind_data
={
wind_data.rename(columns'STATIONS_ID': 'station_id',
'MESS_DATUM': 'timestamp',
'QN_4': 'quality_flag',
'DK_TER': 'dk_ter',
'FK_TER': 'fk_ter',
'eor': 'end_of_record'
=True)
}, inplace
'dk_ter'] = wind_data['dk_ter'].str.strip().astype(int)
wind_data[= wind_data[wind_data['dk_ter'] != -999]
wind_data
'timestamp'] = wind_data['timestamp'].astype(str) + '00'
wind_data['timestamp'] = pd.to_datetime(wind_data['timestamp'], format='%Y%m%d%H%M')
wind_data[
=['end_of_record', 'quality_flag'], inplace=True)
wind_data.drop(columns
= pd.read_csv('../data/station_704_pressure.txt', sep=';')
pressure_data
={
pressure_data.rename(columns'STATIONS_ID': 'station_id',
'MESS_DATUM': 'timestamp',
'QN_4': 'quality_flag',
'PP_TER': 'pressure',
'eor': 'end_of_record'
=True)
}, inplace
'pressure'] = pd.to_numeric(pressure_data['pressure'], errors='coerce')
pressure_data[
'timestamp'] = pressure_data['timestamp'].astype(str) + '00'
pressure_data['timestamp'] = pd.to_datetime(pressure_data['timestamp'], format='%Y%m%d%H%M')
pressure_data[=['end_of_record', 'quality_flag'], inplace=True)
pressure_data.drop(columns
= pd.read_csv('../data/station_704_temp.txt', sep=';')
temp_data ={
temp_data.rename(columns'STATIONS_ID': 'station_id',
'MESS_DATUM': 'timestamp',
'QN_4': 'quality_flag',
'TT_TER': 'temperature',
'RF_TER': 'humidity',
'eor': 'end_of_record'
=True)
}, inplace
'temperature'] = pd.to_numeric(temp_data['temperature'], errors='coerce')
temp_data['humidity'] = pd.to_numeric(temp_data['humidity'], errors='coerce')
temp_data[
'timestamp'] = temp_data['timestamp'].astype(str) + '00'
temp_data['timestamp'] = pd.to_datetime(temp_data['timestamp'], format='%Y%m%d%H%M')
temp_data[
=['end_of_record', 'quality_flag'], inplace=True)
temp_data.drop(columns
= wind_data.merge(pressure_data, on=['timestamp', 'station_id'], how='inner')
merged_data = merged_data.merge(temp_data, on=['timestamp', 'station_id'], how='inner')
merged_data
'pressure', 'temperature', 'humidity']] = merged_data[['pressure', 'temperature', 'humidity']].interpolate().ffill()
merged_data[[
print(merged_data.head())
Model accuracy
= 100
n_points
=(10,5))
plt.figure(figsize
plt.plot(-n_points:],
test_y.index[-n_points:],
test_y[='Actual', color='blue'
label
)
plt.plot(-n_points:],
test_y.index[-n_points:],
test_pred[='Predicted', color='red', alpha=0.7
label
)
f"Model Predictions vs. Actual Values (Last {n_points} points)")
plt.title("Index / Time")
plt.xlabel("Target Variable")
plt.ylabel(
plt.legend() plt.show()
Forecast
import pandas as pd
from datetime import datetime
= merged_data[['pressure', 'temperature', 'humidity', 'hour', 'day_of_year', 'day_of_week']].mean()
historical_avg
for lag in range(1, 25):
f'fk_ter_lag_{lag}'] = merged_data['fk_ter'].mean()
historical_avg[f'pressure_lag_{lag}'] = merged_data['pressure'].mean()
historical_avg[f'temperature_lag_{lag}'] = merged_data['temperature'].mean()
historical_avg[f'humidity_lag_{lag}'] = merged_data['humidity'].mean()
historical_avg[
'fk_ter_rolling_mean'] = merged_data['fk_ter'].mean()
historical_avg['fk_ter_rolling_std'] = merged_data['fk_ter'].std()
historical_avg[
= historical_avg[train_X.columns]
historical_avg
= pd.date_range(start='2025-05-01', end='2025-05-31 23:00:00', freq='H')
future_dates = pd.DataFrame({'timestamp': future_dates})
future_df
'hour'] = future_df['timestamp'].dt.hour
future_df['day_of_year'] = future_df['timestamp'].dt.dayofyear
future_df['day_of_week'] = future_df['timestamp'].dt.dayofweek
future_df[
= merged_data.iloc[-24*3:]
last_historical
def forecast_month(model, last_historical, future_dates):
= []
forecast = last_historical.copy()
current_window
for ts in future_dates:
= ts.hour
hour = ts.dayofyear
doy = ts.dayofweek
dow
= {
features 'hour': hour,
'day_of_year': doy,
'day_of_week': dow,
'pressure': current_window['pressure'].iloc[-1] if len(current_window) > 0 else np.nan,
'temperature': current_window['temperature'].iloc[-1] if len(current_window) > 0 else np.nan,
'humidity': current_window['humidity'].iloc[-1] if len(current_window) > 0 else np.nan,
'fk_ter_rolling_mean': current_window['fk_ter'].rolling(window=24).mean().iloc[-1] if len(current_window) >= 24 else np.nan,
'fk_ter_rolling_std': current_window['fk_ter'].rolling(window=24).std().iloc[-1] if len(current_window) >= 24 else np.nan,
}
for lag in range(1, 25):
f'fk_ter_lag_{lag}'] = current_window['fk_ter'].iloc[-lag] if len(current_window) >= lag else np.nan
features[f'pressure_lag_{lag}'] = current_window['pressure'].iloc[-lag] if len(current_window) >= lag else np.nan
features[f'temperature_lag_{lag}'] = current_window['temperature'].iloc[-lag] if len(current_window) >= lag else np.nan
features[f'humidity_lag_{lag}'] = current_window['humidity'].iloc[-lag] if len(current_window) >= lag else np.nan
features[
= pd.DataFrame([features], columns=train_X.columns)
feature_df
= feature_df.fillna(historical_avg)
feature_df
= model.predict(feature_df)[0]
pred
forecast.append(pred)
= pd.concat([
current_window
current_window,
pd.DataFrame([{'timestamp': ts,
'fk_ter': pred,
'pressure': features['pressure'],
'temperature': features['temperature'],
'humidity': features['humidity'],
}])-24:]
]).iloc[
return forecast
= forecast_month(model, last_historical, future_dates)
may_2025_predictions
'predicted_wind_speed'] = may_2025_predictions
future_df[
'uncertainty'] = np.std(may_2025_predictions[-24:])
future_df[
import matplotlib.pyplot as plt
=(15, 6))
plt.figure(figsize'timestamp'], future_df['predicted_wind_speed'], label='Predicted')
plt.plot(future_df['timestamp'],
plt.fill_between(future_df['predicted_wind_speed'] - future_df['uncertainty'],
future_df['predicted_wind_speed'] + future_df['uncertainty'],
future_df[=0.2)
alpha
'May 2025 Wind Speed Forecast')
plt.title('Date')
plt.xlabel('Wind Speed (m/s)')
plt.ylabel(
plt.legend() plt.show()