Trajectory Forecasting with Gate Recurrent Units AutoEncoders
def qry_database(dbname, start_time, stop_time):
d_threshold = 200000 # max distance (in meters) between two messages before assuming separate tracks
s_threshold = 50 # max speed (in knots) between two AIS messages before splitting tracks
t_threshold = timedelta(hours=24) # max time (in hours) between messages of a track
try:
with aisdb.DBConn() as dbconn:
tracks = aisdb.TrackGen(
aisdb.DBQuery(
dbconn=dbconn, dbpath=os.path.join(ROOT, PATH, dbname),
callback=aisdb.database.sql_query_strings.in_timerange,
start=start_time, end=stop_time).gen_qry(),
decimate=False) # trajectory compression
tracks = aisdb.split_timedelta(tracks, t_threshold) # split trajectories by time without AIS message transmission
tracks = aisdb.encode_greatcircledistance(tracks, distance_threshold=d_threshold, speed_threshold=s_threshold)
tracks = aisdb.interp_time(tracks, step=timedelta(minutes=5)) # interpolate every n-minutes
# tracks = vessel_info(tracks, dbconn=dbconn) # scrapes vessel metadata
return list(tracks) # list of segmented pre-processed tracks
except SyntaxError as e: return [] # no results for querydef get_tracks(dbname, start_ddmmyyyy, stop_ddmmyyyy):
stop_time = datetime.strptime(stop_ddmmyyyy, "%d%m%Y")
start_time = datetime.strptime(start_ddmmyyyy, "%d%m%Y")
# returns a list with all tracks from AISdb
return qry_database(dbname, start_time, stop_time)def batch_tracks(dbname, start_ddmmyyyy, stop_ddmmyyyy, hours2batch):
stop_time = datetime.strptime(stop_ddmmyyyy, "%d%m%Y")
start_time = datetime.strptime(start_ddmmyyyy, "%d%m%Y")
# yields a list of results every delta_time iterations
delta_time = timedelta(hours=hours2batch)
anchor_time, next_time = start_time, start_time + delta_time
while next_time < stop_time:
yield qry_database(dbname, anchor_time, next_time)
anchor_time = next_time
next_time += delta_time
# yields a list of final results (if any)
yield qry_database(dbname, anchor_time, stop_time)# Create the map with specific latitude and longitude limits and a Mercator projection
m = Basemap(llcrnrlat=42, urcrnrlat=52, llcrnrlon=-70, urcrnrlon=-50, projection="merc", resolution="h")
# Draw state, country, coastline borders, and counties
m.drawstates(0.5)
m.drawcountries(0.5)
m.drawcoastlines(0.5)
m.drawcounties(color="gray", linewidth=0.5)
# Fill continents and oceans
m.fillcontinents(color="tan", lake_color="#91A3B0")
m.drawmapboundary(fill_color="#91A3B0")
coordinates = [
(51.26912, -57.53759), (48.92733, -58.87786),
(47.49307, -59.41325), (42.54760, -62.17624),
(43.21702, -60.49943), (44.14955, -60.59600),
(45.42599, -59.76398), (46.99134, -60.02403)]
# Draw 100km-radius circles
for lat, lon in coordinates:
radius_in_degrees = 100 / (111.32 * np.cos(np.deg2rad(lat)))
m.tissot(lon, lat, radius_in_degrees, 100, facecolor="r", edgecolor="k", alpha=0.5)
# Add text annotation with an arrow pointing to the circle
plt.annotate("AIS Coverage", xy=m(lon, lat), xytext=(40, -40),
textcoords="offset points", ha="left", va="bottom", fontweight="bold",
arrowprops=dict(arrowstyle="->", color="k", alpha=0.7, lw=2.5))
# Add labels
ocean_labels = {
"Atlantic Ocean": [(-59, 44), 16],
"Gulf of\nMaine": [(-67, 44.5), 12],
"Gulf of St. Lawrence": [(-64.5, 48.5), 11],
}
for label, (coords, fontsize) in ocean_labels.items():
plt.annotate(label, xy=m(*coords), xytext=(6, 6), textcoords="offset points",
fontsize=fontsize, color="#DBE2E9", fontweight="bold")
# Add a scale in kilometers
m.drawmapscale(-67.5, 42.7, -67.5, 42.7, 500, barstyle="fancy", fontsize=8, units="km", labelstyle="simple")
# Set the map title
_ = plt.title("100km-AIS radius-coverage on Atlantic Canada", fontweight="bold")
# The circle diameter is 200km, and it does not match the km scale (approximation)land_polygons = gpd.read_file(os.path.join(ROOT, SHAPES, "ne_50m_land.shp"))def is_on_land(lat, lon, land_polygons):
return land_polygons.contains(Point(lon, lat)).any()def is_track_on_land(track, land_polygons):
for lat, lon in zip(track["lat"], track["lon"]):
if is_on_land(lat, lon, land_polygons):
return True
return Falsedef process_mmsi(item, polygons):
mmsi, tracks = item
filtered_tracks = [t for t in tracks if not is_track_on_land(t, polygons)]
return mmsi, filtered_tracks, len(tracks)def process_voyages(voyages, land_polygons):
# Tracking progress with TQDM
def process_mmsi_callback(result, progress_bar):
mmsi, filtered_tracks, _ = result
voyages[mmsi] = filtered_tracks
progress_bar.update(1)
# Initialize the progress bar with the total number of MMSIs
progress_bar = tqdm(total=len(voyages), desc="MMSIs processed")
with ThreadPoolExecutor(max_workers=multiprocessing.cpu_count()) as executor:
# Submit all MMSIs for processing
futures = {executor.submit(process_mmsi, item, land_polygons): item for item in voyages.items()}
# Retrieve the results as they become available and update the Voyages dictionary
for future in as_completed(futures):
result = future.result()
process_mmsi_callback(result, progress_bar)
# Close the progress bar after processing the complete
progress_bar.close()
return voyages
file_name = "curated-ais.pkl"
full_path = os.path.join(ROOT, ESRF, file_name)
if not os.path.exists(full_path):
voyages = process_voyages(voyages, land_polygons)
pkl.dump(voyages, open(full_path, "wb"))
else: voyages = pkl.load(open(full_path, "rb"))voyages_counts = {k: len(voyages[k]) for k in voyages.keys()}def plot_voyage_segments_distribution(voyages_counts, bar_color="#ba1644"):
data = pd.DataFrame({"Segments": list(voyages_counts.values())})
return alt.Chart(data).mark_bar(color=bar_color).encode(
alt.X("Segments:Q", bin=alt.Bin(maxbins=90), title="Segments"),
alt.Y("count(Segments):Q", title="Count", scale=alt.Scale(type="log")))\
.properties(title="Distribution of Voyage Segments", width=600, height=400)\
.configure_axisX(titleFontSize=16).configure_axisY(titleFontSize=16)\
.configure_title(fontSize=18).configure_view(strokeOpacity=0)
alt.data_transformers.enable("default", max_rows=None)
plot_voyage_segments_distribution(voyages_counts).display()long_term_voyages, short_term_voyages = [], []
# Separating voyages
for k in voyages_counts:
if voyages_counts[k] < 30:
short_term_voyages.append(k)
else: long_term_voyages.append(k)
# Shuffling for random distribution
random.shuffle(short_term_voyages)
random.shuffle(long_term_voyages)train_voyage, test_voyage = {}, {}
# Iterate over short-term voyages:
for i, k in enumerate(short_term_voyages):
if i < int(0.8 * len(short_term_voyages)):
train_voyage[k] = voyages[k]
else: test_voyage[k] = voyages[k]
# Iterate over long-term voyages:
for i, k in enumerate(long_term_voyages):
if i < int(0.8 * len(long_term_voyages)):
train_voyage[k] = voyages[k]
else: test_voyage[k] = voyages[k]def plot_voyage_length_distribution(data, title, bar_color, min_time=144, force_print=True):
total_time = []
for key in data.keys():
for track in data[key]:
if len(track["time"]) > min_time or force_print:
total_time.append(len(track["time"]))
plot_data = pd.DataFrame({'Length': total_time})
chart = alt.Chart(plot_data).mark_bar(color=bar_color).encode(
alt.Y("count(Length):Q", title="Count", scale=alt.Scale(type="symlog")),
alt.X("Length:Q", bin=alt.Bin(maxbins=90), title="Length")
).properties(title=title, width=600, height=400)\
.configure_axisX(titleFontSize=16).configure_axisY(titleFontSize=16)\
.configure_title(fontSize=18).configure_view(strokeOpacity=0)
print("\n\n")
return chart
display(plot_voyage_length_distribution(train_voyage, "TRAINING: Distribution of Voyage Length", "#287561"))
display(plot_voyage_length_distribution(test_voyage, "TEST: Distribution of Voyage Length", "#3e57ab"))INPUT_TIMESTEPS = 48 # 4 hours * 12 AIS messages/h
INPUT_VARIABLES = 4 # Longitude, Latitude, COG, and SOG
OUTPUT_TIMESTEPS = 96 # 8 hours * 12 AIS messages/h
OUTPUT_VARIABLES = 2 # Longitude and Latitude
NUM_WORKERS = multiprocessing.cpu_count()INPUT_VARIABLES *= 2 # Double the features with deltasdef filter_and_transform_voyages(voyages):
filtered_voyages = {}
for k, v in voyages.items():
voyages_track = []
for voyage in v:
if len(voyage["time"]) > (INPUT_TIMESTEPS + OUTPUT_TIMESTEPS):
mtx = np.vstack([voyage["lon"], voyage["lat"],
voyage["cog"], voyage["sog"]]).T
# Compute deltas
deltas = np.diff(mtx, axis=0)
# Add zeros at the first row for deltas
deltas = np.vstack([np.zeros(deltas.shape[1]), deltas])
# Concatenate the original matrix with the deltas matrix
mtx = np.hstack([mtx, deltas])
voyages_track.append(mtx)
if len(voyages_track) > 0:
filtered_voyages[k] = voyages_track
return filtered_voyages
# Checking how the data behaves for the previously set hyperparameters
display(plot_voyage_length_distribution(train_voyage, "TRAINING: Distribution of Voyage Length", "#287561",
min_time=INPUT_TIMESTEPS + OUTPUT_TIMESTEPS, force_print=False))
display(plot_voyage_length_distribution(test_voyage, "TEST: Distribution of Voyage Length", "#3e57ab",
min_time=INPUT_TIMESTEPS + OUTPUT_TIMESTEPS, force_print=False))
# Filter and transform train and test voyages and prepare for training
train_voyage = filter_and_transform_voyages(train_voyage)
test_voyage = filter_and_transform_voyages(test_voyage)def print_voyage_statistics(header, voyage_dict):
total_time = 0
for mmsi, trajectories in voyage_dict.items():
for trajectory in trajectories:
total_time += trajectory.shape[0]
print(f"{header}")
print(f"Hours of sequential data: {total_time // 12}.")
print(f"Number of unique MMSIs: {len(voyage_dict)}.", end=" \n\n")
return total_time
time_test = print_voyage_statistics("[TEST DATA]", test_voyage)
time_train = print_voyage_statistics("[TRAINING DATA]", train_voyage)
# We remained with a distribution of data that still resembles the 80-20 ratio
print(f"Training hourly-rate: {(time_train * 100) / (time_train + time_test)}%")
print(f"Test hourly-rate: {(time_test * 100) / (time_train + time_test)}%")def haversine_distance(lon_1, lat_1, lon_2, lat_2):
lon_1, lat_1, lon_2, lat_2 = map(np.radians, [lon_1, lat_1, lon_2, lat_2]) # convert latitude and longitude to radians
a = np.sin((lat_2 - lat_1) / 2) ** 2 + np.cos(lat_1) * np.cos(lat_2) * np.sin((lon_2 - lon_1) / 2) ** 2
return (2 * np.arcsin(np.sqrt(a))) * 6371000 # R: 6,371,000 metersdef trajectory_straightness(x):
start_point, end_point = x[0, :2], x[-1, :2]
x_coordinates, y_coordinates = x[:-1, 0], x[:-1, 1]
x_coordinates_next, y_coordinates_next = x[1:, 0], x[1:, 1]
consecutive_distances = np.array(haversine_distance(x_coordinates, y_coordinates, x_coordinates_next, y_coordinates_next))
straight_line_distance = np.array(haversine_distance(start_point[0], start_point[1], end_point[0], end_point[1]))
result = straight_line_distance / np.sum(consecutive_distances)
return result if not np.isnan(result) else 1def process_voyage(voyage, mmsi, max_size, overlap_size=1):
straightness_ratios, mmsis, x, y = [], [], [], []
for j in range(0, voyage.shape[0] - max_size, 1):
x_sample = voyage[(0 + j):(INPUT_TIMESTEPS + j)]
y_sample = voyage[(INPUT_TIMESTEPS + j - overlap_size):(max_size + j), 0:OUTPUT_VARIABLES]
straightness = trajectory_straightness(x_sample)
straightness_ratios.append(straightness)
x.append(x_sample.T)
y.append(y_sample.T)
mmsis.append(mmsi)
return straightness_ratios, mmsis, x, ydef process_data(voyages):
max_size = INPUT_TIMESTEPS + OUTPUT_TIMESTEPS
# Callback function to update tqdm progress bar
def process_voyage_callback(result, pbar):
pbar.update(1)
return result
with Pool(NUM_WORKERS) as pool, tqdm(total=sum(len(v) for v in voyages.values()), desc="Voyages") as pbar:
results = []
# Submit tasks to the pool and store the results
for mmsi in voyages:
for voyage in voyages[mmsi]:
callback = partial(process_voyage_callback, pbar=pbar)
results.append(pool.apply_async(process_voyage, (voyage, mmsi, max_size), callback=callback))
pool.close()
pool.join()
# Gather the results
straightness_ratios, mmsis, x, y = [], [], [], []
for result in results:
s_ratios, s_mmsis, s_x, s_y = result.get()
straightness_ratios.extend(s_ratios)
mmsis.extend(s_mmsis)
x.extend(s_x)
y.extend(s_y)
# Process the results
x, y = np.stack(x), np.stack(y)
x, y = np.transpose(x, (0, 2, 1)), np.transpose(y, (0, 2, 1))
straightness_ratios = np.array(straightness_ratios)
min_straightness, max_straightness = np.min(straightness_ratios), np.max(straightness_ratios)
scaled_straightness_ratios = (straightness_ratios - min_straightness) / (max_straightness - min_straightness)
scaled_straightness_ratios = 1. - scaled_straightness_ratios
print(f"Final number of samples = {len(x)}", end="\n\n")
return mmsis, x, y, scaled_straightness_ratios
mmsi_train, x_train, y_train, straightness_ratios = process_data(train_voyage)
mmsi_test, x_test, y_test, _ = process_data(test_voyage)def normalize_dataset(x_train, x_test, y_train,
lat_min=42, lat_max=52, lon_min=-70, lon_max=-50, max_sog=50):
def normalize(arr, min_val, max_val):
return (arr - min_val) / (max_val - min_val)
# Initial normalization
x_train[:, :, :2] = normalize(x_train[:, :, :2], np.array([lon_min, lat_min]), np.array([lon_max, lat_max]))
y_train[:, :, :2] = normalize(y_train[:, :, :2], np.array([lon_min, lat_min]), np.array([lon_max, lat_max]))
x_test[:, :, :2] = normalize(x_test[:, :, :2], np.array([lon_min, lat_min]), np.array([lon_max, lat_max]))
x_train[:, :, 2:4] = x_train[:, :, 2:4] / np.array([360, max_sog])
x_test[:, :, 2:4] = x_test[:, :, 2:4] / np.array([360, max_sog])
# Standardize X and Y
x_mean, x_std = np.mean(x_train, axis=(0, 1)), np.std(x_train, axis=(0, 1))
y_mean, y_std = np.mean(y_train, axis=(0, 1)), np.std(y_train, axis=(0, 1))
x_train = (x_train - x_mean) / x_std
y_train = (y_train - y_mean) / y_std
x_test = (x_test - x_mean) / x_std
# Final zero-one normalization
x_min, x_max = np.min(x_train, axis=(0, 1)), np.max(x_train, axis=(0, 1))
y_min, y_max = np.min(y_train, axis=(0, 1)), np.max(y_train, axis=(0, 1))
x_train = (x_train - x_min) / (x_max - x_min)
y_train = (y_train - y_min) / (y_max - y_min)
x_test = (x_test - x_min) / (x_max - x_min)
return x_train, x_test, y_train, y_mean, y_std, y_min, y_max, x_mean, x_std, x_min, x_max
return x_train, x_test, y_train, y_mean, y_std, y_min, y_max, x_mean, x_std, x_min, x_max
x_train, x_test, y_train, y_mean, y_std, y_min, y_max, x_mean, x_std, x_min, x_max = normalize_dataset(x_train, x_test, y_train)def denormalize_y(y_data, y_mean, y_std, y_min, y_max,
lat_min=42, lat_max=52, lon_min=-70, lon_max=-50):
y_data = y_data * (y_max - y_min) + y_min # reverse zero-one normalization
y_data = y_data * y_std + y_mean # reverse standardization
# Reverse initial normalization for longitude and latitude
y_data[:, :, 0] = y_data[:, :, 0] * (lon_max - lon_min) + lon_min
y_data[:, :, 1] = y_data[:, :, 1] * (lat_max - lat_min) + lat_min
return y_datadef denormalize_x(x_data, x_mean, x_std, x_min, x_max,
lat_min=42, lat_max=52, lon_min=-70, lon_max=-50):
x_data = x_data * (x_max - x_min) + x_min # reverse zero-one normalization
x_data = x_data * x_std + x_mean # reverse standardization
# Reverse initial normalization for longitude and latitude
x_data[:, :, 0] = x_data[:, :, 0] * (lon_max - lon_min) + lon_min
x_data[:, :, 1] = x_data[:, :, 1] * (lat_max - lat_min) + lat_min
return x_datatf.keras.backend.clear_session() # Clear the Keras session to prevent potential conflicts
_ = wandb.login(force=True) # Log in to Weights & Biasesclass ProbabilisticTeacherForcing(Layer):
def __init__(self, **kwargs):
super(ProbabilisticTeacherForcing, self).__init__(**kwargs)
def call(self, inputs):
decoder_gt_input, decoder_output, mixing_prob = inputs
mixing_prob = tf.expand_dims(mixing_prob, axis=-1) # Add an extra dimension for broadcasting
mixing_prob = tf.broadcast_to(mixing_prob, tf.shape(decoder_gt_input)) # Broadcast to match the shape
return tf.where(tf.random.uniform(tf.shape(decoder_gt_input)) < mixing_prob, decoder_gt_input, decoder_output)def build_model(rnn_unit="GRU", hidden_size=64):
encoder_input = Input(shape=(INPUT_TIMESTEPS, INPUT_VARIABLES), name="Encoder_Input")
decoder_gt_input = Input(shape=((OUTPUT_TIMESTEPS - 1), OUTPUT_VARIABLES), name="Decoder-GT-Input")
mixing_prob_input = Input(shape=(1,), name="Mixing_Probability")
# Encoder
encoder_gru = eval(rnn_unit)(hidden_size, activation="relu", name="Encoder")(encoder_input)
repeat_vector = RepeatVector((OUTPUT_TIMESTEPS - 1), name="Repeater")(encoder_gru)
# Inference Decoder
decoder_gru = eval(rnn_unit)(hidden_size, activation="relu", return_sequences=True, name="Decoder")
decoder_output = decoder_gru(repeat_vector, initial_state=encoder_gru)
# Adjust decoder_output shape
dense_output_adjust = TimeDistributed(Dense(OUTPUT_VARIABLES), name="Output_Adjust")
adjusted_decoder_output = dense_output_adjust(decoder_output)
# Training Decoder
decoder_gru_tf = eval(rnn_unit)(hidden_size, activation="relu", return_sequences=True, name="Decoder-TF")
probabilistic_tf_layer = ProbabilisticTeacherForcing(name="Probabilistic_Teacher_Forcing")
mixed_input = probabilistic_tf_layer([decoder_gt_input, adjusted_decoder_output, mixing_prob_input])
tf_output = decoder_gru_tf(mixed_input, initial_state=encoder_gru)
tf_output = dense_output_adjust(tf_output) # Use dense_output_adjust layer for training output
training_model = Model(inputs=[encoder_input, decoder_gt_input, mixing_prob_input], outputs=tf_output, name="Training")
inference_model = Model(inputs=encoder_input, outputs=adjusted_decoder_output, name="Inference")
return training_model, inference_model
training_model, model = build_model()def denormalize_y(y_data, y_mean, y_std, y_min, y_max, lat_min=42, lat_max=52, lon_min=-70, lon_max=-50):
scales = tf.constant([lon_max - lon_min, lat_max - lat_min], dtype=tf.float32)
biases = tf.constant([lon_min, lat_min], dtype=tf.float32)
# Reverse zero-one normalization and standardization
y_data = y_data * (y_max - y_min) + y_min
y_data = y_data * y_std + y_mean
# Reverse initial normalization for longitude and latitude
return y_data * scales + biasesdef haversine_distance(lon1, lat1, lon2, lat2):
lon1, lat1, lon2, lat2 = [tf.math.multiply(x, tf.divide(tf.constant(np.pi), 180.)) for x in [lon1, lat1, lon2, lat2]] # lat and lon to radians
a = tf.math.square(tf.math.sin((lat2 - lat1) / 2.)) + tf.math.cos(lat1) * tf.math.cos(lat2) * tf.math.square(tf.math.sin((lon2 - lon1) / 2.))
return 2 * 6371000 * tf.math.asin(tf.math.sqrt(a)) # The earth Radius is 6,371,000 metersdef custom_loss(y_true, y_pred):
tf.debugging.check_numerics(y_true, "y_true contains NaNs")
tf.debugging.check_numerics(y_pred, "y_pred contains NaNs")
# Denormalize true and predicted y
y_true_denorm = denormalize_y(y_true, y_mean, y_std, y_min, y_max)
y_pred_denorm = denormalize_y(y_pred, y_mean, y_std, y_min, y_max)
# Compute haversine distance for true and predicted y from the second time-step
true_dist = haversine_distance(y_true_denorm[:, 1:, 0], y_true_denorm[:, 1:, 1], y_true_denorm[:, :-1, 0], y_true_denorm[:, :-1, 1])
pred_dist = haversine_distance(y_pred_denorm[:, 1:, 0], y_pred_denorm[:, 1:, 1], y_pred_denorm[:, :-1, 0], y_pred_denorm[:, :-1, 1])
# Convert maximum speed from knots to meters per 5 minutes
max_speed_m_per_5min = 50 * 1.852 * 1000 * 5 / 60
# Compute the difference in distances
dist_diff = tf.abs(true_dist - pred_dist)
# Apply penalty if the predicted distance is greater than the maximum possible distance
dist_diff = tf.where(pred_dist > max_speed_m_per_5min, pred_dist - max_speed_m_per_5min, dist_diff)
# Penalty for the first output coordinate not being the same as the last input
input_output_diff = haversine_distance(y_true_denorm[:, 0, 0], y_true_denorm[:, 0, 1], y_pred_denorm[:, 0, 0], y_pred_denorm[:, 0, 1])
# Compute RMSE excluding the first element
rmse = K.sqrt(K.mean(K.square(y_true_denorm[:, 1:, :] - y_pred_denorm[:, 1:, :]), axis=1))
tf.debugging.check_numerics(y_true_denorm, "y_true_denorm contains NaNs")
tf.debugging.check_numerics(y_pred_denorm, "y_pred_denorm contains NaNs")
tf.debugging.check_numerics(true_dist, "true_dist contains NaNs")
tf.debugging.check_numerics(pred_dist, "pred_dist contains NaNs")
tf.debugging.check_numerics(dist_diff, "dist_diff contains NaNs")
tf.debugging.check_numerics(input_output_diff, "input_output_diff contains NaNs")
tf.debugging.check_numerics(rmse, "rmse contains NaNs")
# Final loss with weights
# return 0.25 * K.mean(input_output_diff) + 0.35 * K.mean(dist_diff) + 0.40 * K.mean(rmse)
return K.mean(rmse)def compile_model(model, learning_rate, clipnorm, jit_compile, skip_summary=False):
optimizer = AdamW(learning_rate=learning_rate, clipnorm=clipnorm, jit_compile=jit_compile)
model.compile(optimizer=optimizer, loss=custom_loss, metrics=["mae", "mape"], weighted_metrics=[], jit_compile=jit_compile)
if not skip_summary: model.summary() # print a summary of the model architecture
compile_model(training_model, learning_rate=0.001, clipnorm=1, jit_compile=True)
compile_model(model, learning_rate=0.001, clipnorm=1, jit_compile=True)Model: "Training"
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
Encoder_Input (InputLayer) [(None, 48, 8)] 0 []
Encoder (GRU) (None, 64) 14208 ['Encoder_Input[0][0]']
Repeater (RepeatVector) (None, 95, 64) 0 ['Encoder[0][0]']
Decoder (GRU) (None, 95, 64) 24960 ['Repeater[0][0]',
'Encoder[0][0]']
Output_Adjust (TimeDistributed (None, 95, 2) 130 ['Decoder[0][0]',
) 'Decoder-TF[0][0]']
Decoder-GT-Input (InputLayer) [(None, 95, 2)] 0 []
Mixing_Probability (InputLayer [(None, 1)] 0 []
)
Probabilistic_Teacher_Forcing (None, 95, 2) 0 ['Decoder-GT-Input[0][0]',
(ProbabilisticTeacherForcing) 'Output_Adjust[0][0]',
'Mixing_Probability[0][0]']
Decoder-TF (GRU) (None, 95, 64) 13056 ['Probabilistic_Teacher_Forcing[0
][0]',
'Encoder[0][0]']
==================================================================================================
Total params: 52,354
Trainable params: 52,354
Non-trainable params: 0
__________________________________________________________________________________________________
Model: "Inference"
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
Encoder_Input (InputLayer) [(None, 48, 8)] 0 []
Encoder (GRU) (None, 64) 14208 ['Encoder_Input[0][0]']
Repeater (RepeatVector) (None, 95, 64) 0 ['Encoder[0][0]']
Decoder (GRU) (None, 95, 64) 24960 ['Repeater[0][0]',
'Encoder[0][0]']
Output_Adjust (TimeDistributed (None, 95, 2) 130 ['Decoder[0][0]']
)
==================================================================================================
Total params: 39,298
Trainable params: 39,298
Non-trainable params: 0
__________________________________________________________________________________________________def create_callbacks(model_name, monitor="val_loss", factor=0.2, lr_patience=3, ep_patience=12, min_lr=0, verbose=0, restore_best_weights=True, skip_wandb=False):
return ([wandb.keras.WandbMetricsLogger()] if not skip_wandb else []) + [#tf.keras.callbacks.TerminateOnNaN(),
ReduceLROnPlateau(monitor=monitor, factor=factor, patience=lr_patience, min_lr=min_lr, verbose=verbose),
EarlyStopping(monitor=monitor, patience=ep_patience, verbose=verbose, restore_best_weights=restore_best_weights),
tf.keras.callbacks.ModelCheckpoint(os.path.join(ROOT, MODELS, model_name), monitor="val_loss", mode="min", save_best_only=True, verbose=verbose)]def train_model(model, x_train, y_train, batch_size, epochs, validation_split, model_name):
run = wandb.init(project="kAISdb", anonymous="allow") # start the wandb run
# Set the initial mixing probability
mixing_prob = 0.5
# Update y_train to have the same dimensions as the output
y_train = y_train[:, :(OUTPUT_TIMESTEPS - 1), :]
# Create the ground truth input for the decoder by appending a padding at the beginning of the sequence
decoder_ground_truth_input_data = (np.zeros((y_train.shape[0], 1, y_train.shape[2])), y_train[:, :-1, :])
decoder_ground_truth_input_data = np.concatenate(decoder_ground_truth_input_data, axis=1)
try:
# Train the model with Teacher Forcing
with tf.device(tf.test.gpu_device_name()):
training_model.fit([x_train, decoder_ground_truth_input_data, np.full((x_train.shape[0], 1), mixing_prob)], y_train, batch_size=batch_size, epochs=epochs,
verbose=2, validation_split=validation_split, callbacks=create_callbacks(model_name))
# , sample_weight=straightness_ratios)
except KeyboardInterrupt as e:
print("\nRestoring best weights [...]")
# Load the weights of the teacher-forcing model
training_model.load_weights(model_name)
# Transfering the weights to the inference model
for layer in model.layers:
if layer.name in [l.name for l in training_model.layers]:
layer.set_weights(training_model.get_layer(layer.name).get_weights())
run.finish() # finish the wandb run
model_name = "TF-GRU-AE.h5"
full_path = os.path.join(ROOT, MODELS, model_name)
if True:#not os.path.exists(full_path):
train_model(model, x_train, y_train, batch_size=1024,
epochs=250, validation_split=0.2,
model_name=model_name)
else:
training_model.load_weights(full_path)
for layer in model.layers: # inference model initialization
if layer.name in [l.name for l in training_model.layers]:
layer.set_weights(training_model.get_layer(layer.name).get_weights())def evaluate_model(model, x_test, y_test, y_mean, y_std, y_min, y_max, y_pred=None):
def single_trajectory_error(y_test, y_pred, index):
distances = haversine_distance(y_test[index, :, 0], y_test[index, :, 1], y_pred[index, :, 0], y_pred[index, :, 1])
return np.min(distances), np.max(distances), np.mean(distances), np.median(distances)
# Modify this function to handle teacher-forced models with 95 output variables instead of 96
def all_trajectory_error(y_test, y_pred):
errors = [single_trajectory_error(y_test[:, 1:], y_pred, i) for i in range(y_test.shape[0])]
min_errors, max_errors, mean_errors, median_errors = zip(*errors)
return min(min_errors), max(max_errors), np.mean(mean_errors), np.median(median_errors)
def plot_trajectory(x_test, y_test, y_pred, sample_index):
min_error, max_error, mean_error, median_error = single_trajectory_error(y_test, y_pred, sample_index)
fig = go.Figure()
fig.add_trace(go.Scatter(x=x_test[sample_index, :, 0], y=x_test[sample_index, :, 1], mode="lines", name="Input Data", line=dict(color="green")))
fig.add_trace(go.Scatter(x=y_test[sample_index, :, 0], y=y_test[sample_index, :, 1], mode="lines", name="Ground Truth", line=dict(color="blue")))
fig.add_trace(go.Scatter(x=y_pred[sample_index, :, 0], y=y_pred[sample_index, :, 1], mode="lines", name="Forecasted Trajectory", line=dict(color="red")))
fig.update_layout(title=f"Sample Index: {sample_index} | Distance Errors (in meteres):<br>Min: {min_error:.2f}m, Max: {max_error:.2f}m, "
f"Mean: {mean_error:.2f}m, Median: {median_error:.2f}m", xaxis_title="Longitude", yaxis_title="Latitude",
plot_bgcolor="#e4eaf0", paper_bgcolor="#fcfcfc", width=700, height=600)
max_lon, max_lat = -58.705587131108196, 47.89066160591873
min_lon, min_lat = -61.34247286889181, 46.09201839408127
fig.update_xaxes(range=[min_lon, max_lon])
fig.update_yaxes(range=[min_lat, max_lat])
return fig
if y_pred is None:
with tf.device(tf.test.gpu_device_name()):
y_pred = model.predict(x_test, verbose=0)
y_pred_o = y_pred # preserve the result
x_test = denormalize_x(x_test, x_mean, x_std, x_min, x_max)
y_pred = denormalize_y(y_pred_o, y_mean, y_std, y_min, y_max)
# Modify this line to handle teacher-forced models with 95 output variables instead of 96
for sample_index in [1000, 2500, 5000, 7500]:
display(plot_trajectory(x_test, y_test[:, 1:], y_pred, sample_index))
# The metrics require a lower dimension (no impact on the results)
y_test_reshaped = np.reshape(y_test[:, 1:], (-1, y_test.shape[2]))
y_pred_reshaped = np.reshape(y_pred, (-1, y_pred.shape[2]))
# Physical Distance Error given in meters
all_min_error, all_max_error, all_mean_error, all_median_error = all_trajectory_error(y_test, y_pred)
print("\nAll Trajectories Min DE: {:.4f}m".format(all_min_error))
print("All Trajectories Max DE: {:.4f}m".format(all_max_error))
print("All Trajectories Mean DE: {:.4f}m".format(all_mean_error))
print("All Trajectories Median DE: {:.4f}m".format(all_median_error))
# Calculate evaluation metrics on the test data
r2 = r2_score(y_test_reshaped, y_pred_reshaped)
mse = mean_squared_error(y_test_reshaped, y_pred_reshaped)
mae = mean_absolute_error(y_test_reshaped, y_pred_reshaped)
evs = explained_variance_score(y_test_reshaped, y_pred_reshaped)
mape = mean_absolute_percentage_error(y_test_reshaped, y_pred_reshaped)
rmse = np.sqrt(mse)
print(f"\nTest R^2: {r2:.4f}")
print(f"Test MAE: {mae:.4f}")
print(f"Test MSE: {mse:.4f}")
print(f"Test RMSE: {rmse:.4f}")
print(f"Test MAPE: {mape:.4f}")
print(f"Test Explained Variance Score: {evs:.4f}")
return y_pred_o
_ = evaluate_model(model, x_test, y_test, y_mean, y_std, y_min, y_max)def save_history(history, model_name):
history_name = model_name.replace('.h5', '.pkl')
history_name = os.path.join(ROOT, MODELS, history_name)
with open(history_name, 'wb') as f:
pkl.dump(history, f)def load_history(model_name):
history_name = model_name.replace('.h5', '.pkl')
history_name = os.path.join(ROOT, MODELS, history_name)
with open(history_name, 'rb') as f:
history = pkl.load(f)
return historydef build_model(rnn_unit="GRU", enc_units_1=64, dec_units_1=64):
encoder_input = Input(shape=(INPUT_TIMESTEPS, INPUT_VARIABLES), name="Encoder_Input")
decoder_gt_input = Input(shape=((OUTPUT_TIMESTEPS - 1), OUTPUT_VARIABLES), name="Decoder-GT-Input")
mixing_prob_input = Input(shape=(1,), name="Mixing_Probability")
# Encoder
encoder_gru = eval(rnn_unit)(enc_units_1, activation="relu", name="Encoder")(encoder_input)
repeat_vector = RepeatVector((OUTPUT_TIMESTEPS - 1), name="Repeater")(encoder_gru)
# Inference Decoder
decoder_gru = eval(rnn_unit)(dec_units_1, activation="relu", return_sequences=True, name="Decoder")
decoder_output = decoder_gru(repeat_vector, initial_state=encoder_gru)
# Adjust decoder_output shape
dense_output_adjust = TimeDistributed(Dense(OUTPUT_VARIABLES), name="Output_Adjust")
adjusted_decoder_output = dense_output_adjust(decoder_output)
# Training Decoder
decoder_gru_tf = eval(rnn_unit)(dec_units_1, activation="relu", return_sequences=True, name="Decoder-TF")
probabilistic_tf_layer = ProbabilisticTeacherForcing(name="Probabilistic_Teacher_Forcing")
mixed_input = probabilistic_tf_layer([decoder_gt_input, adjusted_decoder_output, mixing_prob_input])
tf_output = decoder_gru_tf(mixed_input, initial_state=encoder_gru)
tf_output = dense_output_adjust(tf_output) # Use dense_output_adjust layer for training output
training_model = Model(inputs=[encoder_input, decoder_gt_input, mixing_prob_input], outputs=tf_output, name="Training")
inference_model = Model(inputs=encoder_input, outputs=adjusted_decoder_output, name="Inference")
return training_model, inference_modeldef objective(hyperparams, x_train, y_train, straightness_ratios, model_prefix):
# Get the best hyperparameters from the optimization results
enc_units_1 = hyperparams["enc_units_1"]
dec_units_1 = hyperparams["dec_units_1"]
mixing_prob = hyperparams["mixing_prob"]
lr = hyperparams["learning_rate"]
# Create the model name using the best hyperparameters
model_name = f"{model_prefix}-{enc_units_1}-{dec_units_1}-{mixing_prob}-{lr}.h5"
full_path = os.path.join(ROOT, MODELS, model_name) # best model full path
# Check if the model results file with this name already exists
if not os.path.exists(full_path.replace(".h5", ".pkl")):
print(f"Saving under {model_name}.")
# Define the model architecture
training_model, _ = build_model(enc_units_1=enc_units_1, dec_units_1=dec_units_1)
compile_model(training_model, learning_rate=lr, clipnorm=1, jit_compile=True, skip_summary=True)
# Update y_train to have the same dimensions as the output
y_train = y_train[:, :(OUTPUT_TIMESTEPS - 1), :]
# Create the ground truth input for the decoder by appending a padding at the beginning of the sequence
decoder_ground_truth_input_data = (np.zeros((y_train.shape[0], 1, y_train.shape[2])), y_train[:, :-1, :])
decoder_ground_truth_input_data = np.concatenate(decoder_ground_truth_input_data, axis=1)
# Train the model on the data, using GPU if available
with tf.device(tf.test.gpu_device_name()):
history = training_model.fit([x_train, decoder_ground_truth_input_data, np.full((x_train.shape[0], 1), mixing_prob)], y_train,
batch_size=10240, epochs=250, validation_split=.2, verbose=0,
workers=multiprocessing.cpu_count(), use_multiprocessing=True,
callbacks=create_callbacks(model_name, skip_wandb=True))
#, sample_weight=straightness_ratios)
# Save the training history
save_history(history.history, model_name)
# Clear the session to release resources
del training_model; tf.keras.backend.clear_session()
else:
print("Loading pre-trained weights.")
history = load_history(model_name)
if type(history) == dict: # validation loss of the model
return {"loss": history["val_loss"][-1], "status": STATUS_OK}
else: return {"loss": history.history["val_loss"][-1], "status": STATUS_OK}def optimize_hyperparameters(max_evals, model_prefix, x_train, y_train, sample_size=5000):
def build_space(n_min=2, n_steps=9):
# Defining a custom 2^N range function
n_range = lambda n_min, n_steps: np.array(
[2**n for n in range(n_min, n_steps) if 2**n >= n_min])
# Defining the unconstrained search space
encoder_1_range = n_range(n_min, n_steps)
decoder_1_range = n_range(n_min, n_steps)
learning_rate_range = [.01, .001, .0001]
mixing_prob_range = [.25, .5, .75]
# Enforcinf contraints to the search space
enc_units_1 = np.random.choice(encoder_1_range)
dec_units_1 = np.random.choice(decoder_1_range[np.where(decoder_1_range == enc_units_1)])
learning_rate = np.random.choice(learning_rate_range)
mixing_prob = np.random.choice(mixing_prob_range)
# Returns a single element of the search space
return dict(enc_units_1=enc_units_1, dec_units_1=dec_units_1, learning_rate=learning_rate, mixing_prob=mixing_prob)
# Select the search space based on a pre-set sampled random space
search_space = hp.choice("hyperparams", [build_space() for _ in range(sample_size)])
trials = Trials() # initialize Hyperopt trials
# Define the objective function for Hyperopt
fn = lambda hyperparams: objective(hyperparams, x_train, y_train, straightness_ratios, model_prefix)
# Perform Hyperopt optimization and find the best hyperparameters
best = fmin(fn=fn, space=search_space, algo=tpe.suggest, max_evals=max_evals, trials=trials)
best_hyperparams = space_eval(search_space, best)
# Get the best hyperparameters from the optimization results
enc_units_1 = best_hyperparams["enc_units_1"]
dec_units_1 = best_hyperparams["dec_units_1"]
mixing_prob = best_hyperparams["mixing_prob"]
lr = best_hyperparams["learning_rate"]
# Create the model name using the best hyperparameters
model_name = f"{model_prefix}-{enc_units_1}-{dec_units_1}-{mixing_prob}-{lr}.h5"
full_path = os.path.join(ROOT, MODELS, model_name) # best model full path
t_model, i_model = build_model(enc_units_1=enc_units_1, dec_units_1=dec_units_1)
t_model = tf.keras.models.load_model(full_path)
for layer in i_model.layers: # inference model initialization
if layer.name in [l.name for l in t_model.layers]:
layer.set_weights(t_model.get_layer(layer.name).get_weights())
print(f"Best hyperparameters:")
print(f" Encoder units 1: {enc_units_1}")
print(f" Decoder units 1: {dec_units_1}")
print(f" Mixing proba.: {mixing_prob}")
print(f" Learning rate: {lr}")
return i_model
max_evals, model_prefix = 100, "TF-GRU"
# best_model = optimize_hyperparameters(max_evals, model_prefix, x_train, y_train)
# [NOTE] YOU CAN SKIP THIS STEP BY LOADING THE PRE-TRAINED WEIGHTS ON THE NEXT CELL.def find_best_model(root_folder, model_prefix):
best_model_name, best_val_loss = None, float('inf')
for f in os.listdir(root_folder):
if (f.endswith(".h5") and f.startswith(model_prefix)):
try:
history = load_history(f)
# Get the validation loss
if type(history) == dict:
val_loss = history["val_loss"][-1]
else: val_loss = history.history["val_loss"][-1]
# Storing the best model
if val_loss < best_val_loss:
best_val_loss = val_loss
best_model_name = f
except: pass
# Load the best model
full_path = os.path.join(ROOT, MODELS, best_model_name)
t_model, i_model = build_model(enc_units_1=int(best_model_name.split("-")[2]), dec_units_1=int(best_model_name.split("-")[3]))
t_model = tf.keras.models.load_model(full_path, custom_objects={"ProbabilisticTeacherForcing": ProbabilisticTeacherForcing})
for layer in i_model.layers: # inference model initialization
if layer.name in [l.name for l in t_model.layers]:
layer.set_weights(t_model.get_layer(layer.name).get_weights())
# Print summary of the best model
print(f"Loss: {best_val_loss}")
i_model.summary()
return i_model
best_model = find_best_model(os.path.join(ROOT, MODELS), model_prefix)_ = evaluate_model(best_model, x_test, y_test, y_mean, y_std, y_min, y_max)def permutation_feature_importance(model, x_test, y_test, metric):
# Function to calculate permutation feature importance
def PFI(model, x, y_true, metric):
# Reshape the true values for easier comparison with predictions
y_true = np.reshape(y_true, (-1, y_true.shape[2]))
# Predict using the model and reshape the predicted values
with tf.device(tf.test.gpu_device_name()):
y_pred = model.predict(x, verbose=0)
y_pred = np.reshape(y_pred, (-1, y_pred.shape[2]))
# Calculate the baseline score using the given metric
baseline_score = metric(y_true, y_pred)
# Initialize an array for feature importances
feature_importances = np.zeros(x.shape[2])
# Calculate the importance for each feature
for feature_idx in range(x.shape[2]):
x_permuted = x.copy()
x_permuted[:, :, feature_idx] = np.random.permutation(x[:, :, feature_idx])
# Predict using the permuted input and reshape the predicted values
with tf.device(tf.test.gpu_device_name()):
y_pred_permuted = model.predict(x_permuted, verbose=0)
y_pred_permuted = np.reshape(y_pred_permuted, (-1, y_pred_permuted.shape[2]))
# Calculate the score with permuted input
permuted_score = metric(y_true, y_pred_permuted)
# Compute the feature importance as the difference between permuted and baseline scores
feature_importances[feature_idx] = permuted_score - baseline_score
return feature_importances
feature_importances = PFI(model, x_test, y_test, metric)
# Prepare the data for plotting (require a dataframe)
feature_names = ["Longitude", "Latitude", "COG", "SOG"]
feature_importance_df = pd.DataFrame({"features": feature_names, "importance": feature_importances})
# Create the bar plot with Altair
bar_plot = alt.Chart(feature_importance_df).mark_bar(size=40, color="mediumblue", opacity=0.8).encode(
x=alt.X("features:N", title="Features", axis=alt.Axis(labelFontSize=12, titleFontSize=14)),
y=alt.Y("importance:Q", title="Permutation Importance", axis=alt.Axis(labelFontSize=12, titleFontSize=14)),
).properties(title=alt.TitleParams(text="Feature Importance", fontSize=16, fontWeight="bold"), width=400, height=300)
return bar_plot, feature_importances
permutation_feature_importance(best_model, x_test, y_test, mean_absolute_error)[0].display()def sensitivity_analysis(model, x_sample, perturbation_range=(-0.1, 0.1), num_steps=10, plot_nrows=4):
# Get the number of features and outputs
num_features = x_sample.shape[1]
num_outputs = model.output_shape[-1] * model.output_shape[-2]
# Create an array of perturbations
perturbations = np.linspace(perturbation_range[0], perturbation_range[1], num_steps)
# Initialize sensitivity array
sensitivity = np.zeros((num_features, num_outputs, num_steps))
# Get the original prediction for the input sample
original_prediction = model.predict(x_sample.reshape(1, -1, 4), verbose=0).reshape(-1)
# Iterate over input features and perturbations
for feature_idx in range(num_features):
for i, perturbation in enumerate(perturbations):
# Create a perturbed version of the input sample
perturbed_sample = x_sample.copy()
perturbed_sample[:, feature_idx] += perturbation
# Get the prediction for the perturbed input sample
perturbed_prediction = model.predict(perturbed_sample.reshape(1, -1, 4), verbose=0).reshape(-1)
# Calculate the absolute prediction change and store it in the sensitivity array
sensitivity[feature_idx, :, i] = np.abs(perturbed_prediction - original_prediction)
# Determine the number of rows and columns in the plot
ncols = 6
nrows = max(min(plot_nrows, math.ceil(num_outputs / ncols)), 1)
# Define feature names
feature_names = ["Longitude", "Latitude", "COG", "SOG"]
# Create the sensitivity plot
fig, axs = plt.subplots(nrows, ncols, figsize=(18, 3 * nrows), sharex=True, sharey=True)
axs = axs.ravel()
output_idx = 0
for row in range(nrows):
for col in range(ncols):
if output_idx < num_outputs:
# Plot sensitivity curves for each feature
for feature_idx in range(num_features):
axs[output_idx].plot(perturbations, sensitivity[feature_idx, output_idx], label=f'{feature_names[feature_idx]}')
# Set the title for each subplot
axs[output_idx].set_title(f'Output {output_idx // 2 + 1}, {"Longitude" if output_idx % 2 == 0 else "Latitude"}')
output_idx += 1
# Set common labels and legend
fig.text(0.5, 0.04, 'Perturbation', ha='center', va='center')
fig.text(0.06, 0.5, 'Absolute Prediction Change', ha='center', va='center', rotation='vertical')
handles, labels = axs[0].get_legend_handles_labels()
fig.legend(handles, labels, loc='upper center', ncol=num_features, bbox_to_anchor=(.5, .87))
plt.tight_layout()
plt.subplots_adjust(top=0.8, bottom=0.1, left=0.1, right=0.9)
plt.show()
return sensitivity
x_sample = x_test[100] # Select a sample from the test set
sensitivity = sensitivity_analysis(best_model, x_sample)def visualize_intermediate_representations(model, x_test_subset, y_test_subset, n_neighbors=15, min_dist=0.1, n_components=2):
# Extract intermediate representations from your model
intermediate_layer_model = keras.Model(inputs=model.input, outputs=model.layers[-2].output)
intermediate_output = intermediate_layer_model.predict(x_test_subset, verbose=0)
# Flatten the last two dimensions of the intermediate_output
flat_intermediate_output = intermediate_output.reshape(intermediate_output.shape[0], -1)
# UMAP
reducer = umap.UMAP(n_neighbors=n_neighbors, min_dist=min_dist, n_components=n_components, random_state=seed_value)
umap_output = reducer.fit_transform(flat_intermediate_output)
# Convert y_test_subset to strings
y_test_str = np.array([str(label) for label in y_test_subset])
# Map string labels to colors
unique_labels = np.unique(y_test_str)
colormap = plt.cm.get_cmap('viridis', len(unique_labels))
label_to_color = {label: colormap(i) for i, label in enumerate(unique_labels)}
colors = np.array([label_to_color[label] for label in y_test_str])
# Create plot with Matplotlib
fig, ax = plt.subplots(figsize=(10, 8))
sc = ax.scatter(umap_output[:, 0], umap_output[:, 1], c=colors, s=5)
ax.set_title("UMAP Visualization", fontsize=14, fontweight="bold")
ax.set_xlabel("X Dimension", fontsize=12)
ax.set_ylabel("Y Dimension", fontsize=12)
ax.grid(True, linestyle='--', alpha=0.5)
# Add a colorbar to the plot
sm = plt.cm.ScalarMappable(cmap=colormap, norm=plt.Normalize(vmin=0, vmax=len(unique_labels)-1))
sm.set_array([])
cbar = plt.colorbar(sm, ticks=range(len(unique_labels)), ax=ax)
cbar.ax.set_yticklabels(unique_labels)
cbar.set_label("MMSIs")
plt.show()
visualize_intermediate_representations(best_model, x_test[:10000], mmsi_test[:10000], n_neighbors=10, min_dist=0.5)