curl -o ./data/AIS_2020_01_01.zip https://coast.noaa.gov/htdata/CMSP/AISDataHandler/2020/AIS_2020_01_01.zipunzip ./data/AIS_2020_01_01.zip -d ./data/import pandas as pd
# Read CSV file in pandas dataframe
df_ = pd.read_csv("./data/AIS_2020_01_01.csv", parse_dates=["BaseDateTime"])
print(df_.columns)Index(['MMSI', 'BaseDateTime', 'LAT', 'LON', 'SOG', 'COG', 'Heading',
'VesselName', 'IMO', 'CallSign', 'VesselType', 'Status',
'Length', 'Width', 'Draft', 'Cargo', 'TransceiverClass'],
dtype='object')list_of_headers_ = ["MMSI","Message_ID","Repeat_indicator","Time","Millisecond","Region","Country","Base_station","Online_data","Group_code","Sequence_ID","Channel","Data_length","Vessel_Name","Call_sign","IMO","Ship_Type","Dimension_to_Bow","Dimension_to_stern","Dimension_to_port","Dimension_to_starboard","Draught","Destination","AIS_version","Navigational_status","ROT","SOG","Accuracy","Longitude","Latitude","COG","Heading","Regional","Maneuver","RAIM_flag","Communication_flag","Communication_state","UTC_year","UTC_month","UTC_day","UTC_hour","UTC_minute","UTC_second","Fixing_device","Transmission_control","ETA_month","ETA_day","ETA_hour","ETA_minute","Sequence","Destination_ID","Retransmit_flag","Country_code","Functional_ID","Data","Destination_ID_1","Sequence_1","Destination_ID_2","Sequence_2","Destination_ID_3","Sequence_3","Destination_ID_4","Sequence_4","Altitude","Altitude_sensor","Data_terminal","Mode","Safety_text","Non-standard_bits","Name_extension","Name_extension_padding","Message_ID_1_1","Offset_1_1","Message_ID_1_2","Offset_1_2","Message_ID_2_1","Offset_2_1","Destination_ID_A","Offset_A","Increment_A","Destination_ID_B","offsetB","incrementB","data_msg_type","station_ID","Z_count","num_data_words","health","unit_flag","display","DSC","band","msg22","offset1","num_slots1","timeout1","Increment_1","Offset_2","Number_slots_2","Timeout_2","Increment_2","Offset_3","Number_slots_3","Timeout_3","Increment_3","Offset_4","Number_slots_4","Timeout_4","Increment_4","ATON_type","ATON_name","off_position","ATON_status","Virtual_ATON","Channel_A","Channel_B","Tx_Rx_mode","Power","Message_indicator","Channel_A_bandwidth","Channel_B_bandwidth","Transzone_size","Longitude_1","Latitude_1","Longitude_2","Latitude_2","Station_Type","Report_Interval","Quiet_Time","Part_Number","Vendor_ID","Mother_ship_MMSI","Destination_indicator","Binary_flag","GNSS_status","spare","spare2","spare3","spare4"]# Take the first 40,000 records from the original dataframe
df = df_.iloc[0:40000]
# Create a new dataframe with the specified headers
df_new = pd.DataFrame(columns=list_of_headers_)
# Populate the new dataframe with formatted data from the original dataframe
df_new['Time'] = pd.to_datetime(df['BaseDateTime']).dt.strftime('%Y%m%d_%H%M%S')
df_new['Latitude'] = df['LAT']
df_new['Longitude'] = df['LON']
df_new['Vessel_Name'] = df['VesselName']
df_new['Call_sign'] = df['CallSign']
df_new['Ship_Type'] = df['VesselType'].fillna(0).astype(int)
df_new['Navigational_status'] = df['Status']
df_new['Draught'] = df['Draft']
df_new['Message_ID'] = 1 # Mark all messages as dynamic by default
df_new['Millisecond'] = 0
# Transfer additional columns from the original dataframe, if they exist
for col_n in df_new:
if col_n in df.columns:
df_new[col_n] = df[col_n]
# Extract static messages for each unique vessel
filtered_df = df_new[df_new['Ship_Type'].notnull() & (df_new['Ship_Type'] != 0)]
filtered_df = filtered_df.drop_duplicates(subset='MMSI', keep='first')
filtered_df = filtered_df.reset_index(drop=True)
filtered_df['Message_ID'] = 5 # Mark these as static messages
# Merge dynamic and static messages into a single dataframe
df_new = pd.concat([filtered_df, df_new])
# Save the final dataframe to a CSV file
# The quoting parameter is necessary because the csvreader reads each column value as a string by default
df_new.to_csv("./data/AIS_2020_01_01_aisdb.csv", index=False, quoting=1)import aisdb
# Establish a connection to the SQLite database and decode messages from the CSV file
with aisdb.SQLiteDBConn('./data/test_decode_msgs.db') as dbconn:
aisdb.decode_msgs(filepaths=["./data/AIS_2020_01_01_aisdb.csv"],
dbconn=dbconn, source='Testing', verbose=True)generating file checksums...
checking file dates...
creating tables and dropping table indexes...
Memory: 20.65GB remaining. CPUs: 12. Average file size: 49.12MB Spawning 4 workers
saving checksums...
processing ./data/AIS_2020_01_01_aisdb.csv
AIS_2020_01_01_aisdb.csv count: 49323 elapsed: 0.27s rate: 183129 msgs/s
cleaning temporary data...
aggregating static reports into static_202001_aggregate...sqlite3 ./data/test_decode_msgs.db
sqlite> .tables
ais_202001_dynamic coarsetype_ref static_202001_aggregate
ais_202001_static hashmap // Some codepython 0-download-ais.py
python 1-zip2csv.pypython 2-merge.py
python 3-deduplicate.pypython 4-postgresql-database.py -dbname DBNAME -user USERNAME -password PASSWORD [-host HOST] [-port PORT]psql -U USERNAME -d DBNAME -h localhost -p 5432ais_pgdb=# \dt
List of relations
Schema | Name | Type | Owner
--------+-------------+-------+----------
public | ais_2023_01 | table | postgres
public | ais_2023_02 | table | postgres
public | ais_2023_03 | table | postgres
public | ais_2023_04 | table | postgres
public | ais_2023_05 | table | postgres
public | ais_2023_06 | table | postgres
public | ais_2023_07 | table | postgres
public | ais_2023_08 | table | postgres
public | ais_2023_09 | table | postgres
public | ais_2023_10 | table | postgres
public | ais_2023_11 | table | postgres
public | ais_2023_12 | table | postgres
(12 rows)