aboutsummaryrefslogtreecommitdiff
path: root/trip_geo.py
diff options
context:
space:
mode:
authorAriel Costas Guerrero <ariel@costas.dev>2026-04-05 22:30:15 +0200
committerAriel Costas Guerrero <ariel@costas.dev>2026-04-05 22:30:27 +0200
commit95f8e03affb17b3b4dd8cff202523f5b131972df (patch)
tree23e31512167f1295defc9cc4639ff6f411c04a54 /trip_geo.py
parentb2631a82a394af8c38224ae0722bcf728d651cfd (diff)
renfe: generate shapes properly and consistently
- Update OSRM container to use ALL SPAIN (sorry, Trencelta) - Generate a shape per trip (no trying to reuse, since trains that change stop sequence got wrong shapes) - Add more position corrections for FEVE - Run separate generators for FEVE and Renfe, since sometimes OSRM would pick the one that shouldn't and generate a wrong shape - Add a debug script to generate a trip's visualisation from GTFS, since I was about to lose my mind debugging this pile of crap - Update README (before starting anything else) Time spent: ca. 6 hours Closes #1
Diffstat (limited to 'trip_geo.py')
-rw-r--r--trip_geo.py92
1 files changed, 92 insertions, 0 deletions
diff --git a/trip_geo.py b/trip_geo.py
new file mode 100644
index 0000000..c6d88db
--- /dev/null
+++ b/trip_geo.py
@@ -0,0 +1,92 @@
+# /// script
+# requires-python = ">=3.13"
+# dependencies = [
+# "pandas",
+# ]
+# ///
+
+import argparse
+import os
+import pandas as pd
+
+if __name__ != "__main__":
+ raise RuntimeError("This script is meant to be run as a standalone program, not imported as a module.")
+
+parser = argparse.ArgumentParser(description="Extract GeoJSON from GTFS feed and save to file")
+parser.add_argument(
+ "gtfs_path",
+ type=str,
+ help="Path to the GTFS feed directory"
+)
+
+parser.add_argument(
+ "trip_id",
+ type=str,
+ help="ID of the trip to extract from the GTFS feed"
+)
+
+args = parser.parse_args()
+
+gtfs_path = args.gtfs_path
+trip_id = args.trip_id
+
+# Load trips.txt, stop_times.txt, stops.txt and shapes.txt
+
+trips_df = pd.read_csv(os.path.join(gtfs_path, "trips.txt"))
+stop_times_df = pd.read_csv(os.path.join(gtfs_path, "stop_times.txt"))
+stops_df = pd.read_csv(os.path.join(gtfs_path, "stops.txt"))
+shapes_df = pd.read_csv(os.path.join(gtfs_path, "shapes.txt"))
+
+# Find the shape_id for the given trip_id
+trip_row = trips_df[trips_df["trip_id"] == trip_id]
+if trip_row.empty:
+ raise ValueError(f"Trip ID {trip_id} not found in trips.txt")
+
+shape_id = trip_row.iloc[0]["shape_id"]
+if pd.isna(shape_id):
+ raise ValueError(f"No shape_id found for Trip ID {trip_id}")
+
+# Extract the shape points for the shape_id
+shape_points = shapes_df[shapes_df["shape_id"] == shape_id].sort_values(by="shape_pt_sequence")
+
+# Find the stop sequence for the trip_id and get the stop coordinates
+stop_times = stop_times_df[stop_times_df["trip_id"] == trip_id].sort_values(by="stop_sequence")
+stop_ids = stop_times["stop_id"].tolist()
+stops = stops_df[stops_df["stop_id"].isin(stop_ids)]
+
+# Convert shape points to GeoJSON LineString format
+geojson = {
+ "type": "FeatureCollection",
+ "features": [
+ {
+ "type": "Feature",
+ "geometry": {
+ "type": "LineString",
+ "coordinates": shape_points.apply(lambda row: [row["shape_pt_lon"], row["shape_pt_lat"]], axis=1).tolist()
+ },
+ "properties": {
+ "headsign": trip_row.iloc[0]["trip_headsign"],
+ "shape_id": shape_id
+ }
+ },
+ *[{
+ "type": "Feature",
+ "geometry": {
+ "type": "Point",
+ "coordinates": [x.stop_lon, x.stop_lat]
+ },
+ "properties": {
+ "name": x.stop_name,
+ "stop_id": x.stop_id,
+ "code": x.stop_code
+ }
+ } for _, x in stops.iterrows()]
+ ]
+}
+
+# Save GeoJSON to file
+output_file = f"{trip_id}_shape.geojson"
+
+with open(output_file, "w") as f:
+ import json
+ json.dump(geojson, f, indent=2)