From b2631a82a394af8c38224ae0722bcf728d651cfd Mon Sep 17 00:00:00 2001
From: Ariel Costas Guerrero <ariel@costas.dev>
Date: Sun, 5 Apr 2026 16:22:40 +0200
Subject: Replace FEVE for Cercanías, make them generate a single combined feed
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 build_renfe/build_static_feed.py | 91 ++++++++++++++++++++++++++++++++++------
 1 file changed, 79 insertions(+), 12 deletions(-)

(limited to 'build_renfe/build_static_feed.py')

diff --git a/build_renfe/build_static_feed.py b/build_renfe/build_static_feed.py
index a60360f..eb247a9 100644
--- a/build_renfe/build_static_feed.py
+++ b/build_renfe/build_static_feed.py
@@ -1,6 +1,7 @@
 # /// script
 # requires-python = ">=3.12"
 # dependencies = [
+#     "pandas",
 #     "requests",
 #     "tqdm",
 # ]
@@ -8,6 +9,7 @@
 
 from argparse import ArgumentParser
 import csv
+import io
 import json
 import logging
 import os
@@ -15,6 +17,8 @@ import shutil
 import tempfile
 import zipfile
 
+import pandas as pd
+
 import requests
 from tqdm import tqdm
 
@@ -24,8 +28,7 @@ BOUNDS = {"SOUTH": 41.820455, "NORTH": 43.937462, "WEST": -9.437256, "EAST": -6.
 
 FEEDS = {
     "general": "1098",
-    "cercanias": "1130",
-    "feve": "1131"
+    "cercanias": "1130"
 }
 
 
@@ -189,6 +192,11 @@ if __name__ == "__main__":
         help="Enable debug logging",
         action="store_true"
     )
+    parser.add_argument(
+        "--merge",
+        help="Merge the generated feeds into a single GTFS ZIP file instead of separate ones for each feed",
+        action="store_true"
+    )
 
     args = parser.parse_args()
 
@@ -310,21 +318,21 @@ if __name__ == "__main__":
             os.path.join(INPUT_GTFS_PATH, "routes.txt"), "route_id", route_ids
         )
 
-        if feed == "feve":
-            feve_c1_route_ids = ["46T0001C1", "46T0002C1"]
-            new_route_id = "FEVE_C1"
+        if feed == "cercanias":
+            cercanias_c1_route_ids = ["46T0001C1", "46T0002C1"]
+            new_route_id = "FERROL_C1"
 
             # Find agency_id and a template route
             template_route = routes_in_trips[0] if routes_in_trips else {}
             agency_id = "1"
             for r in routes_in_trips:
-                if r["route_id"].strip() in feve_c1_route_ids:
+                if r["route_id"].strip() in cercanias_c1_route_ids:
                     agency_id = r.get("agency_id", "1")
                     template_route = r
                     break
 
             # Filter out old routes
-            routes_in_trips = [r for r in routes_in_trips if r["route_id"].strip() not in feve_c1_route_ids]
+            routes_in_trips = [r for r in routes_in_trips if r["route_id"].strip() not in cercanias_c1_route_ids]
 
             # Add new route
             new_route = template_route.copy()
@@ -377,13 +385,13 @@ if __name__ == "__main__":
 
         trips_in_galicia = get_rows_by_ids(TRIPS_FILE, "trip_id", trip_ids)
 
-        if feed == "feve":
-            feve_c1_route_ids = ["46T0001C1", "46T0002C1"]
-            new_route_id = "FEVE_C1"
+        if feed == "cercanias":
+            cercanias_c1_route_ids = ["46T0001C1", "46T0002C1"]
+            new_route_id = "FERROL_C1"
             for tig in trips_in_galicia:
-                if tig["route_id"].strip() in feve_c1_route_ids:
-                    tig["route_id"] = new_route_id
+                if tig["route_id"].strip() in cercanias_c1_route_ids:
                     tig["direction_id"] = "1" if tig["route_id"].strip()[6] == "2" else "0"
+                    tig["route_id"] = new_route_id
 
         stops_by_id = {stop["stop_id"]: stop for stop in stops_in_trips}
 
@@ -562,3 +570,62 @@ if __name__ == "__main__":
         os.remove(INPUT_GTFS_ZIP)
         shutil.rmtree(INPUT_GTFS_PATH)
         shutil.rmtree(OUTPUT_GTFS_PATH)
+
+    if args.merge:
+        # Columns to keep for each GTFS file when merging.
+        # Files not listed here keep all columns present in the data.
+        MERGE_KEEP_COLS: dict[str, list[str]] = {
+            "agency.txt": ["agency_id", "agency_name", "agency_url", "agency_timezone", "agency_lang"],
+            "stops.txt": ["stop_id", "stop_code", "stop_name", "stop_lat", "stop_lon", "wheelchair_boarding"],
+            "routes.txt": ["route_id", "agency_id", "route_short_name", "route_long_name", "route_type", "route_color", "route_text_color"],
+            "trips.txt": ["route_id", "service_id", "trip_id", "trip_headsign", "direction_id", "shape_id", "wheelchair_accessible"],
+            "stop_times.txt": ["trip_id", "arrival_time", "departure_time", "stop_id", "stop_sequence", "pickup_type", "drop_off_type"],
+        }
+        # Default values to fill for columns that are missing or NaN after concat.
+        MERGE_FILL_DEFAULTS: dict[str, dict[str, str]] = {
+            "routes.txt": {"agency_id": "1071VC"},
+            "trips.txt": {"direction_id": "0", "shape_id": "", "wheelchair_accessible": ""},
+            "stop_times.txt": {"pickup_type": "0", "drop_off_type": "0"},
+        }
+        # Deduplicate rows by this column, keeping the first occurrence.
+        MERGE_DEDUP_KEY: dict[str, str] = {
+            "stops.txt": "stop_id",
+        }
+
+        merged_zip_path = os.path.join(os.path.dirname(__file__), "gtfs_renfe_galicia_merged.zip")
+        feed_zip_paths = [os.path.join(os.path.dirname(__file__), f"gtfs_renfe_galicia_{feed}.zip") for feed in FEEDS.keys()]
+
+        frames: dict[str, list[pd.DataFrame]] = {}
+        for feed_zip_path in feed_zip_paths:
+            with zipfile.ZipFile(feed_zip_path, "r") as feed_zip:
+                for filename in feed_zip.namelist():
+                    with feed_zip.open(filename) as f:
+                        df = pd.read_csv(f, dtype=str, encoding="utf-8")
+                    df.columns = df.columns.str.strip()
+                    df = df.apply(lambda col: col.str.strip() if col.dtype == object else col)
+                    frames.setdefault(filename, []).append(df)
+
+        with zipfile.ZipFile(merged_zip_path, "w", zipfile.ZIP_DEFLATED) as merged_zip:
+            for filename, dfs in frames.items():
+                merged = pd.concat(dfs, ignore_index=True)
+
+                keep = MERGE_KEEP_COLS.get(filename)
+                defaults = MERGE_FILL_DEFAULTS.get(filename, {})
+                if keep is not None:
+                    for col in keep:
+                        if col not in merged.columns:
+                            merged[col] = defaults.get(col, "")
+                    for col, val in defaults.items():
+                        if col in merged.columns:
+                            merged[col] = merged[col].fillna(val)
+                    merged = merged[keep]
+
+                dedup_key = MERGE_DEDUP_KEY.get(filename)
+                if dedup_key:
+                    merged = merged.drop_duplicates(subset=[dedup_key], keep="first")
+
+                buf = io.StringIO()
+                merged.to_csv(buf, index=False)
+                merged_zip.writestr(filename, buf.getvalue())
+
+        logging.info(f"Feeds merged successfully into {merged_zip_path}.")
\ No newline at end of file
-- 
cgit v1.3