From 84db1ca075dc63ccb02da825948d95ad09f94e4d Mon Sep 17 00:00:00 2001 From: Ariel Costas Guerrero Date: Thu, 26 Mar 2026 09:53:39 +0100 Subject: Convert submodules to regular repo files, add custom feeds --- build_renfe | 1 - build_renfe/.gitignore | 2 + build_renfe/Dockerfile | 25 ++ build_renfe/LICENCE | 287 ++++++++++++++++++++ build_renfe/README.md | 52 ++++ build_renfe/build_static_feed.py | 564 +++++++++++++++++++++++++++++++++++++++ build_renfe/compose.yml | 7 + build_renfe/stop_overrides.json | 101 +++++++ 8 files changed, 1038 insertions(+), 1 deletion(-) delete mode 160000 build_renfe create mode 100644 build_renfe/.gitignore create mode 100644 build_renfe/Dockerfile create mode 100644 build_renfe/LICENCE create mode 100644 build_renfe/README.md create mode 100644 build_renfe/build_static_feed.py create mode 100644 build_renfe/compose.yml create mode 100644 build_renfe/stop_overrides.json (limited to 'build_renfe') diff --git a/build_renfe b/build_renfe deleted file mode 160000 index 43130f9..0000000 --- a/build_renfe +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 43130f953f86942b349eb1e5cdf59110c02b81cd diff --git a/build_renfe/.gitignore b/build_renfe/.gitignore new file mode 100644 index 0000000..e70de83 --- /dev/null +++ b/build_renfe/.gitignore @@ -0,0 +1,2 @@ +.venv/ +*.zip \ No newline at end of file diff --git a/build_renfe/Dockerfile b/build_renfe/Dockerfile new file mode 100644 index 0000000..f565320 --- /dev/null +++ b/build_renfe/Dockerfile @@ -0,0 +1,25 @@ +# Use a multi-stage build to download necessary files +FROM alpine/curl AS downloader + +RUN curl -L https://download.geofabrik.de/europe/spain/galicia-latest.osm.pbf -o /galicia-latest.osm.pbf +RUN curl -L https://raw.githubusercontent.com/railnova/osrm-train-profile/refs/heads/master/basic.lua -o /opt/train.lua + +FROM osrm/osrm-backend + +# Copy the downloaded OSM file from the downloader stage +COPY --from=downloader /galicia-latest.osm.pbf /data/galicia-latest.osm.pbf +COPY --from=downloader /opt/train.lua /opt/train.lua + +# Extract the map data using osrm-train-profile (by Railnova) +RUN osrm-extract -p /opt/train.lua /data/galicia-latest.osm.pbf + +# Prepare the map data for routing +RUN osrm-partition /data/galicia-latest.osrm +RUN osrm-customize /data/galicia-latest.osrm + +# Expose the OSRM server port +EXPOSE 5000 + +# Start the OSRM server +CMD ["osrm-routed", "--algorithm", "mld", "/data/galicia-latest.osrm"] + diff --git a/build_renfe/LICENCE b/build_renfe/LICENCE new file mode 100644 index 0000000..4153cd3 --- /dev/null +++ b/build_renfe/LICENCE @@ -0,0 +1,287 @@ + EUROPEAN UNION PUBLIC LICENCE v. 1.2 + EUPL © the European Union 2007, 2016 + +This European Union Public Licence (the ‘EUPL’) applies to the Work (as defined +below) which is provided under the terms of this Licence. Any use of the Work, +other than as authorised under this Licence is prohibited (to the extent such +use is covered by a right of the copyright holder of the Work). + +The Work is provided under the terms of this Licence when the Licensor (as +defined below) has placed the following notice immediately following the +copyright notice for the Work: + + Licensed under the EUPL + +or has expressed by any other means his willingness to license under the EUPL. + +1. Definitions + +In this Licence, the following terms have the following meaning: + +- ‘The Licence’: this Licence. + +- ‘The Original Work’: the work or software distributed or communicated by the + Licensor under this Licence, available as Source Code and also as Executable + Code as the case may be. + +- ‘Derivative Works’: the works or software that could be created by the + Licensee, based upon the Original Work or modifications thereof. This Licence + does not define the extent of modification or dependence on the Original Work + required in order to classify a work as a Derivative Work; this extent is + determined by copyright law applicable in the country mentioned in Article 15. + +- ‘The Work’: the Original Work or its Derivative Works. + +- ‘The Source Code’: the human-readable form of the Work which is the most + convenient for people to study and modify. + +- ‘The Executable Code’: any code which has generally been compiled and which is + meant to be interpreted by a computer as a program. + +- ‘The Licensor’: the natural or legal person that distributes or communicates + the Work under the Licence. + +- ‘Contributor(s)’: any natural or legal person who modifies the Work under the + Licence, or otherwise contributes to the creation of a Derivative Work. + +- ‘The Licensee’ or ‘You’: any natural or legal person who makes any usage of + the Work under the terms of the Licence. + +- ‘Distribution’ or ‘Communication’: any act of selling, giving, lending, + renting, distributing, communicating, transmitting, or otherwise making + available, online or offline, copies of the Work or providing access to its + essential functionalities at the disposal of any other natural or legal + person. + +2. Scope of the rights granted by the Licence + +The Licensor hereby grants You a worldwide, royalty-free, non-exclusive, +sublicensable licence to do the following, for the duration of copyright vested +in the Original Work: + +- use the Work in any circumstance and for all usage, +- reproduce the Work, +- modify the Work, and make Derivative Works based upon the Work, +- communicate to the public, including the right to make available or display + the Work or copies thereof to the public and perform publicly, as the case may + be, the Work, +- distribute the Work or copies thereof, +- lend and rent the Work or copies thereof, +- sublicense rights in the Work or copies thereof. + +Those rights can be exercised on any media, supports and formats, whether now +known or later invented, as far as the applicable law permits so. + +In the countries where moral rights apply, the Licensor waives his right to +exercise his moral right to the extent allowed by law in order to make effective +the licence of the economic rights here above listed. + +The Licensor grants to the Licensee royalty-free, non-exclusive usage rights to +any patents held by the Licensor, to the extent necessary to make use of the +rights granted on the Work under this Licence. + +3. Communication of the Source Code + +The Licensor may provide the Work either in its Source Code form, or as +Executable Code. If the Work is provided as Executable Code, the Licensor +provides in addition a machine-readable copy of the Source Code of the Work +along with each copy of the Work that the Licensor distributes or indicates, in +a notice following the copyright notice attached to the Work, a repository where +the Source Code is easily and freely accessible for as long as the Licensor +continues to distribute or communicate the Work. + +4. Limitations on copyright + +Nothing in this Licence is intended to deprive the Licensee of the benefits from +any exception or limitation to the exclusive rights of the rights owners in the +Work, of the exhaustion of those rights or of other applicable limitations +thereto. + +5. Obligations of the Licensee + +The grant of the rights mentioned above is subject to some restrictions and +obligations imposed on the Licensee. Those obligations are the following: + +Attribution right: The Licensee shall keep intact all copyright, patent or +trademarks notices and all notices that refer to the Licence and to the +disclaimer of warranties. The Licensee must include a copy of such notices and a +copy of the Licence with every copy of the Work he/she distributes or +communicates. The Licensee must cause any Derivative Work to carry prominent +notices stating that the Work has been modified and the date of modification. + +Copyleft clause: If the Licensee distributes or communicates copies of the +Original Works or Derivative Works, this Distribution or Communication will be +done under the terms of this Licence or of a later version of this Licence +unless the Original Work is expressly distributed only under this version of the +Licence — for example by communicating ‘EUPL v. 1.2 only’. The Licensee +(becoming Licensor) cannot offer or impose any additional terms or conditions on +the Work or Derivative Work that alter or restrict the terms of the Licence. + +Compatibility clause: If the Licensee Distributes or Communicates Derivative +Works or copies thereof based upon both the Work and another work licensed under +a Compatible Licence, this Distribution or Communication can be done under the +terms of this Compatible Licence. For the sake of this clause, ‘Compatible +Licence’ refers to the licences listed in the appendix attached to this Licence. +Should the Licensee's obligations under the Compatible Licence conflict with +his/her obligations under this Licence, the obligations of the Compatible +Licence shall prevail. + +Provision of Source Code: When distributing or communicating copies of the Work, +the Licensee will provide a machine-readable copy of the Source Code or indicate +a repository where this Source will be easily and freely available for as long +as the Licensee continues to distribute or communicate the Work. + +Legal Protection: This Licence does not grant permission to use the trade names, +trademarks, service marks, or names of the Licensor, except as required for +reasonable and customary use in describing the origin of the Work and +reproducing the content of the copyright notice. + +6. Chain of Authorship + +The original Licensor warrants that the copyright in the Original Work granted +hereunder is owned by him/her or licensed to him/her and that he/she has the +power and authority to grant the Licence. + +Each Contributor warrants that the copyright in the modifications he/she brings +to the Work are owned by him/her or licensed to him/her and that he/she has the +power and authority to grant the Licence. + +Each time You accept the Licence, the original Licensor and subsequent +Contributors grant You a licence to their contributions to the Work, under the +terms of this Licence. + +7. Disclaimer of Warranty + +The Work is a work in progress, which is continuously improved by numerous +Contributors. It is not a finished work and may therefore contain defects or +‘bugs’ inherent to this type of development. + +For the above reason, the Work is provided under the Licence on an ‘as is’ basis +and without warranties of any kind concerning the Work, including without +limitation merchantability, fitness for a particular purpose, absence of defects +or errors, accuracy, non-infringement of intellectual property rights other than +copyright as stated in Article 6 of this Licence. + +This disclaimer of warranty is an essential part of the Licence and a condition +for the grant of any rights to the Work. + +8. Disclaimer of Liability + +Except in the cases of wilful misconduct or damages directly caused to natural +persons, the Licensor will in no event be liable for any direct or indirect, +material or moral, damages of any kind, arising out of the Licence or of the use +of the Work, including without limitation, damages for loss of goodwill, work +stoppage, computer failure or malfunction, loss of data or any commercial +damage, even if the Licensor has been advised of the possibility of such damage. +However, the Licensor will be liable under statutory product liability laws as +far such laws apply to the Work. + +9. Additional agreements + +While distributing the Work, You may choose to conclude an additional agreement, +defining obligations or services consistent with this Licence. However, if +accepting obligations, You may act only on your own behalf and on your sole +responsibility, not on behalf of the original Licensor or any other Contributor, +and only if You agree to indemnify, defend, and hold each Contributor harmless +for any liability incurred by, or claims asserted against such Contributor by +the fact You have accepted any warranty or additional liability. + +10. Acceptance of the Licence + +The provisions of this Licence can be accepted by clicking on an icon ‘I agree’ +placed under the bottom of a window displaying the text of this Licence or by +affirming consent in any other similar way, in accordance with the rules of +applicable law. Clicking on that icon indicates your clear and irrevocable +acceptance of this Licence and all of its terms and conditions. + +Similarly, you irrevocably accept this Licence and all of its terms and +conditions by exercising any rights granted to You by Article 2 of this Licence, +such as the use of the Work, the creation by You of a Derivative Work or the +Distribution or Communication by You of the Work or copies thereof. + +11. Information to the public + +In case of any Distribution or Communication of the Work by means of electronic +communication by You (for example, by offering to download the Work from a +remote location) the distribution channel or media (for example, a website) must +at least provide to the public the information requested by the applicable law +regarding the Licensor, the Licence and the way it may be accessible, concluded, +stored and reproduced by the Licensee. + +12. Termination of the Licence + +The Licence and the rights granted hereunder will terminate automatically upon +any breach by the Licensee of the terms of the Licence. + +Such a termination will not terminate the licences of any person who has +received the Work from the Licensee under the Licence, provided such persons +remain in full compliance with the Licence. + +13. Miscellaneous + +Without prejudice of Article 9 above, the Licence represents the complete +agreement between the Parties as to the Work. + +If any provision of the Licence is invalid or unenforceable under applicable +law, this will not affect the validity or enforceability of the Licence as a +whole. Such provision will be construed or reformed so as necessary to make it +valid and enforceable. + +The European Commission may publish other linguistic versions or new versions of +this Licence or updated versions of the Appendix, so far this is required and +reasonable, without reducing the scope of the rights granted by the Licence. New +versions of the Licence will be published with a unique version number. + +All linguistic versions of this Licence, approved by the European Commission, +have identical value. Parties can take advantage of the linguistic version of +their choice. + +14. Jurisdiction + +Without prejudice to specific agreement between parties, + +- any litigation resulting from the interpretation of this License, arising + between the European Union institutions, bodies, offices or agencies, as a + Licensor, and any Licensee, will be subject to the jurisdiction of the Court + of Justice of the European Union, as laid down in article 272 of the Treaty on + the Functioning of the European Union, + +- any litigation arising between other parties and resulting from the + interpretation of this License, will be subject to the exclusive jurisdiction + of the competent court where the Licensor resides or conducts its primary + business. + +15. Applicable Law + +Without prejudice to specific agreement between parties, + +- this Licence shall be governed by the law of the European Union Member State + where the Licensor has his seat, resides or has his registered office, + +- this licence shall be governed by Belgian law if the Licensor has no seat, + residence or registered office inside a European Union Member State. + +Appendix + +‘Compatible Licences’ according to Article 5 EUPL are: + +- GNU General Public License (GPL) v. 2, v. 3 +- GNU Affero General Public License (AGPL) v. 3 +- Open Software License (OSL) v. 2.1, v. 3.0 +- Eclipse Public License (EPL) v. 1.0 +- CeCILL v. 2.0, v. 2.1 +- Mozilla Public Licence (MPL) v. 2 +- GNU Lesser General Public Licence (LGPL) v. 2.1, v. 3 +- Creative Commons Attribution-ShareAlike v. 3.0 Unported (CC BY-SA 3.0) for + works other than software +- European Union Public Licence (EUPL) v. 1.1, v. 1.2 +- Québec Free and Open-Source Licence — Reciprocity (LiLiQ-R) or Strong + Reciprocity (LiLiQ-R+). + +The European Commission may update this Appendix to later versions of the above +licences without producing a new version of the EUPL, as long as they provide +the rights granted in Article 2 of this Licence and protect the covered Source +Code from exclusive appropriation. + +All other changes or additions to this Appendix require the production of a new +EUPL version. diff --git a/build_renfe/README.md b/build_renfe/README.md new file mode 100644 index 0000000..d2dfce7 --- /dev/null +++ b/build_renfe/README.md @@ -0,0 +1,52 @@ +# Generador de GTFS Renfe Galicia + +Este repositorio contiene un script para extraer feeds GTFS estático para los servicios de Renfe en Galicia, España; usando los tres feeds disponibles (general, cercanías y FEVE). El script descarga los datos oficiales del Punto de Acceso Nacional (NAP) de España, extrae los viajes con paradas en Galicia y genera nuevos feeds GTFS con esta información. Adicionalmente, genera las formas de los viajes utilizando un servidor OSRM local con datos de OpenStreetMap (Geofabrik). + +## Cambios que se realizan + +1. Recortar los viajes para incluir solo aquellos con al menos una parada en Galicia. +2. Añadir headsigns a los viajes usando la última parada (como aparece en los letreros de los trenes). +3. Generar las formas de los viajes utilizando un servidor OSRM local con datos de OpenStreetMap para Galicia y un perfil específico para trenes. +4. Corregir algunos nombres y posiciones de estaciones para que sean fieles a la realidad. +5. Añadir colores a las rutas basándose en colores oficiales actuales y pasados de Renfe: naranja para Media Distancia, rojo Cercanías, verde en Trencelta, morado para regionales y AVE, azulado Avlo. + +## Requisitos + +- Python 3.12 o superior y `requests`. Con [uv](https://docs.astral.sh/uv) no es necesario instalar dependencias manualmente. +- Clave API gratuita del Punto de Acceso Nacional (NAP) de España. Se puede obtener en su portal: registrándose como consumidor de datos. +- Docker y Docker Compose. Alternativamente, Rancher, Podman u otros gestores compatibles con Dockerfile y archivos docker-compose.yml. + +## Uso + +1. Clona este repositorio: + + ```bash + git clone https://github.com/tpgalicia/gtfs-renfe-galicia.git + cd gtfs-renfe-galicia + ``` + +2. Inicia el servidor OSRM local con datos de OpenStreetMap para Galicia y perfil específico para trenes. La primera vez puede tardar varios minutos en arrancar ya que tiene que preprocesar los datos: + + ```bash + docker-compose up -d + ``` + +3. Ejecutar el script para generar el feed GTFS estático: + + ```bash + uv run build_static_feed.py + ``` + +Los feeds GTFS generados se guardarán en `gtfs_renfe_galicia_{feed}.zip` donde `feed` puede ser `general`, `cercanias` o `feve`. + +## Notas + +- Asegúrate de que el servidor OSRM esté en funcionamiento antes de ejecutar el script, en el puerto 5050. +- El script filtra los viajes para incluir solo aquellos con paradas en Galicia, basándose en las coordenadas geográficas de las estaciones. +- Las formas de los viajes se generan utilizando el servidor OSRM local para obtener rutas entre las paradas. + +## Licencia + +Este proyecto está cedido como software libre bajo licencia EUPL v1.2 o superior. Más información en el archivo [`LICENCE`](LICENCE) o en [Interoperable Europe](https://interoperable-europe.ec.europa.eu/collection/eupl). + +Los datos GTFS originales son propiedad de Renfe Operadora, cedidos bajo la [licencia de uso libre del NAP](https://nap.transportes.gob.es/licencia-datos). diff --git a/build_renfe/build_static_feed.py b/build_renfe/build_static_feed.py new file mode 100644 index 0000000..a60360f --- /dev/null +++ b/build_renfe/build_static_feed.py @@ -0,0 +1,564 @@ +# /// script +# requires-python = ">=3.12" +# dependencies = [ +# "requests", +# "tqdm", +# ] +# /// + +from argparse import ArgumentParser +import csv +import json +import logging +import os +import shutil +import tempfile +import zipfile + +import requests +from tqdm import tqdm + + +# Approximate bounding box for Galicia +BOUNDS = {"SOUTH": 41.820455, "NORTH": 43.937462, "WEST": -9.437256, "EAST": -6.767578} + +FEEDS = { + "general": "1098", + "cercanias": "1130", + "feve": "1131" +} + + +def is_in_bounds(lat: float, lon: float) -> bool: + return ( + BOUNDS["SOUTH"] <= lat <= BOUNDS["NORTH"] + and BOUNDS["WEST"] <= lon <= BOUNDS["EAST"] + ) + + +def get_stops_in_bounds(stops_file: str): + with open(stops_file, "r", encoding="utf-8") as f: + stops = csv.DictReader(f) + + for stop in stops: + lat = float(stop["stop_lat"]) + lon = float(stop["stop_lon"]) + if is_in_bounds(lat, lon): + yield stop + + +def get_trip_ids_for_stops(stoptimes_file: str, stop_ids: list[str]) -> list[str]: + trip_ids: set[str] = set() + + with open(stoptimes_file, "r", encoding="utf-8") as f: + stop_times = csv.DictReader(f) + + for stop_time in stop_times: + if stop_time["stop_id"] in stop_ids: + trip_ids.add(stop_time["trip_id"]) + + return list(trip_ids) + + +def get_routes_for_trips(trips_file: str, trip_ids: list[str]) -> list[str]: + route_ids: set[str] = set() + + with open(trips_file, "r", encoding="utf-8") as f: + trips = csv.DictReader(f) + + for trip in trips: + if trip["trip_id"] in trip_ids: + route_ids.add(trip["route_id"]) + + return list(route_ids) + + +def get_distinct_stops_from_stop_times( + stoptimes_file: str, trip_ids: list[str] +) -> list[str]: + stop_ids: set[str] = set() + + with open(stoptimes_file, "r", encoding="utf-8") as f: + stop_times = csv.DictReader(f) + + for stop_time in stop_times: + if stop_time["trip_id"] in trip_ids: + stop_ids.add(stop_time["stop_id"]) + + return list(stop_ids) + + +def get_last_stop_for_trips( + stoptimes_file: str, trip_ids: list[str] +) -> dict[str, str]: + trip_last: dict[str, str] = {} + trip_last_seq: dict[str, int] = {} + + with open(stoptimes_file, "r", encoding="utf-8") as f: + reader = csv.DictReader(f) + if reader.fieldnames is None: + raise Exception("Fuck you, screw you, fieldnames is None and you just get rekt") + reader.fieldnames = [name.strip() for name in reader.fieldnames] + + for stop_time in reader: + if stop_time["trip_id"] in trip_ids: + trip_id = stop_time["trip_id"] + if trip_last.get(trip_id, None) is None: + trip_last[trip_id] = "" + trip_last_seq[trip_id] = -1 + + this_stop_seq = int(stop_time["stop_sequence"]) + if this_stop_seq > trip_last_seq[trip_id]: + trip_last_seq[trip_id] = this_stop_seq + trip_last[trip_id] = stop_time["stop_id"] + + return trip_last + +def get_rows_by_ids(input_file: str, id_field: str, ids: list[str]) -> list[dict]: + rows: list[dict] = [] + + with open(input_file, "r", encoding="utf-8") as f: + reader = csv.DictReader(f) + if reader.fieldnames is None: + raise Exception("Fuck you, screw you, fieldnames is None and you just get rekt") + reader.fieldnames = [name.strip() for name in reader.fieldnames] + + for row in reader: + if row[id_field].strip() in ids: + rows.append(row) + + return rows + +# First colour is background, second is text +SERVICE_COLOURS = { + "REGIONAL": ("9A0060", "FFFFFF"), + "REG.EXP.": ("9A0060", "FFFFFF"), + + "MD": ("F85B0B", "000000"), + "AVANT": ("F85B0B", "000000"), + + "AVLO": ("05CEC6", "000000"), + "AVE": ("FFFFFF", "9A0060"), + "ALVIA": ("FFFFFF", "9A0060"), + + "INTERCITY": ("606060", "FFFFFF"), + + "TRENCELTA": ("00824A", "FFFFFF"), + + # Cercanías Ferrol-Ortigueira + "C1": ("F5333F", "FFFFFF") +} + + +def colour_route(route_short_name: str) -> tuple[str, str]: + """ + Returns the colours to be used for a route from its short name. + + :param route_short_name: The routes.txt's route_short_name + :return: A tuple containing the "route_color" (background) first and "route_text_color" (text) second + :rtype: tuple[str, str] + """ + + route_name_searched = route_short_name.strip().upper() + + if route_name_searched in SERVICE_COLOURS: + return SERVICE_COLOURS[route_name_searched] + + print("Unknown route short name:", route_short_name) + return ("000000", "FFFFFF") + + +if __name__ == "__main__": + parser = ArgumentParser( + description="Extract GTFS data for Galicia from Renfe GTFS feed." + ) + parser.add_argument( + "nap_apikey", + type=str, + help="NAP API Key (https://nap.transportes.gob.es/)" + ) + parser.add_argument( + "--osrm-url", + type=str, + help="OSRM server URL", + default="http://localhost:5050", + required=False, + ) + parser.add_argument( + "--debug", + help="Enable debug logging", + action="store_true" + ) + + args = parser.parse_args() + + try: + osrm_check = requests.head(args.osrm_url, timeout=5) + GENERATE_SHAPES = osrm_check.status_code < 500 + except requests.RequestException: + GENERATE_SHAPES = False + logging.warning("OSRM server is not reachable. Shape generation will be skipped.") + + + logging.basicConfig( + level=logging.DEBUG if args.debug else logging.INFO, + format="%(asctime)s - %(levelname)s - %(message)s", + ) + + for feed in FEEDS.keys(): + INPUT_GTFS_FD, INPUT_GTFS_ZIP = tempfile.mkstemp(suffix=".zip", prefix=f"renfe_galicia_in_{feed}_") + INPUT_GTFS_PATH = tempfile.mkdtemp(prefix=f"renfe_galicia_in_{feed}_") + OUTPUT_GTFS_PATH = tempfile.mkdtemp(prefix=f"renfe_galicia_out_{feed}_") + OUTPUT_GTFS_ZIP = os.path.join(os.path.dirname(__file__), f"gtfs_renfe_galicia_{feed}.zip") + + FEED_URL = f"https://nap.transportes.gob.es/api/Fichero/download/{FEEDS[feed]}" + + logging.info(f"Downloading GTFS feed '{feed}'...") + response = requests.get(FEED_URL, headers={"ApiKey": args.nap_apikey}) + with open(INPUT_GTFS_ZIP, "wb") as f: + f.write(response.content) + + # Unzip the GTFS feed + with zipfile.ZipFile(INPUT_GTFS_ZIP, "r") as zip_ref: + zip_ref.extractall(INPUT_GTFS_PATH) + + STOPS_FILE = os.path.join(INPUT_GTFS_PATH, "stops.txt") + STOP_TIMES_FILE = os.path.join(INPUT_GTFS_PATH, "stop_times.txt") + TRIPS_FILE = os.path.join(INPUT_GTFS_PATH, "trips.txt") + + all_stops_applicable = [stop for stop in get_stops_in_bounds(STOPS_FILE)] + logging.info(f"Total stops in Galicia: {len(all_stops_applicable)}") + + stop_ids = [stop["stop_id"] for stop in all_stops_applicable] + trip_ids = get_trip_ids_for_stops(STOP_TIMES_FILE, stop_ids) + + route_ids = get_routes_for_trips(TRIPS_FILE, trip_ids) + + logging.info(f"Feed parsed successfully. Stops: {len(stop_ids)}, trips: {len(trip_ids)}, routes: {len(route_ids)}") + if len(trip_ids) == 0 or len(route_ids) == 0: + logging.warning(f"No trips or routes found for feed '{feed}'. Skipping...") + shutil.rmtree(INPUT_GTFS_PATH) + shutil.rmtree(OUTPUT_GTFS_PATH) + continue + + # Copy agency.txt, calendar.txt, calendar_dates.txt as is + for filename in ["agency.txt", "calendar.txt", "calendar_dates.txt"]: + src_path = os.path.join(INPUT_GTFS_PATH, filename) + dest_path = os.path.join(OUTPUT_GTFS_PATH, filename) + if os.path.exists(src_path): + shutil.copy(src_path, dest_path) + else: + logging.debug(f"File {filename} does not exist in the input GTFS feed.") + + # Write new stops.txt with the stops in any trip that passes through Galicia + with open( + os.path.join(os.path.dirname(__file__), "stop_overrides.json"), + "r", + encoding="utf-8", + ) as f: + stop_overrides_raw: list = json.load(f) + stop_overrides = { + item["stop_id"]: item + for item in stop_overrides_raw + } + logging.debug(f"Loaded stop overrides for {len(stop_overrides)} stops.") + + deleted_stop_ids: set[str] = set() + for stop_id, override_item in stop_overrides.items(): + if override_item.get("_delete", False): + if override_item.get("feed_id", None) is None or override_item["feed_id"] == feed: + deleted_stop_ids.add(stop_id) + logging.debug(f"Stops marked for deletion in feed '{feed}': {len(deleted_stop_ids)}") + + distinct_stop_ids = get_distinct_stops_from_stop_times( + STOP_TIMES_FILE, trip_ids + ) + stops_in_trips = get_rows_by_ids(STOPS_FILE, "stop_id", distinct_stop_ids) + for stop in stops_in_trips: + stop["stop_code"] = stop["stop_id"] + if stop_overrides.get(stop["stop_id"], None) is not None: + override_item = stop_overrides[stop["stop_id"]] + + if override_item.get("feed_id", None) is not None and override_item["feed_id"] != feed: + continue + + for key, value in override_item.items(): + if key in ("stop_id", "feed_id", "_delete"): + continue + stop[key] = value + + if stop["stop_name"].startswith("Estación de tren "): + stop["stop_name"] = stop["stop_name"][17:].strip() + stop["stop_name"] = " ".join([ + word.capitalize() for word in stop["stop_name"].split(" ") if word != "de" + ]) + + stops_in_trips = [stop for stop in stops_in_trips if stop["stop_id"] not in deleted_stop_ids] + + with open( + os.path.join(OUTPUT_GTFS_PATH, "stops.txt"), + "w", + encoding="utf-8", + newline="", + ) as f: + writer = csv.DictWriter(f, fieldnames=stops_in_trips[0].keys()) + writer.writeheader() + writer.writerows(stops_in_trips) + + # Write new routes.txt with the routes that have trips in Galicia + routes_in_trips = get_rows_by_ids( + os.path.join(INPUT_GTFS_PATH, "routes.txt"), "route_id", route_ids + ) + + if feed == "feve": + feve_c1_route_ids = ["46T0001C1", "46T0002C1"] + new_route_id = "FEVE_C1" + + # Find agency_id and a template route + template_route = routes_in_trips[0] if routes_in_trips else {} + agency_id = "1" + for r in routes_in_trips: + if r["route_id"].strip() in feve_c1_route_ids: + agency_id = r.get("agency_id", "1") + template_route = r + break + + # Filter out old routes + routes_in_trips = [r for r in routes_in_trips if r["route_id"].strip() not in feve_c1_route_ids] + + # Add new route + new_route = template_route.copy() + new_route.update({ + "route_id": new_route_id, + "route_short_name": "C1", + "route_long_name": "Ferrol - Xuvia - San Sadurniño - Ortigueira", + "route_type": "2", + }) + if "agency_id" in template_route: + new_route["agency_id"] = agency_id + + routes_in_trips.append(new_route) + + for route in routes_in_trips: + route["route_color"], route["route_text_color"] = colour_route( + route["route_short_name"] + ) + with open( + os.path.join(OUTPUT_GTFS_PATH, "routes.txt"), + "w", + encoding="utf-8", + newline="", + ) as f: + writer = csv.DictWriter(f, fieldnames=routes_in_trips[0].keys()) + writer.writeheader() + writer.writerows(routes_in_trips) + + # Write new trips.txt with the trips that pass through Galicia + # Load stop_times early so we can filter deleted stops and renumber sequences + stop_times_in_galicia = get_rows_by_ids(STOP_TIMES_FILE, "trip_id", trip_ids) + stop_times_in_galicia = [st for st in stop_times_in_galicia if st["stop_id"].strip() not in deleted_stop_ids] + stop_times_in_galicia.sort(key=lambda x: (x["trip_id"], int(x["stop_sequence"].strip()))) + trip_seq_counter: dict[str, int] = {} + for st in stop_times_in_galicia: + tid = st["trip_id"] + if tid not in trip_seq_counter: + trip_seq_counter[tid] = 0 + st["stop_sequence"] = str(trip_seq_counter[tid]) + trip_seq_counter[tid] += 1 + + last_stop_in_trips: dict[str, str] = {} + trip_last_seq: dict[str, int] = {} + for st in stop_times_in_galicia: + tid = st["trip_id"] + seq = int(st["stop_sequence"]) + if seq > trip_last_seq.get(tid, -1): + trip_last_seq[tid] = seq + last_stop_in_trips[tid] = st["stop_id"].strip() + + trips_in_galicia = get_rows_by_ids(TRIPS_FILE, "trip_id", trip_ids) + + if feed == "feve": + feve_c1_route_ids = ["46T0001C1", "46T0002C1"] + new_route_id = "FEVE_C1" + for tig in trips_in_galicia: + if tig["route_id"].strip() in feve_c1_route_ids: + tig["route_id"] = new_route_id + tig["direction_id"] = "1" if tig["route_id"].strip()[6] == "2" else "0" + + stops_by_id = {stop["stop_id"]: stop for stop in stops_in_trips} + + for tig in trips_in_galicia: + if GENERATE_SHAPES: + tig["shape_id"] = f"Shape_{tig['trip_id'][0:5]}" + tig["trip_headsign"] = stops_by_id[last_stop_in_trips[tig["trip_id"]]]["stop_name"] + with open( + os.path.join(OUTPUT_GTFS_PATH, "trips.txt"), + "w", + encoding="utf-8", + newline="", + ) as f: + writer = csv.DictWriter(f, fieldnames=trips_in_galicia[0].keys()) + writer.writeheader() + writer.writerows(trips_in_galicia) + + # Write new stop_times.txt with the stop times for any trip that passes through Galicia + with open( + os.path.join(OUTPUT_GTFS_PATH, "stop_times.txt"), + "w", + encoding="utf-8", + newline="", + ) as f: + writer = csv.DictWriter(f, fieldnames=stop_times_in_galicia[0].keys()) + writer.writeheader() + writer.writerows(stop_times_in_galicia) + + logging.info("GTFS data for Galicia has been extracted successfully. Generate shapes for the trips...") + + if GENERATE_SHAPES: + shape_ids_total = len(set(f"Shape_{trip_id[0:5]}" for trip_id in trip_ids)) + shape_ids_generated: set[str] = set() + + # Pre-load stops for quick lookup + stops_dict = {stop["stop_id"]: stop for stop in stops_in_trips} + + # Group stop times by trip_id to avoid repeated file reads + stop_times_by_trip: dict[str, list[dict]] = {} + for st in stop_times_in_galicia: + tid = st["trip_id"] + if tid not in stop_times_by_trip: + stop_times_by_trip[tid] = [] + stop_times_by_trip[tid].append(st) + + OSRM_BASE_URL = f"{args.osrm_url}/route/v1/driving/" + for trip_id in tqdm(trip_ids, total=shape_ids_total, desc="Generating shapes"): + shape_id = f"Shape_{trip_id[0:5]}" + if shape_id in shape_ids_generated: + continue + + stop_seq = stop_times_by_trip.get(trip_id, []) + stop_seq.sort(key=lambda x: int(x["stop_sequence"].strip())) + + if not stop_seq: + continue + + final_shape_points = [] + i = 0 + while i < len(stop_seq) - 1: + stop_a = stops_dict[stop_seq[i]["stop_id"]] + lat_a, lon_a = float(stop_a["stop_lat"]), float(stop_a["stop_lon"]) + + if not is_in_bounds(lat_a, lon_a): + # S_i is out of bounds. Segment S_i -> S_{i+1} is straight line. + stop_b = stops_dict[stop_seq[i+1]["stop_id"]] + lat_b, lon_b = float(stop_b["stop_lat"]), float(stop_b["stop_lon"]) + + segment_points = [[lon_a, lat_a], [lon_b, lat_b]] + if not final_shape_points: + final_shape_points.extend(segment_points) + else: + final_shape_points.extend(segment_points[1:]) + i += 1 + else: + # S_i is in bounds. Find how many subsequent stops are also in bounds. + j = i + 1 + while j < len(stop_seq): + stop_j = stops_dict[stop_seq[j]["stop_id"]] + if is_in_bounds(float(stop_j["stop_lat"]), float(stop_j["stop_lon"])): + j += 1 + else: + break + + # Stops from i to j-1 are in bounds. + if j > i + 1: + # We have at least two consecutive stops in bounds. + in_bounds_stops = stop_seq[i:j] + coordinates = [] + for st in in_bounds_stops: + s = stops_dict[st["stop_id"]] + coordinates.append(f"{s['stop_lon']},{s['stop_lat']}") + + coords_str = ";".join(coordinates) + osrm_url = f"{OSRM_BASE_URL}{coords_str}?overview=full&geometries=geojson" + + segment_points = [] + try: + response = requests.get(osrm_url, timeout=10) + if response.status_code == 200: + data = response.json() + if data.get("code") == "Ok": + segment_points = data["routes"][0]["geometry"]["coordinates"] + except Exception: + pass + + if not segment_points: + # Fallback to straight lines for this whole sub-sequence + segment_points = [] + for k in range(i, j): + s = stops_dict[stop_seq[k]["stop_id"]] + segment_points.append([float(s["stop_lon"]), float(s["stop_lat"])]) + + if not final_shape_points: + final_shape_points.extend(segment_points) + else: + final_shape_points.extend(segment_points[1:]) + + i = j - 1 # Next iteration starts from S_{j-1} + else: + # Only S_i is in bounds, S_{i+1} is out. + # Segment S_i -> S_{i+1} is straight line. + stop_b = stops_dict[stop_seq[i+1]["stop_id"]] + lat_b, lon_b = float(stop_b["stop_lat"]), float(stop_b["stop_lon"]) + + segment_points = [[lon_a, lat_a], [lon_b, lat_b]] + if not final_shape_points: + final_shape_points.extend(segment_points) + else: + final_shape_points.extend(segment_points[1:]) + i += 1 + + shape_ids_generated.add(shape_id) + + with open( + os.path.join(OUTPUT_GTFS_PATH, "shapes.txt"), + "a", + encoding="utf-8", + newline="", + ) as f: + fieldnames = [ + "shape_id", + "shape_pt_lat", + "shape_pt_lon", + "shape_pt_sequence", + ] + writer = csv.DictWriter(f, fieldnames=fieldnames) + + if f.tell() == 0: + writer.writeheader() + + for seq, point in enumerate(final_shape_points): + writer.writerow( + { + "shape_id": shape_id, + "shape_pt_lat": point[1], + "shape_pt_lon": point[0], + "shape_pt_sequence": seq, + } + ) + else: + logging.info("Shape generation skipped as per user request.") + + # Create a ZIP archive of the output GTFS + with zipfile.ZipFile(OUTPUT_GTFS_ZIP, "w", zipfile.ZIP_DEFLATED) as zipf: + for root, _, files in os.walk(OUTPUT_GTFS_PATH): + for file in files: + file_path = os.path.join(root, file) + arcname = os.path.relpath(file_path, OUTPUT_GTFS_PATH) + zipf.write(file_path, arcname) + + logging.info( + f"GTFS data from feed {feed} has been zipped successfully at {OUTPUT_GTFS_ZIP}." + ) + os.close(INPUT_GTFS_FD) + os.remove(INPUT_GTFS_ZIP) + shutil.rmtree(INPUT_GTFS_PATH) + shutil.rmtree(OUTPUT_GTFS_PATH) diff --git a/build_renfe/compose.yml b/build_renfe/compose.yml new file mode 100644 index 0000000..ebe29cf --- /dev/null +++ b/build_renfe/compose.yml @@ -0,0 +1,7 @@ +services: + osrm: + build: + context: . + restart: unless-stopped + ports: + - "5050:5000" diff --git a/build_renfe/stop_overrides.json b/build_renfe/stop_overrides.json new file mode 100644 index 0000000..c2298fa --- /dev/null +++ b/build_renfe/stop_overrides.json @@ -0,0 +1,101 @@ +[ + { + "stop_id": "31412", + "stop_lat": 43.3504, + "stop_lon": -8.412142, + "stop_name": "A Coruña" + }, + { + "stop_id": "23021", + "stop_lat": 42.78122, + "stop_lon": -8.656493, + "stop_name": "Padrón-Barbanza" + }, + { + "stop_id": "08224", + "stop_lat": 42.28455, + "stop_lon": -8.603739, + "stop_name": "Redondela AV" + }, + { + "stop_id": "22201", + "stop_name": "O Porriño" + }, + { + "stop_id": "22006", + "stop_name": "Barra de Miño" + }, + { + "stop_id": "20208", + "stop_name": "Quereño" + }, + { + "stop_id": "22109", + "stop_name": "Salvaterra de Miño" + }, + { + "stop_id": "20410", + "stop_name": "Elviña-Universidade" + }, + { + "stop_id": "20318", + "stop_name": "Piñoi" + }, + { + "stop_id": "21002", + "stop_name": "Miño" + }, + { + "stop_id": "31304", + "stop_name": "O Carballiño" + }, + { + "stop_id": "96122", + "stop_name": "Barcelos" + }, + { + "stop_id": "94033", + "stop_name": "Viana do Castelo" + }, + { + "stop_id": "22308", + "stop_lat": 42.23930, + "stop_lon": -8.71226 + }, + { + "stop_id": "22402", + "stop_name": "Valença do Minho" + }, + { + "stop_id": "21010", + "feed_id": "general", + "stop_lat": 43.4880356421007, + "stop_lon": -8.230795701069612 + }, + { + "stop_id": "21010", + "feed_id": "feve", + "stop_lat": 43.48826050175589, + "stop_lon": -8.231122670037813 + }, + { + "stop_id": "99117", + "stop_name": "Ourense Turístico", + "_delete": true + }, + { + "stop_id": "99143", + "stop_name": "A Coruña - Turístico", + "_delete": true + }, + { + "stop_id": "99159", + "stop_name": "Santiago Turístico", + "_delete": true + }, + { + "stop_id": "99161", + "stop_name": "Pontevedra Turístico", + "_delete": true + } +] -- cgit v1.3