From 52f342f9135264216cdbf3012ea115981d0bc294 Mon Sep 17 00:00:00 2001 From: Ariel Costas Guerrero Date: Fri, 14 Nov 2025 15:57:34 +0100 Subject: Add Shape extraction from GTFS to new protobufs --- src/gtfs_vigo_stops/src/proto/stop_schedule_pb2.py | 25 +++--- .../src/proto/stop_schedule_pb2.pyi | 8 ++ src/gtfs_vigo_stops/src/shapes.py | 88 ++++++++++++++++++++++ src/gtfs_vigo_stops/stop_report.py | 8 ++ 4 files changed, 118 insertions(+), 11 deletions(-) create mode 100644 src/gtfs_vigo_stops/src/shapes.py (limited to 'src/gtfs_vigo_stops') diff --git a/src/gtfs_vigo_stops/src/proto/stop_schedule_pb2.py b/src/gtfs_vigo_stops/src/proto/stop_schedule_pb2.py index a4cabd8..d9f8e52 100644 --- a/src/gtfs_vigo_stops/src/proto/stop_schedule_pb2.py +++ b/src/gtfs_vigo_stops/src/proto/stop_schedule_pb2.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # NO CHECKED-IN PROTOBUF GENCODE -# source: src/common/stop_schedule.proto +# source: stop_schedule.proto # Protobuf Python Version: 6.33.0 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor @@ -15,7 +15,7 @@ _runtime_version.ValidateProtobufRuntimeVersion( 33, 0, '', - 'src/common/stop_schedule.proto' + 'stop_schedule.proto' ) # @@protoc_insertion_point(imports) @@ -24,17 +24,20 @@ _sym_db = _symbol_database.Default() -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x1esrc/common/stop_schedule.proto\x12\nsrc.common\"!\n\tEpsg25829\x12\t\n\x01x\x18\x01 \x01(\x01\x12\t\n\x01y\x18\x02 \x01(\x01\"\xed\x03\n\x0cStopArrivals\x12\x0f\n\x07stop_id\x18\x01 \x01(\t\x12\'\n\x08location\x18\x03 \x01(\x0b\x32\x15.src.common.Epsg25829\x12;\n\x08\x61rrivals\x18\x05 \x03(\x0b\x32).src.common.StopArrivals.ScheduledArrival\x1a\xe5\x02\n\x10ScheduledArrival\x12\x12\n\nservice_id\x18\x01 \x01(\t\x12\x0f\n\x07trip_id\x18\x02 \x01(\t\x12\x0c\n\x04line\x18\x03 \x01(\t\x12\r\n\x05route\x18\x04 \x01(\t\x12\x10\n\x08shape_id\x18\x05 \x01(\t\x12\x1b\n\x13shape_dist_traveled\x18\x06 \x01(\x01\x12\x15\n\rstop_sequence\x18\x0b \x01(\r\x12\x14\n\x0cnext_streets\x18\x0c \x03(\t\x12\x15\n\rstarting_code\x18\x15 \x01(\t\x12\x15\n\rstarting_name\x18\x16 \x01(\t\x12\x15\n\rstarting_time\x18\x17 \x01(\t\x12\x14\n\x0c\x63\x61lling_time\x18! \x01(\t\x12\x13\n\x0b\x63\x61lling_ssm\x18\" \x01(\r\x12\x15\n\rterminus_code\x18) \x01(\t\x12\x15\n\rterminus_name\x18* \x01(\t\x12\x15\n\rterminus_time\x18+ \x01(\tb\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x13stop_schedule.proto\x12\x05proto\"!\n\tEpsg25829\x12\t\n\x01x\x18\x01 \x01(\x01\x12\t\n\x01y\x18\x02 \x01(\x01\"\xe3\x03\n\x0cStopArrivals\x12\x0f\n\x07stop_id\x18\x01 \x01(\t\x12\"\n\x08location\x18\x03 \x01(\x0b\x32\x10.proto.Epsg25829\x12\x36\n\x08\x61rrivals\x18\x05 \x03(\x0b\x32$.proto.StopArrivals.ScheduledArrival\x1a\xe5\x02\n\x10ScheduledArrival\x12\x12\n\nservice_id\x18\x01 \x01(\t\x12\x0f\n\x07trip_id\x18\x02 \x01(\t\x12\x0c\n\x04line\x18\x03 \x01(\t\x12\r\n\x05route\x18\x04 \x01(\t\x12\x10\n\x08shape_id\x18\x05 \x01(\t\x12\x1b\n\x13shape_dist_traveled\x18\x06 \x01(\x01\x12\x15\n\rstop_sequence\x18\x0b \x01(\r\x12\x14\n\x0cnext_streets\x18\x0c \x03(\t\x12\x15\n\rstarting_code\x18\x15 \x01(\t\x12\x15\n\rstarting_name\x18\x16 \x01(\t\x12\x15\n\rstarting_time\x18\x17 \x01(\t\x12\x14\n\x0c\x63\x61lling_time\x18! \x01(\t\x12\x13\n\x0b\x63\x61lling_ssm\x18\" \x01(\r\x12\x15\n\rterminus_code\x18) \x01(\t\x12\x15\n\rterminus_name\x18* \x01(\t\x12\x15\n\rterminus_time\x18+ \x01(\t\";\n\x05Shape\x12\x10\n\x08shape_id\x18\x01 \x01(\t\x12 \n\x06points\x18\x03 \x03(\x0b\x32\x10.proto.Epsg25829B$\xaa\x02!Costasdev.Busurbano.Backend.Typesb\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) -_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'src.common.stop_schedule_pb2', _globals) +_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'stop_schedule_pb2', _globals) if not _descriptor._USE_C_DESCRIPTORS: - DESCRIPTOR._loaded_options = None - _globals['_EPSG25829']._serialized_start=46 - _globals['_EPSG25829']._serialized_end=79 - _globals['_STOPARRIVALS']._serialized_start=82 - _globals['_STOPARRIVALS']._serialized_end=575 - _globals['_STOPARRIVALS_SCHEDULEDARRIVAL']._serialized_start=218 - _globals['_STOPARRIVALS_SCHEDULEDARRIVAL']._serialized_end=575 + _globals['DESCRIPTOR']._loaded_options = None + _globals['DESCRIPTOR']._serialized_options = b'\252\002!Costasdev.Busurbano.Backend.Types' + _globals['_EPSG25829']._serialized_start=30 + _globals['_EPSG25829']._serialized_end=63 + _globals['_STOPARRIVALS']._serialized_start=66 + _globals['_STOPARRIVALS']._serialized_end=549 + _globals['_STOPARRIVALS_SCHEDULEDARRIVAL']._serialized_start=192 + _globals['_STOPARRIVALS_SCHEDULEDARRIVAL']._serialized_end=549 + _globals['_SHAPE']._serialized_start=551 + _globals['_SHAPE']._serialized_end=610 # @@protoc_insertion_point(module_scope) diff --git a/src/gtfs_vigo_stops/src/proto/stop_schedule_pb2.pyi b/src/gtfs_vigo_stops/src/proto/stop_schedule_pb2.pyi index aa42cdb..615999f 100644 --- a/src/gtfs_vigo_stops/src/proto/stop_schedule_pb2.pyi +++ b/src/gtfs_vigo_stops/src/proto/stop_schedule_pb2.pyi @@ -58,3 +58,11 @@ class StopArrivals(_message.Message): location: Epsg25829 arrivals: _containers.RepeatedCompositeFieldContainer[StopArrivals.ScheduledArrival] def __init__(self, stop_id: _Optional[str] = ..., location: _Optional[_Union[Epsg25829, _Mapping]] = ..., arrivals: _Optional[_Iterable[_Union[StopArrivals.ScheduledArrival, _Mapping]]] = ...) -> None: ... + +class Shape(_message.Message): + __slots__ = () + SHAPE_ID_FIELD_NUMBER: _ClassVar[int] + POINTS_FIELD_NUMBER: _ClassVar[int] + shape_id: str + points: _containers.RepeatedCompositeFieldContainer[Epsg25829] + def __init__(self, shape_id: _Optional[str] = ..., points: _Optional[_Iterable[_Union[Epsg25829, _Mapping]]] = ...) -> None: ... diff --git a/src/gtfs_vigo_stops/src/shapes.py b/src/gtfs_vigo_stops/src/shapes.py new file mode 100644 index 0000000..f49832a --- /dev/null +++ b/src/gtfs_vigo_stops/src/shapes.py @@ -0,0 +1,88 @@ +import csv +from dataclasses import dataclass +import os +from typing import Dict, Optional + +from pyproj import Transformer + +from src.logger import get_logger + + +logger = get_logger("shapes") + + +@dataclass +class Shape: + shape_id: str + shape_pt_lat: Optional[float] + shape_pt_lon: Optional[float] + shape_pt_position: Optional[int] + shape_dist_traveled: Optional[float] + + shape_pt_25829_x: Optional[float] = None + shape_pt_25829_y: Optional[float] = None + + +def process_shapes(feed_dir: str, out_dir: str) -> None: + file_path = os.path.join(feed_dir, "shapes.txt") + shapes: Dict[str, list[Shape]] = {} + + transformer = Transformer.from_crs(4326, 25829, always_xy=True) + + try: + with open(file_path, "r", encoding="utf-8", newline="") as f: + reader = csv.DictReader(f, quotechar='"', delimiter=",") + for row_num, row in enumerate(reader, start=2): + try: + shape = Shape( + shape_id=row["shape_id"], + shape_pt_lat=float(row["shape_pt_lat"]) if row.get("shape_pt_lat") else None, + shape_pt_lon=float(row["shape_pt_lon"]) if row.get("shape_pt_lon") else None, + shape_pt_position=int(row["shape_pt_position"]) if row.get("shape_pt_position") else None, + shape_dist_traveled=float(row["shape_dist_traveled"]) if row.get("shape_dist_traveled") else None, + ) + + if shape.shape_pt_lat is not None and shape.shape_pt_lon is not None: + shape_pt_25829_x, shape_pt_25829_y = transformer.transform( + shape.shape_pt_lon, shape.shape_pt_lat + ) + shape.shape_pt_25829_x = shape_pt_25829_x + shape.shape_pt_25829_y = shape_pt_25829_y + + if shape.shape_id not in shapes: + shapes[shape.shape_id] = [] + shapes[shape.shape_id].append(shape) + except Exception as e: + logger.warning( + f"Error parsing stops.txt line {row_num}: {e} - line data: {row}" + ) + except FileNotFoundError: + logger.error(f"File not found: {file_path}") + except Exception as e: + logger.error(f"Error reading stops.txt: {e}") + + + # Write shapes to Protobuf files + from src.proto.stop_schedule_pb2 import Epsg25829, Shape as PbShape + + for shape_id, shape_points in shapes.items(): + points = sorted(shape_points, key=lambda sp: sp.shape_pt_position if sp.shape_pt_position is not None else 0) + + pb_shape = PbShape( + shape_id=shape_id, + points=[ + Epsg25829(x=pt.shape_pt_25829_x, y=pt.shape_pt_25829_y) + for pt in points + if pt.shape_pt_25829_x is not None and pt.shape_pt_25829_y is not None + ], + ) + + shape_file_path = os.path.join(out_dir, "shapes", f"{shape_id}.pb") + os.makedirs(os.path.dirname(shape_file_path), exist_ok=True) + + try: + with open(shape_file_path, "wb") as f: + f.write(pb_shape.SerializeToString()) + logger.debug(f"Shape Protobuf written to: {shape_file_path}") + except Exception as e: + logger.error(f"Error writing shape Protobuf to {shape_file_path}: {e}") diff --git a/src/gtfs_vigo_stops/stop_report.py b/src/gtfs_vigo_stops/stop_report.py index ab6bac8..8a36e60 100644 --- a/src/gtfs_vigo_stops/stop_report.py +++ b/src/gtfs_vigo_stops/stop_report.py @@ -6,6 +6,7 @@ import time import traceback from typing import Any, Dict, List +from src.shapes import process_shapes from src.common import get_all_feed_dates from src.download import download_feed_from_url from src.logger import get_logger @@ -339,6 +340,13 @@ def main(): _, stop_summary = process_date(feed_dir, date, output_dir) all_stops_summary[date] = stop_summary + logger.info("Finished processing all dates. Beginning with shape transformation.") + + # Process shapes, converting each coordinate to EPSG:25829 and saving as Protobuf + process_shapes(feed_dir, output_dir) + + logger.info("Finished processing shapes.") + if feed_url: if os.path.exists(feed_dir): shutil.rmtree(feed_dir) -- cgit v1.3