aboutsummaryrefslogtreecommitdiff
path: root/src/gtfs_vigo_stops
diff options
context:
space:
mode:
authorAriel Costas Guerrero <ariel@costas.dev>2025-11-14 15:57:34 +0100
committerAriel Costas Guerrero <ariel@costas.dev>2025-11-14 15:57:34 +0100
commit52f342f9135264216cdbf3012ea115981d0bc294 (patch)
treebbb2e13e3ce277be5407a1dd1b169ee7d12256d1 /src/gtfs_vigo_stops
parent799091e285bf918c4028ade435d9c974e27bb206 (diff)
Add Shape extraction from GTFS to new protobufs
Diffstat (limited to 'src/gtfs_vigo_stops')
-rw-r--r--src/gtfs_vigo_stops/src/proto/stop_schedule_pb2.py25
-rw-r--r--src/gtfs_vigo_stops/src/proto/stop_schedule_pb2.pyi8
-rw-r--r--src/gtfs_vigo_stops/src/shapes.py88
-rw-r--r--src/gtfs_vigo_stops/stop_report.py8
4 files changed, 118 insertions, 11 deletions
diff --git a/src/gtfs_vigo_stops/src/proto/stop_schedule_pb2.py b/src/gtfs_vigo_stops/src/proto/stop_schedule_pb2.py
index a4cabd8..d9f8e52 100644
--- a/src/gtfs_vigo_stops/src/proto/stop_schedule_pb2.py
+++ b/src/gtfs_vigo_stops/src/proto/stop_schedule_pb2.py
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
# Generated by the protocol buffer compiler. DO NOT EDIT!
# NO CHECKED-IN PROTOBUF GENCODE
-# source: src/common/stop_schedule.proto
+# source: stop_schedule.proto
# Protobuf Python Version: 6.33.0
"""Generated protocol buffer code."""
from google.protobuf import descriptor as _descriptor
@@ -15,7 +15,7 @@ _runtime_version.ValidateProtobufRuntimeVersion(
33,
0,
'',
- 'src/common/stop_schedule.proto'
+ 'stop_schedule.proto'
)
# @@protoc_insertion_point(imports)
@@ -24,17 +24,20 @@ _sym_db = _symbol_database.Default()
-DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x1esrc/common/stop_schedule.proto\x12\nsrc.common\"!\n\tEpsg25829\x12\t\n\x01x\x18\x01 \x01(\x01\x12\t\n\x01y\x18\x02 \x01(\x01\"\xed\x03\n\x0cStopArrivals\x12\x0f\n\x07stop_id\x18\x01 \x01(\t\x12\'\n\x08location\x18\x03 \x01(\x0b\x32\x15.src.common.Epsg25829\x12;\n\x08\x61rrivals\x18\x05 \x03(\x0b\x32).src.common.StopArrivals.ScheduledArrival\x1a\xe5\x02\n\x10ScheduledArrival\x12\x12\n\nservice_id\x18\x01 \x01(\t\x12\x0f\n\x07trip_id\x18\x02 \x01(\t\x12\x0c\n\x04line\x18\x03 \x01(\t\x12\r\n\x05route\x18\x04 \x01(\t\x12\x10\n\x08shape_id\x18\x05 \x01(\t\x12\x1b\n\x13shape_dist_traveled\x18\x06 \x01(\x01\x12\x15\n\rstop_sequence\x18\x0b \x01(\r\x12\x14\n\x0cnext_streets\x18\x0c \x03(\t\x12\x15\n\rstarting_code\x18\x15 \x01(\t\x12\x15\n\rstarting_name\x18\x16 \x01(\t\x12\x15\n\rstarting_time\x18\x17 \x01(\t\x12\x14\n\x0c\x63\x61lling_time\x18! \x01(\t\x12\x13\n\x0b\x63\x61lling_ssm\x18\" \x01(\r\x12\x15\n\rterminus_code\x18) \x01(\t\x12\x15\n\rterminus_name\x18* \x01(\t\x12\x15\n\rterminus_time\x18+ \x01(\tb\x06proto3')
+DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x13stop_schedule.proto\x12\x05proto\"!\n\tEpsg25829\x12\t\n\x01x\x18\x01 \x01(\x01\x12\t\n\x01y\x18\x02 \x01(\x01\"\xe3\x03\n\x0cStopArrivals\x12\x0f\n\x07stop_id\x18\x01 \x01(\t\x12\"\n\x08location\x18\x03 \x01(\x0b\x32\x10.proto.Epsg25829\x12\x36\n\x08\x61rrivals\x18\x05 \x03(\x0b\x32$.proto.StopArrivals.ScheduledArrival\x1a\xe5\x02\n\x10ScheduledArrival\x12\x12\n\nservice_id\x18\x01 \x01(\t\x12\x0f\n\x07trip_id\x18\x02 \x01(\t\x12\x0c\n\x04line\x18\x03 \x01(\t\x12\r\n\x05route\x18\x04 \x01(\t\x12\x10\n\x08shape_id\x18\x05 \x01(\t\x12\x1b\n\x13shape_dist_traveled\x18\x06 \x01(\x01\x12\x15\n\rstop_sequence\x18\x0b \x01(\r\x12\x14\n\x0cnext_streets\x18\x0c \x03(\t\x12\x15\n\rstarting_code\x18\x15 \x01(\t\x12\x15\n\rstarting_name\x18\x16 \x01(\t\x12\x15\n\rstarting_time\x18\x17 \x01(\t\x12\x14\n\x0c\x63\x61lling_time\x18! \x01(\t\x12\x13\n\x0b\x63\x61lling_ssm\x18\" \x01(\r\x12\x15\n\rterminus_code\x18) \x01(\t\x12\x15\n\rterminus_name\x18* \x01(\t\x12\x15\n\rterminus_time\x18+ \x01(\t\";\n\x05Shape\x12\x10\n\x08shape_id\x18\x01 \x01(\t\x12 \n\x06points\x18\x03 \x03(\x0b\x32\x10.proto.Epsg25829B$\xaa\x02!Costasdev.Busurbano.Backend.Typesb\x06proto3')
_globals = globals()
_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
-_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'src.common.stop_schedule_pb2', _globals)
+_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'stop_schedule_pb2', _globals)
if not _descriptor._USE_C_DESCRIPTORS:
- DESCRIPTOR._loaded_options = None
- _globals['_EPSG25829']._serialized_start=46
- _globals['_EPSG25829']._serialized_end=79
- _globals['_STOPARRIVALS']._serialized_start=82
- _globals['_STOPARRIVALS']._serialized_end=575
- _globals['_STOPARRIVALS_SCHEDULEDARRIVAL']._serialized_start=218
- _globals['_STOPARRIVALS_SCHEDULEDARRIVAL']._serialized_end=575
+ _globals['DESCRIPTOR']._loaded_options = None
+ _globals['DESCRIPTOR']._serialized_options = b'\252\002!Costasdev.Busurbano.Backend.Types'
+ _globals['_EPSG25829']._serialized_start=30
+ _globals['_EPSG25829']._serialized_end=63
+ _globals['_STOPARRIVALS']._serialized_start=66
+ _globals['_STOPARRIVALS']._serialized_end=549
+ _globals['_STOPARRIVALS_SCHEDULEDARRIVAL']._serialized_start=192
+ _globals['_STOPARRIVALS_SCHEDULEDARRIVAL']._serialized_end=549
+ _globals['_SHAPE']._serialized_start=551
+ _globals['_SHAPE']._serialized_end=610
# @@protoc_insertion_point(module_scope)
diff --git a/src/gtfs_vigo_stops/src/proto/stop_schedule_pb2.pyi b/src/gtfs_vigo_stops/src/proto/stop_schedule_pb2.pyi
index aa42cdb..615999f 100644
--- a/src/gtfs_vigo_stops/src/proto/stop_schedule_pb2.pyi
+++ b/src/gtfs_vigo_stops/src/proto/stop_schedule_pb2.pyi
@@ -58,3 +58,11 @@ class StopArrivals(_message.Message):
location: Epsg25829
arrivals: _containers.RepeatedCompositeFieldContainer[StopArrivals.ScheduledArrival]
def __init__(self, stop_id: _Optional[str] = ..., location: _Optional[_Union[Epsg25829, _Mapping]] = ..., arrivals: _Optional[_Iterable[_Union[StopArrivals.ScheduledArrival, _Mapping]]] = ...) -> None: ...
+
+class Shape(_message.Message):
+ __slots__ = ()
+ SHAPE_ID_FIELD_NUMBER: _ClassVar[int]
+ POINTS_FIELD_NUMBER: _ClassVar[int]
+ shape_id: str
+ points: _containers.RepeatedCompositeFieldContainer[Epsg25829]
+ def __init__(self, shape_id: _Optional[str] = ..., points: _Optional[_Iterable[_Union[Epsg25829, _Mapping]]] = ...) -> None: ...
diff --git a/src/gtfs_vigo_stops/src/shapes.py b/src/gtfs_vigo_stops/src/shapes.py
new file mode 100644
index 0000000..f49832a
--- /dev/null
+++ b/src/gtfs_vigo_stops/src/shapes.py
@@ -0,0 +1,88 @@
+import csv
+from dataclasses import dataclass
+import os
+from typing import Dict, Optional
+
+from pyproj import Transformer
+
+from src.logger import get_logger
+
+
+logger = get_logger("shapes")
+
+
+@dataclass
+class Shape:
+ shape_id: str
+ shape_pt_lat: Optional[float]
+ shape_pt_lon: Optional[float]
+ shape_pt_position: Optional[int]
+ shape_dist_traveled: Optional[float]
+
+ shape_pt_25829_x: Optional[float] = None
+ shape_pt_25829_y: Optional[float] = None
+
+
+def process_shapes(feed_dir: str, out_dir: str) -> None:
+ file_path = os.path.join(feed_dir, "shapes.txt")
+ shapes: Dict[str, list[Shape]] = {}
+
+ transformer = Transformer.from_crs(4326, 25829, always_xy=True)
+
+ try:
+ with open(file_path, "r", encoding="utf-8", newline="") as f:
+ reader = csv.DictReader(f, quotechar='"', delimiter=",")
+ for row_num, row in enumerate(reader, start=2):
+ try:
+ shape = Shape(
+ shape_id=row["shape_id"],
+ shape_pt_lat=float(row["shape_pt_lat"]) if row.get("shape_pt_lat") else None,
+ shape_pt_lon=float(row["shape_pt_lon"]) if row.get("shape_pt_lon") else None,
+ shape_pt_position=int(row["shape_pt_position"]) if row.get("shape_pt_position") else None,
+ shape_dist_traveled=float(row["shape_dist_traveled"]) if row.get("shape_dist_traveled") else None,
+ )
+
+ if shape.shape_pt_lat is not None and shape.shape_pt_lon is not None:
+ shape_pt_25829_x, shape_pt_25829_y = transformer.transform(
+ shape.shape_pt_lon, shape.shape_pt_lat
+ )
+ shape.shape_pt_25829_x = shape_pt_25829_x
+ shape.shape_pt_25829_y = shape_pt_25829_y
+
+ if shape.shape_id not in shapes:
+ shapes[shape.shape_id] = []
+ shapes[shape.shape_id].append(shape)
+ except Exception as e:
+ logger.warning(
+ f"Error parsing stops.txt line {row_num}: {e} - line data: {row}"
+ )
+ except FileNotFoundError:
+ logger.error(f"File not found: {file_path}")
+ except Exception as e:
+ logger.error(f"Error reading stops.txt: {e}")
+
+
+ # Write shapes to Protobuf files
+ from src.proto.stop_schedule_pb2 import Epsg25829, Shape as PbShape
+
+ for shape_id, shape_points in shapes.items():
+ points = sorted(shape_points, key=lambda sp: sp.shape_pt_position if sp.shape_pt_position is not None else 0)
+
+ pb_shape = PbShape(
+ shape_id=shape_id,
+ points=[
+ Epsg25829(x=pt.shape_pt_25829_x, y=pt.shape_pt_25829_y)
+ for pt in points
+ if pt.shape_pt_25829_x is not None and pt.shape_pt_25829_y is not None
+ ],
+ )
+
+ shape_file_path = os.path.join(out_dir, "shapes", f"{shape_id}.pb")
+ os.makedirs(os.path.dirname(shape_file_path), exist_ok=True)
+
+ try:
+ with open(shape_file_path, "wb") as f:
+ f.write(pb_shape.SerializeToString())
+ logger.debug(f"Shape Protobuf written to: {shape_file_path}")
+ except Exception as e:
+ logger.error(f"Error writing shape Protobuf to {shape_file_path}: {e}")
diff --git a/src/gtfs_vigo_stops/stop_report.py b/src/gtfs_vigo_stops/stop_report.py
index ab6bac8..8a36e60 100644
--- a/src/gtfs_vigo_stops/stop_report.py
+++ b/src/gtfs_vigo_stops/stop_report.py
@@ -6,6 +6,7 @@ import time
import traceback
from typing import Any, Dict, List
+from src.shapes import process_shapes
from src.common import get_all_feed_dates
from src.download import download_feed_from_url
from src.logger import get_logger
@@ -339,6 +340,13 @@ def main():
_, stop_summary = process_date(feed_dir, date, output_dir)
all_stops_summary[date] = stop_summary
+ logger.info("Finished processing all dates. Beginning with shape transformation.")
+
+ # Process shapes, converting each coordinate to EPSG:25829 and saving as Protobuf
+ process_shapes(feed_dir, output_dir)
+
+ logger.info("Finished processing shapes.")
+
if feed_url:
if os.path.exists(feed_dir):
shutil.rmtree(feed_dir)