From 53a592c5e116cbf6844f6b16739d00ad8fd61447 Mon Sep 17 00:00:00 2001 From: Ariel Costas Guerrero Date: Thu, 18 Dec 2025 12:53:23 +0100 Subject: Backport date rolling for stop report --- src/gtfs_perstop_report/rolling_dates_example.json | 8 + src/gtfs_perstop_report/src/rolling_dates.py | 168 +++++++++++++++++++++ src/gtfs_perstop_report/stop_report.py | 34 ++++- 3 files changed, 202 insertions(+), 8 deletions(-) create mode 100644 src/gtfs_perstop_report/rolling_dates_example.json create mode 100644 src/gtfs_perstop_report/src/rolling_dates.py (limited to 'src') diff --git a/src/gtfs_perstop_report/rolling_dates_example.json b/src/gtfs_perstop_report/rolling_dates_example.json new file mode 100644 index 0000000..66525c3 --- /dev/null +++ b/src/gtfs_perstop_report/rolling_dates_example.json @@ -0,0 +1,8 @@ +{ + "2025-12-18": "2025-12-11", + "2025-12-19": "2025-12-12", + "2025-12-20": "2025-12-13", + "2025-12-21": "2025-12-14", + "2025-12-22": "2025-12-15", + "2025-12-23": "2025-12-16" +} diff --git a/src/gtfs_perstop_report/src/rolling_dates.py b/src/gtfs_perstop_report/src/rolling_dates.py new file mode 100644 index 0000000..3c6b166 --- /dev/null +++ b/src/gtfs_perstop_report/src/rolling_dates.py @@ -0,0 +1,168 @@ +""" + +Rolling dates module. + +Handles mapping of future dates not in a GTFS feed to equivalent dates that exist in the feed. +This allows extending feed coverage by reusing data from past dates. +""" +import json +import os +from typing import Optional, Dict, Tuple +from datetime import datetime +from src.logger import get_logger + +logger = get_logger("rolling_dates") + + +class RollingDateConfig: + """ + Manages rolling date mappings from a configuration file. + + The configuration file should be a JSON file with the following format: + { + "2025-09-30": "2025-09-24", + "2025-10-01": "2025-09-25" + } + + Where keys are the target dates (not in feed) and values are the source dates (in feed). + """ + + def __init__(self, config_path: Optional[str] = None): + """ + Initialize the rolling date configuration. + + Args: + config_path: Path to the JSON configuration file. If None, no mapping is active. + """ + self.mappings: Dict[str, str] = {} + self.config_path = config_path + + if config_path: + self._load_config(config_path) + + def _load_config(self, config_path: str): + """ + Load rolling date mappings from a JSON file. + + Args: + config_path: Path to the JSON configuration file. + + Raises: + FileNotFoundError: If the config file doesn't exist. + json.JSONDecodeError: If the config file is not valid JSON. + ValueError: If the config file has invalid date formats. + """ + if not os.path.exists(config_path): + raise FileNotFoundError(f"Rolling dates config file not found: {config_path}") + + try: + with open(config_path, 'r', encoding='utf-8') as f: + data = json.load(f) + + # Validate that data is a dictionary + if not isinstance(data, dict): + raise ValueError("Rolling dates config must be a JSON object (dictionary)") + + # Validate date formats + for target_date, source_date in data.items(): + self._validate_date_format(target_date, "target") + self._validate_date_format(source_date, "source") + + self.mappings = data + logger.info(f"Loaded {len(self.mappings)} rolling date mappings from {config_path}") + + except json.JSONDecodeError as e: + logger.error(f"Failed to parse rolling dates config: {e}") + raise + except Exception as e: + logger.error(f"Error loading rolling dates config: {e}") + raise + + def _validate_date_format(self, date_str: str, date_type: str): + """ + Validate that a date string is in YYYY-MM-DD format. + + Args: + date_str: The date string to validate. + date_type: Type of date (for error messages). + + Raises: + ValueError: If the date format is invalid. + """ + try: + datetime.strptime(date_str, '%Y-%m-%d') + except ValueError: + raise ValueError( + f"Invalid {date_type} date format '{date_str}'. " + f"Expected YYYY-MM-DD format." + ) + + def get_source_date(self, target_date: str) -> Optional[str]: + """ + Get the source date for a given target date. + + Args: + target_date: The date to look up (YYYY-MM-DD format). + + Returns: + The source date if a mapping exists, None otherwise. + """ + return self.mappings.get(target_date) + + def is_rolling_date(self, date: str) -> bool: + """ + Check if a date is a rolling date (has a mapping). + + Args: + date: The date to check (YYYY-MM-DD format). + + Returns: + True if the date has a rolling mapping, False otherwise. + """ + return date in self.mappings + + def get_mapping_info(self, target_date: str) -> Optional[Tuple[str, str]]: + """ + Get complete mapping information for a target date. + + Args: + target_date: The date to look up (YYYY-MM-DD format). + + Returns: + Tuple of (source_date, target_date) if mapping exists, None otherwise. + """ + source_date = self.get_source_date(target_date) + if source_date: + return (source_date, target_date) + return None + + def has_mappings(self) -> bool: + """ + Check if any rolling date mappings are configured. + + Returns: + True if at least one mapping exists, False otherwise. + """ + return len(self.mappings) > 0 + + def get_all_mappings(self) -> Dict[str, str]: + """ + Get all configured rolling date mappings. + + Returns: + Dictionary of target_date -> source_date mappings. + """ + return self.mappings.copy() + + +def create_rolling_date_config(config_path: Optional[str] = None) -> RollingDateConfig: + """ + Factory function to create a RollingDateConfig instance. + + Args: + config_path: Path to the JSON configuration file. If None, returns an empty config. + + Returns: + RollingDateConfig instance. + """ + return RollingDateConfig(config_path) diff --git a/src/gtfs_perstop_report/stop_report.py b/src/gtfs_perstop_report/stop_report.py index 3bbdf11..ef40417 100644 --- a/src/gtfs_perstop_report/stop_report.py +++ b/src/gtfs_perstop_report/stop_report.py @@ -13,6 +13,7 @@ from src.logger import get_logger from src.report_writer import write_stop_json, write_stop_protobuf from src.routes import load_routes from src.services import get_active_services +from src.rolling_dates import create_rolling_date_config from src.stop_times import get_stops_for_trips, StopTime from src.stops import get_all_stops, get_all_stops_by_code, get_numeric_code from src.street_name import normalise_stop_name @@ -49,6 +50,8 @@ def parse_args(): default="default", help="Feed provider type (vitrasa, renfe, default). Default: default", ) + parser.add_argument('--rolling-dates', type=str, + help="Path to rolling dates configuration file (JSON)") args = parser.parse_args() if args.feed_dir and args.feed_url: @@ -270,7 +273,7 @@ def build_trip_previous_shape_map( def get_stop_arrivals( - feed_dir: str, date: str, provider + feed_dir: str, date: str, provider, rolling_config=None ) -> Dict[str, List[Dict[str, Any]]]: """ Process trips for the given date and organize stop arrivals. @@ -280,6 +283,7 @@ def get_stop_arrivals( feed_dir: Path to the GTFS feed directory date: Date in YYYY-MM-DD format provider: Provider class with feed-specific formatting methods + rolling_config: Optional RollingDateConfig for date mapping Returns: Dictionary mapping stop_code to lists of arrival information. @@ -289,14 +293,19 @@ def get_stop_arrivals( stops = get_all_stops(feed_dir) logger.info(f"Found {len(stops)} stops in the feed.") - active_services = get_active_services(feed_dir, date) + effective_date = date + if rolling_config and rolling_config.is_rolling_date(date): + effective_date = rolling_config.get_source_date(date) + logger.info(f"Using source date {effective_date} for rolling date {date}") + + active_services = get_active_services(feed_dir, effective_date) if not active_services: - logger.info("No active services found for the given date.") + logger.info(f"No active services found for the given date {effective_date}.") - logger.info(f"Found {len(active_services)} active services for date {date}.") + logger.info(f"Found {len(active_services)} active services for date {effective_date}.") # Also get services from the previous day to include night services (times >= 24:00) - prev_date = (datetime.strptime(date, "%Y-%m-%d") - timedelta(days=1)).strftime( + prev_date = (datetime.strptime(effective_date, "%Y-%m-%d") - timedelta(days=1)).strftime( "%Y-%m-%d" ) prev_services = get_active_services(feed_dir, prev_date) @@ -527,7 +536,7 @@ def get_stop_arrivals( def process_date( - feed_dir: str, date: str, output_dir: str, provider + feed_dir: str, date: str, output_dir: str, provider, rolling_config=None ) -> tuple[str, Dict[str, int]]: """ Process a single date and write its stop JSON files. @@ -540,7 +549,7 @@ def process_date( stops_by_code = get_all_stops_by_code(feed_dir) # Get all stop arrivals for the current date - stop_arrivals = get_stop_arrivals(feed_dir, date, provider) + stop_arrivals = get_stop_arrivals(feed_dir, date, provider, rolling_config) if not stop_arrivals: logger.warning(f"No stop arrivals found for date {date}") @@ -622,6 +631,15 @@ def main(): return date_list = all_dates + # Handle rolling dates + rolling_config = create_rolling_date_config(args.rolling_dates) + if rolling_config.has_mappings(): + for target_date in rolling_config.get_all_mappings().keys(): + if target_date not in date_list: + date_list.append(target_date) + # Sort dates to ensure they are processed in order + date_list.sort() + # Ensure date_list is not empty before processing if not date_list: logger.error("No valid dates to process.") @@ -633,7 +651,7 @@ def main(): all_stops_summary = {} for date in date_list: - _, stop_summary = process_date(feed_dir, date, output_dir, provider) + _, stop_summary = process_date(feed_dir, date, output_dir, provider, rolling_config) all_stops_summary[date] = stop_summary logger.info("Finished processing all dates. Beginning with shape transformation.") -- cgit v1.3