From 1fd17d4d07d25a810816e4e38ddc31ae72b8c91a Mon Sep 17 00:00:00 2001 From: Ariel Costas Guerrero Date: Sun, 28 Dec 2025 00:17:11 +0100 Subject: Basic fare calculations for Galicia (Xunta) --- src/xunta_fares/main.py | 95 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 95 insertions(+) create mode 100644 src/xunta_fares/main.py (limited to 'src/xunta_fares/main.py') diff --git a/src/xunta_fares/main.py b/src/xunta_fares/main.py new file mode 100644 index 0000000..4e97085 --- /dev/null +++ b/src/xunta_fares/main.py @@ -0,0 +1,95 @@ +# /// script +# requires-python = ">=3.13" +# dependencies = [ +# "openpyxl", +# "pandas", +# "requests", +# ] +# /// + +from io import BytesIO +import logging +import pandas as pd +import requests + +def download_municipality_list() -> None: + resp = requests.get('https://www.ine.es/daco/daco42/codmun/diccionario25.xlsx') + if resp.status_code != 200: + raise Exception(f"Failed to download file: {resp.status_code}") + content_bytes = BytesIO(resp.content) + return pd.read_excel(content_bytes, skiprows=1) + + +def download_fare_data() -> None: + resp = requests.get('https://www.bus.gal/sites/w_tpgal/files/faq/2025/06/202507-12._calculadora_tarifas_ptpg_2025_descuentos_cas.xlsx') + if resp.status_code != 200: + raise Exception(f"Failed to download file: {resp.status_code}") + content_bytes = BytesIO(resp.content) + return pd.read_excel(content_bytes, sheet_name='PTPG_Tarifas_2025') + + +GALICIA_CCAA = '12' + +if __name__ == "__main__": + logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') + logging.info("Downloading municipality list...") + municipalities_df = download_municipality_list() + municipalities_df.columns = ['ccaa', 'prov', 'muni', 'dc', 'nombre'] + + municipalities_df = municipalities_df[municipalities_df['ccaa'].astype(str) == GALICIA_CCAA] + municipalities_df['code'] = municipalities_df['prov'].astype(str).str.zfill(2) + municipalities_df['muni'].astype(str).str.zfill(3) + municipalities_df = municipalities_df[['nombre', 'code']].reset_index(drop=True) + + fare_data_df = download_fare_data() + fare_data_df.columns = [ + 'conc_inicio', 'conc_fin', 'rel', 'bonificacion', + 'efectivo', + 'tmg_ignorar', 'tmgrecurr_ignorar', + 'tpg', 'tpgrecurr'] + + fare_data_df = fare_data_df[['conc_inicio', 'conc_fin', 'bonificacion', 'efectivo', 'tpg']] + + # Rename municipalities in fare file, where leading article should be at the end + # 'A Arnoia' should be 'Arnoia, A' and 'O Pino' should be 'Pino, O' + fare_data_df['conc_inicio'] = fare_data_df['conc_inicio'].apply(lambda x: f"{x[2:]}, {x[0:1]}" if x.startswith('A ') or x.startswith('O ') else x) + fare_data_df['conc_inicio'] = fare_data_df['conc_inicio'].apply(lambda x: f"{x[3:]}, {x[0:2]}" if x.startswith('As ') or x.startswith('Os ') else x) + fare_data_df['conc_fin'] = fare_data_df['conc_fin'].apply(lambda x: f"{x[2:]}, {x[0:1]}" if x.startswith('A ') or x.startswith('O ') else x) + fare_data_df['conc_fin'] = fare_data_df['conc_fin'].apply(lambda x: f"{x[3:]}, {x[0:2]}" if x.startswith('As ') or x.startswith('Os ') else x) + + # conc_inicio and conc_fin are municipality names, we need to map them to codes + fare_data_df = fare_data_df.merge( + municipalities_df, left_on='conc_inicio', right_on='nombre', how='left') + fare_data_df = fare_data_df.rename(columns={'code': 'conc_inicio_code'}) + fare_data_df = fare_data_df.drop(columns=['nombre']) + + fare_data_df = fare_data_df.merge( + municipalities_df, left_on='conc_fin', right_on='nombre', how='left') + fare_data_df = fare_data_df.rename(columns={'code': 'conc_fin_code'}) + fare_data_df = fare_data_df.drop(columns=['nombre']) + + # Check for missing municipality codes + missing_inicio = fare_data_df[fare_data_df['conc_inicio_code'].isna()]['conc_inicio'].unique() + missing_fin = fare_data_df[fare_data_df['conc_fin_code'].isna()]['conc_fin'].unique() + + if len(missing_inicio) > 0 or len(missing_fin) > 0: + all_missing = set(missing_inicio) | set(missing_fin) + logging.warning(f"Could not find codes for the following municipalities: {all_missing}") + + # Bonificación '-' -> null + fare_data_df['bonificacion'] = fare_data_df['bonificacion'].replace('-', pd.NA) + + # Drop conc_inicio and conc_fin columns + fare_data_df = fare_data_df.drop(columns=['conc_inicio', 'conc_fin']) + + # Move conc_inicio_code and conc_fin_code to the front and rename for clarity + fare_data_df = fare_data_df[['conc_inicio_code', 'conc_fin_code', 'bonificacion', 'efectivo', 'tpg']] + fare_data_df = fare_data_df.rename(columns={ + 'conc_inicio_code': 'conc_inicio', + 'conc_fin_code': 'conc_fin' + }) + + # Make decimals have two decimal places (since it's currency, and to avoid floating point issues) + fare_data_df['efectivo'] = fare_data_df['efectivo'].round(2) + fare_data_df['tpg'] = fare_data_df['tpg'].round(2) + + fare_data_df.to_csv('xunta_fares_galicia_2025.csv', index=False) -- cgit v1.3