1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
|
# /// script
# requires-python = ">=3.13"
# dependencies = [
# "openpyxl",
# "pandas",
# "requests",
# ]
# ///
from io import BytesIO
import logging
import pandas as pd
import requests
def download_municipality_list() -> None:
resp = requests.get('https://www.ine.es/daco/daco42/codmun/diccionario25.xlsx')
if resp.status_code != 200:
raise Exception(f"Failed to download file: {resp.status_code}")
content_bytes = BytesIO(resp.content)
return pd.read_excel(content_bytes, skiprows=1)
def download_fare_data() -> None:
resp = requests.get('https://www.bus.gal/sites/w_tpgal/files/faq/2025/06/202507-12._calculadora_tarifas_ptpg_2025_descuentos_cas.xlsx')
if resp.status_code != 200:
raise Exception(f"Failed to download file: {resp.status_code}")
content_bytes = BytesIO(resp.content)
return pd.read_excel(content_bytes, sheet_name='PTPG_Tarifas_2025')
GALICIA_CCAA = '12'
if __name__ == "__main__":
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logging.info("Downloading municipality list...")
municipalities_df = download_municipality_list()
municipalities_df.columns = ['ccaa', 'prov', 'muni', 'dc', 'nombre']
municipalities_df = municipalities_df[municipalities_df['ccaa'].astype(str) == GALICIA_CCAA]
municipalities_df['code'] = municipalities_df['prov'].astype(str).str.zfill(2) + municipalities_df['muni'].astype(str).str.zfill(3)
municipalities_df = municipalities_df[['nombre', 'code']].reset_index(drop=True)
fare_data_df = download_fare_data()
fare_data_df.columns = [
'conc_inicio', 'conc_fin', 'rel', 'bonificacion',
'efectivo',
'tmg_ignorar', 'tmgrecurr_ignorar',
'tpg', 'tpgrecurr']
fare_data_df = fare_data_df[['conc_inicio', 'conc_fin', 'bonificacion', 'efectivo', 'tpg']]
# Rename municipalities in fare file, where leading article should be at the end
# 'A Arnoia' should be 'Arnoia, A' and 'O Pino' should be 'Pino, O'
fare_data_df['conc_inicio'] = fare_data_df['conc_inicio'].apply(lambda x: f"{x[2:]}, {x[0:1]}" if x.startswith('A ') or x.startswith('O ') else x)
fare_data_df['conc_inicio'] = fare_data_df['conc_inicio'].apply(lambda x: f"{x[3:]}, {x[0:2]}" if x.startswith('As ') or x.startswith('Os ') else x)
fare_data_df['conc_fin'] = fare_data_df['conc_fin'].apply(lambda x: f"{x[2:]}, {x[0:1]}" if x.startswith('A ') or x.startswith('O ') else x)
fare_data_df['conc_fin'] = fare_data_df['conc_fin'].apply(lambda x: f"{x[3:]}, {x[0:2]}" if x.startswith('As ') or x.startswith('Os ') else x)
# conc_inicio and conc_fin are municipality names, we need to map them to codes
fare_data_df = fare_data_df.merge(
municipalities_df, left_on='conc_inicio', right_on='nombre', how='left')
fare_data_df = fare_data_df.rename(columns={'code': 'conc_inicio_code'})
fare_data_df = fare_data_df.drop(columns=['nombre'])
fare_data_df = fare_data_df.merge(
municipalities_df, left_on='conc_fin', right_on='nombre', how='left')
fare_data_df = fare_data_df.rename(columns={'code': 'conc_fin_code'})
fare_data_df = fare_data_df.drop(columns=['nombre'])
# Check for missing municipality codes
missing_inicio = fare_data_df[fare_data_df['conc_inicio_code'].isna()]['conc_inicio'].unique()
missing_fin = fare_data_df[fare_data_df['conc_fin_code'].isna()]['conc_fin'].unique()
if len(missing_inicio) > 0 or len(missing_fin) > 0:
all_missing = set(missing_inicio) | set(missing_fin)
logging.warning(f"Could not find codes for the following municipalities: {all_missing}")
# Bonificación '-' -> null
fare_data_df['bonificacion'] = fare_data_df['bonificacion'].replace('-', pd.NA)
# Drop conc_inicio and conc_fin columns
fare_data_df = fare_data_df.drop(columns=['conc_inicio', 'conc_fin'])
# Move conc_inicio_code and conc_fin_code to the front and rename for clarity
fare_data_df = fare_data_df[['conc_inicio_code', 'conc_fin_code', 'bonificacion', 'efectivo', 'tpg']]
fare_data_df = fare_data_df.rename(columns={
'conc_inicio_code': 'conc_inicio',
'conc_fin_code': 'conc_fin'
})
# Make decimals have two decimal places (since it's currency, and to avoid floating point issues)
fare_data_df['efectivo'] = fare_data_df['efectivo'].round(2)
fare_data_df['tpg'] = fare_data_df['tpg'].round(2)
fare_data_df.to_csv('xunta_fares_galicia_2025.csv', index=False)
|