From ef86a09ee8d8fc287b382cf9092af8726b44ceae Mon Sep 17 00:00:00 2001 From: Ariel Costas Guerrero Date: Mon, 17 Nov 2025 16:39:48 +0100 Subject: Drop support for Santiago de Compostela, add collection script --- src/delay_collector/schema.sql | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 src/delay_collector/schema.sql (limited to 'src/delay_collector/schema.sql') diff --git a/src/delay_collector/schema.sql b/src/delay_collector/schema.sql new file mode 100644 index 0000000..c16ecd3 --- /dev/null +++ b/src/delay_collector/schema.sql @@ -0,0 +1,41 @@ +-- Database schema for bus delay observations (PostgreSQL) +-- This stores real-time vs scheduled arrival data for analyzing delays +-- Partitioned by month for optimal performance with large datasets + +CREATE TABLE IF NOT EXISTS delay_observations ( + id BIGSERIAL NOT NULL, + observed_at TIMESTAMPTZ NOT NULL, + stop_code INTEGER NOT NULL, + line VARCHAR(10) NOT NULL, + route VARCHAR(100) NOT NULL, + service_id VARCHAR(50) NOT NULL, + trip_id VARCHAR(50) NOT NULL, + running BOOLEAN NOT NULL, + scheduled_minutes SMALLINT NOT NULL, + real_time_minutes SMALLINT NOT NULL, + delay_minutes SMALLINT GENERATED ALWAYS AS (real_time_minutes - scheduled_minutes) STORED, + PRIMARY KEY (id, observed_at) +) PARTITION BY RANGE (observed_at); + +-- Create initial partitions (monthly) +-- You should create new partitions before each month starts +CREATE TABLE IF NOT EXISTS delay_observations_2025_11 PARTITION OF delay_observations + FOR VALUES FROM ('2025-11-01') TO ('2025-12-01'); + +CREATE TABLE IF NOT EXISTS delay_observations_2025_12 PARTITION OF delay_observations + FOR VALUES FROM ('2025-12-01') TO ('2026-01-01'); + +CREATE TABLE IF NOT EXISTS delay_observations_2026_01 PARTITION OF delay_observations + FOR VALUES FROM ('2026-01-01') TO ('2026-02-01'); + +-- Indexes for efficient querying (created on parent table, applied to all partitions) +CREATE INDEX IF NOT EXISTS idx_observed_at ON delay_observations(observed_at DESC); +CREATE INDEX IF NOT EXISTS idx_stop_code ON delay_observations(stop_code); +CREATE INDEX IF NOT EXISTS idx_line ON delay_observations(line); +CREATE INDEX IF NOT EXISTS idx_trip_id ON delay_observations(trip_id, observed_at DESC); +CREATE INDEX IF NOT EXISTS idx_stop_line ON delay_observations(stop_code, line); +CREATE INDEX IF NOT EXISTS idx_running ON delay_observations(running) WHERE running = true; + +-- Partitioning by date for better performance (optional, uncomment if needed) +-- This requires converting to a partitioned table +-- ALTER TABLE delay_observations PARTITION BY RANGE (observation_date); -- cgit v1.3