aboutsummaryrefslogtreecommitdiff
path: root/src/delay_collector/schema.sql
diff options
context:
space:
mode:
Diffstat (limited to 'src/delay_collector/schema.sql')
-rw-r--r--src/delay_collector/schema.sql41
1 files changed, 0 insertions, 41 deletions
diff --git a/src/delay_collector/schema.sql b/src/delay_collector/schema.sql
deleted file mode 100644
index c16ecd3..0000000
--- a/src/delay_collector/schema.sql
+++ /dev/null
@@ -1,41 +0,0 @@
--- Database schema for bus delay observations (PostgreSQL)
--- This stores real-time vs scheduled arrival data for analyzing delays
--- Partitioned by month for optimal performance with large datasets
-
-CREATE TABLE IF NOT EXISTS delay_observations (
- id BIGSERIAL NOT NULL,
- observed_at TIMESTAMPTZ NOT NULL,
- stop_code INTEGER NOT NULL,
- line VARCHAR(10) NOT NULL,
- route VARCHAR(100) NOT NULL,
- service_id VARCHAR(50) NOT NULL,
- trip_id VARCHAR(50) NOT NULL,
- running BOOLEAN NOT NULL,
- scheduled_minutes SMALLINT NOT NULL,
- real_time_minutes SMALLINT NOT NULL,
- delay_minutes SMALLINT GENERATED ALWAYS AS (real_time_minutes - scheduled_minutes) STORED,
- PRIMARY KEY (id, observed_at)
-) PARTITION BY RANGE (observed_at);
-
--- Create initial partitions (monthly)
--- You should create new partitions before each month starts
-CREATE TABLE IF NOT EXISTS delay_observations_2025_11 PARTITION OF delay_observations
- FOR VALUES FROM ('2025-11-01') TO ('2025-12-01');
-
-CREATE TABLE IF NOT EXISTS delay_observations_2025_12 PARTITION OF delay_observations
- FOR VALUES FROM ('2025-12-01') TO ('2026-01-01');
-
-CREATE TABLE IF NOT EXISTS delay_observations_2026_01 PARTITION OF delay_observations
- FOR VALUES FROM ('2026-01-01') TO ('2026-02-01');
-
--- Indexes for efficient querying (created on parent table, applied to all partitions)
-CREATE INDEX IF NOT EXISTS idx_observed_at ON delay_observations(observed_at DESC);
-CREATE INDEX IF NOT EXISTS idx_stop_code ON delay_observations(stop_code);
-CREATE INDEX IF NOT EXISTS idx_line ON delay_observations(line);
-CREATE INDEX IF NOT EXISTS idx_trip_id ON delay_observations(trip_id, observed_at DESC);
-CREATE INDEX IF NOT EXISTS idx_stop_line ON delay_observations(stop_code, line);
-CREATE INDEX IF NOT EXISTS idx_running ON delay_observations(running) WHERE running = true;
-
--- Partitioning by date for better performance (optional, uncomment if needed)
--- This requires converting to a partitioned table
--- ALTER TABLE delay_observations PARTITION BY RANGE (observation_date);