#!/usr/bin/env bash set -euo pipefail # EveryTab Postgres Setup — for dedicated i3 database instance # Run on the i3.large EC2 instance (475GB local NVMe). # Configures Postgres to use the NVMe for data storage. # Optimized for write-heavy bulk loads — fsync disabled, large buffers. # Data is ephemeral — back up with pg_dump before terminating the instance. echo "=== EveryTab Postgres Setup (i3 NVMe) ===" # --- Format and mount the NVMe drive --- echo "--- Setting up NVMe storage ---" NVME_DEV="/dev/nvme1n1" NVME_MOUNT="/data" if [ ! -d "$NVME_MOUNT" ]; then # Find the NVMe instance store (not the root EBS) # i3.large has one 475GB NVMe at /dev/nvme1n1 or similar if [ ! -b "$NVME_DEV" ]; then # Try finding it NVME_DEV=$(lsblk -dpno NAME,SIZE | grep -v "$(lsblk -dpno NAME /)" | head -1 | awk '{print $1}') if [ -z "$NVME_DEV" ]; then echo "ERROR: Could not find NVMe instance store device" echo "Run 'lsblk' and set NVME_DEV manually" exit 1 fi fi echo "Using NVMe device: $NVME_DEV" sudo mkfs.xfs -f "$NVME_DEV" sudo mkdir -p "$NVME_MOUNT" sudo mount "$NVME_DEV" "$NVME_MOUNT" sudo chown ec2-user:ec2-user "$NVME_MOUNT" echo "Mounted $NVME_DEV at $NVME_MOUNT" else echo "NVMe already mounted at $NVME_MOUNT" fi # --- Install Postgres --- echo "--- Installing PostgreSQL 16 ---" if ! command -v pg_isready &>/dev/null; then sudo dnf install -y postgresql16-server fi # --- Init database on NVMe --- echo "--- Initializing database on NVMe ---" PG_DATA="$NVME_MOUNT/pgdata" if [ ! -d "$PG_DATA" ]; then sudo mkdir -p "$PG_DATA" sudo chown postgres:postgres "$PG_DATA" sudo -u postgres /usr/bin/initdb -D "$PG_DATA" fi # --- Configure for pipeline workload --- echo "--- Configuring for bulk load performance ---" PRIVATE_IP=$(hostname -I | awk '{print $1}') sudo tee "$PG_DATA/postgresql.conf" > /dev/null < /dev/null <<'EOF' # Local connections local all all trust host all all 127.0.0.1/32 trust host all all ::1/128 trust # VPC connections (from compute instance) host all all 10.0.0.0/8 trust host all all 172.16.0.0/12 trust EOF # --- Start with custom data directory --- echo "--- Starting PostgreSQL ---" sudo tee /etc/systemd/system/postgresql-everytab.service > /dev/null </dev/null || true sudo -u postgres psql -p 5432 -c "CREATE DATABASE everytab OWNER everytab;" 2>/dev/null || true # --- Apply schema --- SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" SCHEMA="$SCRIPT_DIR/../pipeline/01_cc_index/schema.sql" if [ -f "$SCHEMA" ]; then echo "--- Applying schema ---" psql -U everytab -h localhost -d everytab -f "$SCHEMA" else echo "Warning: schema.sql not found at $SCHEMA" echo "Copy it over and run: psql -U everytab -h localhost -d everytab -f schema.sql" fi # --- Validate --- echo "" echo "=== Validation ===" pg_isready -h localhost psql -U everytab -h localhost -d everytab -c "SELECT 'Postgres OK';" -t -A echo "NVMe disk usage:" df -h "$NVME_MOUNT" echo "" echo "=== Setup Complete ===" echo "" echo "Private IP: $PRIVATE_IP" echo "" echo "Connection string (from compute instance):" echo " export DATABASE_URL='postgres://everytab@${PRIVATE_IP}:5432/everytab'" echo "" echo "Connection string (local):" echo " export DATABASE_URL='postgres://everytab@localhost:5432/everytab'" echo "" echo "IMPORTANT: Ensure the compute instance's security group allows" echo "outbound traffic to this instance on port 5432, and this instance's" echo "security group allows inbound on 5432 from the compute instance."