#!/usr/bin/env bash set -euo pipefail # EveryTab Postgres Setup — for dedicated i3 database instance # Run on the i3.large EC2 instance (475GB local NVMe). # Configures Postgres to use the NVMe for data storage. # Optimized for write-heavy bulk loads — fsync disabled, large buffers. # Data is ephemeral — back up with pg_dump before terminating the instance. echo "=== EveryTab Postgres Setup (i3 NVMe) ===" # --- Format and mount the NVMe drive --- echo "--- Setting up NVMe storage ---" NVME_DEV="/dev/nvme1n1" NVME_MOUNT="/data" if [ ! -d "$NVME_MOUNT" ]; then # Find the NVMe instance store — look for unmounted nvme devices if [ ! -b "$NVME_DEV" ]; then NVME_DEV=$(lsblk -dpno NAME | grep nvme | head -1) if [ -z "$NVME_DEV" ]; then echo "ERROR: Could not find NVMe instance store device" echo "Available devices:" lsblk exit 1 fi fi echo "Using NVMe device: $NVME_DEV" sudo mkfs.xfs -f "$NVME_DEV" sudo mkdir -p "$NVME_MOUNT" sudo mount "$NVME_DEV" "$NVME_MOUNT" sudo chown ec2-user:ec2-user "$NVME_MOUNT" echo "Mounted $NVME_DEV at $NVME_MOUNT" else echo "NVMe already mounted at $NVME_MOUNT" fi # --- Install Postgres --- echo "--- Installing PostgreSQL 16 ---" if ! command -v pg_isready &>/dev/null; then sudo dnf install -y postgresql16-server tmux htop fi # --- Init database on NVMe --- echo "--- Initializing database on NVMe ---" PG_DATA="$NVME_MOUNT/pgdata" if [ ! -d "$PG_DATA" ]; then sudo mkdir -p "$PG_DATA" sudo chown postgres:postgres "$PG_DATA" sudo -u postgres /usr/bin/initdb -D "$PG_DATA" fi # --- Configure for pipeline workload --- echo "--- Configuring for bulk load performance ---" PRIVATE_IP=$(hostname -I | awk '{print $1}') sudo tee "$PG_DATA/postgresql.conf" > /dev/null < /dev/null <<'EOF' # Local connections local all all trust host all all 127.0.0.1/32 trust host all all ::1/128 trust # VPC connections (from compute instance) host all all 10.0.0.0/8 trust host all all 172.16.0.0/12 trust EOF # --- Start with custom data directory --- echo "--- Starting PostgreSQL ---" sudo tee /etc/systemd/system/postgresql-everytab.service > /dev/null </dev/null || true sudo -u postgres psql -p 5432 -c "CREATE DATABASE everytab OWNER everytab;" 2>/dev/null || true # --- Schema --- # Schema is applied from the compute instance over the network: # psql $DATABASE_URL -f pipeline/01_cc_index/schema.sql echo "--- Schema ---" echo "Apply schema from compute instance: psql \$DATABASE_URL -f pipeline/01_cc_index/schema.sql" # --- Validate --- echo "" echo "=== Validation ===" pg_isready -h localhost psql -U everytab -h localhost -d everytab -c "SELECT 'Postgres OK';" -t -A echo "NVMe disk usage:" df -h "$NVME_MOUNT" echo "" echo "=== Setup Complete ===" echo "" echo "Private IP: $PRIVATE_IP" echo "" echo "Connection string (from compute instance):" echo " export DATABASE_URL='postgres://everytab@${PRIVATE_IP}:5432/everytab'" echo "" echo "Connection string (local):" echo " export DATABASE_URL='postgres://everytab@localhost:5432/everytab'" echo "" echo "IMPORTANT: Ensure the compute instance's security group allows" echo "outbound traffic to this instance on port 5432, and this instance's" echo "security group allows inbound on 5432 from the compute instance."