everytab/infra/db-setup.sh

157 lines
4.9 KiB
Bash
Executable file

#!/usr/bin/env bash
set -euo pipefail
# EveryTab Postgres Setup — for dedicated i3 database instance
# Run on the i3.large EC2 instance (475GB local NVMe).
# Configures Postgres to use the NVMe for data storage.
# Optimized for write-heavy bulk loads — fsync disabled, large buffers.
# Data is ephemeral — back up with pg_dump before terminating the instance.
echo "=== EveryTab Postgres Setup (i3 NVMe) ==="
# --- Format and mount the NVMe drive ---
echo "--- Setting up NVMe storage ---"
NVME_DEV="/dev/nvme1n1"
NVME_MOUNT="/data"
if [ ! -d "$NVME_MOUNT" ]; then
# Find the NVMe instance store (not the root EBS)
# i3.large has one 475GB NVMe at /dev/nvme1n1 or similar
if [ ! -b "$NVME_DEV" ]; then
# Try finding it
NVME_DEV=$(lsblk -dpno NAME,SIZE | grep -v "$(lsblk -dpno NAME /)" | head -1 | awk '{print $1}')
if [ -z "$NVME_DEV" ]; then
echo "ERROR: Could not find NVMe instance store device"
echo "Run 'lsblk' and set NVME_DEV manually"
exit 1
fi
fi
echo "Using NVMe device: $NVME_DEV"
sudo mkfs.xfs -f "$NVME_DEV"
sudo mkdir -p "$NVME_MOUNT"
sudo mount "$NVME_DEV" "$NVME_MOUNT"
sudo chown ec2-user:ec2-user "$NVME_MOUNT"
echo "Mounted $NVME_DEV at $NVME_MOUNT"
else
echo "NVMe already mounted at $NVME_MOUNT"
fi
# --- Install Postgres ---
echo "--- Installing PostgreSQL 16 ---"
if ! command -v pg_isready &>/dev/null; then
sudo dnf install -y postgresql16-server
fi
# --- Init database on NVMe ---
echo "--- Initializing database on NVMe ---"
PG_DATA="$NVME_MOUNT/pgdata"
if [ ! -d "$PG_DATA" ]; then
sudo mkdir -p "$PG_DATA"
sudo chown postgres:postgres "$PG_DATA"
sudo -u postgres /usr/bin/initdb -D "$PG_DATA"
fi
# --- Configure for pipeline workload ---
echo "--- Configuring for bulk load performance ---"
PRIVATE_IP=$(hostname -I | awk '{print $1}')
sudo tee "$PG_DATA/postgresql.conf" > /dev/null <<EOF
# Connection — listen on private IP for compute instance
listen_addresses = 'localhost,$PRIVATE_IP'
port = 5432
max_connections = 100
# Memory — i3.large has 15.25GB RAM, Postgres gets most of it
shared_buffers = 8GB
work_mem = 512MB
maintenance_work_mem = 2GB
effective_cache_size = 12GB
# Write performance — data is reproducible, maximize speed over durability
fsync = off
synchronous_commit = off
full_page_writes = off
# WAL — minimal logging since no replication needed
wal_level = minimal
max_wal_senders = 0
max_wal_size = 8GB
checkpoint_timeout = 30min
checkpoint_completion_target = 0.9
# Autovacuum
autovacuum = on
autovacuum_naptime = 60s
EOF
# Allow connections from VPC (10.0.0.0/8 and 172.16.0.0/12 cover most VPC CIDRs)
sudo tee "$PG_DATA/pg_hba.conf" > /dev/null <<'EOF'
# Local connections
local all all trust
host all all 127.0.0.1/32 trust
host all all ::1/128 trust
# VPC connections (from compute instance)
host all all 10.0.0.0/8 trust
host all all 172.16.0.0/12 trust
EOF
# --- Start with custom data directory ---
echo "--- Starting PostgreSQL ---"
sudo tee /etc/systemd/system/postgresql-everytab.service > /dev/null <<EOF
[Unit]
Description=PostgreSQL for EveryTab (NVMe)
After=network.target
[Service]
Type=forking
User=postgres
ExecStart=/usr/bin/pg_ctl start -D $PG_DATA -l $PG_DATA/pg.log
ExecStop=/usr/bin/pg_ctl stop -D $PG_DATA
ExecReload=/usr/bin/pg_ctl reload -D $PG_DATA
[Install]
WantedBy=multi-user.target
EOF
sudo systemctl daemon-reload
sudo systemctl enable postgresql-everytab
sudo systemctl restart postgresql-everytab
# --- Create database ---
echo "--- Creating everytab database ---"
sudo -u postgres psql -p 5432 -c "CREATE USER everytab;" 2>/dev/null || true
sudo -u postgres psql -p 5432 -c "CREATE DATABASE everytab OWNER everytab;" 2>/dev/null || true
# --- Apply schema ---
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
SCHEMA="$SCRIPT_DIR/../pipeline/01_cc_index/schema.sql"
if [ -f "$SCHEMA" ]; then
echo "--- Applying schema ---"
psql -U everytab -h localhost -d everytab -f "$SCHEMA"
else
echo "Warning: schema.sql not found at $SCHEMA"
echo "Copy it over and run: psql -U everytab -h localhost -d everytab -f schema.sql"
fi
# --- Validate ---
echo ""
echo "=== Validation ==="
pg_isready -h localhost
psql -U everytab -h localhost -d everytab -c "SELECT 'Postgres OK';" -t -A
echo "NVMe disk usage:"
df -h "$NVME_MOUNT"
echo ""
echo "=== Setup Complete ==="
echo ""
echo "Private IP: $PRIVATE_IP"
echo ""
echo "Connection string (from compute instance):"
echo " export DATABASE_URL='postgres://everytab@${PRIVATE_IP}:5432/everytab'"
echo ""
echo "Connection string (local):"
echo " export DATABASE_URL='postgres://everytab@localhost:5432/everytab'"
echo ""
echo "IMPORTANT: Ensure the compute instance's security group allows"
echo "outbound traffic to this instance on port 5432, and this instance's"
echo "security group allows inbound on 5432 from the compute instance."