152 lines
4.7 KiB
Bash
Executable file
152 lines
4.7 KiB
Bash
Executable file
#!/usr/bin/env bash
|
|
set -euo pipefail
|
|
|
|
# EveryTab Postgres Setup — for dedicated i3 database instance
|
|
# Run on the i3.large EC2 instance (475GB local NVMe).
|
|
# Configures Postgres to use the NVMe for data storage.
|
|
# Optimized for write-heavy bulk loads — fsync disabled, large buffers.
|
|
# Data is ephemeral — back up with pg_dump before terminating the instance.
|
|
|
|
echo "=== EveryTab Postgres Setup (i3 NVMe) ==="
|
|
|
|
# --- Format and mount the NVMe drive ---
|
|
echo "--- Setting up NVMe storage ---"
|
|
NVME_DEV="/dev/nvme1n1"
|
|
NVME_MOUNT="/data"
|
|
|
|
if [ ! -d "$NVME_MOUNT" ]; then
|
|
# Find the NVMe instance store (not the root EBS)
|
|
# i3.large has one 475GB NVMe at /dev/nvme1n1 or similar
|
|
if [ ! -b "$NVME_DEV" ]; then
|
|
# Try finding it
|
|
NVME_DEV=$(lsblk -dpno NAME,SIZE | grep -v "$(lsblk -dpno NAME /)" | head -1 | awk '{print $1}')
|
|
if [ -z "$NVME_DEV" ]; then
|
|
echo "ERROR: Could not find NVMe instance store device"
|
|
echo "Run 'lsblk' and set NVME_DEV manually"
|
|
exit 1
|
|
fi
|
|
fi
|
|
echo "Using NVMe device: $NVME_DEV"
|
|
sudo mkfs.xfs -f "$NVME_DEV"
|
|
sudo mkdir -p "$NVME_MOUNT"
|
|
sudo mount "$NVME_DEV" "$NVME_MOUNT"
|
|
sudo chown ec2-user:ec2-user "$NVME_MOUNT"
|
|
echo "Mounted $NVME_DEV at $NVME_MOUNT"
|
|
else
|
|
echo "NVMe already mounted at $NVME_MOUNT"
|
|
fi
|
|
|
|
# --- Install Postgres ---
|
|
echo "--- Installing PostgreSQL 16 ---"
|
|
if ! command -v pg_isready &>/dev/null; then
|
|
sudo dnf install -y postgresql16-server
|
|
fi
|
|
|
|
# --- Init database on NVMe ---
|
|
echo "--- Initializing database on NVMe ---"
|
|
PG_DATA="$NVME_MOUNT/pgdata"
|
|
if [ ! -d "$PG_DATA" ]; then
|
|
sudo mkdir -p "$PG_DATA"
|
|
sudo chown postgres:postgres "$PG_DATA"
|
|
sudo -u postgres /usr/bin/initdb -D "$PG_DATA"
|
|
fi
|
|
|
|
# --- Configure for pipeline workload ---
|
|
echo "--- Configuring for bulk load performance ---"
|
|
PRIVATE_IP=$(hostname -I | awk '{print $1}')
|
|
|
|
sudo tee "$PG_DATA/postgresql.conf" > /dev/null <<EOF
|
|
# Connection — listen on private IP for compute instance
|
|
listen_addresses = 'localhost,$PRIVATE_IP'
|
|
port = 5432
|
|
max_connections = 100
|
|
|
|
# Memory — i3.large has 15.25GB RAM, Postgres gets most of it
|
|
shared_buffers = 8GB
|
|
work_mem = 512MB
|
|
maintenance_work_mem = 2GB
|
|
effective_cache_size = 12GB
|
|
|
|
# Write performance — data is reproducible, maximize speed over durability
|
|
fsync = off
|
|
synchronous_commit = off
|
|
full_page_writes = off
|
|
|
|
# WAL — minimal logging since no replication needed
|
|
wal_level = minimal
|
|
max_wal_senders = 0
|
|
max_wal_size = 8GB
|
|
checkpoint_timeout = 30min
|
|
checkpoint_completion_target = 0.9
|
|
|
|
# Autovacuum
|
|
autovacuum = on
|
|
autovacuum_naptime = 60s
|
|
EOF
|
|
|
|
# Allow connections from VPC (10.0.0.0/8 and 172.16.0.0/12 cover most VPC CIDRs)
|
|
sudo tee "$PG_DATA/pg_hba.conf" > /dev/null <<'EOF'
|
|
# Local connections
|
|
local all all trust
|
|
host all all 127.0.0.1/32 trust
|
|
host all all ::1/128 trust
|
|
# VPC connections (from compute instance)
|
|
host all all 10.0.0.0/8 trust
|
|
host all all 172.16.0.0/12 trust
|
|
EOF
|
|
|
|
# --- Start with custom data directory ---
|
|
echo "--- Starting PostgreSQL ---"
|
|
sudo tee /etc/systemd/system/postgresql-everytab.service > /dev/null <<EOF
|
|
[Unit]
|
|
Description=PostgreSQL for EveryTab (NVMe)
|
|
After=network.target
|
|
|
|
[Service]
|
|
Type=forking
|
|
User=postgres
|
|
ExecStart=/usr/bin/pg_ctl start -D $PG_DATA -l $PG_DATA/pg.log
|
|
ExecStop=/usr/bin/pg_ctl stop -D $PG_DATA
|
|
ExecReload=/usr/bin/pg_ctl reload -D $PG_DATA
|
|
|
|
[Install]
|
|
WantedBy=multi-user.target
|
|
EOF
|
|
|
|
sudo systemctl daemon-reload
|
|
sudo systemctl enable postgresql-everytab
|
|
sudo systemctl restart postgresql-everytab
|
|
|
|
# --- Create database ---
|
|
echo "--- Creating everytab database ---"
|
|
sudo -u postgres psql -p 5432 -c "CREATE USER everytab;" 2>/dev/null || true
|
|
sudo -u postgres psql -p 5432 -c "CREATE DATABASE everytab OWNER everytab;" 2>/dev/null || true
|
|
|
|
# --- Schema ---
|
|
# Schema is applied from the compute instance over the network:
|
|
# psql $DATABASE_URL -f pipeline/01_cc_index/schema.sql
|
|
echo "--- Schema ---"
|
|
echo "Apply schema from compute instance: psql \$DATABASE_URL -f pipeline/01_cc_index/schema.sql"
|
|
|
|
# --- Validate ---
|
|
echo ""
|
|
echo "=== Validation ==="
|
|
pg_isready -h localhost
|
|
psql -U everytab -h localhost -d everytab -c "SELECT 'Postgres OK';" -t -A
|
|
echo "NVMe disk usage:"
|
|
df -h "$NVME_MOUNT"
|
|
|
|
echo ""
|
|
echo "=== Setup Complete ==="
|
|
echo ""
|
|
echo "Private IP: $PRIVATE_IP"
|
|
echo ""
|
|
echo "Connection string (from compute instance):"
|
|
echo " export DATABASE_URL='postgres://everytab@${PRIVATE_IP}:5432/everytab'"
|
|
echo ""
|
|
echo "Connection string (local):"
|
|
echo " export DATABASE_URL='postgres://everytab@localhost:5432/everytab'"
|
|
echo ""
|
|
echo "IMPORTANT: Ensure the compute instance's security group allows"
|
|
echo "outbound traffic to this instance on port 5432, and this instance's"
|
|
echo "security group allows inbound on 5432 from the compute instance."
|