switched from rds to i5 ec2 for nvme disk read/write speeds
This commit is contained in:
parent
c93d1736fe
commit
bf8b932cdc
4 changed files with 233 additions and 48 deletions
157
infra/db-setup.sh
Executable file
157
infra/db-setup.sh
Executable file
|
|
@ -0,0 +1,157 @@
|
|||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
# EveryTab Postgres Setup — for dedicated i3 database instance
|
||||
# Run on the i3.large EC2 instance (475GB local NVMe).
|
||||
# Configures Postgres to use the NVMe for data storage.
|
||||
# Optimized for write-heavy bulk loads — fsync disabled, large buffers.
|
||||
# Data is ephemeral — back up with pg_dump before terminating the instance.
|
||||
|
||||
echo "=== EveryTab Postgres Setup (i3 NVMe) ==="
|
||||
|
||||
# --- Format and mount the NVMe drive ---
|
||||
echo "--- Setting up NVMe storage ---"
|
||||
NVME_DEV="/dev/nvme1n1"
|
||||
NVME_MOUNT="/data"
|
||||
|
||||
if [ ! -d "$NVME_MOUNT" ]; then
|
||||
# Find the NVMe instance store (not the root EBS)
|
||||
# i3.large has one 475GB NVMe at /dev/nvme1n1 or similar
|
||||
if [ ! -b "$NVME_DEV" ]; then
|
||||
# Try finding it
|
||||
NVME_DEV=$(lsblk -dpno NAME,SIZE | grep -v "$(lsblk -dpno NAME /)" | head -1 | awk '{print $1}')
|
||||
if [ -z "$NVME_DEV" ]; then
|
||||
echo "ERROR: Could not find NVMe instance store device"
|
||||
echo "Run 'lsblk' and set NVME_DEV manually"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
echo "Using NVMe device: $NVME_DEV"
|
||||
sudo mkfs.xfs -f "$NVME_DEV"
|
||||
sudo mkdir -p "$NVME_MOUNT"
|
||||
sudo mount "$NVME_DEV" "$NVME_MOUNT"
|
||||
sudo chown ec2-user:ec2-user "$NVME_MOUNT"
|
||||
echo "Mounted $NVME_DEV at $NVME_MOUNT"
|
||||
else
|
||||
echo "NVMe already mounted at $NVME_MOUNT"
|
||||
fi
|
||||
|
||||
# --- Install Postgres ---
|
||||
echo "--- Installing PostgreSQL 16 ---"
|
||||
if ! command -v pg_isready &>/dev/null; then
|
||||
sudo dnf install -y postgresql16-server
|
||||
fi
|
||||
|
||||
# --- Init database on NVMe ---
|
||||
echo "--- Initializing database on NVMe ---"
|
||||
PG_DATA="$NVME_MOUNT/pgdata"
|
||||
if [ ! -d "$PG_DATA" ]; then
|
||||
sudo mkdir -p "$PG_DATA"
|
||||
sudo chown postgres:postgres "$PG_DATA"
|
||||
sudo -u postgres /usr/bin/initdb -D "$PG_DATA"
|
||||
fi
|
||||
|
||||
# --- Configure for pipeline workload ---
|
||||
echo "--- Configuring for bulk load performance ---"
|
||||
PRIVATE_IP=$(hostname -I | awk '{print $1}')
|
||||
|
||||
sudo tee "$PG_DATA/postgresql.conf" > /dev/null <<EOF
|
||||
# Connection — listen on private IP for compute instance
|
||||
listen_addresses = 'localhost,$PRIVATE_IP'
|
||||
port = 5432
|
||||
max_connections = 100
|
||||
|
||||
# Memory — i3.large has 15.25GB RAM, Postgres gets most of it
|
||||
shared_buffers = 8GB
|
||||
work_mem = 512MB
|
||||
maintenance_work_mem = 2GB
|
||||
effective_cache_size = 12GB
|
||||
|
||||
# Write performance — data is reproducible, maximize speed over durability
|
||||
fsync = off
|
||||
synchronous_commit = off
|
||||
full_page_writes = off
|
||||
|
||||
# WAL — minimal logging since no replication needed
|
||||
wal_level = minimal
|
||||
max_wal_senders = 0
|
||||
max_wal_size = 8GB
|
||||
checkpoint_timeout = 30min
|
||||
checkpoint_completion_target = 0.9
|
||||
|
||||
# Autovacuum
|
||||
autovacuum = on
|
||||
autovacuum_naptime = 60s
|
||||
EOF
|
||||
|
||||
# Allow connections from VPC (10.0.0.0/8 and 172.16.0.0/12 cover most VPC CIDRs)
|
||||
sudo tee "$PG_DATA/pg_hba.conf" > /dev/null <<'EOF'
|
||||
# Local connections
|
||||
local all all trust
|
||||
host all all 127.0.0.1/32 trust
|
||||
host all all ::1/128 trust
|
||||
# VPC connections (from compute instance)
|
||||
host all all 10.0.0.0/8 trust
|
||||
host all all 172.16.0.0/12 trust
|
||||
EOF
|
||||
|
||||
# --- Start with custom data directory ---
|
||||
echo "--- Starting PostgreSQL ---"
|
||||
sudo tee /etc/systemd/system/postgresql-everytab.service > /dev/null <<EOF
|
||||
[Unit]
|
||||
Description=PostgreSQL for EveryTab (NVMe)
|
||||
After=network.target
|
||||
|
||||
[Service]
|
||||
Type=forking
|
||||
User=postgres
|
||||
ExecStart=/usr/bin/pg_ctl start -D $PG_DATA -l $PG_DATA/pg.log
|
||||
ExecStop=/usr/bin/pg_ctl stop -D $PG_DATA
|
||||
ExecReload=/usr/bin/pg_ctl reload -D $PG_DATA
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
EOF
|
||||
|
||||
sudo systemctl daemon-reload
|
||||
sudo systemctl enable postgresql-everytab
|
||||
sudo systemctl restart postgresql-everytab
|
||||
|
||||
# --- Create database ---
|
||||
echo "--- Creating everytab database ---"
|
||||
sudo -u postgres psql -p 5432 -c "CREATE USER everytab;" 2>/dev/null || true
|
||||
sudo -u postgres psql -p 5432 -c "CREATE DATABASE everytab OWNER everytab;" 2>/dev/null || true
|
||||
|
||||
# --- Apply schema ---
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
SCHEMA="$SCRIPT_DIR/../pipeline/01_cc_index/schema.sql"
|
||||
if [ -f "$SCHEMA" ]; then
|
||||
echo "--- Applying schema ---"
|
||||
psql -U everytab -h localhost -d everytab -f "$SCHEMA"
|
||||
else
|
||||
echo "Warning: schema.sql not found at $SCHEMA"
|
||||
echo "Copy it over and run: psql -U everytab -h localhost -d everytab -f schema.sql"
|
||||
fi
|
||||
|
||||
# --- Validate ---
|
||||
echo ""
|
||||
echo "=== Validation ==="
|
||||
pg_isready -h localhost
|
||||
psql -U everytab -h localhost -d everytab -c "SELECT 'Postgres OK';" -t -A
|
||||
echo "NVMe disk usage:"
|
||||
df -h "$NVME_MOUNT"
|
||||
|
||||
echo ""
|
||||
echo "=== Setup Complete ==="
|
||||
echo ""
|
||||
echo "Private IP: $PRIVATE_IP"
|
||||
echo ""
|
||||
echo "Connection string (from compute instance):"
|
||||
echo " export DATABASE_URL='postgres://everytab@${PRIVATE_IP}:5432/everytab'"
|
||||
echo ""
|
||||
echo "Connection string (local):"
|
||||
echo " export DATABASE_URL='postgres://everytab@localhost:5432/everytab'"
|
||||
echo ""
|
||||
echo "IMPORTANT: Ensure the compute instance's security group allows"
|
||||
echo "outbound traffic to this instance on port 5432, and this instance's"
|
||||
echo "security group allows inbound on 5432 from the compute instance."
|
||||
Loading…
Add table
Add a link
Reference in a new issue