switched from rds to i5 ec2 for nvme disk read/write speeds

This commit is contained in:
Joe Lothan 2026-05-25 18:17:07 -04:00
parent c93d1736fe
commit bf8b932cdc
4 changed files with 233 additions and 48 deletions

View file

@ -30,22 +30,31 @@ git clone <your-repo-url> ~/everytab
cd ~/everytab
```
## 5. Database Setup
## 5. Database Instance (i3.large)
On the EC2 instance:
Spin up an i3.large in the same AZ as the compute instance. This provides 475GB local NVMe with 100K+ IOPS for Postgres — eliminates the EBS/RDS IOPS bottleneck.
```bash
# Add to .bashrc (get the URL from: terraform output -raw database_url)
echo "export DATABASE_URL='postgres://everytab:PASS@ENDPOINT:5432/everytab'" >> ~/.bashrc
# Launch i3.large (same subnet/AZ, same key pair, allow port 5432 from compute SG)
# Then SSH in and run:
bash ~/everytab/infra/db-setup.sh
```
This formats the NVMe, installs Postgres on it with aggressive write settings (`fsync=off`), creates the database, and applies the schema.
On the **compute instance** (c5.2xlarge):
```bash
# Use the private IP printed by db-setup.sh
echo "export DATABASE_URL='postgres://everytab@<i3-private-ip>:5432/everytab'" >> ~/.bashrc
source ~/.bashrc
# Test connection
# Test connectivity
psql $DATABASE_URL -c 'SELECT 1;'
# Create schema
psql $DATABASE_URL -f ~/everytab/pipeline/01_cc_index/schema.sql
```
Note: the i3's local NVMe is ephemeral — data is lost on stop/terminate. Always `pg_dump` before teardown.
## Pinning the EC2 AMI
The `data.aws_ami` lookup fetches the latest Amazon Linux 2023 AMI. If Amazon publishes a new one between applies, Terraform will want to replace your EC2 instance.
@ -67,7 +76,15 @@ Remove the `ec2_ami` line from tfvars when you want a fresh instance with the la
## Teardown (after backup)
Switch to serving-only mode (destroys EC2, RDS, icons bucket):
```bash
# Back up the database first
pg_dump -U everytab -Fc everytab > ~/everytab_dump.pgfc
# Back up icons
rsync -avP ~/icons/ homelab:/backups/everytab/icons/
```
Switch to serving-only mode (destroys EC2, icons bucket):
```bash
terraform apply -var="scanning=false"

157
infra/db-setup.sh Executable file
View file

@ -0,0 +1,157 @@
#!/usr/bin/env bash
set -euo pipefail
# EveryTab Postgres Setup — for dedicated i3 database instance
# Run on the i3.large EC2 instance (475GB local NVMe).
# Configures Postgres to use the NVMe for data storage.
# Optimized for write-heavy bulk loads — fsync disabled, large buffers.
# Data is ephemeral — back up with pg_dump before terminating the instance.
echo "=== EveryTab Postgres Setup (i3 NVMe) ==="
# --- Format and mount the NVMe drive ---
echo "--- Setting up NVMe storage ---"
NVME_DEV="/dev/nvme1n1"
NVME_MOUNT="/data"
if [ ! -d "$NVME_MOUNT" ]; then
# Find the NVMe instance store (not the root EBS)
# i3.large has one 475GB NVMe at /dev/nvme1n1 or similar
if [ ! -b "$NVME_DEV" ]; then
# Try finding it
NVME_DEV=$(lsblk -dpno NAME,SIZE | grep -v "$(lsblk -dpno NAME /)" | head -1 | awk '{print $1}')
if [ -z "$NVME_DEV" ]; then
echo "ERROR: Could not find NVMe instance store device"
echo "Run 'lsblk' and set NVME_DEV manually"
exit 1
fi
fi
echo "Using NVMe device: $NVME_DEV"
sudo mkfs.xfs -f "$NVME_DEV"
sudo mkdir -p "$NVME_MOUNT"
sudo mount "$NVME_DEV" "$NVME_MOUNT"
sudo chown ec2-user:ec2-user "$NVME_MOUNT"
echo "Mounted $NVME_DEV at $NVME_MOUNT"
else
echo "NVMe already mounted at $NVME_MOUNT"
fi
# --- Install Postgres ---
echo "--- Installing PostgreSQL 16 ---"
if ! command -v pg_isready &>/dev/null; then
sudo dnf install -y postgresql16-server
fi
# --- Init database on NVMe ---
echo "--- Initializing database on NVMe ---"
PG_DATA="$NVME_MOUNT/pgdata"
if [ ! -d "$PG_DATA" ]; then
sudo mkdir -p "$PG_DATA"
sudo chown postgres:postgres "$PG_DATA"
sudo -u postgres /usr/bin/initdb -D "$PG_DATA"
fi
# --- Configure for pipeline workload ---
echo "--- Configuring for bulk load performance ---"
PRIVATE_IP=$(hostname -I | awk '{print $1}')
sudo tee "$PG_DATA/postgresql.conf" > /dev/null <<EOF
# Connection — listen on private IP for compute instance
listen_addresses = 'localhost,$PRIVATE_IP'
port = 5432
max_connections = 100
# Memory — i3.large has 15.25GB RAM, Postgres gets most of it
shared_buffers = 8GB
work_mem = 512MB
maintenance_work_mem = 2GB
effective_cache_size = 12GB
# Write performance — data is reproducible, maximize speed over durability
fsync = off
synchronous_commit = off
full_page_writes = off
# WAL — minimal logging since no replication needed
wal_level = minimal
max_wal_senders = 0
max_wal_size = 8GB
checkpoint_timeout = 30min
checkpoint_completion_target = 0.9
# Autovacuum
autovacuum = on
autovacuum_naptime = 60s
EOF
# Allow connections from VPC (10.0.0.0/8 and 172.16.0.0/12 cover most VPC CIDRs)
sudo tee "$PG_DATA/pg_hba.conf" > /dev/null <<'EOF'
# Local connections
local all all trust
host all all 127.0.0.1/32 trust
host all all ::1/128 trust
# VPC connections (from compute instance)
host all all 10.0.0.0/8 trust
host all all 172.16.0.0/12 trust
EOF
# --- Start with custom data directory ---
echo "--- Starting PostgreSQL ---"
sudo tee /etc/systemd/system/postgresql-everytab.service > /dev/null <<EOF
[Unit]
Description=PostgreSQL for EveryTab (NVMe)
After=network.target
[Service]
Type=forking
User=postgres
ExecStart=/usr/bin/pg_ctl start -D $PG_DATA -l $PG_DATA/pg.log
ExecStop=/usr/bin/pg_ctl stop -D $PG_DATA
ExecReload=/usr/bin/pg_ctl reload -D $PG_DATA
[Install]
WantedBy=multi-user.target
EOF
sudo systemctl daemon-reload
sudo systemctl enable postgresql-everytab
sudo systemctl restart postgresql-everytab
# --- Create database ---
echo "--- Creating everytab database ---"
sudo -u postgres psql -p 5432 -c "CREATE USER everytab;" 2>/dev/null || true
sudo -u postgres psql -p 5432 -c "CREATE DATABASE everytab OWNER everytab;" 2>/dev/null || true
# --- Apply schema ---
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
SCHEMA="$SCRIPT_DIR/../pipeline/01_cc_index/schema.sql"
if [ -f "$SCHEMA" ]; then
echo "--- Applying schema ---"
psql -U everytab -h localhost -d everytab -f "$SCHEMA"
else
echo "Warning: schema.sql not found at $SCHEMA"
echo "Copy it over and run: psql -U everytab -h localhost -d everytab -f schema.sql"
fi
# --- Validate ---
echo ""
echo "=== Validation ==="
pg_isready -h localhost
psql -U everytab -h localhost -d everytab -c "SELECT 'Postgres OK';" -t -A
echo "NVMe disk usage:"
df -h "$NVME_MOUNT"
echo ""
echo "=== Setup Complete ==="
echo ""
echo "Private IP: $PRIVATE_IP"
echo ""
echo "Connection string (from compute instance):"
echo " export DATABASE_URL='postgres://everytab@${PRIVATE_IP}:5432/everytab'"
echo ""
echo "Connection string (local):"
echo " export DATABASE_URL='postgres://everytab@localhost:5432/everytab'"
echo ""
echo "IMPORTANT: Ensure the compute instance's security group allows"
echo "outbound traffic to this instance on port 5432, and this instance's"
echo "security group allows inbound on 5432 from the compute instance."

View file

@ -28,13 +28,14 @@ variable "vpc_id" {
}
variable "subnet_ids" {
description = "At least 2 subnet IDs in different AZs (required for RDS subnet group)"
description = "Subnet IDs — both EC2 instances are placed in subnet_ids[0] (same AZ for low latency)"
type = list(string)
}
variable "db_password" {
description = "Postgres master password"
description = "Unused — kept for tfvars compatibility. Local Postgres uses trust auth."
type = string
default = ""
sensitive = true
}
@ -54,7 +55,7 @@ variable "ec2_ami" {
}
variable "scanning" {
description = "Set to true during scanning phase, false for serving-only (tears down EC2, RDS, icons bucket)"
description = "Set to true during scanning phase, false for serving-only (tears down EC2 instances)"
type = bool
default = true
}
@ -116,18 +117,32 @@ resource "aws_security_group" "ec2" {
}
}
resource "aws_security_group" "rds" {
resource "aws_security_group" "db" {
count = var.scanning ? 1 : 0
name = "everytab-rds"
description = "EveryTab RDS instance"
name = "everytab-db"
description = "EveryTab DB instance (Postgres on NVMe)"
vpc_id = var.vpc_id
ingress {
from_port = 22
to_port = 22
protocol = "tcp"
cidr_blocks = [var.ssh_cidr]
}
ingress {
from_port = 5432
to_port = 5432
protocol = "tcp"
security_groups = [aws_security_group.ec2[0].id]
}
egress {
from_port = 0
to_port = 0
protocol = "-1"
cidr_blocks = ["0.0.0.0/0"]
}
}
# --- IAM ---
@ -310,31 +325,23 @@ resource "aws_s3_bucket_policy" "site" {
})
}
# --- RDS ---
# --- DB Instance (i3.large with local NVMe for Postgres) ---
resource "aws_db_subnet_group" "main" {
count = var.scanning ? 1 : 0
name = "everytab"
subnet_ids = var.subnet_ids
variable "db_instance_type" {
default = "i3.large"
}
resource "aws_db_instance" "main" {
count = var.scanning ? 1 : 0
identifier = "everytab"
engine = "postgres"
engine_version = "16"
instance_class = "db.t3.medium"
allocated_storage = 20
storage_type = "gp3"
db_name = "everytab"
username = "everytab"
password = var.db_password
db_subnet_group_name = aws_db_subnet_group.main[0].name
vpc_security_group_ids = [aws_security_group.rds[0].id]
publicly_accessible = false
multi_az = false
backup_retention_period = 0
skip_final_snapshot = true
resource "aws_instance" "db" {
count = var.scanning ? 1 : 0
ami = var.ec2_ami != "" ? var.ec2_ami : data.aws_ami.al2023.id
instance_type = var.db_instance_type
key_name = aws_key_pair.ec2[0].key_name
vpc_security_group_ids = [aws_security_group.db[0].id]
subnet_id = var.subnet_ids[0]
tags = {
Name = "everytab-db"
}
}
# --- EC2 ---
@ -364,13 +371,16 @@ output "ec2_public_ip" {
value = var.scanning ? aws_instance.main[0].public_ip : null
}
output "rds_endpoint" {
value = var.scanning ? aws_db_instance.main[0].endpoint : null
output "db_private_ip" {
value = var.scanning ? aws_instance.db[0].private_ip : null
}
output "db_public_ip" {
value = var.scanning ? aws_instance.db[0].public_ip : null
}
output "database_url" {
value = var.scanning ? "postgres://everytab:${var.db_password}@${aws_db_instance.main[0].endpoint}/everytab" : null
sensitive = true
value = var.scanning ? "postgres://everytab@${aws_instance.db[0].private_ip}:5432/everytab" : null
}
output "ssh_private_key" {
@ -382,6 +392,10 @@ output "ssh_command" {
value = var.scanning ? "ssh -i everytab-key ec2-user@${aws_instance.main[0].public_ip}" : null
}
output "ssh_command_db" {
value = var.scanning ? "ssh -i everytab-key ec2-user@${aws_instance.db[0].public_ip}" : null
}
output "cloudfront_domain" {
value = aws_cloudfront_distribution.site.domain_name
}