diff --git a/infra/README.md b/infra/README.md
index cd4e711..ca759b0 100644
--- a/infra/README.md
+++ b/infra/README.md
@@ -6,65 +6,79 @@ Two EC2 instances during scanning:
 - **c5.2xlarge** (`everytab`) — compute: runs pipeline, stores icons on 1TB EBS
 - **i3.large** (`everytab-db`) — database: runs Postgres on 475GB local NVMe (100K+ IOPS)
 
-Both provisioned by Terraform with `user_data` scripts that auto-run on first boot:
-- Compute: `ec2-userdata.sh` — installs Go, DuckDB, Unbound, swap; clones repo; builds binaries; applies DB schema
-- Database: `db-setup.sh` — formats NVMe, installs Postgres, creates database + schema
+Both provisioned by Terraform with `user_data` scripts that run on first boot:
+- Compute: `ec2-userdata.sh` (Go, DuckDB, Unbound, swap)
+- Database: `db-setup.sh` (NVMe format, Postgres install + config)
 
-## Quick Start
-
-Everything runs from your local machine unless noted.
+## 1. Terraform
 
 ```bash
-# 1. Create infrastructure
 cd infra
-cp terraform.tfvars.example terraform.tfvars  # fill in your values (including repo_url)
+cp terraform.tfvars.example terraform.tfvars  # fill in your values
 terraform init
 terraform apply
-
-# 2. Save SSH key
-terraform output -raw ssh_private_key > everytab-key && chmod 600 everytab-key
-
-# 3. Wait ~3-5 minutes for both instances to auto-provision, then verify
-ssh -i everytab-key ec2-user@$(terraform output -raw ec2_public_ip) \
-  'pg_isready -h $(grep DATABASE_URL ~/.bashrc | cut -d@ -f2 | cut -d: -f1)'
 ```
 
-If `repo_url` is set in tfvars, the compute instance automatically:
-- Clones the repo
-- Builds all Go binaries
-- Waits for the DB to be ready
-- Applies the schema
+This creates both instances. They auto-provision via user_data (~3 minutes).
 
-## Running the Pipeline
+## 2. SSH Key
 
-SSH to the compute instance — everything is ready:
+```bash
+terraform output -raw ssh_private_key > everytab-key && chmod 600 everytab-key
+terraform output ssh_command     # SSH to compute instance
+terraform output ssh_command_db  # SSH to database instance
+```
+
+## 3. Verify Database is Ready
+
+```bash
+# From your local machine or the compute instance
+pg_isready -h $(terraform output -raw db_private_ip)
+```
+
+If not ready yet, SSH to the DB instance and check `cloud-init` logs:
+```bash
+tail -f /var/log/cloud-init-output.log
+```
+
+## 4. Clone Repo + Build on Compute Instance
 
 ```bash
 ssh -i everytab-key ec2-user@$(terraform output -raw ec2_public_ip)
 
-# DATABASE_URL is already in .bashrc, binaries already built
-# Start the pipeline (see pipeline/README.md for full guide)
-./pipeline/01_cc_index/query.sh --db-url "$DATABASE_URL" --limit 0
+git clone <your-repo-url> ~/everytab
+cd ~/everytab
+go build -o ~/warc_parse ./pipeline/02_warc_parse/
+go build -o ~/icon_download ./pipeline/03_icon_download/
+go build -o ~/bundle_gen ./pipeline/05_bundle_gen/
 ```
 
-## Debugging (if auto-provision fails)
+## 5. Connect to Database + Apply Schema
 
-Check cloud-init logs on either instance:
 ```bash
-# Compute instance
-ssh -i everytab-key ec2-user@$(terraform output -raw ec2_public_ip) \
-  'tail -30 /var/log/cloud-init-output.log'
+# Get the connection string
+export DATABASE_URL=$(terraform output -raw database_url)
+echo "export DATABASE_URL='$DATABASE_URL'" >> ~/.bashrc
 
-# DB instance
-ssh -i everytab-key ec2-user@$(terraform output -raw db_public_ip) \
-  'tail -30 /var/log/cloud-init-output.log'
+# Test connectivity
+psql $DATABASE_URL -c 'SELECT 1;'
+
+# Apply schema
+psql $DATABASE_URL -f ~/everytab/pipeline/01_cc_index/schema.sql
 ```
 
+## 6. Run Pipeline
+
+See `pipeline/README.md` for the full stage-by-stage guide.
+
 ## Pinning the EC2 AMI
 
-The `data.aws_ami` lookup fetches the latest Amazon Linux 2023 AMI. Pin it to prevent instance replacement on unrelated changes:
+The `data.aws_ami` lookup fetches the latest Amazon Linux 2023 AMI. If Amazon publishes a new one between applies, Terraform will want to replace your instances.
+
+To prevent this, pin the AMI after initial creation:
 
 ```bash
+# Get the current AMI
 aws ec2 describe-instances --filters "Name=tag:Name,Values=everytab" \
   --query "Reservations[0].Instances[0].ImageId" --output text
 
@@ -72,27 +86,27 @@ aws ec2 describe-instances --filters "Name=tag:Name,Values=everytab" \
 echo 'ec2_ami = "ami-XXXXXXXXXXXX"' >> terraform.tfvars
 ```
 
-Remove the line when you want fresh instances with the latest AMI.
+Remove the `ec2_ami` line from tfvars when you want fresh instances with the latest AMI.
 
-## Teardown
-
-From the compute instance, back up before tearing down:
+## Teardown (after backup)
 
 ```bash
-# Back up database
+# Back up the database (run from compute instance)
 pg_dump $DATABASE_URL -Fc > ~/everytab_dump.pgfc
 
 # Back up icons to homelab
 rsync -avP ~/icons/ homelab:/backups/everytab/icons/
 ```
 
-From your local machine:
+Switch to serving-only mode (destroys both EC2 instances):
 
 ```bash
-# Destroy scanning infrastructure (keeps CloudFront + site bucket)
 terraform apply -var="scanning=false"
+```
 
-# Or full destroy (including the live site)
+Full destroy (including the live site):
+
+```bash
 terraform destroy
 ```
 
diff --git a/infra/db-setup.sh b/infra/db-setup.sh
index 954c6cf..5341024 100755
--- a/infra/db-setup.sh
+++ b/infra/db-setup.sh
@@ -15,13 +15,14 @@ NVME_DEV="/dev/nvme1n1"
 NVME_MOUNT="/data"
 
 if [ ! -d "$NVME_MOUNT" ]; then
-    # Find the NVMe instance store — look for unmounted nvme devices
+    # Find the NVMe instance store (not the root EBS)
+    # i3.large has one 475GB NVMe at /dev/nvme1n1 or similar
     if [ ! -b "$NVME_DEV" ]; then
-        NVME_DEV=$(lsblk -dpno NAME | grep nvme | head -1)
+        # Try finding it
+        NVME_DEV=$(lsblk -dpno NAME,SIZE | grep -v "$(lsblk -dpno NAME /)" | head -1 | awk '{print $1}')
         if [ -z "$NVME_DEV" ]; then
             echo "ERROR: Could not find NVMe instance store device"
-            echo "Available devices:"
-            lsblk
+            echo "Run 'lsblk' and set NVME_DEV manually"
             exit 1
         fi
     fi
diff --git a/infra/ec2-userdata.sh b/infra/ec2-userdata.sh
index 078c29b..56bce25 100755
--- a/infra/ec2-userdata.sh
+++ b/infra/ec2-userdata.sh
@@ -2,11 +2,9 @@
 set -euo pipefail
 
 # EveryTab EC2 Bootstrap
-# Runs automatically via cloud-init user_data on first boot.
+# Run this on the EC2 instance after first SSH connection.
 # Installs: Go, DuckDB, Unbound, psql, pg_dump
 
-export HOME=/root
-
 echo "=== EveryTab EC2 Bootstrap ==="
 
 # --- File descriptor limits ---
@@ -46,7 +44,7 @@ sudo dnf install -y \
 echo "--- Installing Go ---"
 GO_VERSION="1.22.4"
 if ! command -v go &>/dev/null; then
-    curl -fsSL "https://go.dev/dl/go$${GO_VERSION}.linux-amd64.tar.gz" | sudo tar -C /usr/local -xz
+    curl -fsSL "https://go.dev/dl/go${GO_VERSION}.linux-amd64.tar.gz" | sudo tar -C /usr/local -xz
     echo 'export PATH=$PATH:/usr/local/go/bin:$HOME/go/bin' >> ~/.bashrc
     export PATH=$PATH:/usr/local/go/bin:$HOME/go/bin
 fi
@@ -56,7 +54,7 @@ go version
 echo "--- Installing DuckDB ---"
 DUCKDB_VERSION="1.5.2"
 if ! command -v duckdb &>/dev/null; then
-    curl -fsSL "https://github.com/duckdb/duckdb/releases/download/v$${DUCKDB_VERSION}/duckdb_cli-linux-amd64.zip" -o /tmp/duckdb.zip
+    curl -fsSL "https://github.com/duckdb/duckdb/releases/download/v${DUCKDB_VERSION}/duckdb_cli-linux-amd64.zip" -o /tmp/duckdb.zip
     cd /tmp && unzip -o duckdb.zip && sudo mv duckdb /usr/local/bin/ && cd -
 fi
 duckdb -c "SELECT 'DuckDB OK';"
@@ -153,7 +151,7 @@ if [ -n "$REPO_URL" ]; then
     cd /home/ec2-user/everytab
 
     echo "--- Building Go binaries ---"
-    sudo -u ec2-user bash -c 'export PATH=/usr/local/go/bin:$PATH && cd /home/ec2-user/everytab && go build -o /home/ec2-user/warc_parse ./pipeline/02_warc_parse/ && go build -o /home/ec2-user/icon_download ./pipeline/03_icon_download/ && go build -o /home/ec2-user/bundle_gen ./pipeline/05_bundle_gen/'
+    sudo -u ec2-user bash -c 'export PATH=$PATH:/usr/local/go/bin && cd ~/everytab && go build -o ~/warc_parse ./pipeline/02_warc_parse/ && go build -o ~/icon_download ./pipeline/03_icon_download/ && go build -o ~/bundle_gen ./pipeline/05_bundle_gen/'
 
     # Wait for DB to be ready, then apply schema
     echo "--- Waiting for database ---"
diff --git a/infra/main.tf b/infra/main.tf
index 47ea942..bb5a110 100644
--- a/infra/main.tf
+++ b/infra/main.tf
@@ -218,8 +218,6 @@ resource "aws_s3_bucket_lifecycle_configuration" "logs" {
     id     = "expire-old-logs"
     status = "Enabled"
 
-    filter {}
-
     expiration {
       days = 365
     }