178 lines
5.1 KiB
Bash
Executable file
178 lines
5.1 KiB
Bash
Executable file
#!/usr/bin/env bash
|
||
set -euo pipefail
|
||
|
||
# EveryTab EC2 Bootstrap
|
||
# Runs automatically via cloud-init user_data on first boot.
|
||
# Installs: Go, DuckDB, Unbound, psql, pg_dump
|
||
|
||
export HOME=/root
|
||
|
||
echo "=== EveryTab EC2 Bootstrap ==="
|
||
|
||
# --- File descriptor limits ---
|
||
echo "--- Raising file descriptor limits ---"
|
||
echo '* soft nofile 65536' | sudo tee -a /etc/security/limits.conf
|
||
echo '* hard nofile 65536' | sudo tee -a /etc/security/limits.conf
|
||
# Also set for current session
|
||
ulimit -n 65536
|
||
echo "File descriptor limit: $(ulimit -n)"
|
||
|
||
# --- Swap ---
|
||
echo "--- Creating swap file ---"
|
||
if [ ! -f /swapfile ]; then
|
||
sudo dd if=/dev/zero of=/swapfile bs=1M count=8192
|
||
sudo chmod 600 /swapfile
|
||
sudo mkswap /swapfile
|
||
sudo swapon /swapfile
|
||
echo '/swapfile swap swap defaults 0 0' | sudo tee -a /etc/fstab
|
||
echo "Created 4GB swap"
|
||
else
|
||
echo "Swap already exists"
|
||
fi
|
||
|
||
# --- System packages ---
|
||
echo "--- Installing system packages ---"
|
||
sudo dnf update -y
|
||
sudo dnf install -y \
|
||
gcc \
|
||
git \
|
||
postgresql16 \
|
||
unbound \
|
||
jq \
|
||
htop \
|
||
tmux
|
||
|
||
# --- Go ---
|
||
echo "--- Installing Go ---"
|
||
GO_VERSION="1.22.4"
|
||
if ! command -v go &>/dev/null; then
|
||
curl -fsSL "https://go.dev/dl/go$${GO_VERSION}.linux-amd64.tar.gz" | sudo tar -C /usr/local -xz
|
||
echo 'export PATH=$PATH:/usr/local/go/bin:$HOME/go/bin' >> ~/.bashrc
|
||
export PATH=$PATH:/usr/local/go/bin:$HOME/go/bin
|
||
fi
|
||
go version
|
||
|
||
# --- DuckDB ---
|
||
echo "--- Installing DuckDB ---"
|
||
DUCKDB_VERSION="1.5.2"
|
||
if ! command -v duckdb &>/dev/null; then
|
||
curl -fsSL "https://github.com/duckdb/duckdb/releases/download/v$${DUCKDB_VERSION}/duckdb_cli-linux-amd64.zip" -o /tmp/duckdb.zip
|
||
cd /tmp && unzip -o duckdb.zip && sudo mv duckdb /usr/local/bin/ && cd -
|
||
fi
|
||
duckdb -c "SELECT 'DuckDB OK';"
|
||
|
||
# Install DuckDB extensions
|
||
duckdb -c "INSTALL httpfs; INSTALL postgres;"
|
||
echo "DuckDB extensions installed"
|
||
|
||
# --- Unbound ---
|
||
echo "--- Configuring Unbound ---"
|
||
sudo tee /etc/unbound/unbound.conf > /dev/null <<'UNBOUNDCONF'
|
||
server:
|
||
interface: 127.0.0.1
|
||
port: 53
|
||
access-control: 127.0.0.0/8 allow
|
||
|
||
# Performance
|
||
num-threads: 4
|
||
msg-cache-slabs: 4
|
||
rrset-cache-slabs: 4
|
||
infra-cache-slabs: 4
|
||
key-cache-slabs: 4
|
||
|
||
# Cache sizing — values are per-slab (×4 slabs), so total = 4× these values
|
||
# Only need ~500MB total — TLD/NS records stay hot via LRU, individual A records evict naturally
|
||
msg-cache-size: 32m
|
||
rrset-cache-size: 64m
|
||
key-cache-size: 16m
|
||
|
||
# Aggressive caching
|
||
cache-min-ttl: 3600
|
||
cache-max-ttl: 86400
|
||
prefetch: yes
|
||
prefetch-key: yes
|
||
|
||
# Hardening
|
||
hide-identity: yes
|
||
hide-version: yes
|
||
harden-glue: yes
|
||
harden-dnssec-stripped: yes
|
||
|
||
# Logging (minimal)
|
||
verbosity: 1
|
||
log-queries: no
|
||
|
||
# Root hints
|
||
root-hints: "/etc/unbound/root.hints"
|
||
|
||
remote-control:
|
||
control-enable: yes
|
||
control-interface: 127.0.0.1
|
||
UNBOUNDCONF
|
||
|
||
# Download root hints
|
||
sudo curl -fsSL https://www.internic.net/domain/named.root -o /etc/unbound/root.hints
|
||
|
||
# Disable systemd-resolved if present (it manages resolv.conf on AL2023)
|
||
if systemctl is-active --quiet systemd-resolved 2>/dev/null; then
|
||
sudo systemctl disable --now systemd-resolved
|
||
fi
|
||
|
||
# Set system resolver to use Unbound
|
||
sudo rm -f /etc/resolv.conf
|
||
echo "nameserver 127.0.0.1" | sudo tee /etc/resolv.conf > /dev/null
|
||
|
||
# Start and enable Unbound
|
||
sudo systemctl enable unbound
|
||
sudo systemctl restart unbound
|
||
|
||
# Generate control keys for unbound-control stats
|
||
sudo unbound-control-setup 2>/dev/null || true
|
||
|
||
echo ""
|
||
|
||
# --- Validation ---
|
||
echo "=== Validation ==="
|
||
echo -n "Go: "; go version
|
||
echo -n "DuckDB: "; duckdb -c "SELECT version();" -noheader -csv
|
||
echo -n "Unbound: "; dig +short example.com @127.0.0.1 | head -1
|
||
echo -n "psql: "; psql --version
|
||
|
||
echo ""
|
||
|
||
# --- Database Connection ---
|
||
DB_IP="${db_private_ip}"
|
||
export DATABASE_URL="postgres://everytab@$${DB_IP}:5432/everytab"
|
||
echo "export DATABASE_URL='postgres://everytab@$${DB_IP}:5432/everytab'" >> /home/ec2-user/.bashrc
|
||
|
||
# --- Clone Repo + Build ---
|
||
REPO_URL="${repo_url}"
|
||
if [ -n "$REPO_URL" ]; then
|
||
echo "--- Cloning repo ---"
|
||
sudo -u ec2-user git clone "$REPO_URL" /home/ec2-user/everytab
|
||
cd /home/ec2-user/everytab
|
||
|
||
echo "--- Building Go binaries ---"
|
||
sudo -u ec2-user bash -c 'export PATH=/usr/local/go/bin:$PATH && cd /home/ec2-user/everytab && go build -o /home/ec2-user/warc_parse ./pipeline/02_warc_parse/ && go build -o /home/ec2-user/icon_download ./pipeline/03_icon_download/ && go build -o /home/ec2-user/bundle_gen ./pipeline/05_bundle_gen/'
|
||
|
||
# Wait for DB to be ready, then apply schema
|
||
echo "--- Waiting for database ---"
|
||
for i in $(seq 1 60); do
|
||
if pg_isready -h "$DB_IP" -q 2>/dev/null; then
|
||
echo "Database ready"
|
||
sudo -u ec2-user psql "$DATABASE_URL" -f /home/ec2-user/everytab/pipeline/01_cc_index/schema.sql
|
||
echo "Schema applied"
|
||
break
|
||
fi
|
||
sleep 5
|
||
done
|
||
else
|
||
echo "No repo_url set — clone manually"
|
||
fi
|
||
|
||
echo ""
|
||
echo "=== Bootstrap Complete ==="
|
||
echo ""
|
||
echo "DATABASE_URL=$DATABASE_URL"
|
||
echo ""
|
||
echo "Ready to run the pipeline. See pipeline/README.md for usage."
|