diff --git a/frontend/index.html b/frontend/index.html index 9cfe985..f399367 100644 --- a/frontend/index.html +++ b/frontend/index.html @@ -57,6 +57,7 @@ image-rendering: auto; } + /* ── Base tab styles ── */ .tab { display: inline-flex; align-items: center; @@ -68,68 +69,71 @@ min-width: 100px; height: 36px; transition: background 0.15s; - /* Default: Firefox inactive tab style */ - border-radius: 4px; border: none; - margin: 0 1px; background: transparent; } - .tab:hover { - background: var(--tab-hover); - border-radius: 4px; + /* ── Linux Firefox (default) ── */ + /* Inactive: transparent, small margin, rounded */ + .tab { + border-radius: 6px; + margin: 0 1px; } - /* Firefox selected tab — solid bg, rounded top, connects to content below */ - .browser-firefox .tab-active, - .browser-firefox .tab-active:hover { + /* Hover: pill-shaped background */ + .tab:hover { + background: var(--tab-hover); + border-radius: 8px; + } + + /* Selected: white bg, rounded top, connects to viewer below */ + .tab-active, + .tab-active:hover { background: var(--tab-bg); border-radius: 8px 8px 0 0; box-shadow: 0 1px 0 0 var(--tab-bg); } - /* Chrome / Chromium — inactive tabs: no bg, thin separators between */ + /* ── Linux Chrome ── */ + /* Inactive: always inset dimensions, separator via ::before */ .browser-chrome .tab { - border-radius: 0; - margin: 0; - padding: 6px 14px; - border-right: 1px solid var(--tab-border); + border-radius: 8px; + margin: 3px 2px; + padding: 3px 12px; + height: 30px; + position: relative; } + .browser-chrome .tab::before { + content: ""; + position: absolute; + left: -2px; + top: 25%; + height: 50%; + width: 1px; + background: var(--tab-border); + } + + /* Hide separator on hover/active tab AND on the tab after hover/active */ + .browser-chrome .tab:hover::before, + .browser-chrome .tab-active::before, + .browser-chrome .tab:hover + .tab::before, + .browser-chrome .tab-active + .tab::before { + background: transparent; + } + + /* Hover: just add background (no layout change) */ .browser-chrome .tab:hover { background: var(--tab-hover); - border-radius: 8px 8px 0 0; - border-right: 1px solid transparent; } - /* Chrome selected tab — raised, rounded top, solid bg */ + /* Selected: white bg, subtle shadow */ .browser-chrome .tab-active, .browser-chrome .tab-active:hover { background: var(--tab-bg); - border-radius: 10px 10px 0 0; - border-right: 1px solid transparent; box-shadow: 0 -1px 4px rgba(0, 0, 0, 0.1); } - /* Safari — flat pill, very minimal */ - .browser-safari .tab { - border-radius: 6px; - margin: 0 2px; - height: 32px; - padding: 4px 10px; - } - - .browser-safari .tab:hover { - background: var(--tab-hover); - } - - /* Safari selected tab — solid pill, slightly elevated */ - .browser-safari .tab-active, - .browser-safari .tab-active:hover { - background: var(--tab-bg); - box-shadow: 0 1px 3px rgba(0, 0, 0, 0.12); - } - .tab-about .tab-title { font-weight: 600; } @@ -181,36 +185,93 @@ 100% { transform: translateX(0); } } - /* Inline iframe viewer */ + /* ── Inline iframe viewer ── */ .iframe-viewer { width: 100%; height: 75vh; background: var(--viewer-bg); - border-top: 2px solid var(--tab-border); border-bottom: 2px solid var(--tab-border); } - .iframe-header { + /* Toolbar area (address bar) */ + .iframe-toolbar { display: flex; align-items: center; gap: 8px; - padding: 6px 12px; + padding: 8px 12px; background: var(--tab-bg); } - .iframe-header .tab-title { + /* URL bar — rounded pill, looks like a browser address bar */ + .iframe-urlbar { flex: 1; - font-size: 13px; + display: flex; + align-items: center; + gap: 8px; + padding: 10px 16px; + background: var(--bg); + border-radius: 24px; + min-width: 0; } - .iframe-header a { - color: var(--text-muted); - font-size: 12px; - text-decoration: none; + /* Firefox: more rectangular address bar */ + .browser-firefox .iframe-urlbar { + border-radius: 8px; } - .iframe-header a:hover { + .iframe-urlbar .tab-icon { + flex-shrink: 0; + } + + .url-title-mobile { + display: none; + } + + .iframe-urlbar .url-text { + font-size: 14px; + line-height: 1; color: var(--text); + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; + text-decoration: none; + flex-shrink: 0; + } + + .iframe-urlbar .url-text:hover { + text-decoration: underline; + } + + .iframe-urlbar .url-title { + font-size: 12px; + line-height: 1; + color: var(--text-muted); + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; + text-align: right; + flex: 1; + min-width: 0; + } + + /* Mobile: title above address bar */ + @media (max-width: 640px) { + .iframe-toolbar { + flex-wrap: wrap; + } + .iframe-toolbar .url-title-mobile { + width: 100%; + font-size: 13px; + color: var(--text); + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; + padding: 0 4px 4px; + order: -1; + } + .iframe-urlbar .url-title { + display: none; + } } .iframe-close { @@ -219,8 +280,9 @@ color: var(--text); font-size: 16px; cursor: pointer; - padding: 2px 6px; - border-radius: 4px; + padding: 4px 8px; + border-radius: 50%; + line-height: 1; } .iframe-close:hover { @@ -229,7 +291,7 @@ .iframe-viewer iframe { width: 100%; - height: calc(75vh - 36px); + height: calc(75vh - 56px); border: none; background: white; } diff --git a/frontend/site.js b/frontend/site.js index 03d785d..7126c13 100644 --- a/frontend/site.js +++ b/frontend/site.js @@ -14,16 +14,26 @@ const loadedBundles = new Set(); const container = document.getElementById("tab-container"); const loadingEl = document.getElementById("loading"); -// Detect browser and set class on body for tab styling +// Detect browser and OS, set classes on body for tab styling function detectBrowser() { const ua = navigator.userAgent; if (ua.includes("Firefox")) return "firefox"; - if (ua.includes("Safari") && !ua.includes("Chrome")) return "safari"; + if (ua.includes("Edg/")) return "edge"; + if (ua.includes("Safari") && !ua.includes("Chrome") && !ua.includes("CriOS")) return "safari"; return "chrome"; // Chrome, Edge, Opera, Brave, etc. } +function detectOS() { + const ua = navigator.userAgent; + if (ua.includes("Windows")) return "windows"; + if (ua.includes("Macintosh") || ua.includes("Mac OS")) return "mac"; + if (ua.includes("iPhone") || ua.includes("iPad")) return "mac"; + return "linux"; // Linux, Android, ChromeOS, unknown +} const browserName = detectBrowser(); -document.body.classList.add(`browser-${browserName}`); +const osName = detectOS(); +document.body.classList.add(`browser-${browserName}`, `os-${osName}`); const isFirefox = browserName === "firefox"; +console.log(`EveryTab: browser=${browserName}, os=${osName}`); // How many tabs fit in one row? function tabsPerRow() { @@ -266,39 +276,53 @@ function openInlineViewer(tabEl, entry, url) { const viewer = document.createElement("div"); viewer.className = "iframe-viewer"; - const header = document.createElement("div"); - header.className = "iframe-header"; + // Toolbar (address bar area) + const toolbar = document.createElement("div"); + toolbar.className = "iframe-toolbar"; + + // URL bar pill + const urlbar = document.createElement("div"); + urlbar.className = "iframe-urlbar"; if (entry.icon) { const icon = document.createElement("img"); icon.className = "tab-icon"; icon.src = `data:image/png;base64,${entry.icon}`; - header.appendChild(icon); + urlbar.appendChild(icon); } - const title = document.createElement("span"); - title.className = "tab-title"; - title.textContent = entry.title || entry.url; - header.appendChild(title); - const link = document.createElement("a"); + link.className = "url-text"; link.href = url; link.target = "_blank"; link.rel = "noopener"; - link.textContent = entry.url + " ↗"; - header.appendChild(link); + link.textContent = entry.url; + urlbar.appendChild(link); + + const urlTitle = document.createElement("span"); + urlTitle.className = "url-title"; + urlTitle.textContent = entry.title || ""; + urlbar.appendChild(urlTitle); + + toolbar.appendChild(urlbar); + + // Mobile: title shown above the URL bar (hidden on desktop via CSS) + const mobileTitle = document.createElement("span"); + mobileTitle.className = "url-title-mobile"; + mobileTitle.textContent = entry.title || ""; + toolbar.appendChild(mobileTitle); const close = document.createElement("button"); close.className = "iframe-close"; close.textContent = "✕"; close.addEventListener("click", closeInlineViewer); - header.appendChild(close); + toolbar.appendChild(close); const iframe = document.createElement("iframe"); iframe.sandbox = "allow-scripts allow-same-origin allow-forms"; iframe.src = url; - viewer.appendChild(header); + viewer.appendChild(toolbar); viewer.appendChild(iframe); // Insert after the row diff --git a/infra/db-setup.sh b/infra/db-setup.sh index 954c6cf..e183480 100755 --- a/infra/db-setup.sh +++ b/infra/db-setup.sh @@ -38,7 +38,7 @@ fi # --- Install Postgres --- echo "--- Installing PostgreSQL 16 ---" if ! command -v pg_isready &>/dev/null; then - sudo dnf install -y postgresql16-server + sudo dnf install -y postgresql16-server tmux htop fi # --- Init database on NVMe --- diff --git a/infra/ec2-userdata.sh b/infra/ec2-userdata.sh index 078c29b..594ccfa 100755 --- a/infra/ec2-userdata.sh +++ b/infra/ec2-userdata.sh @@ -52,6 +52,13 @@ if ! command -v go &>/dev/null; then fi go version +# --- esbuild --- +echo "--- Installing esbuild ---" +if ! command -v esbuild &>/dev/null; then + GOBIN=/usr/local/bin /usr/local/go/bin/go install github.com/evanw/esbuild/cmd/esbuild@latest +fi +esbuild --version + # --- DuckDB --- echo "--- Installing DuckDB ---" DUCKDB_VERSION="1.5.2" diff --git a/infra/main.tf b/infra/main.tf index 47ea942..5540b9c 100644 --- a/infra/main.tf +++ b/infra/main.tf @@ -190,6 +190,11 @@ resource "aws_iam_role_policy" "s3_access" { "arn:aws:s3:::commoncrawl", "arn:aws:s3:::commoncrawl/*", ] + }, + { + Effect = "Allow" + Action = ["cloudfront:CreateInvalidation", "cloudfront:ListDistributions"] + Resource = "*" } ] }) @@ -278,6 +283,28 @@ resource "aws_cloudfront_distribution" "site" { origin_access_control_id = aws_cloudfront_origin_access_control.site.id } + # Bundles: 2-week cache (immutable between pipeline runs) + ordered_cache_behavior { + path_pattern = "tabs/*" + allowed_methods = ["GET", "HEAD"] + cached_methods = ["GET", "HEAD"] + target_origin_id = "s3-site" + viewer_protocol_policy = "redirect-to-https" + compress = true + + forwarded_values { + query_string = false + cookies { + forward = "none" + } + } + + min_ttl = 0 + default_ttl = 1209600 # 2 weeks + max_ttl = 31536000 # 1 year + } + + # Everything else (HTML, JS): 1-day cache default_cache_behavior { allowed_methods = ["GET", "HEAD"] cached_methods = ["GET", "HEAD"] @@ -292,7 +319,6 @@ resource "aws_cloudfront_distribution" "site" { } } - # Long cache for bundles, short for HTML/JS during development min_ttl = 0 default_ttl = 86400 # 1 day max_ttl = 31536000 # 1 year diff --git a/pipeline/06_frontend/deploy.sh b/pipeline/06_frontend/deploy.sh index ba4641c..750db9a 100755 --- a/pipeline/06_frontend/deploy.sh +++ b/pipeline/06_frontend/deploy.sh @@ -60,16 +60,25 @@ echo "Total bundles: $TOTAL_BUNDLES" echo "S3 bucket: $SITE_BUCKET" echo "" -# Inject TOTAL_BUNDLES into index.html (in a temp copy) +# Build into temp directory TMPDIR=$(mktemp -d) cp "$FRONTEND_DIR/index.html" "$TMPDIR/index.html" cp "$FRONTEND_DIR/site.js" "$TMPDIR/site.js" cp "$FRONTEND_DIR/bot.html" "$TMPDIR/bot.html" cp "$FRONTEND_DIR/about.html" "$TMPDIR/about.html" +# Inject TOTAL_BUNDLES sed -i "s/const TOTAL_BUNDLES = [0-9]*/const TOTAL_BUNDLES = ${TOTAL_BUNDLES}/" "$TMPDIR/index.html" echo "Injected TOTAL_BUNDLES = $TOTAL_BUNDLES" +# Minify JS (strip comments + whitespace, keep variable names) +if command -v esbuild &>/dev/null; then + esbuild "$TMPDIR/site.js" --minify --outfile="$TMPDIR/site.js" --allow-overwrite + echo "Minified site.js" +else + echo "Warning: esbuild not found, deploying unminified JS" +fi + # Upload echo "Uploading to s3://$SITE_BUCKET/..." aws s3 cp "$TMPDIR/index.html" "s3://$SITE_BUCKET/" --content-type "text/html" @@ -80,6 +89,23 @@ echo "Uploaded 4 files" rm -rf "$TMPDIR" +# Clean up stale bundles from previous runs +# Read old TOTAL_BUNDLES from the live index.html before we overwrite it +OLD_TOTAL=$(aws s3 cp "s3://$SITE_BUCKET/index.html" - 2>/dev/null \ + | grep -oP 'TOTAL_BUNDLES = \K[0-9]+' || echo "0") + +if [ "$OLD_TOTAL" -gt "$TOTAL_BUNDLES" ]; then + STALE_COUNT=$((OLD_TOTAL - TOTAL_BUNDLES)) + echo "Deleting $STALE_COUNT stale bundles ($TOTAL_BUNDLES to $((OLD_TOTAL - 1)))..." + for i in $(seq "$TOTAL_BUNDLES" "$((OLD_TOTAL - 1))"); do + PADDED=$(printf "%04d" "$i") + aws s3 rm "s3://$SITE_BUCKET/tabs/${PADDED}.json" --quiet + done + echo "Deleted $STALE_COUNT stale bundles" +else + echo "No stale bundles to clean (old=$OLD_TOTAL, new=$TOTAL_BUNDLES)" +fi + # Invalidate CloudFront if ! $SKIP_INVALIDATION; then if [ -z "$DIST_ID" ]; then diff --git a/pipeline/README.md b/pipeline/README.md index fa0e43a..60e7647 100644 --- a/pipeline/README.md +++ b/pipeline/README.md @@ -11,10 +11,9 @@ Between stages, run the sanity checks to confirm data looks right before proceed export DATABASE_URL='postgres://everytab@:5432/everytab' # Go binaries built on EC2 -cd ~/everytab -go build -o ~/warc_parse ./pipeline/02_warc_parse/ -go build -o ~/icon_download ./pipeline/03_icon_download/ -go build -o ~/bundle_gen ./pipeline/05_bundle_gen/ +go build -o ~/warc_parse ./everytab/pipeline/02_warc_parse/ +go build -o ~/icon_download ./everytab/pipeline/03_icon_download/ +go build -o ~/bundle_gen ./everytab/pipeline/05_bundle_gen/ ``` ## Stage 1: CC-Index Query @@ -22,7 +21,7 @@ go build -o ~/bundle_gen ./pipeline/05_bundle_gen/ Populates the `hosts` table from Common Crawl's columnar index. ```bash -./pipeline/01_cc_index/query.sh --db-url "$DATABASE_URL" --limit 100000 +./everytab/pipeline/01_cc_index/query.sh --db-url "$DATABASE_URL" --limit 100000 # Full run: --limit 0 ``` @@ -47,7 +46,7 @@ GOMEMLIMIT=12GiB ./icon_download --db "$DATABASE_URL" --log-file icon_download.l Picks the best icon per host for display. ```bash -psql $DATABASE_URL -f pipeline/04_best_icon/select.sql +psql $DATABASE_URL -f ./everytab/pipeline/04_best_icon/select.sql ``` ## Stage 5: Bundle Generation @@ -62,12 +61,20 @@ Note the `TOTAL_BUNDLES` number from the summary — this gets baked into the fr ## Stage 6: Frontend Deploy -From your local machine: +From EC2, after bundle gen completes: ```bash -./pipeline/06_frontend/deploy.sh --total-bundles +TOTAL_BUNDLES=$(jq -r '.bundles_created' stats/05_bundle_gen.json) +./everytab/pipeline/06_frontend/deploy.sh --total-bundles "$TOTAL_BUNDLES" ``` +The deploy script: +1. Injects TOTAL_BUNDLES into index.html +2. Minifies site.js (via esbuild, strips comments + whitespace) +3. Uploads frontend files to S3 +4. Deletes stale bundles from previous runs (numbers ≥ TOTAL_BUNDLES) +5. Invalidates CloudFront cache + ## Stage 7: Backup to Homelab After the site is deployed and verified, backup data before tearing down scanning infra. @@ -76,7 +83,7 @@ After the site is deployed and verified, backup data before tearing down scannin | Data | Location on EC2 | Size estimate (30M) | Purpose | |------|----------------|---------------------|---------| -| Database | RDS (pg_dump) | ~5-10GB compressed | Full hosts + icons metadata, titles, WARC coordinates | +| Database | pg_dump from i3 instance | ~5-10GB compressed | Full hosts + icons metadata, titles, WARC coordinates | | Icons | `~/icons/` directory | ~500GB-1TB | Complete favicon archive, content-addressed by SHA-256 | | Stats | `~/stats/*.json` | <1MB | Pipeline timing and counts per stage | | Logs | `~/*.log` | varies | Error logs for debugging |