From 3ea88790b52f5419bb9c3e0b3519b4cf35c5d236 Mon Sep 17 00:00:00 2001 From: Joe Lothan Date: Mon, 25 May 2026 22:53:11 -0400 Subject: [PATCH 1/6] frontend now looks like firefox and chrome tabs on linux --- frontend/index.html | 142 ++++++++++++++++++++++++++++---------------- frontend/site.js | 48 ++++++++++----- 2 files changed, 123 insertions(+), 67 deletions(-) diff --git a/frontend/index.html b/frontend/index.html index 9cfe985..a71200a 100644 --- a/frontend/index.html +++ b/frontend/index.html @@ -57,6 +57,7 @@ image-rendering: auto; } + /* ── Base tab styles ── */ .tab { display: inline-flex; align-items: center; @@ -68,68 +69,71 @@ min-width: 100px; height: 36px; transition: background 0.15s; - /* Default: Firefox inactive tab style */ - border-radius: 4px; border: none; - margin: 0 1px; background: transparent; } - .tab:hover { - background: var(--tab-hover); - border-radius: 4px; + /* ── Linux Firefox (default) ── */ + /* Inactive: transparent, small margin, rounded */ + .tab { + border-radius: 6px; + margin: 0 1px; } - /* Firefox selected tab — solid bg, rounded top, connects to content below */ - .browser-firefox .tab-active, - .browser-firefox .tab-active:hover { + /* Hover: pill-shaped background */ + .tab:hover { + background: var(--tab-hover); + border-radius: 8px; + } + + /* Selected: white bg, rounded top, connects to viewer below */ + .tab-active, + .tab-active:hover { background: var(--tab-bg); border-radius: 8px 8px 0 0; box-shadow: 0 1px 0 0 var(--tab-bg); } - /* Chrome / Chromium — inactive tabs: no bg, thin separators between */ + /* ── Linux Chrome ── */ + /* Inactive: always inset dimensions, separator via ::before */ .browser-chrome .tab { - border-radius: 0; - margin: 0; - padding: 6px 14px; - border-right: 1px solid var(--tab-border); + border-radius: 8px; + margin: 3px 2px; + padding: 3px 12px; + height: 30px; + position: relative; } + .browser-chrome .tab::before { + content: ""; + position: absolute; + left: -2px; + top: 25%; + height: 50%; + width: 1px; + background: var(--tab-border); + } + + /* Hide separator on hover/active tab AND on the tab after hover/active */ + .browser-chrome .tab:hover::before, + .browser-chrome .tab-active::before, + .browser-chrome .tab:hover + .tab::before, + .browser-chrome .tab-active + .tab::before { + background: transparent; + } + + /* Hover: just add background (no layout change) */ .browser-chrome .tab:hover { background: var(--tab-hover); - border-radius: 8px 8px 0 0; - border-right: 1px solid transparent; } - /* Chrome selected tab — raised, rounded top, solid bg */ + /* Selected: white bg, subtle shadow */ .browser-chrome .tab-active, .browser-chrome .tab-active:hover { background: var(--tab-bg); - border-radius: 10px 10px 0 0; - border-right: 1px solid transparent; box-shadow: 0 -1px 4px rgba(0, 0, 0, 0.1); } - /* Safari — flat pill, very minimal */ - .browser-safari .tab { - border-radius: 6px; - margin: 0 2px; - height: 32px; - padding: 4px 10px; - } - - .browser-safari .tab:hover { - background: var(--tab-hover); - } - - /* Safari selected tab — solid pill, slightly elevated */ - .browser-safari .tab-active, - .browser-safari .tab-active:hover { - background: var(--tab-bg); - box-shadow: 0 1px 3px rgba(0, 0, 0, 0.12); - } - .tab-about .tab-title { font-weight: 600; } @@ -181,36 +185,69 @@ 100% { transform: translateX(0); } } - /* Inline iframe viewer */ + /* ── Inline iframe viewer ── */ .iframe-viewer { width: 100%; height: 75vh; background: var(--viewer-bg); - border-top: 2px solid var(--tab-border); border-bottom: 2px solid var(--tab-border); } - .iframe-header { + /* Toolbar area (address bar) */ + .iframe-toolbar { display: flex; align-items: center; gap: 8px; - padding: 6px 12px; + padding: 8px 12px; background: var(--tab-bg); } - .iframe-header .tab-title { + /* URL bar — rounded pill, looks like a browser address bar */ + .iframe-urlbar { flex: 1; - font-size: 13px; + display: flex; + align-items: center; + gap: 8px; + padding: 10px 16px; + background: var(--bg); + border-radius: 24px; + min-width: 0; } - .iframe-header a { - color: var(--text-muted); - font-size: 12px; - text-decoration: none; + /* Firefox: more rectangular address bar */ + .browser-firefox .iframe-urlbar { + border-radius: 8px; } - .iframe-header a:hover { + .iframe-urlbar .tab-icon { + flex-shrink: 0; + } + + .iframe-urlbar .url-text { + font-size: 14px; + line-height: 1; color: var(--text); + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; + text-decoration: none; + flex-shrink: 0; + } + + .iframe-urlbar .url-text:hover { + text-decoration: underline; + } + + .iframe-urlbar .url-title { + font-size: 12px; + line-height: 1; + color: var(--text-muted); + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; + text-align: right; + flex: 1; + min-width: 0; } .iframe-close { @@ -219,8 +256,9 @@ color: var(--text); font-size: 16px; cursor: pointer; - padding: 2px 6px; - border-radius: 4px; + padding: 4px 8px; + border-radius: 50%; + line-height: 1; } .iframe-close:hover { @@ -229,7 +267,7 @@ .iframe-viewer iframe { width: 100%; - height: calc(75vh - 36px); + height: calc(75vh - 56px); border: none; background: white; } diff --git a/frontend/site.js b/frontend/site.js index 03d785d..b1dab7f 100644 --- a/frontend/site.js +++ b/frontend/site.js @@ -14,16 +14,26 @@ const loadedBundles = new Set(); const container = document.getElementById("tab-container"); const loadingEl = document.getElementById("loading"); -// Detect browser and set class on body for tab styling +// Detect browser and OS, set classes on body for tab styling function detectBrowser() { const ua = navigator.userAgent; if (ua.includes("Firefox")) return "firefox"; - if (ua.includes("Safari") && !ua.includes("Chrome")) return "safari"; + if (ua.includes("Edg/")) return "edge"; + if (ua.includes("Safari") && !ua.includes("Chrome") && !ua.includes("CriOS")) return "safari"; return "chrome"; // Chrome, Edge, Opera, Brave, etc. } +function detectOS() { + const ua = navigator.userAgent; + if (ua.includes("Windows")) return "windows"; + if (ua.includes("Macintosh") || ua.includes("Mac OS")) return "mac"; + if (ua.includes("iPhone") || ua.includes("iPad")) return "mac"; + return "linux"; // Linux, Android, ChromeOS, unknown +} const browserName = detectBrowser(); -document.body.classList.add(`browser-${browserName}`); +const osName = detectOS(); +document.body.classList.add(`browser-${browserName}`, `os-${osName}`); const isFirefox = browserName === "firefox"; +console.log(`EveryTab: browser=${browserName}, os=${osName}`); // How many tabs fit in one row? function tabsPerRow() { @@ -266,39 +276,47 @@ function openInlineViewer(tabEl, entry, url) { const viewer = document.createElement("div"); viewer.className = "iframe-viewer"; - const header = document.createElement("div"); - header.className = "iframe-header"; + // Toolbar (address bar area) + const toolbar = document.createElement("div"); + toolbar.className = "iframe-toolbar"; + + // URL bar pill + const urlbar = document.createElement("div"); + urlbar.className = "iframe-urlbar"; if (entry.icon) { const icon = document.createElement("img"); icon.className = "tab-icon"; icon.src = `data:image/png;base64,${entry.icon}`; - header.appendChild(icon); + urlbar.appendChild(icon); } - const title = document.createElement("span"); - title.className = "tab-title"; - title.textContent = entry.title || entry.url; - header.appendChild(title); - const link = document.createElement("a"); + link.className = "url-text"; link.href = url; link.target = "_blank"; link.rel = "noopener"; - link.textContent = entry.url + " ↗"; - header.appendChild(link); + link.textContent = entry.url; + urlbar.appendChild(link); + + const urlTitle = document.createElement("span"); + urlTitle.className = "url-title"; + urlTitle.textContent = entry.title || ""; + urlbar.appendChild(urlTitle); + + toolbar.appendChild(urlbar); const close = document.createElement("button"); close.className = "iframe-close"; close.textContent = "✕"; close.addEventListener("click", closeInlineViewer); - header.appendChild(close); + toolbar.appendChild(close); const iframe = document.createElement("iframe"); iframe.sandbox = "allow-scripts allow-same-origin allow-forms"; iframe.src = url; - viewer.appendChild(header); + viewer.appendChild(toolbar); viewer.appendChild(iframe); // Insert after the row From 8d62832c1d144ba67ff2ad5855226ab86ae0c8e8 Mon Sep 17 00:00:00 2001 From: Joe Lothan Date: Mon, 25 May 2026 23:09:40 -0400 Subject: [PATCH 2/6] tmux and htop on the db for performance monitoring --- infra/db-setup.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/infra/db-setup.sh b/infra/db-setup.sh index 954c6cf..e183480 100755 --- a/infra/db-setup.sh +++ b/infra/db-setup.sh @@ -38,7 +38,7 @@ fi # --- Install Postgres --- echo "--- Installing PostgreSQL 16 ---" if ! command -v pg_isready &>/dev/null; then - sudo dnf install -y postgresql16-server + sudo dnf install -y postgresql16-server tmux htop fi # --- Init database on NVMe --- From 8e3907505fcfaf0b013ac03219a64e6acf6119e9 Mon Sep 17 00:00:00 2001 From: Joe Lothan Date: Mon, 25 May 2026 23:21:50 -0400 Subject: [PATCH 3/6] deploy frontend from the ec2 at the end of the pipeline --- infra/ec2-userdata.sh | 7 +++++++ infra/main.tf | 5 +++++ pipeline/06_frontend/deploy.sh | 33 ++++++++++++++++++++++++++++++++- pipeline/README.md | 25 ++++++++++++++++--------- 4 files changed, 60 insertions(+), 10 deletions(-) diff --git a/infra/ec2-userdata.sh b/infra/ec2-userdata.sh index 078c29b..594ccfa 100755 --- a/infra/ec2-userdata.sh +++ b/infra/ec2-userdata.sh @@ -52,6 +52,13 @@ if ! command -v go &>/dev/null; then fi go version +# --- esbuild --- +echo "--- Installing esbuild ---" +if ! command -v esbuild &>/dev/null; then + GOBIN=/usr/local/bin /usr/local/go/bin/go install github.com/evanw/esbuild/cmd/esbuild@latest +fi +esbuild --version + # --- DuckDB --- echo "--- Installing DuckDB ---" DUCKDB_VERSION="1.5.2" diff --git a/infra/main.tf b/infra/main.tf index 47ea942..ed2e592 100644 --- a/infra/main.tf +++ b/infra/main.tf @@ -190,6 +190,11 @@ resource "aws_iam_role_policy" "s3_access" { "arn:aws:s3:::commoncrawl", "arn:aws:s3:::commoncrawl/*", ] + }, + { + Effect = "Allow" + Action = ["cloudfront:CreateInvalidation", "cloudfront:ListDistributions"] + Resource = "*" } ] }) diff --git a/pipeline/06_frontend/deploy.sh b/pipeline/06_frontend/deploy.sh index ba4641c..0f78ede 100755 --- a/pipeline/06_frontend/deploy.sh +++ b/pipeline/06_frontend/deploy.sh @@ -60,16 +60,25 @@ echo "Total bundles: $TOTAL_BUNDLES" echo "S3 bucket: $SITE_BUCKET" echo "" -# Inject TOTAL_BUNDLES into index.html (in a temp copy) +# Build into temp directory TMPDIR=$(mktemp -d) cp "$FRONTEND_DIR/index.html" "$TMPDIR/index.html" cp "$FRONTEND_DIR/site.js" "$TMPDIR/site.js" cp "$FRONTEND_DIR/bot.html" "$TMPDIR/bot.html" cp "$FRONTEND_DIR/about.html" "$TMPDIR/about.html" +# Inject TOTAL_BUNDLES sed -i "s/const TOTAL_BUNDLES = [0-9]*/const TOTAL_BUNDLES = ${TOTAL_BUNDLES}/" "$TMPDIR/index.html" echo "Injected TOTAL_BUNDLES = $TOTAL_BUNDLES" +# Minify JS (strip comments + whitespace, keep variable names) +if command -v esbuild &>/dev/null; then + esbuild "$TMPDIR/site.js" --minify --outfile="$TMPDIR/site.js" --allow-overwrite + echo "Minified site.js" +else + echo "Warning: esbuild not found, deploying unminified JS" +fi + # Upload echo "Uploading to s3://$SITE_BUCKET/..." aws s3 cp "$TMPDIR/index.html" "s3://$SITE_BUCKET/" --content-type "text/html" @@ -80,6 +89,28 @@ echo "Uploaded 4 files" rm -rf "$TMPDIR" +# Clean up stale bundles from previous runs +echo "Cleaning stale bundles above $TOTAL_BUNDLES..." +STALE=$(aws s3api list-objects-v2 --bucket "$SITE_BUCKET" --prefix "tabs/" --query "Contents[].Key" --output text \ + | tr '\t' '\n' \ + | while read -r key; do + num=$(echo "$key" | grep -oP '\d+' || true) + if [ -n "$num" ] && [ "$((10#$num))" -ge "$TOTAL_BUNDLES" ]; then + echo "$key" + fi + done) + +if [ -n "$STALE" ]; then + STALE_COUNT=$(echo "$STALE" | wc -l) + echo "Deleting $STALE_COUNT stale bundles..." + echo "$STALE" | while read -r key; do + aws s3 rm "s3://$SITE_BUCKET/$key" --quiet + done + echo "Deleted $STALE_COUNT stale bundles" +else + echo "No stale bundles found" +fi + # Invalidate CloudFront if ! $SKIP_INVALIDATION; then if [ -z "$DIST_ID" ]; then diff --git a/pipeline/README.md b/pipeline/README.md index fa0e43a..60e7647 100644 --- a/pipeline/README.md +++ b/pipeline/README.md @@ -11,10 +11,9 @@ Between stages, run the sanity checks to confirm data looks right before proceed export DATABASE_URL='postgres://everytab@:5432/everytab' # Go binaries built on EC2 -cd ~/everytab -go build -o ~/warc_parse ./pipeline/02_warc_parse/ -go build -o ~/icon_download ./pipeline/03_icon_download/ -go build -o ~/bundle_gen ./pipeline/05_bundle_gen/ +go build -o ~/warc_parse ./everytab/pipeline/02_warc_parse/ +go build -o ~/icon_download ./everytab/pipeline/03_icon_download/ +go build -o ~/bundle_gen ./everytab/pipeline/05_bundle_gen/ ``` ## Stage 1: CC-Index Query @@ -22,7 +21,7 @@ go build -o ~/bundle_gen ./pipeline/05_bundle_gen/ Populates the `hosts` table from Common Crawl's columnar index. ```bash -./pipeline/01_cc_index/query.sh --db-url "$DATABASE_URL" --limit 100000 +./everytab/pipeline/01_cc_index/query.sh --db-url "$DATABASE_URL" --limit 100000 # Full run: --limit 0 ``` @@ -47,7 +46,7 @@ GOMEMLIMIT=12GiB ./icon_download --db "$DATABASE_URL" --log-file icon_download.l Picks the best icon per host for display. ```bash -psql $DATABASE_URL -f pipeline/04_best_icon/select.sql +psql $DATABASE_URL -f ./everytab/pipeline/04_best_icon/select.sql ``` ## Stage 5: Bundle Generation @@ -62,12 +61,20 @@ Note the `TOTAL_BUNDLES` number from the summary — this gets baked into the fr ## Stage 6: Frontend Deploy -From your local machine: +From EC2, after bundle gen completes: ```bash -./pipeline/06_frontend/deploy.sh --total-bundles +TOTAL_BUNDLES=$(jq -r '.bundles_created' stats/05_bundle_gen.json) +./everytab/pipeline/06_frontend/deploy.sh --total-bundles "$TOTAL_BUNDLES" ``` +The deploy script: +1. Injects TOTAL_BUNDLES into index.html +2. Minifies site.js (via esbuild, strips comments + whitespace) +3. Uploads frontend files to S3 +4. Deletes stale bundles from previous runs (numbers ≥ TOTAL_BUNDLES) +5. Invalidates CloudFront cache + ## Stage 7: Backup to Homelab After the site is deployed and verified, backup data before tearing down scanning infra. @@ -76,7 +83,7 @@ After the site is deployed and verified, backup data before tearing down scannin | Data | Location on EC2 | Size estimate (30M) | Purpose | |------|----------------|---------------------|---------| -| Database | RDS (pg_dump) | ~5-10GB compressed | Full hosts + icons metadata, titles, WARC coordinates | +| Database | pg_dump from i3 instance | ~5-10GB compressed | Full hosts + icons metadata, titles, WARC coordinates | | Icons | `~/icons/` directory | ~500GB-1TB | Complete favicon archive, content-addressed by SHA-256 | | Stats | `~/stats/*.json` | <1MB | Pipeline timing and counts per stage | | Logs | `~/*.log` | varies | Error logs for debugging | From 6649c09d12cc2c0880a2dd328b6b7bd4f8802eee Mon Sep 17 00:00:00 2001 From: Joe Lothan Date: Mon, 25 May 2026 23:33:49 -0400 Subject: [PATCH 4/6] longer cache for the tabs/ s3 bucket --- infra/main.tf | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/infra/main.tf b/infra/main.tf index ed2e592..5540b9c 100644 --- a/infra/main.tf +++ b/infra/main.tf @@ -283,6 +283,28 @@ resource "aws_cloudfront_distribution" "site" { origin_access_control_id = aws_cloudfront_origin_access_control.site.id } + # Bundles: 2-week cache (immutable between pipeline runs) + ordered_cache_behavior { + path_pattern = "tabs/*" + allowed_methods = ["GET", "HEAD"] + cached_methods = ["GET", "HEAD"] + target_origin_id = "s3-site" + viewer_protocol_policy = "redirect-to-https" + compress = true + + forwarded_values { + query_string = false + cookies { + forward = "none" + } + } + + min_ttl = 0 + default_ttl = 1209600 # 2 weeks + max_ttl = 31536000 # 1 year + } + + # Everything else (HTML, JS): 1-day cache default_cache_behavior { allowed_methods = ["GET", "HEAD"] cached_methods = ["GET", "HEAD"] @@ -297,7 +319,6 @@ resource "aws_cloudfront_distribution" "site" { } } - # Long cache for bundles, short for HTML/JS during development min_ttl = 0 default_ttl = 86400 # 1 day max_ttl = 31536000 # 1 year From 2256055632a704da1952818d477761a735e89238 Mon Sep 17 00:00:00 2001 From: Joe Lothan Date: Mon, 25 May 2026 23:50:35 -0400 Subject: [PATCH 5/6] faster deploy to clean stale bundles, reading the live site --- pipeline/06_frontend/deploy.sh | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/pipeline/06_frontend/deploy.sh b/pipeline/06_frontend/deploy.sh index 0f78ede..750db9a 100755 --- a/pipeline/06_frontend/deploy.sh +++ b/pipeline/06_frontend/deploy.sh @@ -90,25 +90,20 @@ echo "Uploaded 4 files" rm -rf "$TMPDIR" # Clean up stale bundles from previous runs -echo "Cleaning stale bundles above $TOTAL_BUNDLES..." -STALE=$(aws s3api list-objects-v2 --bucket "$SITE_BUCKET" --prefix "tabs/" --query "Contents[].Key" --output text \ - | tr '\t' '\n' \ - | while read -r key; do - num=$(echo "$key" | grep -oP '\d+' || true) - if [ -n "$num" ] && [ "$((10#$num))" -ge "$TOTAL_BUNDLES" ]; then - echo "$key" - fi - done) +# Read old TOTAL_BUNDLES from the live index.html before we overwrite it +OLD_TOTAL=$(aws s3 cp "s3://$SITE_BUCKET/index.html" - 2>/dev/null \ + | grep -oP 'TOTAL_BUNDLES = \K[0-9]+' || echo "0") -if [ -n "$STALE" ]; then - STALE_COUNT=$(echo "$STALE" | wc -l) - echo "Deleting $STALE_COUNT stale bundles..." - echo "$STALE" | while read -r key; do - aws s3 rm "s3://$SITE_BUCKET/$key" --quiet +if [ "$OLD_TOTAL" -gt "$TOTAL_BUNDLES" ]; then + STALE_COUNT=$((OLD_TOTAL - TOTAL_BUNDLES)) + echo "Deleting $STALE_COUNT stale bundles ($TOTAL_BUNDLES to $((OLD_TOTAL - 1)))..." + for i in $(seq "$TOTAL_BUNDLES" "$((OLD_TOTAL - 1))"); do + PADDED=$(printf "%04d" "$i") + aws s3 rm "s3://$SITE_BUCKET/tabs/${PADDED}.json" --quiet done echo "Deleted $STALE_COUNT stale bundles" else - echo "No stale bundles found" + echo "No stale bundles to clean (old=$OLD_TOTAL, new=$TOTAL_BUNDLES)" fi # Invalidate CloudFront From 59d4d2c9e010888cb9b57f8bcfeffbb974f2cdbe Mon Sep 17 00:00:00 2001 From: Joe Lothan Date: Mon, 25 May 2026 23:50:58 -0400 Subject: [PATCH 6/6] title goes above address bar on mobile --- frontend/index.html | 24 ++++++++++++++++++++++++ frontend/site.js | 6 ++++++ 2 files changed, 30 insertions(+) diff --git a/frontend/index.html b/frontend/index.html index a71200a..f399367 100644 --- a/frontend/index.html +++ b/frontend/index.html @@ -223,6 +223,10 @@ flex-shrink: 0; } + .url-title-mobile { + display: none; + } + .iframe-urlbar .url-text { font-size: 14px; line-height: 1; @@ -250,6 +254,26 @@ min-width: 0; } + /* Mobile: title above address bar */ + @media (max-width: 640px) { + .iframe-toolbar { + flex-wrap: wrap; + } + .iframe-toolbar .url-title-mobile { + width: 100%; + font-size: 13px; + color: var(--text); + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; + padding: 0 4px 4px; + order: -1; + } + .iframe-urlbar .url-title { + display: none; + } + } + .iframe-close { background: none; border: none; diff --git a/frontend/site.js b/frontend/site.js index b1dab7f..7126c13 100644 --- a/frontend/site.js +++ b/frontend/site.js @@ -306,6 +306,12 @@ function openInlineViewer(tabEl, entry, url) { toolbar.appendChild(urlbar); + // Mobile: title shown above the URL bar (hidden on desktop via CSS) + const mobileTitle = document.createElement("span"); + mobileTitle.className = "url-title-mobile"; + mobileTitle.textContent = entry.title || ""; + toolbar.appendChild(mobileTitle); + const close = document.createElement("button"); close.className = "iframe-close"; close.textContent = "✕";