diff --git a/frontend/index.html b/frontend/index.html index f399367..9cfe985 100644 --- a/frontend/index.html +++ b/frontend/index.html @@ -57,7 +57,6 @@ image-rendering: auto; } - /* ── Base tab styles ── */ .tab { display: inline-flex; align-items: center; @@ -69,71 +68,68 @@ min-width: 100px; height: 36px; transition: background 0.15s; + /* Default: Firefox inactive tab style */ + border-radius: 4px; border: none; + margin: 0 1px; background: transparent; } - /* ── Linux Firefox (default) ── */ - /* Inactive: transparent, small margin, rounded */ - .tab { - border-radius: 6px; - margin: 0 1px; - } - - /* Hover: pill-shaped background */ .tab:hover { background: var(--tab-hover); - border-radius: 8px; + border-radius: 4px; } - /* Selected: white bg, rounded top, connects to viewer below */ - .tab-active, - .tab-active:hover { + /* Firefox selected tab — solid bg, rounded top, connects to content below */ + .browser-firefox .tab-active, + .browser-firefox .tab-active:hover { background: var(--tab-bg); border-radius: 8px 8px 0 0; box-shadow: 0 1px 0 0 var(--tab-bg); } - /* ── Linux Chrome ── */ - /* Inactive: always inset dimensions, separator via ::before */ + /* Chrome / Chromium — inactive tabs: no bg, thin separators between */ .browser-chrome .tab { - border-radius: 8px; - margin: 3px 2px; - padding: 3px 12px; - height: 30px; - position: relative; + border-radius: 0; + margin: 0; + padding: 6px 14px; + border-right: 1px solid var(--tab-border); } - .browser-chrome .tab::before { - content: ""; - position: absolute; - left: -2px; - top: 25%; - height: 50%; - width: 1px; - background: var(--tab-border); - } - - /* Hide separator on hover/active tab AND on the tab after hover/active */ - .browser-chrome .tab:hover::before, - .browser-chrome .tab-active::before, - .browser-chrome .tab:hover + .tab::before, - .browser-chrome .tab-active + .tab::before { - background: transparent; - } - - /* Hover: just add background (no layout change) */ .browser-chrome .tab:hover { background: var(--tab-hover); + border-radius: 8px 8px 0 0; + border-right: 1px solid transparent; } - /* Selected: white bg, subtle shadow */ + /* Chrome selected tab — raised, rounded top, solid bg */ .browser-chrome .tab-active, .browser-chrome .tab-active:hover { background: var(--tab-bg); + border-radius: 10px 10px 0 0; + border-right: 1px solid transparent; box-shadow: 0 -1px 4px rgba(0, 0, 0, 0.1); } + /* Safari — flat pill, very minimal */ + .browser-safari .tab { + border-radius: 6px; + margin: 0 2px; + height: 32px; + padding: 4px 10px; + } + + .browser-safari .tab:hover { + background: var(--tab-hover); + } + + /* Safari selected tab — solid pill, slightly elevated */ + .browser-safari .tab-active, + .browser-safari .tab-active:hover { + background: var(--tab-bg); + box-shadow: 0 1px 3px rgba(0, 0, 0, 0.12); + } + .tab-about .tab-title { font-weight: 600; } @@ -185,93 +181,36 @@ 100% { transform: translateX(0); } } - /* ── Inline iframe viewer ── */ + /* Inline iframe viewer */ .iframe-viewer { width: 100%; height: 75vh; background: var(--viewer-bg); + border-top: 2px solid var(--tab-border); border-bottom: 2px solid var(--tab-border); } - /* Toolbar area (address bar) */ - .iframe-toolbar { + .iframe-header { display: flex; align-items: center; gap: 8px; - padding: 8px 12px; + padding: 6px 12px; background: var(--tab-bg); } - /* URL bar — rounded pill, looks like a browser address bar */ - .iframe-urlbar { + .iframe-header .tab-title { flex: 1; - display: flex; - align-items: center; - gap: 8px; - padding: 10px 16px; - background: var(--bg); - border-radius: 24px; - min-width: 0; + font-size: 13px; } - /* Firefox: more rectangular address bar */ - .browser-firefox .iframe-urlbar { - border-radius: 8px; - } - - .iframe-urlbar .tab-icon { - flex-shrink: 0; - } - - .url-title-mobile { - display: none; - } - - .iframe-urlbar .url-text { - font-size: 14px; - line-height: 1; - color: var(--text); - overflow: hidden; - text-overflow: ellipsis; - white-space: nowrap; - text-decoration: none; - flex-shrink: 0; - } - - .iframe-urlbar .url-text:hover { - text-decoration: underline; - } - - .iframe-urlbar .url-title { - font-size: 12px; - line-height: 1; + .iframe-header a { color: var(--text-muted); - overflow: hidden; - text-overflow: ellipsis; - white-space: nowrap; - text-align: right; - flex: 1; - min-width: 0; + font-size: 12px; + text-decoration: none; } - /* Mobile: title above address bar */ - @media (max-width: 640px) { - .iframe-toolbar { - flex-wrap: wrap; - } - .iframe-toolbar .url-title-mobile { - width: 100%; - font-size: 13px; - color: var(--text); - overflow: hidden; - text-overflow: ellipsis; - white-space: nowrap; - padding: 0 4px 4px; - order: -1; - } - .iframe-urlbar .url-title { - display: none; - } + .iframe-header a:hover { + color: var(--text); } .iframe-close { @@ -280,9 +219,8 @@ color: var(--text); font-size: 16px; cursor: pointer; - padding: 4px 8px; - border-radius: 50%; - line-height: 1; + padding: 2px 6px; + border-radius: 4px; } .iframe-close:hover { @@ -291,7 +229,7 @@ .iframe-viewer iframe { width: 100%; - height: calc(75vh - 56px); + height: calc(75vh - 36px); border: none; background: white; } diff --git a/frontend/site.js b/frontend/site.js index 7126c13..03d785d 100644 --- a/frontend/site.js +++ b/frontend/site.js @@ -14,26 +14,16 @@ const loadedBundles = new Set(); const container = document.getElementById("tab-container"); const loadingEl = document.getElementById("loading"); -// Detect browser and OS, set classes on body for tab styling +// Detect browser and set class on body for tab styling function detectBrowser() { const ua = navigator.userAgent; if (ua.includes("Firefox")) return "firefox"; - if (ua.includes("Edg/")) return "edge"; - if (ua.includes("Safari") && !ua.includes("Chrome") && !ua.includes("CriOS")) return "safari"; + if (ua.includes("Safari") && !ua.includes("Chrome")) return "safari"; return "chrome"; // Chrome, Edge, Opera, Brave, etc. } -function detectOS() { - const ua = navigator.userAgent; - if (ua.includes("Windows")) return "windows"; - if (ua.includes("Macintosh") || ua.includes("Mac OS")) return "mac"; - if (ua.includes("iPhone") || ua.includes("iPad")) return "mac"; - return "linux"; // Linux, Android, ChromeOS, unknown -} const browserName = detectBrowser(); -const osName = detectOS(); -document.body.classList.add(`browser-${browserName}`, `os-${osName}`); +document.body.classList.add(`browser-${browserName}`); const isFirefox = browserName === "firefox"; -console.log(`EveryTab: browser=${browserName}, os=${osName}`); // How many tabs fit in one row? function tabsPerRow() { @@ -276,53 +266,39 @@ function openInlineViewer(tabEl, entry, url) { const viewer = document.createElement("div"); viewer.className = "iframe-viewer"; - // Toolbar (address bar area) - const toolbar = document.createElement("div"); - toolbar.className = "iframe-toolbar"; - - // URL bar pill - const urlbar = document.createElement("div"); - urlbar.className = "iframe-urlbar"; + const header = document.createElement("div"); + header.className = "iframe-header"; if (entry.icon) { const icon = document.createElement("img"); icon.className = "tab-icon"; icon.src = `data:image/png;base64,${entry.icon}`; - urlbar.appendChild(icon); + header.appendChild(icon); } + const title = document.createElement("span"); + title.className = "tab-title"; + title.textContent = entry.title || entry.url; + header.appendChild(title); + const link = document.createElement("a"); - link.className = "url-text"; link.href = url; link.target = "_blank"; link.rel = "noopener"; - link.textContent = entry.url; - urlbar.appendChild(link); - - const urlTitle = document.createElement("span"); - urlTitle.className = "url-title"; - urlTitle.textContent = entry.title || ""; - urlbar.appendChild(urlTitle); - - toolbar.appendChild(urlbar); - - // Mobile: title shown above the URL bar (hidden on desktop via CSS) - const mobileTitle = document.createElement("span"); - mobileTitle.className = "url-title-mobile"; - mobileTitle.textContent = entry.title || ""; - toolbar.appendChild(mobileTitle); + link.textContent = entry.url + " ↗"; + header.appendChild(link); const close = document.createElement("button"); close.className = "iframe-close"; close.textContent = "✕"; close.addEventListener("click", closeInlineViewer); - toolbar.appendChild(close); + header.appendChild(close); const iframe = document.createElement("iframe"); iframe.sandbox = "allow-scripts allow-same-origin allow-forms"; iframe.src = url; - viewer.appendChild(toolbar); + viewer.appendChild(header); viewer.appendChild(iframe); // Insert after the row diff --git a/infra/db-setup.sh b/infra/db-setup.sh index e183480..954c6cf 100755 --- a/infra/db-setup.sh +++ b/infra/db-setup.sh @@ -38,7 +38,7 @@ fi # --- Install Postgres --- echo "--- Installing PostgreSQL 16 ---" if ! command -v pg_isready &>/dev/null; then - sudo dnf install -y postgresql16-server tmux htop + sudo dnf install -y postgresql16-server fi # --- Init database on NVMe --- diff --git a/infra/ec2-userdata.sh b/infra/ec2-userdata.sh index 594ccfa..078c29b 100755 --- a/infra/ec2-userdata.sh +++ b/infra/ec2-userdata.sh @@ -52,13 +52,6 @@ if ! command -v go &>/dev/null; then fi go version -# --- esbuild --- -echo "--- Installing esbuild ---" -if ! command -v esbuild &>/dev/null; then - GOBIN=/usr/local/bin /usr/local/go/bin/go install github.com/evanw/esbuild/cmd/esbuild@latest -fi -esbuild --version - # --- DuckDB --- echo "--- Installing DuckDB ---" DUCKDB_VERSION="1.5.2" diff --git a/infra/main.tf b/infra/main.tf index 5540b9c..47ea942 100644 --- a/infra/main.tf +++ b/infra/main.tf @@ -190,11 +190,6 @@ resource "aws_iam_role_policy" "s3_access" { "arn:aws:s3:::commoncrawl", "arn:aws:s3:::commoncrawl/*", ] - }, - { - Effect = "Allow" - Action = ["cloudfront:CreateInvalidation", "cloudfront:ListDistributions"] - Resource = "*" } ] }) @@ -283,28 +278,6 @@ resource "aws_cloudfront_distribution" "site" { origin_access_control_id = aws_cloudfront_origin_access_control.site.id } - # Bundles: 2-week cache (immutable between pipeline runs) - ordered_cache_behavior { - path_pattern = "tabs/*" - allowed_methods = ["GET", "HEAD"] - cached_methods = ["GET", "HEAD"] - target_origin_id = "s3-site" - viewer_protocol_policy = "redirect-to-https" - compress = true - - forwarded_values { - query_string = false - cookies { - forward = "none" - } - } - - min_ttl = 0 - default_ttl = 1209600 # 2 weeks - max_ttl = 31536000 # 1 year - } - - # Everything else (HTML, JS): 1-day cache default_cache_behavior { allowed_methods = ["GET", "HEAD"] cached_methods = ["GET", "HEAD"] @@ -319,6 +292,7 @@ resource "aws_cloudfront_distribution" "site" { } } + # Long cache for bundles, short for HTML/JS during development min_ttl = 0 default_ttl = 86400 # 1 day max_ttl = 31536000 # 1 year diff --git a/pipeline/06_frontend/deploy.sh b/pipeline/06_frontend/deploy.sh index 750db9a..ba4641c 100755 --- a/pipeline/06_frontend/deploy.sh +++ b/pipeline/06_frontend/deploy.sh @@ -60,25 +60,16 @@ echo "Total bundles: $TOTAL_BUNDLES" echo "S3 bucket: $SITE_BUCKET" echo "" -# Build into temp directory +# Inject TOTAL_BUNDLES into index.html (in a temp copy) TMPDIR=$(mktemp -d) cp "$FRONTEND_DIR/index.html" "$TMPDIR/index.html" cp "$FRONTEND_DIR/site.js" "$TMPDIR/site.js" cp "$FRONTEND_DIR/bot.html" "$TMPDIR/bot.html" cp "$FRONTEND_DIR/about.html" "$TMPDIR/about.html" -# Inject TOTAL_BUNDLES sed -i "s/const TOTAL_BUNDLES = [0-9]*/const TOTAL_BUNDLES = ${TOTAL_BUNDLES}/" "$TMPDIR/index.html" echo "Injected TOTAL_BUNDLES = $TOTAL_BUNDLES" -# Minify JS (strip comments + whitespace, keep variable names) -if command -v esbuild &>/dev/null; then - esbuild "$TMPDIR/site.js" --minify --outfile="$TMPDIR/site.js" --allow-overwrite - echo "Minified site.js" -else - echo "Warning: esbuild not found, deploying unminified JS" -fi - # Upload echo "Uploading to s3://$SITE_BUCKET/..." aws s3 cp "$TMPDIR/index.html" "s3://$SITE_BUCKET/" --content-type "text/html" @@ -89,23 +80,6 @@ echo "Uploaded 4 files" rm -rf "$TMPDIR" -# Clean up stale bundles from previous runs -# Read old TOTAL_BUNDLES from the live index.html before we overwrite it -OLD_TOTAL=$(aws s3 cp "s3://$SITE_BUCKET/index.html" - 2>/dev/null \ - | grep -oP 'TOTAL_BUNDLES = \K[0-9]+' || echo "0") - -if [ "$OLD_TOTAL" -gt "$TOTAL_BUNDLES" ]; then - STALE_COUNT=$((OLD_TOTAL - TOTAL_BUNDLES)) - echo "Deleting $STALE_COUNT stale bundles ($TOTAL_BUNDLES to $((OLD_TOTAL - 1)))..." - for i in $(seq "$TOTAL_BUNDLES" "$((OLD_TOTAL - 1))"); do - PADDED=$(printf "%04d" "$i") - aws s3 rm "s3://$SITE_BUCKET/tabs/${PADDED}.json" --quiet - done - echo "Deleted $STALE_COUNT stale bundles" -else - echo "No stale bundles to clean (old=$OLD_TOTAL, new=$TOTAL_BUNDLES)" -fi - # Invalidate CloudFront if ! $SKIP_INVALIDATION; then if [ -z "$DIST_ID" ]; then diff --git a/pipeline/README.md b/pipeline/README.md index 60e7647..fa0e43a 100644 --- a/pipeline/README.md +++ b/pipeline/README.md @@ -11,9 +11,10 @@ Between stages, run the sanity checks to confirm data looks right before proceed export DATABASE_URL='postgres://everytab@:5432/everytab' # Go binaries built on EC2 -go build -o ~/warc_parse ./everytab/pipeline/02_warc_parse/ -go build -o ~/icon_download ./everytab/pipeline/03_icon_download/ -go build -o ~/bundle_gen ./everytab/pipeline/05_bundle_gen/ +cd ~/everytab +go build -o ~/warc_parse ./pipeline/02_warc_parse/ +go build -o ~/icon_download ./pipeline/03_icon_download/ +go build -o ~/bundle_gen ./pipeline/05_bundle_gen/ ``` ## Stage 1: CC-Index Query @@ -21,7 +22,7 @@ go build -o ~/bundle_gen ./everytab/pipeline/05_bundle_gen/ Populates the `hosts` table from Common Crawl's columnar index. ```bash -./everytab/pipeline/01_cc_index/query.sh --db-url "$DATABASE_URL" --limit 100000 +./pipeline/01_cc_index/query.sh --db-url "$DATABASE_URL" --limit 100000 # Full run: --limit 0 ``` @@ -46,7 +47,7 @@ GOMEMLIMIT=12GiB ./icon_download --db "$DATABASE_URL" --log-file icon_download.l Picks the best icon per host for display. ```bash -psql $DATABASE_URL -f ./everytab/pipeline/04_best_icon/select.sql +psql $DATABASE_URL -f pipeline/04_best_icon/select.sql ``` ## Stage 5: Bundle Generation @@ -61,20 +62,12 @@ Note the `TOTAL_BUNDLES` number from the summary — this gets baked into the fr ## Stage 6: Frontend Deploy -From EC2, after bundle gen completes: +From your local machine: ```bash -TOTAL_BUNDLES=$(jq -r '.bundles_created' stats/05_bundle_gen.json) -./everytab/pipeline/06_frontend/deploy.sh --total-bundles "$TOTAL_BUNDLES" +./pipeline/06_frontend/deploy.sh --total-bundles ``` -The deploy script: -1. Injects TOTAL_BUNDLES into index.html -2. Minifies site.js (via esbuild, strips comments + whitespace) -3. Uploads frontend files to S3 -4. Deletes stale bundles from previous runs (numbers ≥ TOTAL_BUNDLES) -5. Invalidates CloudFront cache - ## Stage 7: Backup to Homelab After the site is deployed and verified, backup data before tearing down scanning infra. @@ -83,7 +76,7 @@ After the site is deployed and verified, backup data before tearing down scannin | Data | Location on EC2 | Size estimate (30M) | Purpose | |------|----------------|---------------------|---------| -| Database | pg_dump from i3 instance | ~5-10GB compressed | Full hosts + icons metadata, titles, WARC coordinates | +| Database | RDS (pg_dump) | ~5-10GB compressed | Full hosts + icons metadata, titles, WARC coordinates | | Icons | `~/icons/` directory | ~500GB-1TB | Complete favicon archive, content-addressed by SHA-256 | | Stats | `~/stats/*.json` | <1MB | Pipeline timing and counts per stage | | Logs | `~/*.log` | varies | Error logs for debugging |