Compare commits

..

No commits in common. "59d4d2c9e010888cb9b57f8bcfeffbb974f2cdbe" and "7c4572aafb9c2634b08bd10c55636176d9e70a76" have entirely different histories.

7 changed files with 77 additions and 229 deletions

View file

@ -57,7 +57,6 @@
image-rendering: auto;
}
/* ── Base tab styles ── */
.tab {
display: inline-flex;
align-items: center;
@ -69,71 +68,68 @@
min-width: 100px;
height: 36px;
transition: background 0.15s;
/* Default: Firefox inactive tab style */
border-radius: 4px;
border: none;
margin: 0 1px;
background: transparent;
}
/* ── Linux Firefox (default) ── */
/* Inactive: transparent, small margin, rounded */
.tab {
border-radius: 6px;
margin: 0 1px;
}
/* Hover: pill-shaped background */
.tab:hover {
background: var(--tab-hover);
border-radius: 8px;
border-radius: 4px;
}
/* Selected: white bg, rounded top, connects to viewer below */
.tab-active,
.tab-active:hover {
/* Firefox selected tab — solid bg, rounded top, connects to content below */
.browser-firefox .tab-active,
.browser-firefox .tab-active:hover {
background: var(--tab-bg);
border-radius: 8px 8px 0 0;
box-shadow: 0 1px 0 0 var(--tab-bg);
}
/* ── Linux Chrome ── */
/* Inactive: always inset dimensions, separator via ::before */
/* Chrome / Chromium — inactive tabs: no bg, thin separators between */
.browser-chrome .tab {
border-radius: 8px;
margin: 3px 2px;
padding: 3px 12px;
height: 30px;
position: relative;
border-radius: 0;
margin: 0;
padding: 6px 14px;
border-right: 1px solid var(--tab-border);
}
.browser-chrome .tab::before {
content: "";
position: absolute;
left: -2px;
top: 25%;
height: 50%;
width: 1px;
background: var(--tab-border);
}
/* Hide separator on hover/active tab AND on the tab after hover/active */
.browser-chrome .tab:hover::before,
.browser-chrome .tab-active::before,
.browser-chrome .tab:hover + .tab::before,
.browser-chrome .tab-active + .tab::before {
background: transparent;
}
/* Hover: just add background (no layout change) */
.browser-chrome .tab:hover {
background: var(--tab-hover);
border-radius: 8px 8px 0 0;
border-right: 1px solid transparent;
}
/* Selected: white bg, subtle shadow */
/* Chrome selected tab — raised, rounded top, solid bg */
.browser-chrome .tab-active,
.browser-chrome .tab-active:hover {
background: var(--tab-bg);
border-radius: 10px 10px 0 0;
border-right: 1px solid transparent;
box-shadow: 0 -1px 4px rgba(0, 0, 0, 0.1);
}
/* Safari — flat pill, very minimal */
.browser-safari .tab {
border-radius: 6px;
margin: 0 2px;
height: 32px;
padding: 4px 10px;
}
.browser-safari .tab:hover {
background: var(--tab-hover);
}
/* Safari selected tab — solid pill, slightly elevated */
.browser-safari .tab-active,
.browser-safari .tab-active:hover {
background: var(--tab-bg);
box-shadow: 0 1px 3px rgba(0, 0, 0, 0.12);
}
.tab-about .tab-title {
font-weight: 600;
}
@ -185,93 +181,36 @@
100% { transform: translateX(0); }
}
/* ── Inline iframe viewer ── */
/* Inline iframe viewer */
.iframe-viewer {
width: 100%;
height: 75vh;
background: var(--viewer-bg);
border-top: 2px solid var(--tab-border);
border-bottom: 2px solid var(--tab-border);
}
/* Toolbar area (address bar) */
.iframe-toolbar {
.iframe-header {
display: flex;
align-items: center;
gap: 8px;
padding: 8px 12px;
padding: 6px 12px;
background: var(--tab-bg);
}
/* URL bar — rounded pill, looks like a browser address bar */
.iframe-urlbar {
.iframe-header .tab-title {
flex: 1;
display: flex;
align-items: center;
gap: 8px;
padding: 10px 16px;
background: var(--bg);
border-radius: 24px;
min-width: 0;
font-size: 13px;
}
/* Firefox: more rectangular address bar */
.browser-firefox .iframe-urlbar {
border-radius: 8px;
}
.iframe-urlbar .tab-icon {
flex-shrink: 0;
}
.url-title-mobile {
display: none;
}
.iframe-urlbar .url-text {
font-size: 14px;
line-height: 1;
color: var(--text);
overflow: hidden;
text-overflow: ellipsis;
white-space: nowrap;
text-decoration: none;
flex-shrink: 0;
}
.iframe-urlbar .url-text:hover {
text-decoration: underline;
}
.iframe-urlbar .url-title {
font-size: 12px;
line-height: 1;
.iframe-header a {
color: var(--text-muted);
overflow: hidden;
text-overflow: ellipsis;
white-space: nowrap;
text-align: right;
flex: 1;
min-width: 0;
font-size: 12px;
text-decoration: none;
}
/* Mobile: title above address bar */
@media (max-width: 640px) {
.iframe-toolbar {
flex-wrap: wrap;
}
.iframe-toolbar .url-title-mobile {
width: 100%;
font-size: 13px;
color: var(--text);
overflow: hidden;
text-overflow: ellipsis;
white-space: nowrap;
padding: 0 4px 4px;
order: -1;
}
.iframe-urlbar .url-title {
display: none;
}
.iframe-header a:hover {
color: var(--text);
}
.iframe-close {
@ -280,9 +219,8 @@
color: var(--text);
font-size: 16px;
cursor: pointer;
padding: 4px 8px;
border-radius: 50%;
line-height: 1;
padding: 2px 6px;
border-radius: 4px;
}
.iframe-close:hover {
@ -291,7 +229,7 @@
.iframe-viewer iframe {
width: 100%;
height: calc(75vh - 56px);
height: calc(75vh - 36px);
border: none;
background: white;
}

View file

@ -14,26 +14,16 @@ const loadedBundles = new Set();
const container = document.getElementById("tab-container");
const loadingEl = document.getElementById("loading");
// Detect browser and OS, set classes on body for tab styling
// Detect browser and set class on body for tab styling
function detectBrowser() {
const ua = navigator.userAgent;
if (ua.includes("Firefox")) return "firefox";
if (ua.includes("Edg/")) return "edge";
if (ua.includes("Safari") && !ua.includes("Chrome") && !ua.includes("CriOS")) return "safari";
if (ua.includes("Safari") && !ua.includes("Chrome")) return "safari";
return "chrome"; // Chrome, Edge, Opera, Brave, etc.
}
function detectOS() {
const ua = navigator.userAgent;
if (ua.includes("Windows")) return "windows";
if (ua.includes("Macintosh") || ua.includes("Mac OS")) return "mac";
if (ua.includes("iPhone") || ua.includes("iPad")) return "mac";
return "linux"; // Linux, Android, ChromeOS, unknown
}
const browserName = detectBrowser();
const osName = detectOS();
document.body.classList.add(`browser-${browserName}`, `os-${osName}`);
document.body.classList.add(`browser-${browserName}`);
const isFirefox = browserName === "firefox";
console.log(`EveryTab: browser=${browserName}, os=${osName}`);
// How many tabs fit in one row?
function tabsPerRow() {
@ -276,53 +266,39 @@ function openInlineViewer(tabEl, entry, url) {
const viewer = document.createElement("div");
viewer.className = "iframe-viewer";
// Toolbar (address bar area)
const toolbar = document.createElement("div");
toolbar.className = "iframe-toolbar";
// URL bar pill
const urlbar = document.createElement("div");
urlbar.className = "iframe-urlbar";
const header = document.createElement("div");
header.className = "iframe-header";
if (entry.icon) {
const icon = document.createElement("img");
icon.className = "tab-icon";
icon.src = `data:image/png;base64,${entry.icon}`;
urlbar.appendChild(icon);
header.appendChild(icon);
}
const title = document.createElement("span");
title.className = "tab-title";
title.textContent = entry.title || entry.url;
header.appendChild(title);
const link = document.createElement("a");
link.className = "url-text";
link.href = url;
link.target = "_blank";
link.rel = "noopener";
link.textContent = entry.url;
urlbar.appendChild(link);
const urlTitle = document.createElement("span");
urlTitle.className = "url-title";
urlTitle.textContent = entry.title || "";
urlbar.appendChild(urlTitle);
toolbar.appendChild(urlbar);
// Mobile: title shown above the URL bar (hidden on desktop via CSS)
const mobileTitle = document.createElement("span");
mobileTitle.className = "url-title-mobile";
mobileTitle.textContent = entry.title || "";
toolbar.appendChild(mobileTitle);
link.textContent = entry.url + " ↗";
header.appendChild(link);
const close = document.createElement("button");
close.className = "iframe-close";
close.textContent = "✕";
close.addEventListener("click", closeInlineViewer);
toolbar.appendChild(close);
header.appendChild(close);
const iframe = document.createElement("iframe");
iframe.sandbox = "allow-scripts allow-same-origin allow-forms";
iframe.src = url;
viewer.appendChild(toolbar);
viewer.appendChild(header);
viewer.appendChild(iframe);
// Insert after the row

View file

@ -38,7 +38,7 @@ fi
# --- Install Postgres ---
echo "--- Installing PostgreSQL 16 ---"
if ! command -v pg_isready &>/dev/null; then
sudo dnf install -y postgresql16-server tmux htop
sudo dnf install -y postgresql16-server
fi
# --- Init database on NVMe ---

View file

@ -52,13 +52,6 @@ if ! command -v go &>/dev/null; then
fi
go version
# --- esbuild ---
echo "--- Installing esbuild ---"
if ! command -v esbuild &>/dev/null; then
GOBIN=/usr/local/bin /usr/local/go/bin/go install github.com/evanw/esbuild/cmd/esbuild@latest
fi
esbuild --version
# --- DuckDB ---
echo "--- Installing DuckDB ---"
DUCKDB_VERSION="1.5.2"

View file

@ -190,11 +190,6 @@ resource "aws_iam_role_policy" "s3_access" {
"arn:aws:s3:::commoncrawl",
"arn:aws:s3:::commoncrawl/*",
]
},
{
Effect = "Allow"
Action = ["cloudfront:CreateInvalidation", "cloudfront:ListDistributions"]
Resource = "*"
}
]
})
@ -283,28 +278,6 @@ resource "aws_cloudfront_distribution" "site" {
origin_access_control_id = aws_cloudfront_origin_access_control.site.id
}
# Bundles: 2-week cache (immutable between pipeline runs)
ordered_cache_behavior {
path_pattern = "tabs/*"
allowed_methods = ["GET", "HEAD"]
cached_methods = ["GET", "HEAD"]
target_origin_id = "s3-site"
viewer_protocol_policy = "redirect-to-https"
compress = true
forwarded_values {
query_string = false
cookies {
forward = "none"
}
}
min_ttl = 0
default_ttl = 1209600 # 2 weeks
max_ttl = 31536000 # 1 year
}
# Everything else (HTML, JS): 1-day cache
default_cache_behavior {
allowed_methods = ["GET", "HEAD"]
cached_methods = ["GET", "HEAD"]
@ -319,6 +292,7 @@ resource "aws_cloudfront_distribution" "site" {
}
}
# Long cache for bundles, short for HTML/JS during development
min_ttl = 0
default_ttl = 86400 # 1 day
max_ttl = 31536000 # 1 year

View file

@ -60,25 +60,16 @@ echo "Total bundles: $TOTAL_BUNDLES"
echo "S3 bucket: $SITE_BUCKET"
echo ""
# Build into temp directory
# Inject TOTAL_BUNDLES into index.html (in a temp copy)
TMPDIR=$(mktemp -d)
cp "$FRONTEND_DIR/index.html" "$TMPDIR/index.html"
cp "$FRONTEND_DIR/site.js" "$TMPDIR/site.js"
cp "$FRONTEND_DIR/bot.html" "$TMPDIR/bot.html"
cp "$FRONTEND_DIR/about.html" "$TMPDIR/about.html"
# Inject TOTAL_BUNDLES
sed -i "s/const TOTAL_BUNDLES = [0-9]*/const TOTAL_BUNDLES = ${TOTAL_BUNDLES}/" "$TMPDIR/index.html"
echo "Injected TOTAL_BUNDLES = $TOTAL_BUNDLES"
# Minify JS (strip comments + whitespace, keep variable names)
if command -v esbuild &>/dev/null; then
esbuild "$TMPDIR/site.js" --minify --outfile="$TMPDIR/site.js" --allow-overwrite
echo "Minified site.js"
else
echo "Warning: esbuild not found, deploying unminified JS"
fi
# Upload
echo "Uploading to s3://$SITE_BUCKET/..."
aws s3 cp "$TMPDIR/index.html" "s3://$SITE_BUCKET/" --content-type "text/html"
@ -89,23 +80,6 @@ echo "Uploaded 4 files"
rm -rf "$TMPDIR"
# Clean up stale bundles from previous runs
# Read old TOTAL_BUNDLES from the live index.html before we overwrite it
OLD_TOTAL=$(aws s3 cp "s3://$SITE_BUCKET/index.html" - 2>/dev/null \
| grep -oP 'TOTAL_BUNDLES = \K[0-9]+' || echo "0")
if [ "$OLD_TOTAL" -gt "$TOTAL_BUNDLES" ]; then
STALE_COUNT=$((OLD_TOTAL - TOTAL_BUNDLES))
echo "Deleting $STALE_COUNT stale bundles ($TOTAL_BUNDLES to $((OLD_TOTAL - 1)))..."
for i in $(seq "$TOTAL_BUNDLES" "$((OLD_TOTAL - 1))"); do
PADDED=$(printf "%04d" "$i")
aws s3 rm "s3://$SITE_BUCKET/tabs/${PADDED}.json" --quiet
done
echo "Deleted $STALE_COUNT stale bundles"
else
echo "No stale bundles to clean (old=$OLD_TOTAL, new=$TOTAL_BUNDLES)"
fi
# Invalidate CloudFront
if ! $SKIP_INVALIDATION; then
if [ -z "$DIST_ID" ]; then

View file

@ -11,9 +11,10 @@ Between stages, run the sanity checks to confirm data looks right before proceed
export DATABASE_URL='postgres://everytab@<i3-private-ip>:5432/everytab'
# Go binaries built on EC2
go build -o ~/warc_parse ./everytab/pipeline/02_warc_parse/
go build -o ~/icon_download ./everytab/pipeline/03_icon_download/
go build -o ~/bundle_gen ./everytab/pipeline/05_bundle_gen/
cd ~/everytab
go build -o ~/warc_parse ./pipeline/02_warc_parse/
go build -o ~/icon_download ./pipeline/03_icon_download/
go build -o ~/bundle_gen ./pipeline/05_bundle_gen/
```
## Stage 1: CC-Index Query
@ -21,7 +22,7 @@ go build -o ~/bundle_gen ./everytab/pipeline/05_bundle_gen/
Populates the `hosts` table from Common Crawl's columnar index.
```bash
./everytab/pipeline/01_cc_index/query.sh --db-url "$DATABASE_URL" --limit 100000
./pipeline/01_cc_index/query.sh --db-url "$DATABASE_URL" --limit 100000
# Full run: --limit 0
```
@ -46,7 +47,7 @@ GOMEMLIMIT=12GiB ./icon_download --db "$DATABASE_URL" --log-file icon_download.l
Picks the best icon per host for display.
```bash
psql $DATABASE_URL -f ./everytab/pipeline/04_best_icon/select.sql
psql $DATABASE_URL -f pipeline/04_best_icon/select.sql
```
## Stage 5: Bundle Generation
@ -61,20 +62,12 @@ Note the `TOTAL_BUNDLES` number from the summary — this gets baked into the fr
## Stage 6: Frontend Deploy
From EC2, after bundle gen completes:
From your local machine:
```bash
TOTAL_BUNDLES=$(jq -r '.bundles_created' stats/05_bundle_gen.json)
./everytab/pipeline/06_frontend/deploy.sh --total-bundles "$TOTAL_BUNDLES"
./pipeline/06_frontend/deploy.sh --total-bundles <NUMBER>
```
The deploy script:
1. Injects TOTAL_BUNDLES into index.html
2. Minifies site.js (via esbuild, strips comments + whitespace)
3. Uploads frontend files to S3
4. Deletes stale bundles from previous runs (numbers ≥ TOTAL_BUNDLES)
5. Invalidates CloudFront cache
## Stage 7: Backup to Homelab
After the site is deployed and verified, backup data before tearing down scanning infra.
@ -83,7 +76,7 @@ After the site is deployed and verified, backup data before tearing down scannin
| Data | Location on EC2 | Size estimate (30M) | Purpose |
|------|----------------|---------------------|---------|
| Database | pg_dump from i3 instance | ~5-10GB compressed | Full hosts + icons metadata, titles, WARC coordinates |
| Database | RDS (pg_dump) | ~5-10GB compressed | Full hosts + icons metadata, titles, WARC coordinates |
| Icons | `~/icons/` directory | ~500GB-1TB | Complete favicon archive, content-addressed by SHA-256 |
| Stats | `~/stats/*.json` | <1MB | Pipeline timing and counts per stage |
| Logs | `~/*.log` | varies | Error logs for debugging |