-- Best Icon Selection -- Picks the best completed icon for each host and stores its s3_key in hosts.best_icon_s3_key. -- -- Priority: -- 1. Standard square sizes (64 > 48 > 32 > 16) — ideal for tab display -- 2. Other square sizes ≤64 -- 3. Non-square sizes ≤64 on both axes -- 4. Anything larger (downloaded because rel_sizes was undeclared) -- 5. Among equal priority: prefer PNG/GIF/ICO over WebP (SVGs excluded — not supported in bundle generation) -- 6. Tiebreaker: smaller file size (less bandwidth in bundles) -- -- Usage: psql $DATABASE_URL -f pipeline/04_best_icon/select.sql UPDATE hosts h SET best_icon_s3_key = sub.s3_key FROM ( SELECT DISTINCT ON (i.host_id) i.host_id, i.s3_key FROM icons i WHERE i.scan_state = 'completed' AND i.s3_key IS NOT NULL AND i.content_type NOT IN ('image/svg+xml') ORDER BY i.host_id, CASE WHEN i.width = i.height AND i.width IN (64, 48, 32, 16) THEN 0 WHEN i.width = i.height AND i.width <= 64 THEN 1 WHEN i.width IS NOT NULL AND i.width <= 64 AND i.height IS NOT NULL AND i.height <= 64 THEN 2 ELSE 3 END, COALESCE(i.width, 0) DESC, CASE WHEN i.content_type IN ('image/png', 'image/gif', 'image/x-icon', 'image/vnd.microsoft.icon') THEN 0 WHEN i.content_type = 'image/webp' THEN 1 ELSE 2 END, i.file_size ASC ) sub WHERE h.id = sub.host_id; -- Stats \echo '--- Best Icon Selection Stats ---' SELECT COUNT(*) FILTER (WHERE best_icon_s3_key IS NOT NULL) AS hosts_with_icon, COUNT(*) FILTER (WHERE best_icon_s3_key IS NULL) AS hosts_without_icon, COUNT(*) FILTER (WHERE html_title IS NOT NULL) AS hosts_with_title, COUNT(*) FILTER (WHERE html_title IS NOT NULL AND best_icon_s3_key IS NULL) AS title_but_no_icon FROM hosts WHERE parsed = TRUE;