-- Best Icon Selection -- Picks the best completed icon for each host and stores its s3_key in hosts.best_icon_s3_key. -- -- Target: 32x32 source icon (displayed at 16x16 CSS, crisp on 2x Retina). -- -- Priority: -- 1. Icons ≥32px: prefer smallest first (closest to 32 — a 32x32 beats a 48x48 beats a 180x180) -- 2. Icons <32px: prefer largest first (16x16 beats 8x8) -- 3. Within same size: prefer PNG > ICO > GIF/JPEG/BMP > WebP -- 4. Tiebreaker: smaller file size -- SVGs excluded (not supported in bundle generation). Icons ≤2x2 excluded (tracking pixels). -- -- Usage: psql $DATABASE_URL -f pipeline/04_best_icon/select.sql UPDATE hosts h SET best_icon_s3_key = sub.s3_key FROM ( SELECT DISTINCT ON (i.host_id) i.host_id, i.s3_key FROM icons i WHERE i.scan_state = 'completed' AND i.s3_key IS NOT NULL AND i.content_type != 'image/svg+xml' AND (i.width IS NULL OR i.width > 2) AND (i.height IS NULL OR i.height > 2) ORDER BY i.host_id, -- Tier: ≥32 preferred over <32. NULL dimensions go last. CASE WHEN LEAST(COALESCE(i.width, 0), COALESCE(i.height, 0)) >= 32 THEN 0 WHEN COALESCE(i.width, 0) > 0 THEN 1 ELSE 2 END, -- Within ≥32: smallest first (closest to 32). Within <32: largest first. CASE WHEN LEAST(COALESCE(i.width, 0), COALESCE(i.height, 0)) >= 32 THEN GREATEST(COALESCE(i.width, 0), COALESCE(i.height, 0)) ELSE -GREATEST(COALESCE(i.width, 0), COALESCE(i.height, 0)) END, -- Format preference CASE WHEN i.content_type = 'image/png' THEN 0 WHEN i.content_type IN ('image/x-icon', 'image/vnd.microsoft.icon') THEN 1 WHEN i.content_type IN ('image/gif', 'image/jpeg', 'image/bmp') THEN 2 WHEN i.content_type = 'image/webp' THEN 3 ELSE 4 END, i.file_size ASC ) sub WHERE h.id = sub.host_id; -- Stats (human-readable) \echo '--- Best Icon Selection Stats ---' SELECT COUNT(*) FILTER (WHERE best_icon_s3_key IS NOT NULL) AS hosts_with_icon, COUNT(*) FILTER (WHERE best_icon_s3_key IS NULL) AS hosts_without_icon, COUNT(*) FILTER (WHERE html_title IS NOT NULL) AS hosts_with_title, COUNT(*) FILTER (WHERE html_title IS NOT NULL AND best_icon_s3_key IS NULL) AS title_but_no_icon FROM hosts WHERE parsed = TRUE; -- Stats JSON \! mkdir -p stats \t on \a \o stats/04_best_icon.json SELECT json_build_object( 'hosts_with_icon', COUNT(*) FILTER (WHERE best_icon_s3_key IS NOT NULL), 'hosts_without_icon', COUNT(*) FILTER (WHERE best_icon_s3_key IS NULL), 'hosts_with_title', COUNT(*) FILTER (WHERE html_title IS NOT NULL), 'hosts_no_title', COUNT(*) FILTER (WHERE html_title IS NULL), 'title_but_no_icon', COUNT(*) FILTER (WHERE html_title IS NOT NULL AND best_icon_s3_key IS NULL) ) FROM hosts WHERE parsed = TRUE; \o \t off \a \echo 'Stats written to stats/04_best_icon.json'