88 lines
2.9 KiB
SQL
88 lines
2.9 KiB
SQL
-- Best Icon Selection
|
|
-- Picks the best completed icon for each host and stores its icon_hash in hosts.best_icon_hash.
|
|
--
|
|
-- Target: 32x32 source icon (displayed at 16x16 CSS, crisp on 2x Retina).
|
|
--
|
|
-- Priority:
|
|
-- 1. Icons ≥32px: prefer smallest first (closest to 32 — a 32x32 beats a 48x48 beats a 180x180)
|
|
-- 2. Icons <32px: prefer largest first (16x16 beats 8x8)
|
|
-- 3. Within same size: prefer PNG > ICO > GIF/JPEG/BMP > WebP
|
|
-- 4. Tiebreaker: smaller file size
|
|
-- SVGs excluded (not supported in bundle generation). Icons ≤2x2 excluded (tracking pixels).
|
|
--
|
|
-- Two-step: SELECT into temp table (index-only scan, no heap), then single bulk UPDATE.
|
|
-- Requires idx_icons_best covering index on (host_id) INCLUDE (icon_hash, content_type, width, height, file_size).
|
|
--
|
|
-- Usage: psql $DATABASE_URL -f pipeline/04_best_icon/select.sql
|
|
|
|
SET work_mem = '2GB';
|
|
|
|
\echo 'Step 1: Finding best icon per host...'
|
|
\timing on
|
|
|
|
CREATE TEMP TABLE best_icons AS
|
|
SELECT DISTINCT ON (host_id) host_id, icon_hash
|
|
FROM icons
|
|
WHERE scan_state = 'completed'
|
|
AND icon_hash IS NOT NULL
|
|
AND content_type != 'image/svg+xml'
|
|
AND (width IS NULL OR width > 2)
|
|
AND (height IS NULL OR height > 2)
|
|
ORDER BY host_id,
|
|
CASE
|
|
WHEN LEAST(COALESCE(width, 0), COALESCE(height, 0)) >= 32 THEN 0
|
|
WHEN COALESCE(width, 0) > 0 THEN 1
|
|
ELSE 2
|
|
END,
|
|
CASE
|
|
WHEN LEAST(COALESCE(width, 0), COALESCE(height, 0)) >= 32
|
|
THEN GREATEST(COALESCE(width, 0), COALESCE(height, 0))
|
|
ELSE -GREATEST(COALESCE(width, 0), COALESCE(height, 0))
|
|
END,
|
|
CASE
|
|
WHEN content_type = 'image/png' THEN 0
|
|
WHEN content_type IN ('image/x-icon', 'image/vnd.microsoft.icon') THEN 1
|
|
WHEN content_type IN ('image/gif', 'image/jpeg', 'image/bmp') THEN 2
|
|
WHEN content_type = 'image/webp' THEN 3
|
|
ELSE 4
|
|
END,
|
|
file_size ASC;
|
|
|
|
\echo 'Step 2: Updating hosts...'
|
|
|
|
UPDATE hosts h SET best_icon_hash = b.icon_hash
|
|
FROM best_icons b WHERE h.id = b.host_id;
|
|
|
|
\timing off
|
|
|
|
DROP TABLE best_icons;
|
|
|
|
-- Stats (human-readable)
|
|
\echo '--- Best Icon Selection Stats ---'
|
|
|
|
SELECT
|
|
COUNT(*) FILTER (WHERE best_icon_hash IS NOT NULL) AS hosts_with_icon,
|
|
COUNT(*) FILTER (WHERE best_icon_hash IS NULL) AS hosts_without_icon,
|
|
COUNT(*) FILTER (WHERE html_title IS NOT NULL) AS hosts_with_title,
|
|
COUNT(*) FILTER (WHERE html_title IS NOT NULL AND best_icon_hash IS NULL) AS title_but_no_icon
|
|
FROM hosts
|
|
WHERE parsed = TRUE;
|
|
|
|
-- Stats JSON
|
|
\! mkdir -p stats
|
|
\t on
|
|
\a
|
|
\o stats/04_best_icon.json
|
|
SELECT json_build_object(
|
|
'hosts_with_icon', COUNT(*) FILTER (WHERE best_icon_hash IS NOT NULL),
|
|
'hosts_without_icon', COUNT(*) FILTER (WHERE best_icon_hash IS NULL),
|
|
'hosts_with_title', COUNT(*) FILTER (WHERE html_title IS NOT NULL),
|
|
'hosts_no_title', COUNT(*) FILTER (WHERE html_title IS NULL),
|
|
'title_but_no_icon', COUNT(*) FILTER (WHERE html_title IS NOT NULL AND best_icon_hash IS NULL)
|
|
)
|
|
FROM hosts
|
|
WHERE parsed = TRUE;
|
|
\o
|
|
\t off
|
|
\a
|
|
\echo 'Stats written to stats/04_best_icon.json'
|