two phase best icon selection with a temporary table

This commit is contained in:
Joe Lothan 2026-05-25 20:55:29 -04:00
parent a819dabb57
commit 8c005c4f6c

View file

@ -10,41 +10,52 @@
-- 4. Tiebreaker: smaller file size -- 4. Tiebreaker: smaller file size
-- SVGs excluded (not supported in bundle generation). Icons ≤2x2 excluded (tracking pixels). -- SVGs excluded (not supported in bundle generation). Icons ≤2x2 excluded (tracking pixels).
-- --
-- Two-step: SELECT into temp table (index-only scan, no heap), then single bulk UPDATE.
-- Requires idx_icons_best covering index on (host_id) INCLUDE (s3_key, content_type, width, height, file_size).
--
-- Usage: psql $DATABASE_URL -f pipeline/04_best_icon/select.sql -- Usage: psql $DATABASE_URL -f pipeline/04_best_icon/select.sql
UPDATE hosts h SET best_icon_s3_key = sub.s3_key SET work_mem = '2GB';
FROM (
SELECT DISTINCT ON (i.host_id) i.host_id, i.s3_key \echo 'Step 1: Finding best icon per host...'
FROM icons i \timing on
WHERE i.scan_state = 'completed'
AND i.s3_key IS NOT NULL CREATE TEMP TABLE best_icons AS
AND i.content_type != 'image/svg+xml' SELECT DISTINCT ON (host_id) host_id, s3_key
AND (i.width IS NULL OR i.width > 2) FROM icons
AND (i.height IS NULL OR i.height > 2) WHERE scan_state = 'completed'
ORDER BY i.host_id, AND s3_key IS NOT NULL
-- Tier: ≥32 preferred over <32. NULL dimensions go last. AND content_type != 'image/svg+xml'
CASE AND (width IS NULL OR width > 2)
WHEN LEAST(COALESCE(i.width, 0), COALESCE(i.height, 0)) >= 32 THEN 0 AND (height IS NULL OR height > 2)
WHEN COALESCE(i.width, 0) > 0 THEN 1 ORDER BY host_id,
ELSE 2 CASE
END, WHEN LEAST(COALESCE(width, 0), COALESCE(height, 0)) >= 32 THEN 0
-- Within ≥32: smallest first (closest to 32). Within <32: largest first. WHEN COALESCE(width, 0) > 0 THEN 1
CASE ELSE 2
WHEN LEAST(COALESCE(i.width, 0), COALESCE(i.height, 0)) >= 32 END,
THEN GREATEST(COALESCE(i.width, 0), COALESCE(i.height, 0)) CASE
ELSE -GREATEST(COALESCE(i.width, 0), COALESCE(i.height, 0)) WHEN LEAST(COALESCE(width, 0), COALESCE(height, 0)) >= 32
END, THEN GREATEST(COALESCE(width, 0), COALESCE(height, 0))
-- Format preference ELSE -GREATEST(COALESCE(width, 0), COALESCE(height, 0))
CASE END,
WHEN i.content_type = 'image/png' THEN 0 CASE
WHEN i.content_type IN ('image/x-icon', 'image/vnd.microsoft.icon') THEN 1 WHEN content_type = 'image/png' THEN 0
WHEN i.content_type IN ('image/gif', 'image/jpeg', 'image/bmp') THEN 2 WHEN content_type IN ('image/x-icon', 'image/vnd.microsoft.icon') THEN 1
WHEN i.content_type = 'image/webp' THEN 3 WHEN content_type IN ('image/gif', 'image/jpeg', 'image/bmp') THEN 2
ELSE 4 WHEN content_type = 'image/webp' THEN 3
END, ELSE 4
i.file_size ASC END,
) sub file_size ASC;
WHERE h.id = sub.host_id;
\echo 'Step 2: Updating hosts...'
UPDATE hosts h SET best_icon_s3_key = b.s3_key
FROM best_icons b WHERE h.id = b.host_id;
\timing off
DROP TABLE best_icons;
-- Stats (human-readable) -- Stats (human-readable)
\echo '--- Best Icon Selection Stats ---' \echo '--- Best Icon Selection Stats ---'