two phase best icon selection with a temporary table

This commit is contained in:
Joe Lothan 2026-05-25 20:55:29 -04:00
parent a819dabb57
commit 8c005c4f6c

View file

@ -10,41 +10,52 @@
-- 4. Tiebreaker: smaller file size -- 4. Tiebreaker: smaller file size
-- SVGs excluded (not supported in bundle generation). Icons ≤2x2 excluded (tracking pixels). -- SVGs excluded (not supported in bundle generation). Icons ≤2x2 excluded (tracking pixels).
-- --
-- Two-step: SELECT into temp table (index-only scan, no heap), then single bulk UPDATE.
-- Requires idx_icons_best covering index on (host_id) INCLUDE (s3_key, content_type, width, height, file_size).
--
-- Usage: psql $DATABASE_URL -f pipeline/04_best_icon/select.sql -- Usage: psql $DATABASE_URL -f pipeline/04_best_icon/select.sql
UPDATE hosts h SET best_icon_s3_key = sub.s3_key SET work_mem = '2GB';
FROM (
SELECT DISTINCT ON (i.host_id) i.host_id, i.s3_key \echo 'Step 1: Finding best icon per host...'
FROM icons i \timing on
WHERE i.scan_state = 'completed'
AND i.s3_key IS NOT NULL CREATE TEMP TABLE best_icons AS
AND i.content_type != 'image/svg+xml' SELECT DISTINCT ON (host_id) host_id, s3_key
AND (i.width IS NULL OR i.width > 2) FROM icons
AND (i.height IS NULL OR i.height > 2) WHERE scan_state = 'completed'
ORDER BY i.host_id, AND s3_key IS NOT NULL
-- Tier: ≥32 preferred over <32. NULL dimensions go last. AND content_type != 'image/svg+xml'
AND (width IS NULL OR width > 2)
AND (height IS NULL OR height > 2)
ORDER BY host_id,
CASE CASE
WHEN LEAST(COALESCE(i.width, 0), COALESCE(i.height, 0)) >= 32 THEN 0 WHEN LEAST(COALESCE(width, 0), COALESCE(height, 0)) >= 32 THEN 0
WHEN COALESCE(i.width, 0) > 0 THEN 1 WHEN COALESCE(width, 0) > 0 THEN 1
ELSE 2 ELSE 2
END, END,
-- Within ≥32: smallest first (closest to 32). Within <32: largest first.
CASE CASE
WHEN LEAST(COALESCE(i.width, 0), COALESCE(i.height, 0)) >= 32 WHEN LEAST(COALESCE(width, 0), COALESCE(height, 0)) >= 32
THEN GREATEST(COALESCE(i.width, 0), COALESCE(i.height, 0)) THEN GREATEST(COALESCE(width, 0), COALESCE(height, 0))
ELSE -GREATEST(COALESCE(i.width, 0), COALESCE(i.height, 0)) ELSE -GREATEST(COALESCE(width, 0), COALESCE(height, 0))
END, END,
-- Format preference
CASE CASE
WHEN i.content_type = 'image/png' THEN 0 WHEN content_type = 'image/png' THEN 0
WHEN i.content_type IN ('image/x-icon', 'image/vnd.microsoft.icon') THEN 1 WHEN content_type IN ('image/x-icon', 'image/vnd.microsoft.icon') THEN 1
WHEN i.content_type IN ('image/gif', 'image/jpeg', 'image/bmp') THEN 2 WHEN content_type IN ('image/gif', 'image/jpeg', 'image/bmp') THEN 2
WHEN i.content_type = 'image/webp' THEN 3 WHEN content_type = 'image/webp' THEN 3
ELSE 4 ELSE 4
END, END,
i.file_size ASC file_size ASC;
) sub
WHERE h.id = sub.host_id; \echo 'Step 2: Updating hosts...'
UPDATE hosts h SET best_icon_s3_key = b.s3_key
FROM best_icons b WHERE h.id = b.host_id;
\timing off
DROP TABLE best_icons;
-- Stats (human-readable) -- Stats (human-readable)
\echo '--- Best Icon Selection Stats ---' \echo '--- Best Icon Selection Stats ---'