two phase best icon selection with a temporary table
This commit is contained in:
parent
a819dabb57
commit
8c005c4f6c
1 changed files with 44 additions and 33 deletions
|
|
@ -10,41 +10,52 @@
|
|||
-- 4. Tiebreaker: smaller file size
|
||||
-- SVGs excluded (not supported in bundle generation). Icons ≤2x2 excluded (tracking pixels).
|
||||
--
|
||||
-- Two-step: SELECT into temp table (index-only scan, no heap), then single bulk UPDATE.
|
||||
-- Requires idx_icons_best covering index on (host_id) INCLUDE (s3_key, content_type, width, height, file_size).
|
||||
--
|
||||
-- Usage: psql $DATABASE_URL -f pipeline/04_best_icon/select.sql
|
||||
|
||||
UPDATE hosts h SET best_icon_s3_key = sub.s3_key
|
||||
FROM (
|
||||
SELECT DISTINCT ON (i.host_id) i.host_id, i.s3_key
|
||||
FROM icons i
|
||||
WHERE i.scan_state = 'completed'
|
||||
AND i.s3_key IS NOT NULL
|
||||
AND i.content_type != 'image/svg+xml'
|
||||
AND (i.width IS NULL OR i.width > 2)
|
||||
AND (i.height IS NULL OR i.height > 2)
|
||||
ORDER BY i.host_id,
|
||||
-- Tier: ≥32 preferred over <32. NULL dimensions go last.
|
||||
SET work_mem = '2GB';
|
||||
|
||||
\echo 'Step 1: Finding best icon per host...'
|
||||
\timing on
|
||||
|
||||
CREATE TEMP TABLE best_icons AS
|
||||
SELECT DISTINCT ON (host_id) host_id, s3_key
|
||||
FROM icons
|
||||
WHERE scan_state = 'completed'
|
||||
AND s3_key IS NOT NULL
|
||||
AND content_type != 'image/svg+xml'
|
||||
AND (width IS NULL OR width > 2)
|
||||
AND (height IS NULL OR height > 2)
|
||||
ORDER BY host_id,
|
||||
CASE
|
||||
WHEN LEAST(COALESCE(i.width, 0), COALESCE(i.height, 0)) >= 32 THEN 0
|
||||
WHEN COALESCE(i.width, 0) > 0 THEN 1
|
||||
WHEN LEAST(COALESCE(width, 0), COALESCE(height, 0)) >= 32 THEN 0
|
||||
WHEN COALESCE(width, 0) > 0 THEN 1
|
||||
ELSE 2
|
||||
END,
|
||||
-- Within ≥32: smallest first (closest to 32). Within <32: largest first.
|
||||
CASE
|
||||
WHEN LEAST(COALESCE(i.width, 0), COALESCE(i.height, 0)) >= 32
|
||||
THEN GREATEST(COALESCE(i.width, 0), COALESCE(i.height, 0))
|
||||
ELSE -GREATEST(COALESCE(i.width, 0), COALESCE(i.height, 0))
|
||||
WHEN LEAST(COALESCE(width, 0), COALESCE(height, 0)) >= 32
|
||||
THEN GREATEST(COALESCE(width, 0), COALESCE(height, 0))
|
||||
ELSE -GREATEST(COALESCE(width, 0), COALESCE(height, 0))
|
||||
END,
|
||||
-- Format preference
|
||||
CASE
|
||||
WHEN i.content_type = 'image/png' THEN 0
|
||||
WHEN i.content_type IN ('image/x-icon', 'image/vnd.microsoft.icon') THEN 1
|
||||
WHEN i.content_type IN ('image/gif', 'image/jpeg', 'image/bmp') THEN 2
|
||||
WHEN i.content_type = 'image/webp' THEN 3
|
||||
WHEN content_type = 'image/png' THEN 0
|
||||
WHEN content_type IN ('image/x-icon', 'image/vnd.microsoft.icon') THEN 1
|
||||
WHEN content_type IN ('image/gif', 'image/jpeg', 'image/bmp') THEN 2
|
||||
WHEN content_type = 'image/webp' THEN 3
|
||||
ELSE 4
|
||||
END,
|
||||
i.file_size ASC
|
||||
) sub
|
||||
WHERE h.id = sub.host_id;
|
||||
file_size ASC;
|
||||
|
||||
\echo 'Step 2: Updating hosts...'
|
||||
|
||||
UPDATE hosts h SET best_icon_s3_key = b.s3_key
|
||||
FROM best_icons b WHERE h.id = b.host_id;
|
||||
|
||||
\timing off
|
||||
|
||||
DROP TABLE best_icons;
|
||||
|
||||
-- Stats (human-readable)
|
||||
\echo '--- Best Icon Selection Stats ---'
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue