updated s3_key name to icon_hash
This commit is contained in:
parent
e308718eb2
commit
33bd0a221e
8 changed files with 31 additions and 31 deletions
|
|
@ -1,5 +1,5 @@
|
|||
-- Best Icon Selection
|
||||
-- Picks the best completed icon for each host and stores its s3_key in hosts.best_icon_s3_key.
|
||||
-- Picks the best completed icon for each host and stores its icon_hash in hosts.best_icon_hash.
|
||||
--
|
||||
-- Target: 32x32 source icon (displayed at 16x16 CSS, crisp on 2x Retina).
|
||||
--
|
||||
|
|
@ -11,7 +11,7 @@
|
|||
-- SVGs excluded (not supported in bundle generation). Icons ≤2x2 excluded (tracking pixels).
|
||||
--
|
||||
-- Two-step: SELECT into temp table (index-only scan, no heap), then single bulk UPDATE.
|
||||
-- Requires idx_icons_best covering index on (host_id) INCLUDE (s3_key, content_type, width, height, file_size).
|
||||
-- Requires idx_icons_best covering index on (host_id) INCLUDE (icon_hash, content_type, width, height, file_size).
|
||||
--
|
||||
-- Usage: psql $DATABASE_URL -f pipeline/04_best_icon/select.sql
|
||||
|
||||
|
|
@ -21,10 +21,10 @@ SET work_mem = '2GB';
|
|||
\timing on
|
||||
|
||||
CREATE TEMP TABLE best_icons AS
|
||||
SELECT DISTINCT ON (host_id) host_id, s3_key
|
||||
SELECT DISTINCT ON (host_id) host_id, icon_hash
|
||||
FROM icons
|
||||
WHERE scan_state = 'completed'
|
||||
AND s3_key IS NOT NULL
|
||||
AND icon_hash IS NOT NULL
|
||||
AND content_type != 'image/svg+xml'
|
||||
AND (width IS NULL OR width > 2)
|
||||
AND (height IS NULL OR height > 2)
|
||||
|
|
@ -50,7 +50,7 @@ ORDER BY host_id,
|
|||
|
||||
\echo 'Step 2: Updating hosts...'
|
||||
|
||||
UPDATE hosts h SET best_icon_s3_key = b.s3_key
|
||||
UPDATE hosts h SET best_icon_hash = b.icon_hash
|
||||
FROM best_icons b WHERE h.id = b.host_id;
|
||||
|
||||
\timing off
|
||||
|
|
@ -61,10 +61,10 @@ DROP TABLE best_icons;
|
|||
\echo '--- Best Icon Selection Stats ---'
|
||||
|
||||
SELECT
|
||||
COUNT(*) FILTER (WHERE best_icon_s3_key IS NOT NULL) AS hosts_with_icon,
|
||||
COUNT(*) FILTER (WHERE best_icon_s3_key IS NULL) AS hosts_without_icon,
|
||||
COUNT(*) FILTER (WHERE best_icon_hash IS NOT NULL) AS hosts_with_icon,
|
||||
COUNT(*) FILTER (WHERE best_icon_hash IS NULL) AS hosts_without_icon,
|
||||
COUNT(*) FILTER (WHERE html_title IS NOT NULL) AS hosts_with_title,
|
||||
COUNT(*) FILTER (WHERE html_title IS NOT NULL AND best_icon_s3_key IS NULL) AS title_but_no_icon
|
||||
COUNT(*) FILTER (WHERE html_title IS NOT NULL AND best_icon_hash IS NULL) AS title_but_no_icon
|
||||
FROM hosts
|
||||
WHERE parsed = TRUE;
|
||||
|
||||
|
|
@ -74,11 +74,11 @@ WHERE parsed = TRUE;
|
|||
\a
|
||||
\o stats/04_best_icon.json
|
||||
SELECT json_build_object(
|
||||
'hosts_with_icon', COUNT(*) FILTER (WHERE best_icon_s3_key IS NOT NULL),
|
||||
'hosts_without_icon', COUNT(*) FILTER (WHERE best_icon_s3_key IS NULL),
|
||||
'hosts_with_icon', COUNT(*) FILTER (WHERE best_icon_hash IS NOT NULL),
|
||||
'hosts_without_icon', COUNT(*) FILTER (WHERE best_icon_hash IS NULL),
|
||||
'hosts_with_title', COUNT(*) FILTER (WHERE html_title IS NOT NULL),
|
||||
'hosts_no_title', COUNT(*) FILTER (WHERE html_title IS NULL),
|
||||
'title_but_no_icon', COUNT(*) FILTER (WHERE html_title IS NOT NULL AND best_icon_s3_key IS NULL)
|
||||
'title_but_no_icon', COUNT(*) FILTER (WHERE html_title IS NOT NULL AND best_icon_hash IS NULL)
|
||||
)
|
||||
FROM hosts
|
||||
WHERE parsed = TRUE;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue