added select.sql query
This commit is contained in:
parent
6cf6049698
commit
664197e287
1 changed files with 47 additions and 0 deletions
47
pipeline/04_best_icon/select.sql
Normal file
47
pipeline/04_best_icon/select.sql
Normal file
|
|
@ -0,0 +1,47 @@
|
||||||
|
-- Best Icon Selection
|
||||||
|
-- Picks the best completed icon for each host and stores its s3_key in hosts.best_icon_s3_key.
|
||||||
|
--
|
||||||
|
-- Priority:
|
||||||
|
-- 1. Standard square sizes (64 > 48 > 32 > 16) — ideal for tab display
|
||||||
|
-- 2. Other square sizes ≤64
|
||||||
|
-- 3. Non-square sizes ≤64 on both axes
|
||||||
|
-- 4. Anything larger (downloaded because rel_sizes was undeclared)
|
||||||
|
-- 5. Among equal priority: prefer PNG/GIF/ICO over WebP over SVG
|
||||||
|
-- 6. Tiebreaker: smaller file size (less bandwidth in bundles)
|
||||||
|
--
|
||||||
|
-- Usage: psql $DATABASE_URL -f pipeline/04_best_icon/select.sql
|
||||||
|
|
||||||
|
UPDATE hosts h SET best_icon_s3_key = sub.s3_key
|
||||||
|
FROM (
|
||||||
|
SELECT DISTINCT ON (i.host_id) i.host_id, i.s3_key
|
||||||
|
FROM icons i
|
||||||
|
WHERE i.scan_state = 'completed'
|
||||||
|
AND i.s3_key IS NOT NULL
|
||||||
|
ORDER BY i.host_id,
|
||||||
|
CASE
|
||||||
|
WHEN i.width = i.height AND i.width IN (64, 48, 32, 16) THEN 0
|
||||||
|
WHEN i.width = i.height AND i.width <= 64 THEN 1
|
||||||
|
WHEN i.width IS NOT NULL AND i.width <= 64 AND i.height IS NOT NULL AND i.height <= 64 THEN 2
|
||||||
|
ELSE 3
|
||||||
|
END,
|
||||||
|
COALESCE(i.width, 0) DESC,
|
||||||
|
CASE
|
||||||
|
WHEN i.content_type IN ('image/png', 'image/gif', 'image/x-icon', 'image/vnd.microsoft.icon') THEN 0
|
||||||
|
WHEN i.content_type = 'image/webp' THEN 1
|
||||||
|
WHEN i.content_type = 'image/svg+xml' THEN 2
|
||||||
|
ELSE 3
|
||||||
|
END,
|
||||||
|
i.file_size ASC
|
||||||
|
) sub
|
||||||
|
WHERE h.id = sub.host_id;
|
||||||
|
|
||||||
|
-- Stats
|
||||||
|
\echo '--- Best Icon Selection Stats ---'
|
||||||
|
|
||||||
|
SELECT
|
||||||
|
COUNT(*) FILTER (WHERE best_icon_s3_key IS NOT NULL) AS hosts_with_icon,
|
||||||
|
COUNT(*) FILTER (WHERE best_icon_s3_key IS NULL) AS hosts_without_icon,
|
||||||
|
COUNT(*) FILTER (WHERE html_title IS NOT NULL) AS hosts_with_title,
|
||||||
|
COUNT(*) FILTER (WHERE html_title IS NOT NULL AND best_icon_s3_key IS NULL) AS title_but_no_icon
|
||||||
|
FROM hosts
|
||||||
|
WHERE parsed = TRUE;
|
||||||
Loading…
Add table
Add a link
Reference in a new issue