improve stats generation

This commit is contained in:
Joe Lothan 2026-05-20 00:31:38 -04:00
parent 0c9ad5bfd6
commit a8177a1583
5 changed files with 61 additions and 20 deletions

View file

@ -71,6 +71,7 @@ func writeStats(stats *Stats, cfg Config) {
"duration_seconds": int(duration.Seconds()),
"processed": stats.Processed.Load(),
"titles_found": stats.TitlesFound.Load(),
"no_title": stats.NoTitle.Load(),
"icons_found": stats.IconsFound.Load(),
"iframe_blocked": stats.IframeBlocked.Load(),
"fetch_errors": stats.FetchErrors.Load(),

View file

@ -26,6 +26,7 @@ type Config struct {
type Stats struct {
Processed atomic.Int64
TitlesFound atomic.Int64
NoTitle atomic.Int64
IconsFound atomic.Int64
IframeBlocked atomic.Int64
ParseErrors atomic.Int64
@ -167,13 +168,18 @@ func main() {
// Update stats
stats.Processed.Add(1)
if result.Err == nil {
if result.Title != "" {
stats.TitlesFound.Add(1)
} else {
stats.NoTitle.Add(1)
}
stats.IconsFound.Add(int64(len(result.Icons)))
if result.Err == nil && !result.IframeAllowed {
// +1 for the /favicon.ico entry added per host
stats.IconsFound.Add(int64(len(result.Icons) + 1))
if !result.IframeAllowed {
stats.IframeBlocked.Add(1)
}
}
if result.Err != nil {
if result.FetchErr {
stats.FetchErrors.Add(1)
@ -195,6 +201,7 @@ func main() {
fmt.Printf("Duration: %s\n", duration.Round(time.Second))
fmt.Printf("Processed: %d\n", stats.Processed.Load())
fmt.Printf("Titles found: %d\n", stats.TitlesFound.Load())
fmt.Printf("No title: %d\n", stats.NoTitle.Load())
fmt.Printf("Icons found: %d\n", stats.IconsFound.Load())
fmt.Printf("Iframe blocked: %d\n", stats.IframeBlocked.Load())
fmt.Printf("Fetch errors: %d\n", stats.FetchErrors.Load())

View file

@ -37,7 +37,7 @@ FROM (
) sub
WHERE h.id = sub.host_id;
-- Stats
-- Stats (human-readable)
\echo '--- Best Icon Selection Stats ---'
SELECT
@ -47,3 +47,22 @@ SELECT
COUNT(*) FILTER (WHERE html_title IS NOT NULL AND best_icon_s3_key IS NULL) AS title_but_no_icon
FROM hosts
WHERE parsed = TRUE;
-- Stats JSON
\! mkdir -p stats
\t on
\a
\o stats/04_best_icon.json
SELECT json_build_object(
'hosts_with_icon', COUNT(*) FILTER (WHERE best_icon_s3_key IS NOT NULL),
'hosts_without_icon', COUNT(*) FILTER (WHERE best_icon_s3_key IS NULL),
'hosts_with_title', COUNT(*) FILTER (WHERE html_title IS NOT NULL),
'hosts_no_title', COUNT(*) FILTER (WHERE html_title IS NULL),
'title_but_no_icon', COUNT(*) FILTER (WHERE html_title IS NOT NULL AND best_icon_s3_key IS NULL)
)
FROM hosts
WHERE parsed = TRUE;
\o
\t off
\a
\echo 'Stats written to stats/04_best_icon.json'

View file

@ -46,6 +46,8 @@ func writeStats(stats *Stats) {
"total_hosts": stats.TotalHosts,
"hosts_with_icon": stats.HostsWithIcon,
"hosts_no_icon": stats.HostsNoIcon,
"bundled_with_icon": stats.BundledWithIcon.Load(),
"bundled_no_icon": stats.BundledNoIcon.Load(),
"convert_errors": stats.ConvertErrors.Load(),
"bundles_created": stats.BundlesCreated,
"total_bytes": stats.TotalBytes,

View file

@ -32,6 +32,8 @@ type Stats struct {
HostsNoIcon int
BundlesCreated int
ConvertErrors atomic.Int64
BundledWithIcon atomic.Int64
BundledNoIcon atomic.Int64
TotalBytes int64
StartedAt time.Time
}
@ -158,6 +160,14 @@ func main() {
}
wg.Wait()
for _, e := range pageEntries {
if e.Icon != "" {
stats.BundledWithIcon.Add(1)
} else {
stats.BundledNoIcon.Add(1)
}
}
entryBuf = append(entryBuf, pageEntries...)
// Write complete bundles from the buffer
@ -226,6 +236,8 @@ func main() {
fmt.Printf("Total hosts: %d\n", stats.TotalHosts)
fmt.Printf("Hosts with icon: %d\n", stats.HostsWithIcon)
fmt.Printf("Hosts without icon: %d\n", stats.HostsNoIcon)
fmt.Printf("Bundled with icon: %d\n", stats.BundledWithIcon.Load())
fmt.Printf("Bundled without icon: %d\n", stats.BundledNoIcon.Load())
fmt.Printf("Convert errors: %d\n", stats.ConvertErrors.Load())
fmt.Printf("Bundles created: %d\n", stats.BundlesCreated)
fmt.Printf("Total size: %.1f MB\n", float64(stats.TotalBytes)/(1024*1024))