improve stats generation

This commit is contained in:
Joe Lothan 2026-05-20 00:31:38 -04:00
parent 0c9ad5bfd6
commit a8177a1583
5 changed files with 61 additions and 20 deletions

View file

@ -71,6 +71,7 @@ func writeStats(stats *Stats, cfg Config) {
"duration_seconds": int(duration.Seconds()),
"processed": stats.Processed.Load(),
"titles_found": stats.TitlesFound.Load(),
"no_title": stats.NoTitle.Load(),
"icons_found": stats.IconsFound.Load(),
"iframe_blocked": stats.IframeBlocked.Load(),
"fetch_errors": stats.FetchErrors.Load(),

View file

@ -26,6 +26,7 @@ type Config struct {
type Stats struct {
Processed atomic.Int64
TitlesFound atomic.Int64
NoTitle atomic.Int64
IconsFound atomic.Int64
IframeBlocked atomic.Int64
ParseErrors atomic.Int64
@ -167,12 +168,17 @@ func main() {
// Update stats
stats.Processed.Add(1)
if result.Title != "" {
stats.TitlesFound.Add(1)
}
stats.IconsFound.Add(int64(len(result.Icons)))
if result.Err == nil && !result.IframeAllowed {
stats.IframeBlocked.Add(1)
if result.Err == nil {
if result.Title != "" {
stats.TitlesFound.Add(1)
} else {
stats.NoTitle.Add(1)
}
// +1 for the /favicon.ico entry added per host
stats.IconsFound.Add(int64(len(result.Icons) + 1))
if !result.IframeAllowed {
stats.IframeBlocked.Add(1)
}
}
if result.Err != nil {
if result.FetchErr {
@ -195,6 +201,7 @@ func main() {
fmt.Printf("Duration: %s\n", duration.Round(time.Second))
fmt.Printf("Processed: %d\n", stats.Processed.Load())
fmt.Printf("Titles found: %d\n", stats.TitlesFound.Load())
fmt.Printf("No title: %d\n", stats.NoTitle.Load())
fmt.Printf("Icons found: %d\n", stats.IconsFound.Load())
fmt.Printf("Iframe blocked: %d\n", stats.IframeBlocked.Load())
fmt.Printf("Fetch errors: %d\n", stats.FetchErrors.Load())

View file

@ -37,7 +37,7 @@ FROM (
) sub
WHERE h.id = sub.host_id;
-- Stats
-- Stats (human-readable)
\echo '--- Best Icon Selection Stats ---'
SELECT
@ -47,3 +47,22 @@ SELECT
COUNT(*) FILTER (WHERE html_title IS NOT NULL AND best_icon_s3_key IS NULL) AS title_but_no_icon
FROM hosts
WHERE parsed = TRUE;
-- Stats JSON
\! mkdir -p stats
\t on
\a
\o stats/04_best_icon.json
SELECT json_build_object(
'hosts_with_icon', COUNT(*) FILTER (WHERE best_icon_s3_key IS NOT NULL),
'hosts_without_icon', COUNT(*) FILTER (WHERE best_icon_s3_key IS NULL),
'hosts_with_title', COUNT(*) FILTER (WHERE html_title IS NOT NULL),
'hosts_no_title', COUNT(*) FILTER (WHERE html_title IS NULL),
'title_but_no_icon', COUNT(*) FILTER (WHERE html_title IS NOT NULL AND best_icon_s3_key IS NULL)
)
FROM hosts
WHERE parsed = TRUE;
\o
\t off
\a
\echo 'Stats written to stats/04_best_icon.json'

View file

@ -43,12 +43,14 @@ func writeStats(stats *Stats) {
"started_at": stats.StartedAt.Format(time.RFC3339),
"finished_at": finishedAt.Format(time.RFC3339),
"duration_seconds": int(duration.Seconds()),
"total_hosts": stats.TotalHosts,
"hosts_with_icon": stats.HostsWithIcon,
"hosts_no_icon": stats.HostsNoIcon,
"convert_errors": stats.ConvertErrors.Load(),
"bundles_created": stats.BundlesCreated,
"total_bytes": stats.TotalBytes,
"total_hosts": stats.TotalHosts,
"hosts_with_icon": stats.HostsWithIcon,
"hosts_no_icon": stats.HostsNoIcon,
"bundled_with_icon": stats.BundledWithIcon.Load(),
"bundled_no_icon": stats.BundledNoIcon.Load(),
"convert_errors": stats.ConvertErrors.Load(),
"bundles_created": stats.BundlesCreated,
"total_bytes": stats.TotalBytes,
}
os.MkdirAll("stats", 0755)

View file

@ -27,13 +27,15 @@ type Config struct {
}
type Stats struct {
TotalHosts int
HostsWithIcon int
HostsNoIcon int
BundlesCreated int
ConvertErrors atomic.Int64
TotalBytes int64
StartedAt time.Time
TotalHosts int
HostsWithIcon int
HostsNoIcon int
BundlesCreated int
ConvertErrors atomic.Int64
BundledWithIcon atomic.Int64
BundledNoIcon atomic.Int64
TotalBytes int64
StartedAt time.Time
}
func main() {
@ -158,6 +160,14 @@ func main() {
}
wg.Wait()
for _, e := range pageEntries {
if e.Icon != "" {
stats.BundledWithIcon.Add(1)
} else {
stats.BundledNoIcon.Add(1)
}
}
entryBuf = append(entryBuf, pageEntries...)
// Write complete bundles from the buffer
@ -226,6 +236,8 @@ func main() {
fmt.Printf("Total hosts: %d\n", stats.TotalHosts)
fmt.Printf("Hosts with icon: %d\n", stats.HostsWithIcon)
fmt.Printf("Hosts without icon: %d\n", stats.HostsNoIcon)
fmt.Printf("Bundled with icon: %d\n", stats.BundledWithIcon.Load())
fmt.Printf("Bundled without icon: %d\n", stats.BundledNoIcon.Load())
fmt.Printf("Convert errors: %d\n", stats.ConvertErrors.Load())
fmt.Printf("Bundles created: %d\n", stats.BundlesCreated)
fmt.Printf("Total size: %.1f MB\n", float64(stats.TotalBytes)/(1024*1024))