improve stats generation

This commit is contained in:
Joe Lothan 2026-05-20 00:31:38 -04:00
parent 0c9ad5bfd6
commit a8177a1583
5 changed files with 61 additions and 20 deletions

View file

@ -71,6 +71,7 @@ func writeStats(stats *Stats, cfg Config) {
"duration_seconds": int(duration.Seconds()), "duration_seconds": int(duration.Seconds()),
"processed": stats.Processed.Load(), "processed": stats.Processed.Load(),
"titles_found": stats.TitlesFound.Load(), "titles_found": stats.TitlesFound.Load(),
"no_title": stats.NoTitle.Load(),
"icons_found": stats.IconsFound.Load(), "icons_found": stats.IconsFound.Load(),
"iframe_blocked": stats.IframeBlocked.Load(), "iframe_blocked": stats.IframeBlocked.Load(),
"fetch_errors": stats.FetchErrors.Load(), "fetch_errors": stats.FetchErrors.Load(),

View file

@ -26,6 +26,7 @@ type Config struct {
type Stats struct { type Stats struct {
Processed atomic.Int64 Processed atomic.Int64
TitlesFound atomic.Int64 TitlesFound atomic.Int64
NoTitle atomic.Int64
IconsFound atomic.Int64 IconsFound atomic.Int64
IframeBlocked atomic.Int64 IframeBlocked atomic.Int64
ParseErrors atomic.Int64 ParseErrors atomic.Int64
@ -167,12 +168,17 @@ func main() {
// Update stats // Update stats
stats.Processed.Add(1) stats.Processed.Add(1)
if result.Title != "" { if result.Err == nil {
stats.TitlesFound.Add(1) if result.Title != "" {
} stats.TitlesFound.Add(1)
stats.IconsFound.Add(int64(len(result.Icons))) } else {
if result.Err == nil && !result.IframeAllowed { stats.NoTitle.Add(1)
stats.IframeBlocked.Add(1) }
// +1 for the /favicon.ico entry added per host
stats.IconsFound.Add(int64(len(result.Icons) + 1))
if !result.IframeAllowed {
stats.IframeBlocked.Add(1)
}
} }
if result.Err != nil { if result.Err != nil {
if result.FetchErr { if result.FetchErr {
@ -195,6 +201,7 @@ func main() {
fmt.Printf("Duration: %s\n", duration.Round(time.Second)) fmt.Printf("Duration: %s\n", duration.Round(time.Second))
fmt.Printf("Processed: %d\n", stats.Processed.Load()) fmt.Printf("Processed: %d\n", stats.Processed.Load())
fmt.Printf("Titles found: %d\n", stats.TitlesFound.Load()) fmt.Printf("Titles found: %d\n", stats.TitlesFound.Load())
fmt.Printf("No title: %d\n", stats.NoTitle.Load())
fmt.Printf("Icons found: %d\n", stats.IconsFound.Load()) fmt.Printf("Icons found: %d\n", stats.IconsFound.Load())
fmt.Printf("Iframe blocked: %d\n", stats.IframeBlocked.Load()) fmt.Printf("Iframe blocked: %d\n", stats.IframeBlocked.Load())
fmt.Printf("Fetch errors: %d\n", stats.FetchErrors.Load()) fmt.Printf("Fetch errors: %d\n", stats.FetchErrors.Load())

View file

@ -37,7 +37,7 @@ FROM (
) sub ) sub
WHERE h.id = sub.host_id; WHERE h.id = sub.host_id;
-- Stats -- Stats (human-readable)
\echo '--- Best Icon Selection Stats ---' \echo '--- Best Icon Selection Stats ---'
SELECT SELECT
@ -47,3 +47,22 @@ SELECT
COUNT(*) FILTER (WHERE html_title IS NOT NULL AND best_icon_s3_key IS NULL) AS title_but_no_icon COUNT(*) FILTER (WHERE html_title IS NOT NULL AND best_icon_s3_key IS NULL) AS title_but_no_icon
FROM hosts FROM hosts
WHERE parsed = TRUE; WHERE parsed = TRUE;
-- Stats JSON
\! mkdir -p stats
\t on
\a
\o stats/04_best_icon.json
SELECT json_build_object(
'hosts_with_icon', COUNT(*) FILTER (WHERE best_icon_s3_key IS NOT NULL),
'hosts_without_icon', COUNT(*) FILTER (WHERE best_icon_s3_key IS NULL),
'hosts_with_title', COUNT(*) FILTER (WHERE html_title IS NOT NULL),
'hosts_no_title', COUNT(*) FILTER (WHERE html_title IS NULL),
'title_but_no_icon', COUNT(*) FILTER (WHERE html_title IS NOT NULL AND best_icon_s3_key IS NULL)
)
FROM hosts
WHERE parsed = TRUE;
\o
\t off
\a
\echo 'Stats written to stats/04_best_icon.json'

View file

@ -43,12 +43,14 @@ func writeStats(stats *Stats) {
"started_at": stats.StartedAt.Format(time.RFC3339), "started_at": stats.StartedAt.Format(time.RFC3339),
"finished_at": finishedAt.Format(time.RFC3339), "finished_at": finishedAt.Format(time.RFC3339),
"duration_seconds": int(duration.Seconds()), "duration_seconds": int(duration.Seconds()),
"total_hosts": stats.TotalHosts, "total_hosts": stats.TotalHosts,
"hosts_with_icon": stats.HostsWithIcon, "hosts_with_icon": stats.HostsWithIcon,
"hosts_no_icon": stats.HostsNoIcon, "hosts_no_icon": stats.HostsNoIcon,
"convert_errors": stats.ConvertErrors.Load(), "bundled_with_icon": stats.BundledWithIcon.Load(),
"bundles_created": stats.BundlesCreated, "bundled_no_icon": stats.BundledNoIcon.Load(),
"total_bytes": stats.TotalBytes, "convert_errors": stats.ConvertErrors.Load(),
"bundles_created": stats.BundlesCreated,
"total_bytes": stats.TotalBytes,
} }
os.MkdirAll("stats", 0755) os.MkdirAll("stats", 0755)

View file

@ -27,13 +27,15 @@ type Config struct {
} }
type Stats struct { type Stats struct {
TotalHosts int TotalHosts int
HostsWithIcon int HostsWithIcon int
HostsNoIcon int HostsNoIcon int
BundlesCreated int BundlesCreated int
ConvertErrors atomic.Int64 ConvertErrors atomic.Int64
TotalBytes int64 BundledWithIcon atomic.Int64
StartedAt time.Time BundledNoIcon atomic.Int64
TotalBytes int64
StartedAt time.Time
} }
func main() { func main() {
@ -158,6 +160,14 @@ func main() {
} }
wg.Wait() wg.Wait()
for _, e := range pageEntries {
if e.Icon != "" {
stats.BundledWithIcon.Add(1)
} else {
stats.BundledNoIcon.Add(1)
}
}
entryBuf = append(entryBuf, pageEntries...) entryBuf = append(entryBuf, pageEntries...)
// Write complete bundles from the buffer // Write complete bundles from the buffer
@ -226,6 +236,8 @@ func main() {
fmt.Printf("Total hosts: %d\n", stats.TotalHosts) fmt.Printf("Total hosts: %d\n", stats.TotalHosts)
fmt.Printf("Hosts with icon: %d\n", stats.HostsWithIcon) fmt.Printf("Hosts with icon: %d\n", stats.HostsWithIcon)
fmt.Printf("Hosts without icon: %d\n", stats.HostsNoIcon) fmt.Printf("Hosts without icon: %d\n", stats.HostsNoIcon)
fmt.Printf("Bundled with icon: %d\n", stats.BundledWithIcon.Load())
fmt.Printf("Bundled without icon: %d\n", stats.BundledNoIcon.Load())
fmt.Printf("Convert errors: %d\n", stats.ConvertErrors.Load()) fmt.Printf("Convert errors: %d\n", stats.ConvertErrors.Load())
fmt.Printf("Bundles created: %d\n", stats.BundlesCreated) fmt.Printf("Bundles created: %d\n", stats.BundlesCreated)
fmt.Printf("Total size: %.1f MB\n", float64(stats.TotalBytes)/(1024*1024)) fmt.Printf("Total size: %.1f MB\n", float64(stats.TotalBytes)/(1024*1024))