improve stats generation
This commit is contained in:
parent
0c9ad5bfd6
commit
a8177a1583
5 changed files with 61 additions and 20 deletions
|
|
@ -71,6 +71,7 @@ func writeStats(stats *Stats, cfg Config) {
|
|||
"duration_seconds": int(duration.Seconds()),
|
||||
"processed": stats.Processed.Load(),
|
||||
"titles_found": stats.TitlesFound.Load(),
|
||||
"no_title": stats.NoTitle.Load(),
|
||||
"icons_found": stats.IconsFound.Load(),
|
||||
"iframe_blocked": stats.IframeBlocked.Load(),
|
||||
"fetch_errors": stats.FetchErrors.Load(),
|
||||
|
|
|
|||
|
|
@ -26,6 +26,7 @@ type Config struct {
|
|||
type Stats struct {
|
||||
Processed atomic.Int64
|
||||
TitlesFound atomic.Int64
|
||||
NoTitle atomic.Int64
|
||||
IconsFound atomic.Int64
|
||||
IframeBlocked atomic.Int64
|
||||
ParseErrors atomic.Int64
|
||||
|
|
@ -167,12 +168,17 @@ func main() {
|
|||
|
||||
// Update stats
|
||||
stats.Processed.Add(1)
|
||||
if result.Title != "" {
|
||||
stats.TitlesFound.Add(1)
|
||||
}
|
||||
stats.IconsFound.Add(int64(len(result.Icons)))
|
||||
if result.Err == nil && !result.IframeAllowed {
|
||||
stats.IframeBlocked.Add(1)
|
||||
if result.Err == nil {
|
||||
if result.Title != "" {
|
||||
stats.TitlesFound.Add(1)
|
||||
} else {
|
||||
stats.NoTitle.Add(1)
|
||||
}
|
||||
// +1 for the /favicon.ico entry added per host
|
||||
stats.IconsFound.Add(int64(len(result.Icons) + 1))
|
||||
if !result.IframeAllowed {
|
||||
stats.IframeBlocked.Add(1)
|
||||
}
|
||||
}
|
||||
if result.Err != nil {
|
||||
if result.FetchErr {
|
||||
|
|
@ -195,6 +201,7 @@ func main() {
|
|||
fmt.Printf("Duration: %s\n", duration.Round(time.Second))
|
||||
fmt.Printf("Processed: %d\n", stats.Processed.Load())
|
||||
fmt.Printf("Titles found: %d\n", stats.TitlesFound.Load())
|
||||
fmt.Printf("No title: %d\n", stats.NoTitle.Load())
|
||||
fmt.Printf("Icons found: %d\n", stats.IconsFound.Load())
|
||||
fmt.Printf("Iframe blocked: %d\n", stats.IframeBlocked.Load())
|
||||
fmt.Printf("Fetch errors: %d\n", stats.FetchErrors.Load())
|
||||
|
|
|
|||
|
|
@ -37,7 +37,7 @@ FROM (
|
|||
) sub
|
||||
WHERE h.id = sub.host_id;
|
||||
|
||||
-- Stats
|
||||
-- Stats (human-readable)
|
||||
\echo '--- Best Icon Selection Stats ---'
|
||||
|
||||
SELECT
|
||||
|
|
@ -47,3 +47,22 @@ SELECT
|
|||
COUNT(*) FILTER (WHERE html_title IS NOT NULL AND best_icon_s3_key IS NULL) AS title_but_no_icon
|
||||
FROM hosts
|
||||
WHERE parsed = TRUE;
|
||||
|
||||
-- Stats JSON
|
||||
\! mkdir -p stats
|
||||
\t on
|
||||
\a
|
||||
\o stats/04_best_icon.json
|
||||
SELECT json_build_object(
|
||||
'hosts_with_icon', COUNT(*) FILTER (WHERE best_icon_s3_key IS NOT NULL),
|
||||
'hosts_without_icon', COUNT(*) FILTER (WHERE best_icon_s3_key IS NULL),
|
||||
'hosts_with_title', COUNT(*) FILTER (WHERE html_title IS NOT NULL),
|
||||
'hosts_no_title', COUNT(*) FILTER (WHERE html_title IS NULL),
|
||||
'title_but_no_icon', COUNT(*) FILTER (WHERE html_title IS NOT NULL AND best_icon_s3_key IS NULL)
|
||||
)
|
||||
FROM hosts
|
||||
WHERE parsed = TRUE;
|
||||
\o
|
||||
\t off
|
||||
\a
|
||||
\echo 'Stats written to stats/04_best_icon.json'
|
||||
|
|
|
|||
|
|
@ -43,12 +43,14 @@ func writeStats(stats *Stats) {
|
|||
"started_at": stats.StartedAt.Format(time.RFC3339),
|
||||
"finished_at": finishedAt.Format(time.RFC3339),
|
||||
"duration_seconds": int(duration.Seconds()),
|
||||
"total_hosts": stats.TotalHosts,
|
||||
"hosts_with_icon": stats.HostsWithIcon,
|
||||
"hosts_no_icon": stats.HostsNoIcon,
|
||||
"convert_errors": stats.ConvertErrors.Load(),
|
||||
"bundles_created": stats.BundlesCreated,
|
||||
"total_bytes": stats.TotalBytes,
|
||||
"total_hosts": stats.TotalHosts,
|
||||
"hosts_with_icon": stats.HostsWithIcon,
|
||||
"hosts_no_icon": stats.HostsNoIcon,
|
||||
"bundled_with_icon": stats.BundledWithIcon.Load(),
|
||||
"bundled_no_icon": stats.BundledNoIcon.Load(),
|
||||
"convert_errors": stats.ConvertErrors.Load(),
|
||||
"bundles_created": stats.BundlesCreated,
|
||||
"total_bytes": stats.TotalBytes,
|
||||
}
|
||||
|
||||
os.MkdirAll("stats", 0755)
|
||||
|
|
|
|||
|
|
@ -27,13 +27,15 @@ type Config struct {
|
|||
}
|
||||
|
||||
type Stats struct {
|
||||
TotalHosts int
|
||||
HostsWithIcon int
|
||||
HostsNoIcon int
|
||||
BundlesCreated int
|
||||
ConvertErrors atomic.Int64
|
||||
TotalBytes int64
|
||||
StartedAt time.Time
|
||||
TotalHosts int
|
||||
HostsWithIcon int
|
||||
HostsNoIcon int
|
||||
BundlesCreated int
|
||||
ConvertErrors atomic.Int64
|
||||
BundledWithIcon atomic.Int64
|
||||
BundledNoIcon atomic.Int64
|
||||
TotalBytes int64
|
||||
StartedAt time.Time
|
||||
}
|
||||
|
||||
func main() {
|
||||
|
|
@ -158,6 +160,14 @@ func main() {
|
|||
}
|
||||
wg.Wait()
|
||||
|
||||
for _, e := range pageEntries {
|
||||
if e.Icon != "" {
|
||||
stats.BundledWithIcon.Add(1)
|
||||
} else {
|
||||
stats.BundledNoIcon.Add(1)
|
||||
}
|
||||
}
|
||||
|
||||
entryBuf = append(entryBuf, pageEntries...)
|
||||
|
||||
// Write complete bundles from the buffer
|
||||
|
|
@ -226,6 +236,8 @@ func main() {
|
|||
fmt.Printf("Total hosts: %d\n", stats.TotalHosts)
|
||||
fmt.Printf("Hosts with icon: %d\n", stats.HostsWithIcon)
|
||||
fmt.Printf("Hosts without icon: %d\n", stats.HostsNoIcon)
|
||||
fmt.Printf("Bundled with icon: %d\n", stats.BundledWithIcon.Load())
|
||||
fmt.Printf("Bundled without icon: %d\n", stats.BundledNoIcon.Load())
|
||||
fmt.Printf("Convert errors: %d\n", stats.ConvertErrors.Load())
|
||||
fmt.Printf("Bundles created: %d\n", stats.BundlesCreated)
|
||||
fmt.Printf("Total size: %.1f MB\n", float64(stats.TotalBytes)/(1024*1024))
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue