improve stats generation
This commit is contained in:
parent
0c9ad5bfd6
commit
a8177a1583
5 changed files with 61 additions and 20 deletions
|
|
@ -71,6 +71,7 @@ func writeStats(stats *Stats, cfg Config) {
|
||||||
"duration_seconds": int(duration.Seconds()),
|
"duration_seconds": int(duration.Seconds()),
|
||||||
"processed": stats.Processed.Load(),
|
"processed": stats.Processed.Load(),
|
||||||
"titles_found": stats.TitlesFound.Load(),
|
"titles_found": stats.TitlesFound.Load(),
|
||||||
|
"no_title": stats.NoTitle.Load(),
|
||||||
"icons_found": stats.IconsFound.Load(),
|
"icons_found": stats.IconsFound.Load(),
|
||||||
"iframe_blocked": stats.IframeBlocked.Load(),
|
"iframe_blocked": stats.IframeBlocked.Load(),
|
||||||
"fetch_errors": stats.FetchErrors.Load(),
|
"fetch_errors": stats.FetchErrors.Load(),
|
||||||
|
|
|
||||||
|
|
@ -26,6 +26,7 @@ type Config struct {
|
||||||
type Stats struct {
|
type Stats struct {
|
||||||
Processed atomic.Int64
|
Processed atomic.Int64
|
||||||
TitlesFound atomic.Int64
|
TitlesFound atomic.Int64
|
||||||
|
NoTitle atomic.Int64
|
||||||
IconsFound atomic.Int64
|
IconsFound atomic.Int64
|
||||||
IframeBlocked atomic.Int64
|
IframeBlocked atomic.Int64
|
||||||
ParseErrors atomic.Int64
|
ParseErrors atomic.Int64
|
||||||
|
|
@ -167,13 +168,18 @@ func main() {
|
||||||
|
|
||||||
// Update stats
|
// Update stats
|
||||||
stats.Processed.Add(1)
|
stats.Processed.Add(1)
|
||||||
|
if result.Err == nil {
|
||||||
if result.Title != "" {
|
if result.Title != "" {
|
||||||
stats.TitlesFound.Add(1)
|
stats.TitlesFound.Add(1)
|
||||||
|
} else {
|
||||||
|
stats.NoTitle.Add(1)
|
||||||
}
|
}
|
||||||
stats.IconsFound.Add(int64(len(result.Icons)))
|
// +1 for the /favicon.ico entry added per host
|
||||||
if result.Err == nil && !result.IframeAllowed {
|
stats.IconsFound.Add(int64(len(result.Icons) + 1))
|
||||||
|
if !result.IframeAllowed {
|
||||||
stats.IframeBlocked.Add(1)
|
stats.IframeBlocked.Add(1)
|
||||||
}
|
}
|
||||||
|
}
|
||||||
if result.Err != nil {
|
if result.Err != nil {
|
||||||
if result.FetchErr {
|
if result.FetchErr {
|
||||||
stats.FetchErrors.Add(1)
|
stats.FetchErrors.Add(1)
|
||||||
|
|
@ -195,6 +201,7 @@ func main() {
|
||||||
fmt.Printf("Duration: %s\n", duration.Round(time.Second))
|
fmt.Printf("Duration: %s\n", duration.Round(time.Second))
|
||||||
fmt.Printf("Processed: %d\n", stats.Processed.Load())
|
fmt.Printf("Processed: %d\n", stats.Processed.Load())
|
||||||
fmt.Printf("Titles found: %d\n", stats.TitlesFound.Load())
|
fmt.Printf("Titles found: %d\n", stats.TitlesFound.Load())
|
||||||
|
fmt.Printf("No title: %d\n", stats.NoTitle.Load())
|
||||||
fmt.Printf("Icons found: %d\n", stats.IconsFound.Load())
|
fmt.Printf("Icons found: %d\n", stats.IconsFound.Load())
|
||||||
fmt.Printf("Iframe blocked: %d\n", stats.IframeBlocked.Load())
|
fmt.Printf("Iframe blocked: %d\n", stats.IframeBlocked.Load())
|
||||||
fmt.Printf("Fetch errors: %d\n", stats.FetchErrors.Load())
|
fmt.Printf("Fetch errors: %d\n", stats.FetchErrors.Load())
|
||||||
|
|
|
||||||
|
|
@ -37,7 +37,7 @@ FROM (
|
||||||
) sub
|
) sub
|
||||||
WHERE h.id = sub.host_id;
|
WHERE h.id = sub.host_id;
|
||||||
|
|
||||||
-- Stats
|
-- Stats (human-readable)
|
||||||
\echo '--- Best Icon Selection Stats ---'
|
\echo '--- Best Icon Selection Stats ---'
|
||||||
|
|
||||||
SELECT
|
SELECT
|
||||||
|
|
@ -47,3 +47,22 @@ SELECT
|
||||||
COUNT(*) FILTER (WHERE html_title IS NOT NULL AND best_icon_s3_key IS NULL) AS title_but_no_icon
|
COUNT(*) FILTER (WHERE html_title IS NOT NULL AND best_icon_s3_key IS NULL) AS title_but_no_icon
|
||||||
FROM hosts
|
FROM hosts
|
||||||
WHERE parsed = TRUE;
|
WHERE parsed = TRUE;
|
||||||
|
|
||||||
|
-- Stats JSON
|
||||||
|
\! mkdir -p stats
|
||||||
|
\t on
|
||||||
|
\a
|
||||||
|
\o stats/04_best_icon.json
|
||||||
|
SELECT json_build_object(
|
||||||
|
'hosts_with_icon', COUNT(*) FILTER (WHERE best_icon_s3_key IS NOT NULL),
|
||||||
|
'hosts_without_icon', COUNT(*) FILTER (WHERE best_icon_s3_key IS NULL),
|
||||||
|
'hosts_with_title', COUNT(*) FILTER (WHERE html_title IS NOT NULL),
|
||||||
|
'hosts_no_title', COUNT(*) FILTER (WHERE html_title IS NULL),
|
||||||
|
'title_but_no_icon', COUNT(*) FILTER (WHERE html_title IS NOT NULL AND best_icon_s3_key IS NULL)
|
||||||
|
)
|
||||||
|
FROM hosts
|
||||||
|
WHERE parsed = TRUE;
|
||||||
|
\o
|
||||||
|
\t off
|
||||||
|
\a
|
||||||
|
\echo 'Stats written to stats/04_best_icon.json'
|
||||||
|
|
|
||||||
|
|
@ -46,6 +46,8 @@ func writeStats(stats *Stats) {
|
||||||
"total_hosts": stats.TotalHosts,
|
"total_hosts": stats.TotalHosts,
|
||||||
"hosts_with_icon": stats.HostsWithIcon,
|
"hosts_with_icon": stats.HostsWithIcon,
|
||||||
"hosts_no_icon": stats.HostsNoIcon,
|
"hosts_no_icon": stats.HostsNoIcon,
|
||||||
|
"bundled_with_icon": stats.BundledWithIcon.Load(),
|
||||||
|
"bundled_no_icon": stats.BundledNoIcon.Load(),
|
||||||
"convert_errors": stats.ConvertErrors.Load(),
|
"convert_errors": stats.ConvertErrors.Load(),
|
||||||
"bundles_created": stats.BundlesCreated,
|
"bundles_created": stats.BundlesCreated,
|
||||||
"total_bytes": stats.TotalBytes,
|
"total_bytes": stats.TotalBytes,
|
||||||
|
|
|
||||||
|
|
@ -32,6 +32,8 @@ type Stats struct {
|
||||||
HostsNoIcon int
|
HostsNoIcon int
|
||||||
BundlesCreated int
|
BundlesCreated int
|
||||||
ConvertErrors atomic.Int64
|
ConvertErrors atomic.Int64
|
||||||
|
BundledWithIcon atomic.Int64
|
||||||
|
BundledNoIcon atomic.Int64
|
||||||
TotalBytes int64
|
TotalBytes int64
|
||||||
StartedAt time.Time
|
StartedAt time.Time
|
||||||
}
|
}
|
||||||
|
|
@ -158,6 +160,14 @@ func main() {
|
||||||
}
|
}
|
||||||
wg.Wait()
|
wg.Wait()
|
||||||
|
|
||||||
|
for _, e := range pageEntries {
|
||||||
|
if e.Icon != "" {
|
||||||
|
stats.BundledWithIcon.Add(1)
|
||||||
|
} else {
|
||||||
|
stats.BundledNoIcon.Add(1)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
entryBuf = append(entryBuf, pageEntries...)
|
entryBuf = append(entryBuf, pageEntries...)
|
||||||
|
|
||||||
// Write complete bundles from the buffer
|
// Write complete bundles from the buffer
|
||||||
|
|
@ -226,6 +236,8 @@ func main() {
|
||||||
fmt.Printf("Total hosts: %d\n", stats.TotalHosts)
|
fmt.Printf("Total hosts: %d\n", stats.TotalHosts)
|
||||||
fmt.Printf("Hosts with icon: %d\n", stats.HostsWithIcon)
|
fmt.Printf("Hosts with icon: %d\n", stats.HostsWithIcon)
|
||||||
fmt.Printf("Hosts without icon: %d\n", stats.HostsNoIcon)
|
fmt.Printf("Hosts without icon: %d\n", stats.HostsNoIcon)
|
||||||
|
fmt.Printf("Bundled with icon: %d\n", stats.BundledWithIcon.Load())
|
||||||
|
fmt.Printf("Bundled without icon: %d\n", stats.BundledNoIcon.Load())
|
||||||
fmt.Printf("Convert errors: %d\n", stats.ConvertErrors.Load())
|
fmt.Printf("Convert errors: %d\n", stats.ConvertErrors.Load())
|
||||||
fmt.Printf("Bundles created: %d\n", stats.BundlesCreated)
|
fmt.Printf("Bundles created: %d\n", stats.BundlesCreated)
|
||||||
fmt.Printf("Total size: %.1f MB\n", float64(stats.TotalBytes)/(1024*1024))
|
fmt.Printf("Total size: %.1f MB\n", float64(stats.TotalBytes)/(1024*1024))
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue