95 lines
2.3 KiB
Go
95 lines
2.3 KiB
Go
package main
|
|
|
|
import (
|
|
"encoding/json"
|
|
"fmt"
|
|
"os"
|
|
"sync"
|
|
"time"
|
|
)
|
|
|
|
// LogWriter handles writing log lines to a file.
|
|
type LogWriter struct {
|
|
file *os.File
|
|
mu sync.Mutex
|
|
errorsOnly bool
|
|
}
|
|
|
|
func NewLogWriter(path string, errorsOnly bool) (*LogWriter, error) {
|
|
f, err := os.OpenFile(path, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0644)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return &LogWriter{file: f, errorsOnly: errorsOnly}, nil
|
|
}
|
|
|
|
func (lw *LogWriter) Write(line string, isError bool) {
|
|
if lw.errorsOnly && !isError {
|
|
return
|
|
}
|
|
lw.mu.Lock()
|
|
defer lw.mu.Unlock()
|
|
fmt.Fprintln(lw.file, line)
|
|
}
|
|
|
|
func (lw *LogWriter) Close() error {
|
|
return lw.file.Close()
|
|
}
|
|
|
|
// formatLogLine creates a concise one-line log for a processed host.
|
|
func formatLogLine(host Host, result ProcessResult) string {
|
|
title := result.Title
|
|
if len(title) > 20 {
|
|
title = title[:20] + "..."
|
|
}
|
|
|
|
if result.Err != nil {
|
|
errType := "parse"
|
|
if result.FetchErr {
|
|
errType = "fetch"
|
|
}
|
|
return fmt.Sprintf("parsed: %s err:%s %v", host.Hostname, errType, result.Err)
|
|
}
|
|
|
|
iconCount := len(result.Icons) + 1 // +1 for /favicon.ico
|
|
iframe := "iframe:ok"
|
|
if !result.IframeAllowed {
|
|
iframe = "iframe:no"
|
|
}
|
|
|
|
return fmt.Sprintf("parsed: %s \"%s\" icons:%d %s", host.Hostname, title, iconCount, iframe)
|
|
}
|
|
|
|
// writeStats writes the stage stats to a JSON file.
|
|
func writeStats(stats *Stats, cfg Config) {
|
|
finishedAt := time.Now()
|
|
duration := finishedAt.Sub(stats.StartedAt)
|
|
|
|
data := map[string]interface{}{
|
|
"started_at": stats.StartedAt.Format(time.RFC3339),
|
|
"finished_at": finishedAt.Format(time.RFC3339),
|
|
"duration_seconds": int(duration.Seconds()),
|
|
"processed": stats.Processed.Load(),
|
|
"titles_found": stats.TitlesFound.Load(),
|
|
"no_title": stats.NoTitle.Load(),
|
|
"icons_found": stats.IconsFound.Load(),
|
|
"iframe_blocked": stats.IframeBlocked.Load(),
|
|
"fetch_errors": stats.FetchErrors.Load(),
|
|
"parse_errors": stats.ParseErrors.Load(),
|
|
"db_errors": stats.DBErrors.Load(),
|
|
"panics": stats.Panics.Load(),
|
|
}
|
|
|
|
os.MkdirAll("stats", 0755)
|
|
f, err := os.Create("stats/02_warc_parse.json")
|
|
if err != nil {
|
|
fmt.Printf("Failed to write stats: %v\n", err)
|
|
return
|
|
}
|
|
defer f.Close()
|
|
|
|
enc := json.NewEncoder(f)
|
|
enc.SetIndent("", " ")
|
|
enc.Encode(data)
|
|
fmt.Println("Stats written to stats/02_warc_parse.json")
|
|
}
|