package main import ( "encoding/json" "fmt" "os" "sync" "time" ) // LogWriter handles writing log lines to a file. type LogWriter struct { file *os.File mu sync.Mutex errorsOnly bool } func NewLogWriter(path string, errorsOnly bool) (*LogWriter, error) { f, err := os.OpenFile(path, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0644) if err != nil { return nil, err } return &LogWriter{file: f, errorsOnly: errorsOnly}, nil } func (lw *LogWriter) Write(line string, isError bool) { if lw.errorsOnly && !isError { return } lw.mu.Lock() defer lw.mu.Unlock() fmt.Fprintln(lw.file, line) } func (lw *LogWriter) Close() error { return lw.file.Close() } // formatLogLine creates a concise one-line log for a processed host. func formatLogLine(host Host, result ProcessResult) string { title := result.Title if len(title) > 20 { title = title[:20] + "..." } if result.Err != nil { errType := "parse" if result.FetchErr { errType = "fetch" } return fmt.Sprintf("parsed: %s err:%s %v", host.Hostname, errType, result.Err) } iconCount := len(result.Icons) + 1 // +1 for /favicon.ico iframe := "iframe:ok" if !result.IframeAllowed { iframe = "iframe:no" } return fmt.Sprintf("parsed: %s \"%s\" icons:%d %s", host.Hostname, title, iconCount, iframe) } // writeStats writes the stage stats to a JSON file. func writeStats(stats *Stats, cfg Config) { finishedAt := time.Now() duration := finishedAt.Sub(stats.StartedAt) data := map[string]interface{}{ "started_at": stats.StartedAt.Format(time.RFC3339), "finished_at": finishedAt.Format(time.RFC3339), "duration_seconds": int(duration.Seconds()), "processed": stats.Processed.Load(), "titles_found": stats.TitlesFound.Load(), "no_title": stats.NoTitle.Load(), "icons_found": stats.IconsFound.Load(), "iframe_blocked": stats.IframeBlocked.Load(), "fetch_errors": stats.FetchErrors.Load(), "parse_errors": stats.ParseErrors.Load(), "db_errors": stats.DBErrors.Load(), "panics": stats.Panics.Load(), } os.MkdirAll("stats", 0755) f, err := os.Create("stats/02_warc_parse.json") if err != nil { fmt.Printf("Failed to write stats: %v\n", err) return } defer f.Close() enc := json.NewEncoder(f) enc.SetIndent("", " ") enc.Encode(data) fmt.Println("Stats written to stats/02_warc_parse.json") }