package main import ( "context" "flag" "fmt" "log" "os" "sync" "sync/atomic" "time" "github.com/jackc/pgx/v5/pgxpool" ) type Config struct { DBUrl string IconsDir string SiteBucket string EntriesPerBundle int Concurrency int DryRun bool OutputDir string Limit int LogFile string LogErrors bool } type Stats struct { TotalHosts int HostsWithIcon int HostsNoIcon int BundlesCreated int ConvertErrors atomic.Int64 BundledWithIcon atomic.Int64 BundledNoIcon atomic.Int64 TotalBytes int64 StartedAt time.Time } func main() { cfg := Config{} flag.StringVar(&cfg.DBUrl, "db", "", "Postgres connection string (required)") flag.StringVar(&cfg.IconsDir, "icons-dir", "icons", "Directory with downloaded icons") flag.StringVar(&cfg.SiteBucket, "site-bucket", "everytab-site", "S3 bucket for the static site") flag.IntVar(&cfg.EntriesPerBundle, "entries-per-bundle", 120, "Tabs per bundle JSON file") flag.IntVar(&cfg.Concurrency, "concurrency", 200, "Concurrent icon conversions") flag.BoolVar(&cfg.DryRun, "dry-run", false, "Write bundles to local disk instead of S3") flag.StringVar(&cfg.OutputDir, "output-dir", "bundles", "Local output dir for dry-run mode") flag.IntVar(&cfg.Limit, "limit", 0, "Max hosts to process (0 = all)") flag.StringVar(&cfg.LogFile, "log-file", "", "Mirror log lines to this file") flag.BoolVar(&cfg.LogErrors, "log-errors-only", false, "Only write errors to log file") flag.Parse() if cfg.DBUrl == "" { fmt.Println("Usage: bundle_gen --db DATABASE_URL [OPTIONS]") flag.PrintDefaults() os.Exit(1) } ctx := context.Background() // Init S3 (for uploading bundles) if err := initS3(); err != nil { log.Fatalf("Failed to init S3: %v", err) } // Init DB pool, err := pgxpool.New(ctx, cfg.DBUrl) if err != nil { log.Fatalf("Failed to connect to database: %v", err) } defer pool.Close() // Setup log file var logWriter *LogWriter if cfg.LogFile != "" { logWriter, err = NewLogWriter(cfg.LogFile, cfg.LogErrors) if err != nil { log.Fatalf("Failed to open log file: %v", err) } defer logWriter.Close() } stats := &Stats{StartedAt: time.Now()} // Count hosts fmt.Println("=== Bundle Generator ===") var totalHosts, hostsWithIcon int err = pool.QueryRow(ctx, `SELECT COUNT(*) FROM hosts WHERE html_title IS NOT NULL`).Scan(&totalHosts) if err != nil { log.Fatalf("Failed to count hosts: %v", err) } err = pool.QueryRow(ctx, `SELECT COUNT(*) FROM hosts WHERE html_title IS NOT NULL AND best_icon_s3_key IS NOT NULL`).Scan(&hostsWithIcon) if err != nil { log.Fatalf("Failed to count icons: %v", err) } stats.TotalHosts = totalHosts stats.HostsWithIcon = hostsWithIcon stats.HostsNoIcon = totalHosts - hostsWithIcon fmt.Printf("Total hosts: %d (with icon: %d, no icon: %d)\n", totalHosts, hostsWithIcon, totalHosts-hostsWithIcon) fmt.Printf("Entries per bundle: %d\n", cfg.EntriesPerBundle) fmt.Printf("Concurrency: %d\n", cfg.Concurrency) fmt.Printf("Dry run: %v\n\n", cfg.DryRun) if cfg.DryRun { os.MkdirAll(cfg.OutputDir, 0755) } // Stream hosts from DB in pages, convert icons, write bundles incrementally // Bundles are written in-place (overwriting previous run). No delete-first step, // so the live site always has valid data even if bundle gen crashes midway. fmt.Println("Processing hosts and writing bundles...") bundleCount := 0 var totalBytes int64 var lastRandom float64 = -1 pageSize := cfg.EntriesPerBundle * 50 // fetch 50 bundles worth at a time var entryBuf []BundleEntry hostsProcessed := 0 for { // Fetch a page of hosts limit := pageSize if cfg.Limit > 0 { remaining := cfg.Limit - hostsProcessed if remaining <= 0 { break } if limit > remaining { limit = remaining } } hosts, err := fetchHostsPage(ctx, pool, lastRandom, limit) if err != nil { log.Fatalf("Failed to fetch hosts: %v", err) } if len(hosts) == 0 { break } lastRandom = hosts[len(hosts)-1].RandomOrder hostsProcessed += len(hosts) // Convert icons concurrently for this page pageEntries := make([]BundleEntry, len(hosts)) var wg sync.WaitGroup sem := make(chan struct{}, cfg.Concurrency) for i, host := range hosts { wg.Add(1) sem <- struct{}{} go func(idx int, h HostRow) { defer wg.Done() defer func() { <-sem }() pageEntries[idx] = buildEntry(h, cfg.IconsDir, logWriter, stats) }(i, host) } wg.Wait() for _, e := range pageEntries { if e.Icon != "" { stats.BundledWithIcon.Add(1) } else { stats.BundledNoIcon.Add(1) } } entryBuf = append(entryBuf, pageEntries...) // Write complete bundles from the buffer for len(entryBuf) >= cfg.EntriesPerBundle { chunk := entryBuf[:cfg.EntriesPerBundle] entryBuf = entryBuf[cfg.EntriesPerBundle:] data, err := serializeBundle(chunk) if err != nil { log.Fatalf("Failed to serialize bundle %d: %v", bundleCount, err) } if cfg.DryRun { err = writeBundleLocal(cfg.OutputDir, bundleCount, data) } else { err = writeBundleS3(cfg.SiteBucket, bundleCount, data) } if err != nil { log.Fatalf("Failed to write bundle %d: %v", bundleCount, err) } logLine := fmt.Sprintf("bundle: %04d.json %d entries %dKB", bundleCount, len(chunk), len(data)/1024) fmt.Println(logLine) if logWriter != nil { logWriter.Write(logLine, false) } totalBytes += int64(len(data)) bundleCount++ } } // Write final partial bundle if len(entryBuf) > 0 { data, err := serializeBundle(entryBuf) if err != nil { log.Fatalf("Failed to serialize final bundle: %v", err) } if cfg.DryRun { err = writeBundleLocal(cfg.OutputDir, bundleCount, data) } else { err = writeBundleS3(cfg.SiteBucket, bundleCount, data) } if err != nil { log.Fatalf("Failed to write final bundle: %v", err) } logLine := fmt.Sprintf("bundle: %04d.json %d entries %dKB", bundleCount, len(entryBuf), len(data)/1024) fmt.Println(logLine) if logWriter != nil { logWriter.Write(logLine, false) } totalBytes += int64(len(data)) bundleCount++ } stats.BundlesCreated = bundleCount stats.TotalBytes = totalBytes // Summary duration := time.Since(stats.StartedAt) fmt.Printf("\n=== Summary ===\n") fmt.Printf("Duration: %s\n", duration.Round(time.Second)) fmt.Printf("Total hosts: %d\n", stats.TotalHosts) fmt.Printf("Hosts with icon: %d\n", stats.HostsWithIcon) fmt.Printf("Hosts without icon: %d\n", stats.HostsNoIcon) fmt.Printf("Bundled with icon: %d\n", stats.BundledWithIcon.Load()) fmt.Printf("Bundled without icon: %d\n", stats.BundledNoIcon.Load()) fmt.Printf("Convert errors: %d\n", stats.ConvertErrors.Load()) fmt.Printf("Bundles created: %d\n", stats.BundlesCreated) fmt.Printf("Total size: %.1f MB\n", float64(stats.TotalBytes)/(1024*1024)) fmt.Printf("Avg bundle size: %.0f KB\n", float64(stats.TotalBytes)/float64(max(stats.BundlesCreated, 1))/1024) fmt.Printf("TOTAL_BUNDLES = %d (bake this into the frontend)\n", stats.BundlesCreated) writeStats(stats) }