From cf17fc42b1c326023e71075baadf31bd23cbe4bc Mon Sep 17 00:00:00 2001 From: Joe Lothan Date: Tue, 19 May 2026 10:32:34 -0400 Subject: [PATCH] fixed icon downloading performance issues --- pipeline/03_icon_download/db.go | 4 ---- pipeline/03_icon_download/main.go | 10 +++------- 2 files changed, 3 insertions(+), 11 deletions(-) diff --git a/pipeline/03_icon_download/db.go b/pipeline/03_icon_download/db.go index 2e142dd..12a56c7 100644 --- a/pipeline/03_icon_download/db.go +++ b/pipeline/03_icon_download/db.go @@ -20,10 +20,6 @@ func claimBatch(ctx context.Context, pool *pgxpool.Pool, limit int) ([]IconRow, WHERE id IN ( SELECT id FROM icons WHERE scan_state = 'unscanned' - AND (source = 'favicon_ico' - OR rel_sizes IS NULL - OR rel_sizes IN ('16x16','32x32','48x48','64x64')) - ORDER BY md5(id::text) LIMIT $1 FOR UPDATE SKIP LOCKED ) diff --git a/pipeline/03_icon_download/main.go b/pipeline/03_icon_download/main.go index b345e1b..0a67d9d 100644 --- a/pipeline/03_icon_download/main.go +++ b/pipeline/03_icon_download/main.go @@ -46,7 +46,7 @@ func main() { cfg := Config{} flag.StringVar(&cfg.DBUrl, "db", "", "Postgres connection string (required)") flag.StringVar(&cfg.IconsDir, "icons-dir", "icons", "Directory to store downloaded icons") - flag.IntVar(&cfg.BatchSize, "batch-size", 200, "Rows to claim per batch") + flag.IntVar(&cfg.BatchSize, "batch-size", 5000, "Rows to claim per batch") flag.IntVar(&cfg.Concurrency, "concurrency", 200, "Number of concurrent goroutines") flag.IntVar(&cfg.Limit, "limit", 0, "Max icons to process (0 = all)") flag.DurationVar(&cfg.Timeout, "timeout", 10*time.Second, "HTTP request timeout") @@ -79,11 +79,7 @@ func main() { // Count eligible icons var total int64 err = pool.QueryRow(ctx, ` - SELECT COUNT(*) FROM icons - WHERE scan_state = 'unscanned' - AND (source = 'favicon_ico' - OR rel_sizes IS NULL - OR rel_sizes IN ('16x16','32x32','48x48','64x64')) + SELECT COUNT(*) FROM icons WHERE scan_state = 'unscanned' `).Scan(&total) if err != nil { log.Fatalf("Failed to count icons: %v", err) @@ -119,7 +115,7 @@ func main() { stats := &Stats{StartedAt: time.Now()} // Feed icons into a channel so workers never starve waiting for batch claims - iconCh := make(chan IconRow, cfg.Concurrency*2) + iconCh := make(chan IconRow, cfg.BatchSize) go func() { defer close(iconCh) claimed := 0