package main import ( "crypto/sha256" "encoding/hex" "errors" "fmt" "io" "net" "net/http" "strings" "time" ) // processIcon downloads, validates, and stores a single icon. func processIcon(icon IconRow, cfg Config) DownloadResult { // Download data, contentType, err := downloadIcon(icon.URL, cfg.Timeout, cfg.MaxSize) if err != nil { errType := classifyError(err) return DownloadResult{Err: err.Error(), ErrType: errType} } // Validate it's an image detectedType := detectImageType(data) if detectedType == "" { return DownloadResult{Err: "not a valid image", ErrType: "invalid"} } // Use detected type over HTTP Content-Type (more reliable) if contentType == "" || contentType == "application/octet-stream" { contentType = detectedType } // Get dimensions width, height := getImageDimensions(data, detectedType) // Compute SHA-256 for content-addressed storage hash := sha256.Sum256(data) iconHash := hex.EncodeToString(hash[:]) // Write to disk (skip if already exists — dedup) dedup := false if !cfg.DryRun { if iconExists(iconHash) { dedup = true } else { if err := iconWrite(iconHash, data); err != nil { return DownloadResult{Err: fmt.Sprintf("disk write: %v", err), ErrType: "other"} } } } return DownloadResult{ IconHash: iconHash, ContentType: contentType, Width: width, Height: height, FileSize: len(data), Dedup: dedup, } } // httpTransport is shared across all goroutines for connection pooling and TLS session reuse. var httpTransport = &http.Transport{ MaxIdleConns: 0, MaxIdleConnsPerHost: 0, DisableKeepAlives: true, DialContext: (&net.Dialer{ Timeout: 5 * time.Second, KeepAlive: 30 * time.Second, }).DialContext, TLSHandshakeTimeout: 5 * time.Second, } // downloadIcon fetches an icon URL with timeouts and size limits. func downloadIcon(url string, timeout time.Duration, maxSize int64) ([]byte, string, error) { client := &http.Client{ Timeout: timeout, Transport: httpTransport, CheckRedirect: func(req *http.Request, via []*http.Request) error { if len(via) >= 3 { return fmt.Errorf("too many redirects") } return nil }, } req, err := http.NewRequest("GET", url, nil) if err != nil { return nil, "", fmt.Errorf("bad url: %w", err) } req.Header.Set("User-Agent", "Mozilla/5.0 (compatible; EveryTabBot/1.0; +https://everytab.site/bot.html)") resp, err := client.Do(req) if err != nil { return nil, "", err } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { return nil, "", fmt.Errorf("http %d", resp.StatusCode) } // Read with size limit limited := io.LimitReader(resp.Body, maxSize+1) data, err := io.ReadAll(limited) if err != nil { return nil, "", fmt.Errorf("read: %w", err) } if int64(len(data)) > maxSize { return nil, "", fmt.Errorf("exceeds %dKB", maxSize/1024) } contentType := resp.Header.Get("Content-Type") // Strip charset suffix if idx := strings.Index(contentType, ";"); idx != -1 { contentType = strings.TrimSpace(contentType[:idx]) } return data, contentType, nil } // classifyError categorizes a download error for stats. func classifyError(err error) string { msg := err.Error() // DNS errors var dnsErr *net.DNSError if errors.As(err, &dnsErr) { return "dns" } // Timeouts if netErr, ok := err.(net.Error); ok && netErr.Timeout() { return "timeout" } if strings.Contains(msg, "timeout") || strings.Contains(msg, "deadline") { return "timeout" } // HTTP errors if strings.Contains(msg, "http ") { return "http" } // Too large if strings.Contains(msg, "exceeds") { return "too_large" } return "other" }