157 lines
3.8 KiB
Go
157 lines
3.8 KiB
Go
package main
|
|
|
|
import (
|
|
"context"
|
|
"crypto/sha256"
|
|
"encoding/hex"
|
|
"fmt"
|
|
"io"
|
|
"net"
|
|
"net/http"
|
|
"strings"
|
|
"time"
|
|
)
|
|
|
|
// processIcon downloads, validates, and uploads a single icon.
|
|
func processIcon(ctx context.Context, icon IconRow, cfg Config) DownloadResult {
|
|
// Download
|
|
data, contentType, err := downloadIcon(icon.URL, cfg.Timeout, cfg.MaxSize)
|
|
if err != nil {
|
|
errType := classifyError(err)
|
|
return DownloadResult{Err: err.Error(), ErrType: errType}
|
|
}
|
|
|
|
// Validate it's an image
|
|
detectedType := detectImageType(data)
|
|
if detectedType == "" {
|
|
return DownloadResult{Err: "not a valid image", ErrType: "invalid"}
|
|
}
|
|
|
|
// Use detected type over HTTP Content-Type (more reliable)
|
|
if contentType == "" || contentType == "application/octet-stream" {
|
|
contentType = detectedType
|
|
}
|
|
|
|
// Get dimensions
|
|
width, height := getImageDimensions(data, detectedType)
|
|
|
|
// Compute SHA-256 for content-addressed storage
|
|
hash := sha256.Sum256(data)
|
|
s3Key := hex.EncodeToString(hash[:])
|
|
|
|
// Upload to S3 (skip if already exists — dedup)
|
|
dedup := false
|
|
if !cfg.DryRun {
|
|
exists, err := s3Exists(ctx, s3Key)
|
|
if err == nil && exists {
|
|
dedup = true
|
|
} else {
|
|
if err := s3Upload(ctx, s3Key, data, contentType); err != nil {
|
|
return DownloadResult{Err: fmt.Sprintf("s3 upload: %v", err), ErrType: "other"}
|
|
}
|
|
}
|
|
}
|
|
|
|
return DownloadResult{
|
|
S3Key: s3Key,
|
|
ContentType: contentType,
|
|
Width: width,
|
|
Height: height,
|
|
FileSize: len(data),
|
|
Dedup: dedup,
|
|
}
|
|
}
|
|
|
|
// httpTransport is shared across all goroutines for connection pooling and TLS session reuse.
|
|
var httpTransport = &http.Transport{
|
|
MaxIdleConns: 1000,
|
|
MaxIdleConnsPerHost: 2,
|
|
IdleConnTimeout: 30 * time.Second,
|
|
DisableKeepAlives: false,
|
|
DialContext: (&net.Dialer{
|
|
Timeout: 5 * time.Second,
|
|
KeepAlive: 30 * time.Second,
|
|
}).DialContext,
|
|
TLSHandshakeTimeout: 5 * time.Second,
|
|
}
|
|
|
|
// downloadIcon fetches an icon URL with timeouts and size limits.
|
|
func downloadIcon(url string, timeout time.Duration, maxSize int64) ([]byte, string, error) {
|
|
client := &http.Client{
|
|
Timeout: timeout,
|
|
Transport: httpTransport,
|
|
CheckRedirect: func(req *http.Request, via []*http.Request) error {
|
|
if len(via) >= 3 {
|
|
return fmt.Errorf("too many redirects")
|
|
}
|
|
return nil
|
|
},
|
|
}
|
|
|
|
req, err := http.NewRequest("GET", url, nil)
|
|
if err != nil {
|
|
return nil, "", fmt.Errorf("bad url: %w", err)
|
|
}
|
|
req.Header.Set("User-Agent", "Mozilla/5.0 (compatible; EveryTabBot/1.0; +https://everytab.site/bot)")
|
|
|
|
resp, err := client.Do(req)
|
|
if err != nil {
|
|
return nil, "", err
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
if resp.StatusCode != http.StatusOK {
|
|
return nil, "", fmt.Errorf("http %d", resp.StatusCode)
|
|
}
|
|
|
|
// Read with size limit
|
|
limited := io.LimitReader(resp.Body, maxSize+1)
|
|
data, err := io.ReadAll(limited)
|
|
if err != nil {
|
|
return nil, "", fmt.Errorf("read: %w", err)
|
|
}
|
|
if int64(len(data)) > maxSize {
|
|
return nil, "", fmt.Errorf("exceeds %dKB", maxSize/1024)
|
|
}
|
|
|
|
contentType := resp.Header.Get("Content-Type")
|
|
// Strip charset suffix
|
|
if idx := strings.Index(contentType, ";"); idx != -1 {
|
|
contentType = strings.TrimSpace(contentType[:idx])
|
|
}
|
|
|
|
return data, contentType, nil
|
|
}
|
|
|
|
// classifyError categorizes a download error for stats.
|
|
func classifyError(err error) string {
|
|
msg := err.Error()
|
|
|
|
// DNS errors
|
|
if _, ok := err.(*net.DNSError); ok {
|
|
return "dns"
|
|
}
|
|
if strings.Contains(msg, "no such host") || strings.Contains(msg, "dns") {
|
|
return "dns"
|
|
}
|
|
|
|
// Timeouts
|
|
if netErr, ok := err.(net.Error); ok && netErr.Timeout() {
|
|
return "timeout"
|
|
}
|
|
if strings.Contains(msg, "timeout") || strings.Contains(msg, "deadline") {
|
|
return "timeout"
|
|
}
|
|
|
|
// HTTP errors
|
|
if strings.Contains(msg, "http ") {
|
|
return "http"
|
|
}
|
|
|
|
// Too large
|
|
if strings.Contains(msg, "exceeds") {
|
|
return "too_large"
|
|
}
|
|
|
|
return "other"
|
|
}
|