added bundle generation

This commit is contained in:
Joe Lothan 2026-05-17 23:02:34 -04:00
parent ca06a91dc6
commit f89883e745
8 changed files with 536 additions and 0 deletions

View file

@ -0,0 +1,85 @@
package main
import (
"context"
"encoding/json"
"fmt"
"os"
"path/filepath"
)
// BundleEntry is one tab in a bundle JSON file.
type BundleEntry struct {
Host string `json:"host"`
Title string `json:"title"`
Icon string `json:"icon"`
IconW int `json:"icon_w,omitempty"`
IconH int `json:"icon_h,omitempty"`
IframeOk bool `json:"iframe_ok"`
}
// Bundle is the top-level JSON structure.
type Bundle struct {
Entries []BundleEntry `json:"entries"`
}
// buildEntry creates a BundleEntry for a host, converting its icon if available.
func buildEntry(ctx context.Context, host HostRow, iconsBucket string, logWriter *LogWriter, stats *Stats) BundleEntry {
entry := BundleEntry{
Host: host.Hostname,
Title: host.HtmlTitle,
Icon: "",
IframeOk: host.IframeAllowed,
}
if host.BestIconS3Key == "" {
return entry
}
encoded, w, h, convertErr := safeConvert(ctx, host.BestIconS3Key, iconsBucket)
if convertErr != "" {
stats.ConvertErrors.Add(1)
logLine := fmt.Sprintf("CONVERT_ERROR: %s %s", host.Hostname, convertErr)
fmt.Println(logLine)
if logWriter != nil {
logWriter.Write(logLine, true)
}
return entry
}
entry.Icon = encoded
entry.IconW = w
entry.IconH = h
return entry
}
// safeConvert wraps convertIconToBase64PNG with panic recovery.
func safeConvert(ctx context.Context, s3Key, iconsBucket string) (encoded string, w, h int, errMsg string) {
defer func() {
if r := recover(); r != nil {
errMsg = fmt.Sprintf("panic: %v", r)
}
}()
var err error
encoded, w, h, err = convertIconToBase64PNG(ctx, s3Key, iconsBucket)
if err != nil {
return "", 0, 0, err.Error()
}
return encoded, w, h, ""
}
func serializeBundle(entries []BundleEntry) ([]byte, error) {
bundle := Bundle{Entries: entries}
return json.Marshal(bundle)
}
func writeBundleLocal(outputDir string, index int, data []byte) error {
path := filepath.Join(outputDir, fmt.Sprintf("%04d.json", index))
return os.WriteFile(path, data, 0644)
}
func writeBundleS3(ctx context.Context, bucket string, index int, data []byte) error {
key := fmt.Sprintf("tabs/%04d.json", index)
return s3UploadBundle(ctx, bucket, key, data)
}

View file

@ -0,0 +1,82 @@
package main
import (
"bytes"
"context"
"encoding/base64"
"fmt"
"image"
"image/png"
_ "image/gif"
_ "image/jpeg"
_ "github.com/biessek/golang-ico"
_ "golang.org/x/image/webp"
)
// convertIconToBase64PNG downloads an icon from S3, converts it to PNG, and returns base64-encoded data.
func convertIconToBase64PNG(ctx context.Context, s3Key string, iconsBucket string) (encoded string, width, height int, err error) {
data, err := s3Download(ctx, iconsBucket, s3Key)
if err != nil {
return "", 0, 0, fmt.Errorf("s3 download: %w", err)
}
// Check for SVG (can't decode to raster without external deps)
if isSVG(data) {
return "", 0, 0, fmt.Errorf("svg not supported")
}
// image.Decode handles PNG, GIF, JPEG, WebP, BMP, and ICO (via registered decoders)
img, _, err := image.Decode(bytes.NewReader(data))
if err != nil {
return "", 0, 0, fmt.Errorf("decode: %w", err)
}
// Downscale icons >128px to 32x32 to keep bundle sizes reasonable
bounds := img.Bounds()
w, h := bounds.Dx(), bounds.Dy()
if w > 128 || h > 128 {
img = resizeNearestNeighbor(img, 32, 32)
w, h = 32, 32
}
// Re-encode as PNG
var pngBuf bytes.Buffer
if err := png.Encode(&pngBuf, img); err != nil {
return "", 0, 0, fmt.Errorf("png encode: %w", err)
}
encoded = base64.StdEncoding.EncodeToString(pngBuf.Bytes())
return encoded, w, h, nil
}
// resizeNearestNeighbor does a simple nearest-neighbor resize.
func resizeNearestNeighbor(src image.Image, dstW, dstH int) image.Image {
srcBounds := src.Bounds()
srcW := srcBounds.Dx()
srcH := srcBounds.Dy()
dst := image.NewRGBA(image.Rect(0, 0, dstW, dstH))
for y := 0; y < dstH; y++ {
srcY := srcBounds.Min.Y + y*srcH/dstH
for x := 0; x < dstW; x++ {
srcX := srcBounds.Min.X + x*srcW/dstW
dst.Set(x, y, src.At(srcX, srcY))
}
}
return dst
}
func isSVG(data []byte) bool {
if len(data) < 5 {
return false
}
header := data[:min(256, len(data))]
return bytes.Contains(header, []byte("<svg")) || bytes.Contains(header, []byte("<?xml"))
}
func min(a, b int) int {
if a < b {
return a
}
return b
}

View file

@ -0,0 +1,63 @@
package main
import (
"context"
"github.com/jackc/pgx/v5/pgxpool"
)
type HostRow struct {
Hostname string
Protocol string
HtmlTitle string
IframeAllowed bool
BestIconS3Key string
}
// fetchHosts gets all hosts with titles, randomized order.
func fetchHosts(ctx context.Context, pool *pgxpool.Pool, limit int) ([]HostRow, error) {
query := `
SELECT hostname, protocol, html_title, COALESCE(iframe_allowed, true), COALESCE(best_icon_s3_key, '')
FROM hosts
WHERE html_title IS NOT NULL
ORDER BY random()
`
if limit > 0 {
query += " LIMIT $1"
}
var rows interface{ Query(context.Context, string, ...interface{}) (interface{ Close(); Next() bool; Scan(...interface{}) error; Err() error }, error) }
_ = rows // unused, using pool directly
var hosts []HostRow
if limit > 0 {
pgRows, err := pool.Query(ctx, query, limit)
if err != nil {
return nil, err
}
defer pgRows.Close()
for pgRows.Next() {
var h HostRow
if err := pgRows.Scan(&h.Hostname, &h.Protocol, &h.HtmlTitle, &h.IframeAllowed, &h.BestIconS3Key); err != nil {
return nil, err
}
hosts = append(hosts, h)
}
return hosts, pgRows.Err()
}
pgRows, err := pool.Query(ctx, query)
if err != nil {
return nil, err
}
defer pgRows.Close()
for pgRows.Next() {
var h HostRow
if err := pgRows.Scan(&h.Hostname, &h.Protocol, &h.HtmlTitle, &h.IframeAllowed, &h.BestIconS3Key); err != nil {
return nil, err
}
hosts = append(hosts, h)
}
return hosts, pgRows.Err()
}

View file

@ -0,0 +1,66 @@
package main
import (
"encoding/json"
"fmt"
"os"
"sync"
"time"
)
type LogWriter struct {
file *os.File
mu sync.Mutex
errorsOnly bool
}
func NewLogWriter(path string, errorsOnly bool) (*LogWriter, error) {
f, err := os.OpenFile(path, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0644)
if err != nil {
return nil, err
}
return &LogWriter{file: f, errorsOnly: errorsOnly}, nil
}
func (lw *LogWriter) Write(line string, isError bool) {
if lw.errorsOnly && !isError {
return
}
lw.mu.Lock()
defer lw.mu.Unlock()
fmt.Fprintln(lw.file, line)
}
func (lw *LogWriter) Close() error {
return lw.file.Close()
}
func writeStats(stats *Stats) {
finishedAt := time.Now()
duration := finishedAt.Sub(stats.StartedAt)
data := map[string]interface{}{
"started_at": stats.StartedAt.Format(time.RFC3339),
"finished_at": finishedAt.Format(time.RFC3339),
"duration_seconds": int(duration.Seconds()),
"total_hosts": stats.TotalHosts,
"hosts_with_icon": stats.HostsWithIcon,
"hosts_no_icon": stats.HostsNoIcon,
"convert_errors": stats.ConvertErrors.Load(),
"bundles_created": stats.BundlesCreated,
"total_bytes": stats.TotalBytes,
}
os.MkdirAll("stats", 0755)
f, err := os.Create("stats/05_bundle_gen.json")
if err != nil {
fmt.Printf("Failed to write stats: %v\n", err)
return
}
defer f.Close()
enc := json.NewEncoder(f)
enc.SetIndent("", " ")
enc.Encode(data)
fmt.Println("Stats written to stats/05_bundle_gen.json")
}

View file

@ -0,0 +1,188 @@
package main
import (
"context"
"flag"
"fmt"
"log"
"os"
"sync"
"sync/atomic"
"time"
"github.com/jackc/pgx/v5/pgxpool"
)
type Config struct {
DBUrl string
IconsBucket string
SiteBucket string
EntriesPerBundle int
Concurrency int
DryRun bool
OutputDir string
Limit int
LogFile string
LogErrors bool
}
type Stats struct {
TotalHosts int
HostsWithIcon int
HostsNoIcon int
BundlesCreated int
ConvertErrors atomic.Int64
TotalBytes int64
StartedAt time.Time
}
func main() {
cfg := Config{}
flag.StringVar(&cfg.DBUrl, "db", "", "Postgres connection string (required)")
flag.StringVar(&cfg.IconsBucket, "icons-bucket", "everytab-icons", "S3 bucket with downloaded icons")
flag.StringVar(&cfg.SiteBucket, "site-bucket", "everytab-site", "S3 bucket for the static site")
flag.IntVar(&cfg.EntriesPerBundle, "entries-per-bundle", 120, "Tabs per bundle JSON file")
flag.BoolVar(&cfg.DryRun, "dry-run", false, "Write bundles to local disk instead of S3")
flag.StringVar(&cfg.OutputDir, "output-dir", "bundles", "Local output dir for dry-run mode")
flag.IntVar(&cfg.Limit, "limit", 0, "Max hosts to process (0 = all)")
flag.IntVar(&cfg.Concurrency, "concurrency", 50, "Concurrent icon conversions")
flag.StringVar(&cfg.LogFile, "log-file", "", "Mirror log lines to this file")
flag.BoolVar(&cfg.LogErrors, "log-errors-only", false, "Only write errors to log file")
flag.Parse()
if cfg.DBUrl == "" {
fmt.Println("Usage: bundle_gen --db DATABASE_URL [OPTIONS]")
flag.PrintDefaults()
os.Exit(1)
}
ctx := context.Background()
// Init S3
if err := initS3(); err != nil {
log.Fatalf("Failed to init S3: %v", err)
}
// Init DB
pool, err := pgxpool.New(ctx, cfg.DBUrl)
if err != nil {
log.Fatalf("Failed to connect to database: %v", err)
}
defer pool.Close()
// Setup log file
var logWriter *LogWriter
if cfg.LogFile != "" {
logWriter, err = NewLogWriter(cfg.LogFile, cfg.LogErrors)
if err != nil {
log.Fatalf("Failed to open log file: %v", err)
}
defer logWriter.Close()
}
stats := &Stats{StartedAt: time.Now()}
// Fetch all qualifying hosts (randomized)
fmt.Println("=== Bundle Generator ===")
fmt.Println("Querying hosts...")
hosts, err := fetchHosts(ctx, pool, cfg.Limit)
if err != nil {
log.Fatalf("Failed to fetch hosts: %v", err)
}
stats.TotalHosts = len(hosts)
for _, h := range hosts {
if h.BestIconS3Key != "" {
stats.HostsWithIcon++
} else {
stats.HostsNoIcon++
}
}
fmt.Printf("Total hosts: %d (with icon: %d, no icon: %d)\n", stats.TotalHosts, stats.HostsWithIcon, stats.HostsNoIcon)
fmt.Printf("Entries per bundle: %d\n", cfg.EntriesPerBundle)
fmt.Printf("Dry run: %v\n\n", cfg.DryRun)
if cfg.DryRun {
os.MkdirAll(cfg.OutputDir, 0755)
}
// Process hosts into bundle entries (concurrently for S3 downloads)
fmt.Printf("Converting icons and building entries (concurrency: %d)...\n", cfg.Concurrency)
entries := make([]BundleEntry, len(hosts))
var wg sync.WaitGroup
sem := make(chan struct{}, cfg.Concurrency)
var processed atomic.Int64
for i, host := range hosts {
wg.Add(1)
sem <- struct{}{}
go func(idx int, h HostRow) {
defer wg.Done()
defer func() { <-sem }()
entries[idx] = buildEntry(ctx, h, cfg.IconsBucket, logWriter, stats)
n := processed.Add(1)
if n%5000 == 0 {
fmt.Printf(" processed %d/%d hosts\n", n, len(hosts))
}
}(i, host)
}
wg.Wait()
// Chunk into bundles and write
fmt.Println("\nWriting bundles...")
bundleCount := 0
var totalBytes int64
for i := 0; i < len(entries); i += cfg.EntriesPerBundle {
end := i + cfg.EntriesPerBundle
if end > len(entries) {
end = len(entries)
}
chunk := entries[i:end]
bundleIndex := bundleCount
data, err := serializeBundle(chunk)
if err != nil {
log.Fatalf("Failed to serialize bundle %d: %v", bundleIndex, err)
}
if cfg.DryRun {
err = writeBundleLocal(cfg.OutputDir, bundleIndex, data)
} else {
err = writeBundleS3(ctx, cfg.SiteBucket, bundleIndex, data)
}
if err != nil {
log.Fatalf("Failed to write bundle %d: %v", bundleIndex, err)
}
logLine := fmt.Sprintf("bundle: %04d.json %d entries %dKB", bundleIndex, len(chunk), len(data)/1024)
fmt.Println(logLine)
if logWriter != nil {
logWriter.Write(logLine, false)
}
totalBytes += int64(len(data))
bundleCount++
}
stats.BundlesCreated = bundleCount
stats.TotalBytes = totalBytes
// Summary
duration := time.Since(stats.StartedAt)
fmt.Printf("\n=== Summary ===\n")
fmt.Printf("Duration: %s\n", duration.Round(time.Second))
fmt.Printf("Total hosts: %d\n", stats.TotalHosts)
fmt.Printf("Hosts with icon: %d\n", stats.HostsWithIcon)
fmt.Printf("Hosts without icon: %d\n", stats.HostsNoIcon)
fmt.Printf("Convert errors: %d\n", stats.ConvertErrors.Load())
fmt.Printf("Bundles created: %d\n", stats.BundlesCreated)
fmt.Printf("Total size: %.1f MB\n", float64(stats.TotalBytes)/(1024*1024))
fmt.Printf("Avg bundle size: %.0f KB\n", float64(stats.TotalBytes)/float64(stats.BundlesCreated)/1024)
fmt.Printf("TOTAL_BUNDLES = %d (bake this into the frontend)\n", stats.BundlesCreated)
writeStats(stats)
}

View file

@ -0,0 +1,46 @@
package main
import (
"bytes"
"context"
"io"
"github.com/aws/aws-sdk-go-v2/aws"
"github.com/aws/aws-sdk-go-v2/config"
"github.com/aws/aws-sdk-go-v2/service/s3"
)
var s3Client *s3.Client
func initS3() error {
cfg, err := config.LoadDefaultConfig(context.Background(), config.WithRegion("us-east-1"))
if err != nil {
return err
}
s3Client = s3.NewFromConfig(cfg)
return nil
}
// s3Download fetches an object from S3.
func s3Download(ctx context.Context, bucket, key string) ([]byte, error) {
resp, err := s3Client.GetObject(ctx, &s3.GetObjectInput{
Bucket: aws.String(bucket),
Key: aws.String(key),
})
if err != nil {
return nil, err
}
defer resp.Body.Close()
return io.ReadAll(resp.Body)
}
// s3UploadBundle uploads a bundle JSON to S3.
func s3UploadBundle(ctx context.Context, bucket, key string, data []byte) error {
_, err := s3Client.PutObject(ctx, &s3.PutObjectInput{
Bucket: aws.String(bucket),
Key: aws.String(key),
Body: bytes.NewReader(data),
ContentType: aws.String("application/json"),
})
return err
}