package main import ( "context" "time" "github.com/jackc/pgx/v5" "github.com/jackc/pgx/v5/pgxpool" ) type HostRow struct { ID int64 Hostname string Protocol string HtmlTitle string IframeAllowed bool BestIconHash string IconDownloadedAt *time.Time } // fetchHostsPage gets a page of hosts with titles, ordered by icon_downloaded_at for disk locality. // Icons written to disk at similar times are physically adjacent on the EBS volume — // reading in write order maximizes OS readahead cache hits. func fetchHostsPage(ctx context.Context, pool *pgxpool.Pool, lastTime *time.Time, lastID int64, limit int) ([]HostRow, error) { // Two-phase: first hosts with icons (ordered by download time), then hosts without var rows pgx.Rows var err error if lastTime != nil { rows, err = pool.Query(ctx, ` SELECT id, hostname, protocol, html_title, COALESCE(iframe_allowed, true), COALESCE(best_icon_hash, ''), icon_downloaded_at FROM hosts WHERE html_title IS NOT NULL AND icon_downloaded_at > $1 ORDER BY icon_downloaded_at LIMIT $2 `, lastTime, limit) } else if lastID > 0 { // No more timestamped hosts — fetch remaining (no icon) by id rows, err = pool.Query(ctx, ` SELECT id, hostname, protocol, html_title, COALESCE(iframe_allowed, true), '', NULL::timestamptz FROM hosts WHERE html_title IS NOT NULL AND icon_downloaded_at IS NULL AND id > $1 ORDER BY id LIMIT $2 `, lastID, limit) } else { // Start: fetch timestamped hosts first rows, err = pool.Query(ctx, ` SELECT id, hostname, protocol, html_title, COALESCE(iframe_allowed, true), COALESCE(best_icon_hash, ''), icon_downloaded_at FROM hosts WHERE html_title IS NOT NULL AND icon_downloaded_at IS NOT NULL ORDER BY icon_downloaded_at LIMIT $1 `, limit) } if err != nil { return nil, err } defer rows.Close() var hosts []HostRow for rows.Next() { var h HostRow if err := rows.Scan(&h.ID, &h.Hostname, &h.Protocol, &h.HtmlTitle, &h.IframeAllowed, &h.BestIconHash, &h.IconDownloadedAt); err != nil { return nil, err } hosts = append(hosts, h) } return hosts, rows.Err() }