interleve no icon hosts and icons hosts for an even mix

This commit is contained in:
Joe Lothan 2026-05-26 23:50:02 -04:00
parent a799c05f81
commit 758ab3080b
2 changed files with 116 additions and 40 deletions

View file

@ -18,19 +18,47 @@ type HostRow struct {
IconDownloadedAt *time.Time
}
// fetchHostsPage gets a page of hosts with titles, ordered by icon_downloaded_at for disk locality.
// Icons written to disk at similar times are physically adjacent — reading in write order
// maximizes OS readahead cache hits. Hosts without icons come last (no disk reads needed).
// random_order is included for bundle bucket assignment (randomized bundles).
func fetchHostsPage(ctx context.Context, pool *pgxpool.Pool, lastDownloaded *time.Time, lastID int64, limit int) ([]HostRow, error) {
rows, err := pool.Query(ctx, `
SELECT id, hostname, protocol, html_title, COALESCE(iframe_allowed, true), COALESCE(best_icon_hash, ''), random_order, icon_downloaded_at
FROM hosts
WHERE html_title IS NOT NULL
AND (icon_downloaded_at, id) > ($1, $2)
ORDER BY icon_downloaded_at NULLS LAST, id
LIMIT $3
`, lastDownloaded, lastID, limit)
// fetchHostsWithIcons gets hosts with icons, ordered by icon_downloaded_at for disk locality.
func fetchHostsWithIcons(ctx context.Context, pool *pgxpool.Pool, lastDownloaded *time.Time, lastID int64, limit int) ([]HostRow, error) {
var query string
var args []any
if lastDownloaded == nil && lastID == 0 {
query = `SELECT id, hostname, protocol, html_title, COALESCE(iframe_allowed, true), best_icon_hash, random_order, icon_downloaded_at
FROM hosts WHERE html_title IS NOT NULL AND icon_downloaded_at IS NOT NULL
ORDER BY icon_downloaded_at, id LIMIT $1`
args = []any{limit}
} else {
query = `SELECT id, hostname, protocol, html_title, COALESCE(iframe_allowed, true), best_icon_hash, random_order, icon_downloaded_at
FROM hosts WHERE html_title IS NOT NULL AND icon_downloaded_at IS NOT NULL
AND (icon_downloaded_at > $1 OR (icon_downloaded_at = $1 AND id > $2))
ORDER BY icon_downloaded_at, id LIMIT $3`
args = []any{lastDownloaded, lastID, limit}
}
rows, err := pool.Query(ctx, query, args...)
if err != nil {
return nil, err
}
defer rows.Close()
var hosts []HostRow
for rows.Next() {
var h HostRow
if err := rows.Scan(&h.ID, &h.Hostname, &h.Protocol, &h.HtmlTitle, &h.IframeAllowed, &h.BestIconHash, &h.RandomOrder, &h.IconDownloadedAt); err != nil {
return nil, err
}
hosts = append(hosts, h)
}
return hosts, rows.Err()
}
// fetchHostsWithoutIcons gets hosts without icons, ordered by id.
func fetchHostsWithoutIcons(ctx context.Context, pool *pgxpool.Pool, lastID int64, limit int) ([]HostRow, error) {
rows, err := pool.Query(ctx, `
SELECT id, hostname, protocol, html_title, COALESCE(iframe_allowed, true), '', random_order, NULL::timestamptz
FROM hosts
WHERE html_title IS NOT NULL AND icon_downloaded_at IS NULL AND id > $1
ORDER BY id LIMIT $2
`, lastID, limit)
if err != nil {
return nil, err
}