more iops cheese testing - sort by downloaded_at not host id

This commit is contained in:
Joe Lothan 2026-05-26 01:09:32 -04:00
parent a30fe104a3
commit 9266c1417d
3 changed files with 62 additions and 22 deletions

View file

@ -2,30 +2,56 @@ package main
import (
"context"
"time"
"github.com/jackc/pgx/v5"
"github.com/jackc/pgx/v5/pgxpool"
)
type HostRow struct {
ID int64
Hostname string
Protocol string
HtmlTitle string
IframeAllowed bool
BestIconHash string
ID int64
Hostname string
Protocol string
HtmlTitle string
IframeAllowed bool
BestIconHash string
IconDownloadedAt *time.Time
}
// fetchHostsPage gets a page of hosts with titles, ordered by id for disk locality.
// Icons were downloaded roughly in host-ID order, so reading by ID approximates
// the physical write order on disk — improves EBS readahead cache hits.
func fetchHostsPage(ctx context.Context, pool *pgxpool.Pool, lastID int64, limit int) ([]HostRow, error) {
rows, err := pool.Query(ctx, `
SELECT id, hostname, protocol, html_title, COALESCE(iframe_allowed, true), COALESCE(best_icon_hash, '')
FROM hosts
WHERE html_title IS NOT NULL AND id > $1
ORDER BY id
LIMIT $2
`, lastID, limit)
// fetchHostsPage gets a page of hosts with titles, ordered by icon_downloaded_at for disk locality.
// Icons written to disk at similar times are physically adjacent on the EBS volume —
// reading in write order maximizes OS readahead cache hits.
func fetchHostsPage(ctx context.Context, pool *pgxpool.Pool, lastTime *time.Time, lastID int64, limit int) ([]HostRow, error) {
// Two-phase: first hosts with icons (ordered by download time), then hosts without
var rows pgx.Rows
var err error
if lastTime != nil {
rows, err = pool.Query(ctx, `
SELECT id, hostname, protocol, html_title, COALESCE(iframe_allowed, true), COALESCE(best_icon_hash, ''), icon_downloaded_at
FROM hosts
WHERE html_title IS NOT NULL AND icon_downloaded_at > $1
ORDER BY icon_downloaded_at
LIMIT $2
`, lastTime, limit)
} else if lastID > 0 {
// No more timestamped hosts — fetch remaining (no icon) by id
rows, err = pool.Query(ctx, `
SELECT id, hostname, protocol, html_title, COALESCE(iframe_allowed, true), '', NULL::timestamptz
FROM hosts
WHERE html_title IS NOT NULL AND icon_downloaded_at IS NULL AND id > $1
ORDER BY id
LIMIT $2
`, lastID, limit)
} else {
// Start: fetch timestamped hosts first
rows, err = pool.Query(ctx, `
SELECT id, hostname, protocol, html_title, COALESCE(iframe_allowed, true), COALESCE(best_icon_hash, ''), icon_downloaded_at
FROM hosts
WHERE html_title IS NOT NULL AND icon_downloaded_at IS NOT NULL
ORDER BY icon_downloaded_at
LIMIT $1
`, limit)
}
if err != nil {
return nil, err
}
@ -34,7 +60,7 @@ func fetchHostsPage(ctx context.Context, pool *pgxpool.Pool, lastID int64, limit
var hosts []HostRow
for rows.Next() {
var h HostRow
if err := rows.Scan(&h.ID, &h.Hostname, &h.Protocol, &h.HtmlTitle, &h.IframeAllowed, &h.BestIconHash); err != nil {
if err := rows.Scan(&h.ID, &h.Hostname, &h.Protocol, &h.HtmlTitle, &h.IframeAllowed, &h.BestIconHash, &h.IconDownloadedAt); err != nil {
return nil, err
}
hosts = append(hosts, h)