sort by host id to test iops cheese to improve icon read performance

This commit is contained in:
Joe Lothan 2026-05-26 00:43:11 -04:00
parent 03f6406e18
commit a30fe104a3
2 changed files with 17 additions and 16 deletions

View file

@ -7,24 +7,25 @@ import (
)
type HostRow struct {
ID int64
Hostname string
Protocol string
HtmlTitle string
IframeAllowed bool
ID int64
Hostname string
Protocol string
HtmlTitle string
IframeAllowed bool
BestIconHash string
RandomOrder float64
}
// fetchHostsPage gets a page of hosts with titles, ordered by random_order for shuffled bundles.
func fetchHostsPage(ctx context.Context, pool *pgxpool.Pool, lastRandom float64, limit int) ([]HostRow, error) {
// fetchHostsPage gets a page of hosts with titles, ordered by id for disk locality.
// Icons were downloaded roughly in host-ID order, so reading by ID approximates
// the physical write order on disk — improves EBS readahead cache hits.
func fetchHostsPage(ctx context.Context, pool *pgxpool.Pool, lastID int64, limit int) ([]HostRow, error) {
rows, err := pool.Query(ctx, `
SELECT id, hostname, protocol, html_title, COALESCE(iframe_allowed, true), COALESCE(best_icon_hash, ''), random_order
SELECT id, hostname, protocol, html_title, COALESCE(iframe_allowed, true), COALESCE(best_icon_hash, '')
FROM hosts
WHERE html_title IS NOT NULL AND random_order > $1
ORDER BY random_order
WHERE html_title IS NOT NULL AND id > $1
ORDER BY id
LIMIT $2
`, lastRandom, limit)
`, lastID, limit)
if err != nil {
return nil, err
}
@ -33,7 +34,7 @@ func fetchHostsPage(ctx context.Context, pool *pgxpool.Pool, lastRandom float64,
var hosts []HostRow
for rows.Next() {
var h HostRow
if err := rows.Scan(&h.ID, &h.Hostname, &h.Protocol, &h.HtmlTitle, &h.IframeAllowed, &h.BestIconHash, &h.RandomOrder); err != nil {
if err := rows.Scan(&h.ID, &h.Hostname, &h.Protocol, &h.HtmlTitle, &h.IframeAllowed, &h.BestIconHash); err != nil {
return nil, err
}
hosts = append(hosts, h)

View file

@ -128,7 +128,7 @@ func main() {
// Stage 1: DB fetcher — continuously fetches pages into hostCh
go func() {
defer close(hostCh)
var lastRandom float64 = -1
var lastID int64
pageSize := 50000
fetched := 0
for {
@ -143,14 +143,14 @@ func main() {
}
}
fetchStart := time.Now()
hosts, err := fetchHostsPage(ctx, pool, lastRandom, limit)
hosts, err := fetchHostsPage(ctx, pool, lastID, limit)
if err != nil {
log.Fatalf("Failed to fetch hosts: %v", err)
}
if len(hosts) == 0 {
break
}
lastRandom = hosts[len(hosts)-1].RandomOrder
lastID = hosts[len(hosts)-1].ID
fmt.Printf("[fetcher] %d hosts in %dms (hostCh: %d/%d)\n",
len(hosts), time.Since(fetchStart).Milliseconds(), len(hostCh), cap(hostCh))
for _, h := range hosts {