sort by host id to test iops cheese to improve icon read performance

This commit is contained in:
Joe Lothan 2026-05-26 00:43:11 -04:00
parent 03f6406e18
commit a30fe104a3
2 changed files with 17 additions and 16 deletions

View file

@ -13,18 +13,19 @@ type HostRow struct {
HtmlTitle string HtmlTitle string
IframeAllowed bool IframeAllowed bool
BestIconHash string BestIconHash string
RandomOrder float64
} }
// fetchHostsPage gets a page of hosts with titles, ordered by random_order for shuffled bundles. // fetchHostsPage gets a page of hosts with titles, ordered by id for disk locality.
func fetchHostsPage(ctx context.Context, pool *pgxpool.Pool, lastRandom float64, limit int) ([]HostRow, error) { // Icons were downloaded roughly in host-ID order, so reading by ID approximates
// the physical write order on disk — improves EBS readahead cache hits.
func fetchHostsPage(ctx context.Context, pool *pgxpool.Pool, lastID int64, limit int) ([]HostRow, error) {
rows, err := pool.Query(ctx, ` rows, err := pool.Query(ctx, `
SELECT id, hostname, protocol, html_title, COALESCE(iframe_allowed, true), COALESCE(best_icon_hash, ''), random_order SELECT id, hostname, protocol, html_title, COALESCE(iframe_allowed, true), COALESCE(best_icon_hash, '')
FROM hosts FROM hosts
WHERE html_title IS NOT NULL AND random_order > $1 WHERE html_title IS NOT NULL AND id > $1
ORDER BY random_order ORDER BY id
LIMIT $2 LIMIT $2
`, lastRandom, limit) `, lastID, limit)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -33,7 +34,7 @@ func fetchHostsPage(ctx context.Context, pool *pgxpool.Pool, lastRandom float64,
var hosts []HostRow var hosts []HostRow
for rows.Next() { for rows.Next() {
var h HostRow var h HostRow
if err := rows.Scan(&h.ID, &h.Hostname, &h.Protocol, &h.HtmlTitle, &h.IframeAllowed, &h.BestIconHash, &h.RandomOrder); err != nil { if err := rows.Scan(&h.ID, &h.Hostname, &h.Protocol, &h.HtmlTitle, &h.IframeAllowed, &h.BestIconHash); err != nil {
return nil, err return nil, err
} }
hosts = append(hosts, h) hosts = append(hosts, h)

View file

@ -128,7 +128,7 @@ func main() {
// Stage 1: DB fetcher — continuously fetches pages into hostCh // Stage 1: DB fetcher — continuously fetches pages into hostCh
go func() { go func() {
defer close(hostCh) defer close(hostCh)
var lastRandom float64 = -1 var lastID int64
pageSize := 50000 pageSize := 50000
fetched := 0 fetched := 0
for { for {
@ -143,14 +143,14 @@ func main() {
} }
} }
fetchStart := time.Now() fetchStart := time.Now()
hosts, err := fetchHostsPage(ctx, pool, lastRandom, limit) hosts, err := fetchHostsPage(ctx, pool, lastID, limit)
if err != nil { if err != nil {
log.Fatalf("Failed to fetch hosts: %v", err) log.Fatalf("Failed to fetch hosts: %v", err)
} }
if len(hosts) == 0 { if len(hosts) == 0 {
break break
} }
lastRandom = hosts[len(hosts)-1].RandomOrder lastID = hosts[len(hosts)-1].ID
fmt.Printf("[fetcher] %d hosts in %dms (hostCh: %d/%d)\n", fmt.Printf("[fetcher] %d hosts in %dms (hostCh: %d/%d)\n",
len(hosts), time.Since(fetchStart).Milliseconds(), len(hostCh), cap(hostCh)) len(hosts), time.Since(fetchStart).Milliseconds(), len(hostCh), cap(hostCh))
for _, h := range hosts { for _, h := range hosts {