interleve no icon hosts and icons hosts for an even mix

This commit is contained in:
Joe Lothan 2026-05-26 23:50:02 -04:00
parent a799c05f81
commit 758ab3080b
2 changed files with 116 additions and 40 deletions

View file

@ -132,41 +132,89 @@ func main() {
entryCh := make(chan assemblerEntry, 50000)
uploadCh := make(chan bundleJob, cfg.Uploaders*2)
// Stage 1: DB fetcher — continuously fetches pages into hostCh
// Stage 1: DB fetcher — interleaves hosts with icons (ordered by download time for
// disk locality) and hosts without icons (no disk reads needed), matching the natural
// ratio so bundles contain a representative mix.
go func() {
defer close(hostCh)
// Calculate the ratio: for every iconPageSize icon-hosts, fetch noIconPageSize no-icon hosts
iconPageSize := 50000
noIconRatio := float64(stats.HostsNoIcon) / float64(max(stats.HostsWithIcon, 1))
noIconPageSize := int(float64(iconPageSize) * noIconRatio)
if noIconPageSize < 1000 {
noIconPageSize = 1000
}
var lastDownloaded *time.Time
var lastID int64
pageSize := 50000
var lastIconID int64
var lastNoIconID int64
iconsDone := false
noIconsDone := false
fetched := 0
for {
limit := pageSize
if cfg.Limit > 0 {
remaining := cfg.Limit - fetched
if remaining <= 0 {
break
for !iconsDone || !noIconsDone {
// Fetch icon-hosts (ordered by download time for disk locality)
if !iconsDone {
limit := iconPageSize
if cfg.Limit > 0 {
remaining := cfg.Limit - fetched
if remaining <= 0 {
break
}
if limit > remaining {
limit = remaining
}
}
if limit > remaining {
limit = remaining
fetchStart := time.Now()
hosts, err := fetchHostsWithIcons(ctx, pool, lastDownloaded, lastIconID, limit)
if err != nil {
log.Fatalf("Failed to fetch icon hosts: %v", err)
}
if len(hosts) == 0 {
iconsDone = true
} else {
last := hosts[len(hosts)-1]
lastDownloaded = last.IconDownloadedAt
lastIconID = last.ID
fmt.Printf("[fetcher] %d icon hosts in %dms (hostCh: %d/%d)\n",
len(hosts), time.Since(fetchStart).Milliseconds(), len(hostCh), cap(hostCh))
for _, h := range hosts {
hostCh <- h
}
fetched += len(hosts)
}
}
fetchStart := time.Now()
hosts, err := fetchHostsPage(ctx, pool, lastDownloaded, lastID, limit)
if err != nil {
log.Fatalf("Failed to fetch hosts: %v", err)
// Fetch proportional no-icon hosts (interleaved for balanced bundles)
if !noIconsDone {
limit := noIconPageSize
if cfg.Limit > 0 {
remaining := cfg.Limit - fetched
if remaining <= 0 {
break
}
if limit > remaining {
limit = remaining
}
}
fetchStart := time.Now()
hosts, err := fetchHostsWithoutIcons(ctx, pool, lastNoIconID, limit)
if err != nil {
log.Fatalf("Failed to fetch no-icon hosts: %v", err)
}
if len(hosts) == 0 {
noIconsDone = true
} else {
lastNoIconID = hosts[len(hosts)-1].ID
fmt.Printf("[fetcher] %d no-icon hosts in %dms (hostCh: %d/%d)\n",
len(hosts), time.Since(fetchStart).Milliseconds(), len(hostCh), cap(hostCh))
for _, h := range hosts {
hostCh <- h
}
fetched += len(hosts)
}
}
if len(hosts) == 0 {
break
}
last := hosts[len(hosts)-1]
lastDownloaded = last.IconDownloadedAt
lastID = last.ID
fmt.Printf("[fetcher] %d hosts in %dms (hostCh: %d/%d)\n",
len(hosts), time.Since(fetchStart).Milliseconds(), len(hostCh), cap(hostCh))
for _, h := range hosts {
hostCh <- h
}
fetched += len(hosts)
}
}()