From 1d5b7bd374a512f1113953dedb542041177ea7ad Mon Sep 17 00:00:00 2001 From: Joe Lothan Date: Tue, 19 May 2026 10:47:05 -0400 Subject: [PATCH] added random_order to host table schema --- pipeline/01_cc_index/schema.sql | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pipeline/01_cc_index/schema.sql b/pipeline/01_cc_index/schema.sql index e10bd0f..d4367c3 100644 --- a/pipeline/01_cc_index/schema.sql +++ b/pipeline/01_cc_index/schema.sql @@ -13,7 +13,8 @@ CREATE TABLE IF NOT EXISTS hosts ( html_title TEXT, iframe_allowed BOOLEAN, best_icon_s3_key TEXT, - parsed BOOLEAN DEFAULT FALSE + parsed BOOLEAN DEFAULT FALSE, + random_order REAL DEFAULT random() ); CREATE TABLE IF NOT EXISTS icons ( @@ -40,3 +41,6 @@ CREATE INDEX IF NOT EXISTS idx_icons_host_id ON icons(host_id); -- For WARC parsing cursor CREATE INDEX IF NOT EXISTS idx_hosts_unparsed ON hosts(id) WHERE parsed = FALSE; + +-- For bundle generation pagination in random order +CREATE INDEX IF NOT EXISTS idx_hosts_random ON hosts(random_order);