diff --git a/pipeline/01_cc_index/schema.sql b/pipeline/01_cc_index/schema.sql index e10bd0f..d4367c3 100644 --- a/pipeline/01_cc_index/schema.sql +++ b/pipeline/01_cc_index/schema.sql @@ -13,7 +13,8 @@ CREATE TABLE IF NOT EXISTS hosts ( html_title TEXT, iframe_allowed BOOLEAN, best_icon_s3_key TEXT, - parsed BOOLEAN DEFAULT FALSE + parsed BOOLEAN DEFAULT FALSE, + random_order REAL DEFAULT random() ); CREATE TABLE IF NOT EXISTS icons ( @@ -40,3 +41,6 @@ CREATE INDEX IF NOT EXISTS idx_icons_host_id ON icons(host_id); -- For WARC parsing cursor CREATE INDEX IF NOT EXISTS idx_hosts_unparsed ON hosts(id) WHERE parsed = FALSE; + +-- For bundle generation pagination in random order +CREATE INDEX IF NOT EXISTS idx_hosts_random ON hosts(random_order);