From ec33b2e8576b1571c9c5cb9601f08d67f712d361 Mon Sep 17 00:00:00 2001 From: Joe Lothan Date: Wed, 20 May 2026 01:30:46 -0400 Subject: [PATCH] bump up s3 warc retries to 6 to avoid 503 errors --- pipeline/02_warc_parse/warc.go | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/pipeline/02_warc_parse/warc.go b/pipeline/02_warc_parse/warc.go index 246e741..125ccf7 100644 --- a/pipeline/02_warc_parse/warc.go +++ b/pipeline/02_warc_parse/warc.go @@ -10,6 +10,7 @@ import ( "strings" "github.com/aws/aws-sdk-go-v2/aws" + "github.com/aws/aws-sdk-go-v2/aws/retry" "github.com/aws/aws-sdk-go-v2/config" "github.com/aws/aws-sdk-go-v2/service/s3" "github.com/nlnwa/gowarc/v3" @@ -20,7 +21,12 @@ const ccBucket = "commoncrawl" var s3Client *s3.Client func initS3() error { - cfg, err := config.LoadDefaultConfig(context.Background(), config.WithRegion("us-east-1")) + cfg, err := config.LoadDefaultConfig(context.Background(), + config.WithRegion("us-east-1"), + config.WithRetryer(func() aws.Retryer { + return retry.AddWithMaxAttempts(retry.NewStandard(), 6) + }), + ) if err != nil { return fmt.Errorf("load AWS config: %w", err) }