diff --git a/pipeline/02_warc_parse/warc.go b/pipeline/02_warc_parse/warc.go index 246e741..125ccf7 100644 --- a/pipeline/02_warc_parse/warc.go +++ b/pipeline/02_warc_parse/warc.go @@ -10,6 +10,7 @@ import ( "strings" "github.com/aws/aws-sdk-go-v2/aws" + "github.com/aws/aws-sdk-go-v2/aws/retry" "github.com/aws/aws-sdk-go-v2/config" "github.com/aws/aws-sdk-go-v2/service/s3" "github.com/nlnwa/gowarc/v3" @@ -20,7 +21,12 @@ const ccBucket = "commoncrawl" var s3Client *s3.Client func initS3() error { - cfg, err := config.LoadDefaultConfig(context.Background(), config.WithRegion("us-east-1")) + cfg, err := config.LoadDefaultConfig(context.Background(), + config.WithRegion("us-east-1"), + config.WithRetryer(func() aws.Retryer { + return retry.AddWithMaxAttempts(retry.NewStandard(), 6) + }), + ) if err != nil { return fmt.Errorf("load AWS config: %w", err) }