diff --git a/pipeline/02_warc_parse/parser.go b/pipeline/02_warc_parse/parser.go index 0d0460a..37b4339 100644 --- a/pipeline/02_warc_parse/parser.go +++ b/pipeline/02_warc_parse/parser.go @@ -47,7 +47,7 @@ func ParseHTML(body []byte, protocol, hostname string) ParseResult { continue } - if tagName == "link" && hasAttr { + if tagName == "link" && hasAttr && len(result.Icons) < 50 { icon := parseLinkTag(tokenizer, protocol, hostname) if icon != nil { result.Icons = append(result.Icons, *icon)