Unverified Commit ce5b9040 authored by Sergey M․'s avatar Sergey M․
Browse files

[extractor/common] Relax interaction count extraction in _json_ld

parent ad06b99d
...@@ -68,6 +68,7 @@ from ..utils import ( ...@@ -68,6 +68,7 @@ from ..utils import (
sanitized_Request, sanitized_Request,
sanitize_filename, sanitize_filename,
str_or_none, str_or_none,
strip_or_none, strip_or_none,
unescapeHTML, unescapeHTML,
unified_strdate, unified_strdate,
...@@ -1248,7 +1249,10 @@ class InfoExtractor(object): ...@@ -1248,7 +1249,10 @@ class InfoExtractor(object):
interaction_type = is_e.get('interactionType') interaction_type = is_e.get('interactionType')
if not isinstance(interaction_type, compat_str): if not isinstance(interaction_type, compat_str):
continue continue
interaction_count = int_or_none(is_e.get('userInteractionCount')) # For interaction count some sites provide string instead of
# an integer (as per spec) with non digit characters (e.g. ",")
# so extracting count with more relaxed str_to_int
interaction_count = str_to_int(is_e.get('userInteractionCount'))
if interaction_count is None: if interaction_count is None:
continue continue
count_kind = INTERACTION_TYPE_MAP.get(interaction_type.split('/')[-1]) count_kind = INTERACTION_TYPE_MAP.get(interaction_type.split('/')[-1])
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment