[ard:beta] Improve extraction robustness, fix subtitles extraction, improve...

[ard:beta] Improve extraction robustness, fix subtitles extraction, improve geo restricted videos extraction
...@@ -8,13 +8,16 @@ from .generic import GenericIE ...@@ -8,13 +8,16 @@ from .generic import GenericIE
from ..utils import ( from ..utils import (
determine_ext, determine_ext,
ExtractorError, ExtractorError,
int_or_none, int_or_none,
parse_duration, parse_duration,
unified_strdate, unified_strdate,
xpath_text, unified_timestamp,
update_url_query, update_url_query,
url_or_none, url_or_none,
) )
from ..compat import compat_etree_fromstring from ..compat import compat_etree_fromstring
...@@ -336,43 +339,62 @@ class ARDBetaMediathekIE(InfoExtractor): ...@@ -336,43 +339,62 @@ class ARDBetaMediathekIE(InfoExtractor):
'display_id': display_id, 'display_id': display_id,
} }
formats = [] formats = []
subtitles = {}
geoblocked = False
for widget in data.values(): for widget in data.values():
if widget.get('_geoblocked'): if widget.get('_geoblocked') is True:
raise ExtractorError('This video is not available due to geoblocking', expected=True) geoblocked = True
if '_duration' in widget: if '_duration' in widget:
res['duration'] = widget['_duration'] res['duration'] = int_or_none(widget['_duration'])
if 'clipTitle' in widget: if 'clipTitle' in widget:
res['title'] = widget['clipTitle'] res['title'] = widget['clipTitle']
if '_previewImage' in widget: if '_previewImage' in widget:
res['thumbnail'] = widget['_previewImage'] res['thumbnail'] = widget['_previewImage']
if 'broadcastedOn' in widget: if 'broadcastedOn' in widget:
res['upload_date'] = unified_strdate(widget['broadcastedOn']) res['timestamp'] = unified_timestamp(widget['broadcastedOn'])
if 'synopsis' in widget: if 'synopsis' in widget:
res['description'] = widget['synopsis'] res['description'] = widget['synopsis']
if '_subtitleUrl' in widget: subtitle_url = url_or_none(widget.get('_subtitleUrl'))
res['subtitles'] = {'de': [{ if subtitle_url:
subtitles.setdefault('de', []).append({
'ext': 'ttml', 'ext': 'ttml',
'url': widget['_subtitleUrl'], 'url': subtitle_url,
}]} })
if '_quality' in widget: if '_quality' in widget:
format_url = widget['_stream']['json'][0] format_url = url_or_none(try_get(
widget, lambda x: x['_stream']['json'][0]))
if format_url.endswith('.f4m'): if not format_url:
ext = determine_ext(format_url)
if ext == 'f4m':
formats.extend(self._extract_f4m_formats( formats.extend(self._extract_f4m_formats(
format_url + '?hdcore=3.11.0', format_url + '?hdcore=3.11.0',
video_id, f4m_id='hds', fatal=False)) video_id, f4m_id='hds', fatal=False))
elif format_url.endswith('m3u8'): elif ext == 'm3u8':
formats.extend(self._extract_m3u8_formats( formats.extend(self._extract_m3u8_formats(
format_url, video_id, 'mp4', m3u8_id='hls', fatal=False)) format_url, video_id, 'mp4', m3u8_id='hls',
else: else:
# HTTP formats are not available when geoblocked is True,
# other formats are fine though
if geoblocked:
quality = str_or_none(widget.get('_quality'))
formats.append({ formats.append({
'format_id': 'http-' + widget['_quality'], 'format_id': ('http-' + quality) if quality else 'http',
'url': format_url, 'url': format_url,
'preference': 10, # Plain HTTP, that's nice 'preference': 10, # Plain HTTP, that's nice
}) })
if not formats and geoblocked:
msg='This video is not available due to geoblocking',
self._sort_formats(formats) self._sort_formats(formats)
res['formats'] = formats res.update({
'subtitles': subtitles,
'formats': formats,
return res return res
