Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Open sidebar
Backups
youtube-dl
Commits
476cf548
Unverified
Commit
476cf548
authored
Oct 29, 2018
by
Sergey M․
Browse files
[sportbox] Improve extraction, add support for matchtv.ru and fix video id (closes #17978)
parent
bebef109
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
37 additions
and
26 deletions
+37
-26
youtube_dl/extractor/extractors.py
youtube_dl/extractor/extractors.py
+1
-1
youtube_dl/extractor/generic.py
youtube_dl/extractor/generic.py
+3
-3
youtube_dl/extractor/sportbox.py
youtube_dl/extractor/sportbox.py
+33
-22
No files found.
youtube_dl/extractor/extractors.py
View file @
476cf548
...
...
@@ -1043,7 +1043,7 @@ from .spike import (
)
from
.stitcher
import
StitcherIE
from
.sport5
import
Sport5IE
from
.sportbox
import
SportBox
Embed
IE
from
.sportbox
import
SportBoxIE
from
.sportdeutschland
import
SportDeutschlandIE
from
.springboardplatform
import
SpringboardPlatformIE
from
.sprout
import
SproutIE
...
...
youtube_dl/extractor/generic.py
View file @
476cf548
...
...
@@ -47,7 +47,7 @@ from .nbc import NBCSportsVPlayerIE
from
.ooyala
import
OoyalaIE
from
.rutv
import
RUTVIE
from
.tvc
import
TVCIE
from
.sportbox
import
SportBox
Embed
IE
from
.sportbox
import
SportBoxIE
from
.smotri
import
SmotriIE
from
.myvi
import
MyviIE
from
.condenast
import
CondeNastIE
...
...
@@ -2636,9 +2636,9 @@ class GenericIE(InfoExtractor):
return
self
.
url_result
(
tvc_url
,
'TVC'
)
# Look for embedded SportBox player
sportbox_urls
=
SportBox
Embed
IE
.
_extract_urls
(
webpage
)
sportbox_urls
=
SportBoxIE
.
_extract_urls
(
webpage
)
if
sportbox_urls
:
return
self
.
playlist_from_matches
(
sportbox_urls
,
video_id
,
video_title
,
ie
=
'
SportBox
Embed'
)
return
self
.
playlist_from_matches
(
sportbox_urls
,
video_id
,
video_title
,
ie
=
SportBox
IE
.
ie_key
()
)
# Look for embedded XHamster player
xhamster_urls
=
XHamsterEmbedIE
.
_extract_urls
(
webpage
)
...
...
youtube_dl/extractor/sportbox.py
View file @
476cf548
...
...
@@ -8,20 +8,24 @@ from ..utils import (
determine_ext
,
int_or_none
,
js_to_json
,
merge_dicts
,
)
class
SportBox
Embed
IE
(
InfoExtractor
):
_VALID_URL
=
r
'https?://news\.sportbox\.ru/vdl/player(?:/[^/]+/|\?.*?\bn?id=)(?P<id>\d+)'
class
SportBoxIE
(
InfoExtractor
):
_VALID_URL
=
r
'https?://
(?:
news\.sportbox
|matchtv)
\.ru/vdl/player(?:/[^/]+/|\?.*?\bn?id=)(?P<id>\d+)'
_TESTS
=
[{
'url'
:
'http://news.sportbox.ru/vdl/player/ci/211355'
,
'info_dict'
:
{
'id'
:
'
211355
'
,
'id'
:
'
109158
'
,
'ext'
:
'mp4'
,
'title'
:
'В Новороссийске прошел детский турнир «Поле славы боевой»'
,
'description'
:
'В Новороссийске прошел детский турнир «Поле славы боевой»'
,
'thumbnail'
:
r
're:^https?://.*\.jpg$'
,
'duration'
:
292
,
'view_count'
:
int
,
'timestamp'
:
1426237001
,
'upload_date'
:
'20150313'
,
},
'params'
:
{
# m3u8 download
...
...
@@ -33,12 +37,18 @@ class SportBoxEmbedIE(InfoExtractor):
},
{
'url'
:
'https://news.sportbox.ru/vdl/player/media/193095'
,
'only_matching'
:
True
,
},
{
'url'
:
'https://news.sportbox.ru/vdl/player/media/109158'
,
'only_matching'
:
True
,
},
{
'url'
:
'https://matchtv.ru/vdl/player/media/109158'
,
'only_matching'
:
True
,
}]
@
staticmethod
def
_extract_urls
(
webpage
):
return
re
.
findall
(
r
'<iframe[^>]+src="(https?://news\.sportbox\.ru/vdl/player[^"]+)"'
,
r
'<iframe[^>]+src="(https?://
(?:
news\.sportbox
|matchtv)
\.ru/vdl/player[^"]+)"'
,
webpage
)
def
_real_extract
(
self
,
url
):
...
...
@@ -46,22 +56,14 @@ class SportBoxEmbedIE(InfoExtractor):
webpage
=
self
.
_download_webpage
(
url
,
video_id
)
wjplayer_data
=
self
.
_parse_json
(
self
.
_search_regex
(
r
'(?s)var\s+playerOptions\s*=\s*({.+?});'
,
webpage
,
'wjplayer settings'
),
video_id
,
transform_source
=
js_to_json
)
wjplayer_data
[
'sources'
]
=
self
.
_parse_json
(
sources
=
self
.
_parse_json
(
self
.
_search_regex
(
r
'(?s)playerOptions\.sources\s*=\s*(\[.+?\]);'
,
webpage
,
'wjplayer sources'
),
r
'(?s)playerOptions\.sources(?:WithRes)?\s*=\s*(\[.+?\])\s*;\s*\n'
,
webpage
,
'sources'
),
video_id
,
transform_source
=
js_to_json
)
title
=
self
.
_html_search_meta
(
[
'og:title'
,
'twitter:title'
],
webpage
)
or
self
.
_html_search_regex
(
r
'<title>(.+?)</title>'
,
webpage
,
'title'
,
fatal
=
False
)
or
video_id
formats
=
[]
for
source
in
wjplayer_data
[
'
sources
'
]
:
for
source
in
sources
:
src
=
source
.
get
(
'src'
)
if
not
src
:
continue
...
...
@@ -75,14 +77,23 @@ class SportBoxEmbedIE(InfoExtractor):
})
self
.
_sort_formats
(
formats
)
player
=
self
.
_parse_json
(
self
.
_search_regex
(
r
'(?s)playerOptions\s*=\s*({.+?})\s*;\s*\n'
,
webpage
,
'player options'
,
default
=
'{}'
),
video_id
,
transform_source
=
js_to_json
)
media_id
=
player
[
'mediaId'
]
info
=
self
.
_search_json_ld
(
webpage
,
media_id
,
default
=
{})
view_count
=
int_or_none
(
self
.
_search_regex
(
r
'Просмотров\s*:\s*(\d+)'
,
webpage
,
'view count'
,
default
=
None
))
return
{
'id'
:
video
_id
,
'title'
:
title
,
'thumbnail'
:
wj
player
_data
.
get
(
'poster'
),
'duration'
:
int_or_none
(
wj
player
_data
.
get
(
'duration'
)),
return
merge_dicts
(
info
,
{
'id'
:
media
_id
,
'title'
:
self
.
_og_search_title
(
webpage
,
default
=
None
)
or
media_id
,
'thumbnail'
:
player
.
get
(
'poster'
),
'duration'
:
int_or_none
(
player
.
get
(
'duration'
)),
'view_count'
:
view_count
,
'formats'
:
formats
,
}
}
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment