Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Open sidebar
Backups
youtube-dl
Commits
bf1317d2
Unverified
Commit
bf1317d2
authored
Sep 11, 2019
by
Sergey M․
Browse files
[youtube] Quick extraction tempfix (closes #22367, closes #22163)
parent
bff90fc5
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
109 additions
and
81 deletions
+109
-81
youtube_dl/extractor/youtube.py
youtube_dl/extractor/youtube.py
+109
-81
No files found.
youtube_dl/extractor/youtube.py
View file @
bf1317d2
...
...
@@ -1915,6 +1915,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
return
int_or_none
(
self
.
_search_regex
(
r
'\bclen[=/](\d+)'
,
media_url
,
'filesize'
,
default
=
None
))
streaming_formats
=
try_get
(
player_response
,
lambda
x
:
x
[
'streamingData'
][
'formats'
],
list
)
or
[]
streaming_formats
.
extend
(
try_get
(
player_response
,
lambda
x
:
x
[
'streamingData'
][
'adaptiveFormats'
],
list
)
or
[])
if
'conn'
in
video_info
and
video_info
[
'conn'
][
0
].
startswith
(
'rtmp'
):
self
.
report_rtmp_download
()
formats
=
[{
...
...
@@ -1923,10 +1926,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'url'
:
video_info
[
'conn'
][
0
],
'player_url'
:
player_url
,
}]
elif
not
is_live
and
(
len
(
video_info
.
get
(
'url_encoded_fmt_stream_map'
,
[
''
])[
0
])
>=
1
or
len
(
video_info
.
get
(
'adaptive_fmts'
,
[
''
])[
0
])
>=
1
):
elif
not
is_live
and
(
streaming_formats
or
len
(
video_info
.
get
(
'url_encoded_fmt_stream_map'
,
[
''
])[
0
])
>=
1
or
len
(
video_info
.
get
(
'adaptive_fmts'
,
[
''
])[
0
])
>=
1
):
encoded_url_map
=
video_info
.
get
(
'url_encoded_fmt_stream_map'
,
[
''
])[
0
]
+
','
+
video_info
.
get
(
'adaptive_fmts'
,
[
''
])[
0
]
if
'rtmpe%3Dyes'
in
encoded_url_map
:
raise
ExtractorError
(
'rtmpe downloads are not supported, see https://github.com/ytdl-org/youtube-dl/issues/343 for more information.'
,
expected
=
True
)
formats
=
[]
formats_spec
=
{}
fmt_list
=
video_info
.
get
(
'fmt_list'
,
[
''
])[
0
]
if
fmt_list
:
...
...
@@ -1941,90 +1945,105 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'height'
:
int_or_none
(
width_height
[
1
]),
}
q
=
qualities
([
'small'
,
'medium'
,
'hd720'
])
streaming_formats
=
try_get
(
player_response
,
lambda
x
:
x
[
'streamingData'
][
'formats'
],
list
)
if
streaming_formats
:
for
fmt
in
streaming_formats
:
itag
=
str_or_none
(
fmt
.
get
(
'itag'
))
if
not
itag
:
continue
quality
=
fmt
.
get
(
'quality'
)
quality_label
=
fmt
.
get
(
'qualityLabel'
)
or
quality
formats_spec
[
itag
]
=
{
'asr'
:
int_or_none
(
fmt
.
get
(
'audioSampleRate'
)),
'filesize'
:
int_or_none
(
fmt
.
get
(
'contentLength'
)),
'format_note'
:
quality_label
,
'fps'
:
int_or_none
(
fmt
.
get
(
'fps'
)),
'height'
:
int_or_none
(
fmt
.
get
(
'height'
)),
'quality'
:
q
(
quality
),
# bitrate for itag 43 is always 2147483647
'tbr'
:
float_or_none
(
fmt
.
get
(
'averageBitrate'
)
or
fmt
.
get
(
'bitrate'
),
1000
)
if
itag
!=
'43'
else
None
,
'width'
:
int_or_none
(
fmt
.
get
(
'width'
)),
}
formats
=
[]
for
url_data_str
in
encoded_url_map
.
split
(
','
):
url_data
=
compat_parse_qs
(
url_data_str
)
if
'itag'
not
in
url_data
or
'url'
not
in
url_data
or
url_data
.
get
(
'drm_families'
):
for
fmt
in
streaming_formats
:
itag
=
str_or_none
(
fmt
.
get
(
'itag'
))
if
not
itag
:
continue
quality
=
fmt
.
get
(
'quality'
)
quality_label
=
fmt
.
get
(
'qualityLabel'
)
or
quality
formats_spec
[
itag
]
=
{
'asr'
:
int_or_none
(
fmt
.
get
(
'audioSampleRate'
)),
'filesize'
:
int_or_none
(
fmt
.
get
(
'contentLength'
)),
'format_note'
:
quality_label
,
'fps'
:
int_or_none
(
fmt
.
get
(
'fps'
)),
'height'
:
int_or_none
(
fmt
.
get
(
'height'
)),
'quality'
:
q
(
quality
),
# bitrate for itag 43 is always 2147483647
'tbr'
:
float_or_none
(
fmt
.
get
(
'averageBitrate'
)
or
fmt
.
get
(
'bitrate'
),
1000
)
if
itag
!=
'43'
else
None
,
'width'
:
int_or_none
(
fmt
.
get
(
'width'
)),
}
for
fmt
in
streaming_formats
:
if
fmt
.
get
(
'drm_families'
):
continue
url
=
url_or_none
(
fmt
.
get
(
'url'
))
if
not
url
:
cipher
=
fmt
.
get
(
'cipher'
)
if
not
cipher
:
continue
url_data
=
compat_parse_qs
(
cipher
)
url
=
url_or_none
(
try_get
(
url_data
,
lambda
x
:
x
[
'url'
][
0
],
compat_str
))
if
not
url
:
continue
else
:
cipher
=
None
url_data
=
compat_parse_qs
(
compat_urllib_parse_urlparse
(
url
).
query
)
stream_type
=
int_or_none
(
try_get
(
url_data
,
lambda
x
:
x
[
'stream_type'
][
0
]))
# Unsupported FORMAT_STREAM_TYPE_OTF
if
stream_type
==
3
:
continue
format_id
=
url_data
[
'itag'
][
0
]
url
=
url_data
[
'url'
][
0
]
if
's'
in
url_data
or
self
.
_downloader
.
params
.
get
(
'youtube_include_dash_manifest'
,
True
):
ASSETS_RE
=
r
'"assets":.+?"js":\s*("[^"]+")'
jsplayer_url_json
=
self
.
_search_regex
(
ASSETS_RE
,
embed_webpage
if
age_gate
else
video_webpage
,
'JS player URL (1)'
,
default
=
None
)
if
not
jsplayer_url_json
and
not
age_gate
:
# We need the embed website after all
if
embed_webpage
is
None
:
embed_url
=
proto
+
'://www.youtube.com/embed/%s'
%
video_id
embed_webpage
=
self
.
_download_webpage
(
embed_url
,
video_id
,
'Downloading embed webpage'
)
jsplayer_url_json
=
self
.
_search_regex
(
ASSETS_RE
,
embed_webpage
,
'JS player URL'
)
player_url
=
json
.
loads
(
jsplayer_url_json
)
if
player_url
is
None
:
player_url_json
=
self
.
_search_regex
(
r
'ytplayer\.config.*?"url"\s*:\s*("[^"]+")'
,
video_webpage
,
'age gate player URL'
)
player_url
=
json
.
loads
(
player_url_json
)
if
'sig'
in
url_data
:
url
+=
'&signature='
+
url_data
[
'sig'
][
0
]
elif
's'
in
url_data
:
encrypted_sig
=
url_data
[
's'
][
0
]
format_id
=
fmt
.
get
(
'itag'
)
or
url_data
[
'itag'
][
0
]
if
not
format_id
:
continue
format_id
=
compat_str
(
format_id
)
if
self
.
_downloader
.
params
.
get
(
'verbose'
):
if
cipher
:
if
's'
in
url_data
or
self
.
_downloader
.
params
.
get
(
'youtube_include_dash_manifest'
,
True
):
ASSETS_RE
=
r
'"assets":.+?"js":\s*("[^"]+")'
jsplayer_url_json
=
self
.
_search_regex
(
ASSETS_RE
,
embed_webpage
if
age_gate
else
video_webpage
,
'JS player URL (1)'
,
default
=
None
)
if
not
jsplayer_url_json
and
not
age_gate
:
# We need the embed website after all
if
embed_webpage
is
None
:
embed_url
=
proto
+
'://www.youtube.com/embed/%s'
%
video_id
embed_webpage
=
self
.
_download_webpage
(
embed_url
,
video_id
,
'Downloading embed webpage'
)
jsplayer_url_json
=
self
.
_search_regex
(
ASSETS_RE
,
embed_webpage
,
'JS player URL'
)
player_url
=
json
.
loads
(
jsplayer_url_json
)
if
player_url
is
None
:
player_version
=
'unknown'
player_desc
=
'unknown'
else
:
if
player_url
.
endswith
(
'swf'
):
player_version
=
self
.
_search_regex
(
r
'-(.+?)(?:/watch_as3)?\.swf$'
,
player_url
,
'flash player'
,
fatal
=
False
)
player_desc
=
'flash player %s'
%
player_version
player_url_json
=
self
.
_search_regex
(
r
'ytplayer\.config.*?"url"\s*:\s*("[^"]+")'
,
video_webpage
,
'age gate player URL'
)
player_url
=
json
.
loads
(
player_url_json
)
if
'sig'
in
url_data
:
url
+=
'&signature='
+
url_data
[
'sig'
][
0
]
elif
's'
in
url_data
:
encrypted_sig
=
url_data
[
's'
][
0
]
if
self
.
_downloader
.
params
.
get
(
'verbose'
):
if
player_url
is
None
:
player_version
=
'unknown'
player_desc
=
'unknown'
else
:
player_version
=
self
.
_search_regex
(
[
r
'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js'
,
r
'(?:www|player(?:_ias)?)-([^/]+)(?:/[a-z]{2,3}_[A-Z]{2})?/base\.js'
],
player_url
,
'html5 player'
,
fatal
=
False
)
player_desc
=
'html5 player %s'
%
player_version
parts_sizes
=
self
.
_signature_cache_id
(
encrypted_sig
)
self
.
to_screen
(
'{%s} signature length %s, %s'
%
(
format_id
,
parts_sizes
,
player_desc
))
signature
=
self
.
_decrypt_signature
(
encrypted_sig
,
video_id
,
player_url
,
age_gate
)
sp
=
try_get
(
url_data
,
lambda
x
:
x
[
'sp'
][
0
],
compat_str
)
or
'signature'
url
+=
'&%s=%s'
%
(
sp
,
signature
)
if
player_url
.
endswith
(
'swf'
):
player_version
=
self
.
_search_regex
(
r
'-(.+?)(?:/watch_as3)?\.swf$'
,
player_url
,
'flash player'
,
fatal
=
False
)
player_desc
=
'flash player %s'
%
player_version
else
:
player_version
=
self
.
_search_regex
(
[
r
'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js'
,
r
'(?:www|player(?:_ias)?)-([^/]+)(?:/[a-z]{2,3}_[A-Z]{2})?/base\.js'
],
player_url
,
'html5 player'
,
fatal
=
False
)
player_desc
=
'html5 player %s'
%
player_version
parts_sizes
=
self
.
_signature_cache_id
(
encrypted_sig
)
self
.
to_screen
(
'{%s} signature length %s, %s'
%
(
format_id
,
parts_sizes
,
player_desc
))
signature
=
self
.
_decrypt_signature
(
encrypted_sig
,
video_id
,
player_url
,
age_gate
)
sp
=
try_get
(
url_data
,
lambda
x
:
x
[
'sp'
][
0
],
compat_str
)
or
'signature'
url
+=
'&%s=%s'
%
(
sp
,
signature
)
if
'ratebypass'
not
in
url
:
url
+=
'&ratebypass=yes'
...
...
@@ -2044,24 +2063,33 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
mobj
=
re
.
search
(
r
'^(?P<width>\d+)[xX](?P<height>\d+)$'
,
url_data
.
get
(
'size'
,
[
''
])[
0
])
width
,
height
=
(
int
(
mobj
.
group
(
'width'
)),
int
(
mobj
.
group
(
'height'
)))
if
mobj
else
(
None
,
None
)
if
width
is
None
:
width
=
int_or_none
(
fmt
.
get
(
'width'
))
if
height
is
None
:
height
=
int_or_none
(
fmt
.
get
(
'height'
))
filesize
=
int_or_none
(
url_data
.
get
(
'clen'
,
[
None
])[
0
])
or
_extract_filesize
(
url
)
quality
=
url_data
.
get
(
'quality'
,
[
None
])[
0
]
quality
=
url_data
.
get
(
'quality'
,
[
None
])[
0
]
or
fmt
.
get
(
'quality'
)
quality_label
=
url_data
.
get
(
'quality_label'
,
[
None
])[
0
]
or
fmt
.
get
(
'qualityLabel'
)
tbr
=
float_or_none
(
url_data
.
get
(
'bitrate'
,
[
None
])[
0
],
1000
)
or
float_or_none
(
fmt
.
get
(
'bitrate'
),
1000
)
fps
=
int_or_none
(
url_data
.
get
(
'fps'
,
[
None
])[
0
])
or
int_or_none
(
fmt
.
get
(
'fps'
))
more_fields
=
{
'filesize'
:
filesize
,
'tbr'
:
float_or_none
(
url_data
.
get
(
'bitrate'
,
[
None
])[
0
],
1000
)
,
'tbr'
:
tbr
,
'width'
:
width
,
'height'
:
height
,
'fps'
:
int_or_none
(
url_data
.
get
(
'fps'
,
[
None
])[
0
])
,
'format_note'
:
url_data
.
get
(
'quality_label'
,
[
None
])[
0
]
or
quality
,
'fps'
:
fps
,
'format_note'
:
quality_label
or
quality
,
'quality'
:
q
(
quality
),
}
for
key
,
value
in
more_fields
.
items
():
if
value
:
dct
[
key
]
=
value
type_
=
url_data
.
get
(
'type'
,
[
None
])[
0
]
type_
=
url_data
.
get
(
'type'
,
[
None
])[
0
]
or
fmt
.
get
(
'mimeType'
)
if
type_
:
type_split
=
type_
.
split
(
';'
)
kind_ext
=
type_split
[
0
].
split
(
'/'
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment