youtube.py 153 KB
Newer Older
1 2
# coding: utf-8

Philipp Hagemeister's avatar
Philipp Hagemeister committed
3 4 5
from __future__ import unicode_literals


6
import itertools
7
import json
8
import os.path
9
import random
10
import re
11
import time
12
import traceback
13

14
from .common import InfoExtractor, SearchInfoExtractor
15
from ..jsinterp import JSInterpreter
16
from ..swfinterp import SWFInterpreter
Philipp Hagemeister's avatar
Philipp Hagemeister committed
17
from ..compat import (
18
    compat_chr,
19
    compat_HTTPError,
20
    compat_kwargs,
21
    compat_parse_qs,
22 23
    compat_urllib_parse_unquote,
    compat_urllib_parse_unquote_plus,
24
    compat_urllib_parse_urlencode,
25
    compat_urllib_parse_urlparse,
26
    compat_urlparse,
27
    compat_str,
Philipp Hagemeister's avatar
Philipp Hagemeister committed
28 29
)
from ..utils import (
30
    bool_or_none,
31
    clean_html,
32
    error_to_compat_str,
33
    extract_attributes,
34
    ExtractorError,
35
    float_or_none,
Philipp Hagemeister's avatar
Philipp Hagemeister committed
36 37
    get_element_by_attribute,
    get_element_by_id,
38
    int_or_none,
39
    mimetype2ext,
Philipp Hagemeister's avatar
Philipp Hagemeister committed
40
    orderedSet,
41
    parse_codecs,
42
    parse_duration,
43
    remove_quotes,
Sergey M․'s avatar
Sergey M․ committed
44
    remove_start,
45
    smuggle_url,
46
    str_or_none,
47
    str_to_int,
48
    try_get,
49 50
    unescapeHTML,
    unified_strdate,
51
    unsmuggle_url,
52
    uppercase_escape,
53
    url_or_none,
54
    urlencode_postdata,
55 56
)

Jouke Waleson's avatar
Jouke Waleson committed
57

58
class YoutubeBaseInfoExtractor(InfoExtractor):
59 60
    """Provide base functions for Youtube extractors"""
    _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
61
    _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
62 63

    _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
Sergey M․'s avatar
Sergey M․ committed
64 65
    _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
    _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
66

67 68 69 70
    _NETRC_MACHINE = 'youtube'
    # If True it will raise an error if no login info is provided
    _LOGIN_REQUIRED = False

71
    _PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}'
72

73 74 75 76 77
    _YOUTUBE_CLIENT_HEADERS = {
        'x-youtube-client-name': '1',
        'x-youtube-client-version': '1.20200609.04.02',
    }

78
    def _set_language(self):
79
        self._set_cookie(
80
            '.youtube.com', 'PREF', 'f1=50000000&f6=8&hl=en',
81
            # YouTube sets the expire time to about two months
82
            expire_time=time.time() + 2 * 30 * 24 * 3600)
83

84 85 86 87 88
    def _ids_to_results(self, ids):
        return [
            self.url_result(vid_id, 'Youtube', video_id=vid_id)
            for vid_id in ids]

89
    def _login(self):
90 91 92 93 94 95 96
        """
        Attempt to log in to YouTube.
        True is returned if successful or skipped.
        False is returned if login failed.

        If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
        """
97
        username, password = self._get_login_info()
98 99
        # No authentication to be performed
        if username is None:
100
            if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
101
                raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
102
            return True
103

104 105
        login_page = self._download_webpage(
            self._LOGIN_URL, None,
106 107
            note='Downloading login page',
            errnote='unable to fetch login page', fatal=False)
108 109
        if login_page is False:
            return
110

111
        login_form = self._hidden_inputs(login_page)
112

113 114 115 116 117 118 119 120
        def req(url, f_req, note, errnote):
            data = login_form.copy()
            data.update({
                'pstMsg': 1,
                'checkConnection': 'youtube',
                'checkedDomains': 'youtube',
                'hl': 'en',
                'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
Sergey M․'s avatar
Sergey M․ committed
121
                'f.req': json.dumps(f_req),
122 123
                'flowName': 'GlifWebSignIn',
                'flowEntry': 'ServiceLogin',
124 125
                # TODO: reverse actual botguard identifier generation algo
                'bgRequest': '["identifier",""]',
126
            })
127 128 129 130 131 132 133 134 135
            return self._download_json(
                url, None, note=note, errnote=errnote,
                transform_source=lambda s: re.sub(r'^[^[]*', '', s),
                fatal=False,
                data=urlencode_postdata(data), headers={
                    'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
                    'Google-Accounts-XSRF': 1,
                })

Sergey M․'s avatar
Sergey M․ committed
136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151
        def warn(message):
            self._downloader.report_warning(message)

        lookup_req = [
            username,
            None, [], None, 'US', None, None, 2, False, True,
            [
                None, None,
                [2, 1, None, 1,
                 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
                 None, [], 4],
                1, [None, None, []], None, None, None, True
            ],
            username,
        ]

152
        lookup_results = req(
Sergey M․'s avatar
Sergey M․ committed
153
            self._LOOKUP_URL, lookup_req,
154 155 156 157
            'Looking up account info', 'Unable to look up account info')

        if lookup_results is False:
            return False
158

Sergey M․'s avatar
Sergey M․ committed
159 160 161 162 163 164 165 166 167 168 169 170
        user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
        if not user_hash:
            warn('Unable to extract user hash')
            return False

        challenge_req = [
            user_hash,
            None, 1, None, [1, None, None, None, [password, None, True]],
            [
                None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
                1, [None, None, []], None, None, None, True
            ]]
171

Sergey M․'s avatar
Sergey M․ committed
172 173 174
        challenge_results = req(
            self._CHALLENGE_URL, challenge_req,
            'Logging in', 'Unable to log in')
175

Sergey M․'s avatar
Sergey M․ committed
176
        if challenge_results is False:
177
            return
178

Sergey M․'s avatar
Sergey M․ committed
179 180 181 182 183 184 185 186 187 188 189 190 191
        login_res = try_get(challenge_results, lambda x: x[0][5], list)
        if login_res:
            login_msg = try_get(login_res, lambda x: x[5], compat_str)
            warn(
                'Unable to login: %s' % 'Invalid password'
                if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
            return False

        res = try_get(challenge_results, lambda x: x[0][-1], list)
        if not res:
            warn('Unable to extract result entry')
            return False

192 193 194 195
        login_challenge = try_get(res, lambda x: x[0][0], list)
        if login_challenge:
            challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
            if challenge_str == 'TWO_STEP_VERIFICATION':
Sergey M․'s avatar
Sergey M․ committed
196 197
                # SEND_SUCCESS - TFA code has been successfully sent to phone
                # QUOTA_EXCEEDED - reached the limit of TFA codes
198
                status = try_get(login_challenge, lambda x: x[5], compat_str)
Sergey M․'s avatar
Sergey M․ committed
199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241
                if status == 'QUOTA_EXCEEDED':
                    warn('Exceeded the limit of TFA codes, try later')
                    return False

                tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
                if not tl:
                    warn('Unable to extract TL')
                    return False

                tfa_code = self._get_tfa_info('2-step verification code')

                if not tfa_code:
                    warn(
                        'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
                        '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
                    return False

                tfa_code = remove_start(tfa_code, 'G-')

                tfa_req = [
                    user_hash, None, 2, None,
                    [
                        9, None, None, None, None, None, None, None,
                        [None, tfa_code, True, 2]
                    ]]

                tfa_results = req(
                    self._TFA_URL.format(tl), tfa_req,
                    'Submitting TFA code', 'Unable to submit TFA code')

                if tfa_results is False:
                    return False

                tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
                if tfa_res:
                    tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
                    warn(
                        'Unable to finish TFA: %s' % 'Invalid TFA code'
                        if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
                    return False

                check_cookie_url = try_get(
                    tfa_results, lambda x: x[0][-1][2], compat_str)
242 243 244 245 246 247 248 249 250 251 252
            else:
                CHALLENGES = {
                    'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
                    'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
                    'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
                }
                challenge = CHALLENGES.get(
                    challenge_str,
                    '%s returned error %s.' % (self.IE_NAME, challenge_str))
                warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
                return False
Sergey M․'s avatar
Sergey M․ committed
253 254 255 256 257 258
        else:
            check_cookie_url = try_get(res, lambda x: x[2], compat_str)

        if not check_cookie_url:
            warn('Unable to extract CheckCookie URL')
            return False
259 260

        check_cookie_results = self._download_webpage(
Sergey M․'s avatar
Sergey M․ committed
261 262 263 264
            check_cookie_url, None, 'Checking cookie', fatal=False)

        if check_cookie_results is False:
            return False
265

Sergey M․'s avatar
Sergey M․ committed
266 267
        if 'https://myaccount.google.com/' not in check_cookie_results:
            warn('Unable to log in')
268
            return False
269

270 271
        return True

272
    def _download_webpage_handle(self, *args, **kwargs):
273 274 275
        query = kwargs.get('query', {}).copy()
        query['disable_polymer'] = 'true'
        kwargs['query'] = query
276
        return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
277 278
            *args, **compat_kwargs(kwargs))

279 280 281
    def _real_initialize(self):
        if self._downloader is None:
            return
282
        self._set_language()
283 284
        if not self._login():
            return
285

286

287
class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
288
    # Extract entries from page with "Load more" button
289 290 291
    def _entries(self, page, playlist_id):
        more_widget_html = content_html = page
        for page_num in itertools.count(1):
292 293
            for entry in self._process_page(content_html):
                yield entry
294 295 296 297 298

            mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
            if not mobj:
                break

299 300 301 302 303 304 305
            count = 0
            retries = 3
            while count <= retries:
                try:
                    # Downloading page may result in intermittent 5xx HTTP error
                    # that is usually worked around with a retry
                    more = self._download_json(
306
                        'https://www.youtube.com/%s' % mobj.group('more'), playlist_id,
307 308
                        'Downloading page #%s%s'
                        % (page_num, ' (retry #%d)' % count if count else ''),
309 310
                        transform_source=uppercase_escape,
                        headers=self._YOUTUBE_CLIENT_HEADERS)
311 312 313 314 315 316 317 318
                    break
                except ExtractorError as e:
                    if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503):
                        count += 1
                        if count <= retries:
                            continue
                    raise

319 320 321 322 323 324 325
            content_html = more['content_html']
            if not content_html.strip():
                # Some webpages show a "Load more" button but they don't
                # have more videos
                break
            more_widget_html = more['load_more_widget_html']

326 327 328 329 330 331

class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
    def _process_page(self, content):
        for video_id, video_title in self.extract_videos_from_page(content):
            yield self.url_result(video_id, 'Youtube', video_id, video_title)

332 333
    def extract_videos_from_page_impl(self, video_re, page, ids_in_page, titles_in_page):
        for mobj in re.finditer(video_re, page):
334 335 336 337
            # The link with index 0 is not the first video of the playlist (not sure if still actual)
            if 'index' in mobj.groupdict() and mobj.group('id') == '0':
                continue
            video_id = mobj.group('id')
338 339
            video_title = unescapeHTML(
                mobj.group('title')) if 'title' in mobj.groupdict() else None
340 341
            if video_title:
                video_title = video_title.strip()
342 343
            if video_title == '► Play all':
                video_title = None
344 345 346 347 348 349 350
            try:
                idx = ids_in_page.index(video_id)
                if video_title and not titles_in_page[idx]:
                    titles_in_page[idx] = video_title
            except ValueError:
                ids_in_page.append(video_id)
                titles_in_page.append(video_title)
351 352 353 354 355 356

    def extract_videos_from_page(self, page):
        ids_in_page = []
        titles_in_page = []
        self.extract_videos_from_page_impl(
            self._VIDEO_RE, page, ids_in_page, titles_in_page)
357 358 359
        return zip(ids_in_page, titles_in_page)


360 361
class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
    def _process_page(self, content):
362 363 364
        for playlist_id in orderedSet(re.findall(
                r'<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*><a[^>]+href="/?playlist\?list=([0-9A-Za-z-_]{10,})"',
                content)):
365 366 367
            yield self.url_result(
                'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')

368 369 370 371
    def _real_extract(self, url):
        playlist_id = self._match_id(url)
        webpage = self._download_webpage(url, playlist_id)
        title = self._og_search_title(webpage, fatal=False)
372
        return self.playlist_result(self._entries(webpage, playlist_id), playlist_id, title)
373 374


375
class YoutubeIE(YoutubeBaseInfoExtractor):
Philipp Hagemeister's avatar
Philipp Hagemeister committed
376
    IE_DESC = 'YouTube.com'
377
    _VALID_URL = r"""(?x)^
378
                     (
379
                         (?:https?://|//)                                    # http(s):// or protocol-independent URL
380
                         (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com/|
381
                            (?:www\.)?deturl\.com/www\.youtube\.com/|
382
                            (?:www\.)?pwnyoutube\.com/|
383
                            (?:www\.)?hooktube\.com/|
384
                            (?:www\.)?yourepeat\.com/|
385
                            tube\.majestyc\.net/|
386
                            # Invidious instances taken from https://github.com/omarroth/invidious/wiki/Invidious-Instances
387
                            (?:(?:www|dev)\.)?invidio\.us/|
388 389
                            (?:(?:www|no)\.)?invidiou\.sh/|
                            (?:(?:www|fi|de)\.)?invidious\.snopyta\.org/|
390
                            (?:www\.)?invidious\.kabi\.tk/|
391
                            (?:www\.)?invidious\.13ad\.de/|
392
                            (?:www\.)?invidious\.mastodon\.host/|
393
                            (?:www\.)?invidious\.nixnet\.xyz/|
394
                            (?:www\.)?invidious\.drycat\.fr/|
395
                            (?:www\.)?tube\.poal\.co/|
396
                            (?:www\.)?vid\.wxzm\.sx/|
397
                            (?:www\.)?yewtu\.be/|
398
                            (?:www\.)?yt\.elukerio\.org/|
399
                            (?:www\.)?yt\.lelux\.fi/|
400 401 402 403 404 405
                            (?:www\.)?invidious\.ggc-project\.de/|
                            (?:www\.)?yt\.maisputain\.ovh/|
                            (?:www\.)?invidious\.13ad\.de/|
                            (?:www\.)?invidious\.toot\.koeln/|
                            (?:www\.)?invidious\.fdn\.fr/|
                            (?:www\.)?watch\.nettohikari\.com/|
406 407 408 409 410 411
                            (?:www\.)?kgg2m7yk5aybusll\.onion/|
                            (?:www\.)?qklhadlycap4cnod\.onion/|
                            (?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion/|
                            (?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion/|
                            (?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion/|
                            (?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion/|
412
                            (?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p/|
413
                            (?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion/|
414
                            youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains
415 416
                         (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
                         (?:                                                  # the various things that can precede the ID:
417
                             (?:(?:v|embed|e)/(?!videoseries))                # v/ or embed/ or e/
418
                             |(?:                                             # or the v= param in all its forms
419
                                 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)?  # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
420
                                 (?:\?|\#!?)                                  # the params delimiter ? or # or #!
421
                                 (?:.*?[&;])??                                # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
422 423
                                 v=
                             )
424
                         ))
425 426
                         |(?:
                            youtu\.be|                                        # just youtu.be/xxxx
427 428
                            vid\.plus|                                        # or vid.plus/xxxx
                            zwearz\.com/watch|                                # or zwearz.com/watch/xxxx
429
                         )/
430
                         |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
431
                         )
432
                     )?                                                       # all until now is optional -> you can pass the naked ID
433
                     ([0-9A-Za-z_-]{11})                                      # here is it! the YouTube video ID
434 435 436 437 438 439
                     (?!.*?\blist=
                        (?:
                            %(playlist_id)s|                                  # combined list/video URLs are handled by the playlist IE
                            WL                                                # WL are handled by the watch later IE
                        )
                     )
440
                     (?(1).+)?                                                # if we found the ID, everything can follow
441
                     $""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
442
    _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
443 444 445 446
    _PLAYER_INFO_RE = (
        r'/(?P<id>[a-zA-Z0-9_-]{8,})/player_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?/base\.(?P<ext>[a-z]+)$',
        r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.(?P<ext>[a-z]+)$',
    )
447
    _formats = {
448 449 450 451 452 453 454 455
        '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
        '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
        '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
        '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
        '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
        '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
        '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
        '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
456
        # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
457 458 459 460 461 462
        '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
        '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
        '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
        '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
        '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
        '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
463
        '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
464 465
        '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
        '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
466 467 468


        # 3D videos
469 470 471 472
        '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
        '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
        '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
        '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
473 474 475
        '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
        '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
        '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
476

Allan Zhou's avatar
Allan Zhou committed
477
        # Apple HTTP Live Streaming
Yen Chi Hsuan's avatar
Yen Chi Hsuan committed
478
        '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
479 480 481 482 483
        '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
        '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
        '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
        '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
        '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
484 485
        '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
        '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
486 487

        # DASH mp4 video
488 489 490 491 492
        '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
        '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
        '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
        '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
        '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
Sergey M․'s avatar
Sergey M․ committed
493
        '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'},  # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
494 495 496 497 498 499
        '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
        '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
        '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
        '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
        '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
        '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
500

501
        # Dash mp4 audio
502 503 504 505 506 507 508
        '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
        '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
        '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
        '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
        '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
        '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
        '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
509 510

        # Dash webm
511 512 513 514 515 516 517 518 519 520 521 522 523 524 525
        '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
        '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
        '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
        '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
        '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
        '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
        '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
        '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
        '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
        '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
        '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
        '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
        '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
        '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
        '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
526
        # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
527 528 529 530 531 532
        '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
        '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
        '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
        '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
        '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
        '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
533 534

        # Dash webm audio
535 536
        '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
        '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
537

538
        # Dash webm audio with opus inside
539 540 541
        '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
        '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
        '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
542

543 544
        # RTMP (unnamed)
        '_rtmp': {'protocol': 'rtmp'},
545 546 547 548 549 550

        # av01 video only formats sometimes served with "unknown" codecs
        '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
        '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
        '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
        '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
551
    }
552
    _SUBTITLE_FORMATS = ('srv1', 'srv2', 'srv3', 'ttml', 'vtt')
553

554 555
    _GEO_BYPASS = False

Philipp Hagemeister's avatar
Philipp Hagemeister committed
556
    IE_NAME = 'youtube'
557 558
    _TESTS = [
        {
559
            'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
Philipp Hagemeister's avatar
Philipp Hagemeister committed
560 561 562 563 564 565
            'info_dict': {
                'id': 'BaW_jenozKc',
                'ext': 'mp4',
                'title': 'youtube-dl test video "\'/\\ä↭𝕐',
                'uploader': 'Philipp Hagemeister',
                'uploader_id': 'phihag',
566
                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
567 568
                'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
                'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
Philipp Hagemeister's avatar
Philipp Hagemeister committed
569 570 571
                'upload_date': '20121002',
                'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
                'categories': ['Science & Technology'],
572
                'tags': ['youtube-dl'],
573
                'duration': 10,
574
                'view_count': int,
575 576
                'like_count': int,
                'dislike_count': int,
577
                'start_time': 1,
578
                'end_time': 9,
579
            }
580 581
        },
        {
582
            'url': 'https://www.youtube.com/watch?v=UxxajLWwzqY',
Philipp Hagemeister's avatar
Philipp Hagemeister committed
583 584 585 586 587 588
            'note': 'Test generic use_cipher_signature video (#897)',
            'info_dict': {
                'id': 'UxxajLWwzqY',
                'ext': 'mp4',
                'upload_date': '20120506',
                'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
589
                'alt_title': 'I Love It (feat. Charli XCX)',
Sergey M․'s avatar
Sergey M․ committed
590
                'description': 'md5:19a2f98d9032b9311e686ed039564f63',
591 592 593
                'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
                         'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
                         'iconic ep', 'iconic', 'love', 'it'],
594
                'duration': 180,
Philipp Hagemeister's avatar
Philipp Hagemeister committed
595 596
                'uploader': 'Icona Pop',
                'uploader_id': 'IconaPop',
597
                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IconaPop',
598
                'creator': 'Icona Pop',
599 600
                'track': 'I Love It (feat. Charli XCX)',
                'artist': 'Icona Pop',
601
            }
602 603
        },
        {
Philipp Hagemeister's avatar
Philipp Hagemeister committed
604 605 606 607 608 609
            'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',
            'note': 'Test VEVO video with age protection (#956)',
            'info_dict': {
                'id': '07FYdnEawAQ',
                'ext': 'mp4',
                'upload_date': '20130703',
610
                'title': 'Justin Timberlake - Tunnel Vision (Official Music Video) (Explicit)',
611
                'alt_title': 'Tunnel Vision',
612
                'description': 'md5:07dab3356cde4199048e4c7cd93471e1',
613
                'duration': 419,
Philipp Hagemeister's avatar
Philipp Hagemeister committed
614 615
                'uploader': 'justintimberlakeVEVO',
                'uploader_id': 'justintimberlakeVEVO',
616
                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/justintimberlakeVEVO',
617
                'creator': 'Justin Timberlake',
618
                'track': 'Tunnel Vision',
619
                'artist': 'Justin Timberlake',
Sergey M․'s avatar
Sergey M․ committed
620
                'age_limit': 18,
621 622
            }
        },
623
        {
Philipp Hagemeister's avatar
Philipp Hagemeister committed
624 625 626 627 628 629 630 631 632
            'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
            'note': 'Embed-only video (#1746)',
            'info_dict': {
                'id': 'yZIXLfi8CZQ',
                'ext': 'mp4',
                'upload_date': '20120608',
                'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
                'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
                'uploader': 'SET India',
Sergey M․'s avatar
Sergey M․ committed
633
                'uploader_id': 'setindia',
634
                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
Sergey M․'s avatar
Sergey M․ committed
635
                'age_limit': 18,
636 637
            }
        },
638
        {
639
            'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=UxxajLWwzqY',
640 641 642 643 644 645 646
            'note': 'Use the first video ID in the URL',
            'info_dict': {
                'id': 'BaW_jenozKc',
                'ext': 'mp4',
                'title': 'youtube-dl test video "\'/\\ä↭𝕐',
                'uploader': 'Philipp Hagemeister',
                'uploader_id': 'phihag',
647
                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
648 649 650 651
                'upload_date': '20121002',
                'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
                'categories': ['Science & Technology'],
                'tags': ['youtube-dl'],
652
                'duration': 10,
653
                'view_count': int,
654 655
                'like_count': int,
                'dislike_count': int,
656 657 658 659
            },
            'params': {
                'skip_download': True,
            },
660
        },
661
        {
662
            'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
Philipp Hagemeister's avatar
Philipp Hagemeister committed
663 664 665 666 667 668
            'note': '256k DASH audio (format 141) via DASH manifest',
            'info_dict': {
                'id': 'a9LDPn-MO4I',
                'ext': 'm4a',
                'upload_date': '20121002',
                'uploader_id': '8KVIDEO',
669
                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
Philipp Hagemeister's avatar
Philipp Hagemeister committed
670 671 672
                'description': '',
                'uploader': '8KVIDEO',
                'title': 'UHDTV TEST 8K VIDEO.mp4'
673
            },
Philipp Hagemeister's avatar
Philipp Hagemeister committed
674 675 676
            'params': {
                'youtube_include_dash_manifest': True,
                'format': '141',
677
            },
Sergey M․'s avatar
Sergey M․ committed
678
            'skip': 'format 141 not served anymore',
679
        },
680 681
        # DASH manifest with encrypted signature
        {
Philipp Hagemeister's avatar
Philipp Hagemeister committed
682 683 684 685
            'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
            'info_dict': {
                'id': 'IB3lcPjvWLA',
                'ext': 'm4a',
686 687
                'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
                'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
688
                'duration': 244,
Philipp Hagemeister's avatar
Philipp Hagemeister committed
689 690 691
                'uploader': 'AfrojackVEVO',
                'uploader_id': 'AfrojackVEVO',
                'upload_date': '20131011',
692
            },
Philipp Hagemeister's avatar
Philipp Hagemeister committed
693
            'params': {
Philipp Hagemeister's avatar
Philipp Hagemeister committed
694
                'youtube_include_dash_manifest': True,
Sergey M․'s avatar
Sergey M․ committed
695
                'format': '141/bestaudio[ext=m4a]',
696 697
            },
        },
Sergey M․'s avatar
Sergey M․ committed
698 699 700 701 702 703 704
        # JS player signature function name containing $
        {
            'url': 'https://www.youtube.com/watch?v=nfWlot6h_JM',
            'info_dict': {
                'id': 'nfWlot6h_JM',
                'ext': 'm4a',
                'title': 'Taylor Swift - Shake It Off',
Sergey M․'s avatar
Sergey M․ committed
705
                'description': 'md5:307195cd21ff7fa352270fe884570ef0',
706
                'duration': 242,
Sergey M․'s avatar
Sergey M․ committed
707 708 709 710 711 712
                'uploader': 'TaylorSwiftVEVO',
                'uploader_id': 'TaylorSwiftVEVO',
                'upload_date': '20140818',
            },
            'params': {
                'youtube_include_dash_manifest': True,
Sergey M․'s avatar
Sergey M․ committed
713
                'format': '141/bestaudio[ext=m4a]',
Sergey M․'s avatar
Sergey M․ committed
714 715
            },
        },
716 717 718 719 720 721
        # Controversy video
        {
            'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
            'info_dict': {
                'id': 'T4XJQO3qol8',
                'ext': 'mp4',
722
                'duration': 219,
723
                'upload_date': '20100909',
724
                'uploader': 'Amazing Atheist',
725
                'uploader_id': 'TheAmazingAtheist',
726
                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
727 728 729
                'title': 'Burning Everyone\'s Koran',
                'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
            }
730 731 732
        },
        # Normal age-gate video (No vevo, embed allowed)
        {
733
            'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
734 735 736 737
            'info_dict': {
                'id': 'HtVdAasjOgU',
                'ext': 'mp4',
                'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
738
                'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
739
                'duration': 142,
740 741
                'uploader': 'The Witcher',
                'uploader_id': 'WitcherGame',
742
                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
743
                'upload_date': '20140605',
Sergey M․'s avatar
Sergey M․ committed
744
                'age_limit': 18,
745 746
            },
        },
747 748
        # Age-gate video with encrypted signature
        {
749
            'url': 'https://www.youtube.com/watch?v=6kLq3WMV1nU',
750 751
            'info_dict': {
                'id': '6kLq3WMV1nU',
752
                'ext': 'mp4',
753 754
                'title': 'Dedication To My Ex (Miss That) (Lyric Video)',
                'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
Sergey M․'s avatar
Sergey M․ committed
755
                'duration': 246,
756 757
                'uploader': 'LloydVEVO',
                'uploader_id': 'LloydVEVO',
758
                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/LloydVEVO',
759
                'upload_date': '20110629',
Sergey M․'s avatar
Sergey M․ committed
760
                'age_limit': 18,
761 762
            },
        },
Sergey M․'s avatar
Sergey M․ committed
763
        # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
764
        # YouTube Red ad is not captured for creator
765 766 767 768 769
        {
            'url': '__2ABJjxzNo',
            'info_dict': {
                'id': '__2ABJjxzNo',
                'ext': 'mp4',
770
                'duration': 266,
771 772
                'upload_date': '20100430',
                'uploader_id': 'deadmau5',
773
                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
Sergey M․'s avatar
Sergey M․ committed
774
                'creator': 'Dada Life, deadmau5',
775 776 777
                'description': 'md5:12c56784b8032162bb936a5f76d55360',
                'uploader': 'deadmau5',
                'title': 'Deadmau5 - Some Chords (HD)',
Sergey M․'s avatar
Sergey M․ committed
778
                'alt_title': 'This Machine Kills Some Chords',
779 780 781 782
            },
            'expected_warnings': [
                'DASH manifest missing',
            ]
783
        },
Sergey M․'s avatar
Sergey M․ committed
784
        # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
785 786 787 788 789
        {
            'url': 'lqQg6PlCWgI',
            'info_dict': {
                'id': 'lqQg6PlCWgI',
                'ext': 'mp4',
790
                'duration': 6085,
Sergey M․'s avatar
Sergey M․ committed
791
                'upload_date': '20150827',
Philipp Hagemeister's avatar
Philipp Hagemeister committed
792
                'uploader_id': 'olympic',
793
                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
Philipp Hagemeister's avatar
Philipp Hagemeister committed
794
                'description': 'HO09  - Women -  GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
795
                'uploader': 'Olympic',
Philipp Hagemeister's avatar
Philipp Hagemeister committed
796 797 798 799
                'title': 'Hockey - Women -  GER-AUS - London 2012 Olympic Games',
            },
            'params': {
                'skip_download': 'requires avconv',
800
            }
Philipp Hagemeister's avatar
Philipp Hagemeister committed
801
        },
802 803 804 805 806 807 808
        # Non-square pixels
        {
            'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
            'info_dict': {
                'id': '_b-2C3KPAM0',
                'ext': 'mp4',
                'stretched_ratio': 16 / 9.,
809
                'duration': 85,
810 811
                'upload_date': '20110310',
                'uploader_id': 'AllenMeow',
812
                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
813
                'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',