Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Open sidebar
Backups
youtube-dl
Commits
ce18a19b
Unverified
Commit
ce18a19b
authored
Dec 02, 2018
by
Sergey M․
Browse files
[tiktok] Improve extraction and add support for user pages (closes #18135)
parent
1ead840d
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
86 additions
and
45 deletions
+86
-45
youtube_dl/extractor/extractors.py
youtube_dl/extractor/extractors.py
+4
-1
youtube_dl/extractor/tiktok.py
youtube_dl/extractor/tiktok.py
+82
-44
No files found.
youtube_dl/extractor/extractors.py
View file @
ce18a19b
...
...
@@ -1124,7 +1124,10 @@ from .thisamericanlife import ThisAmericanLifeIE
from
.thisav
import
ThisAVIE
from
.thisoldhouse
import
ThisOldHouseIE
from
.threeqsdn
import
ThreeQSDNIE
from
.tiktok
import
TikTokIE
from
.tiktok
import
(
TikTokIE
,
TikTokUserIE
,
)
from
.tinypic
import
TinyPicIE
from
.tmz
import
(
TMZIE
,
...
...
youtube_dl/extractor/tiktok.py
View file @
ce18a19b
...
...
@@ -4,6 +4,7 @@ from __future__ import unicode_literals
from
.common
import
InfoExtractor
from
..utils
import
(
compat_str
,
ExtractorError
,
int_or_none
,
str_or_none
,
try_get
,
...
...
@@ -11,69 +12,106 @@ from ..utils import (
)
class
TikTokIE
(
InfoExtractor
):
_VALID_URL
=
r
'https?://(?:m\.)?tiktok\.com/v/(?P<id>[0-9]+)'
_TEST
=
{
'url'
:
'https://m.tiktok.com/v/6606727368545406213.html'
,
'md5'
:
'd584b572e92fcd48888051f238022420'
,
'info_dict'
:
{
'id'
:
'6606727368545406213'
,
'ext'
:
'mp4'
,
'title'
:
'Zureeal on TikTok'
,
'thumbnail'
:
r
're:^https?://.*~noop.image'
,
'description'
:
'#bowsette#mario#cosplay#uk#lgbt#gaming#asian#bowsettecosplay'
,
'uploader'
:
'Zureeal'
,
'width'
:
540
,
'height'
:
960
,
}
}
def
_real_extract
(
self
,
url
):
video_id
=
self
.
_match_id
(
url
)
webpage
=
self
.
_download_webpage
(
url
,
video_id
)
data
=
self
.
_parse_json
(
self
.
_search_regex
(
r
'var\s+data\s*=\s*({.+?});'
,
webpage
,
'data'
),
video_id
)
title
=
self
.
_og_search_title
(
webpage
)
class
TikTokBaseIE
(
InfoExtractor
):
def
_extract_aweme
(
self
,
data
):
video
=
data
[
'video'
]
description
=
str_or_none
(
try_get
(
data
,
lambda
x
:
x
[
'desc'
]))
width
=
int_or_none
(
try_get
(
data
,
lambda
x
:
x
[
'
video
'
]
[
'width'
]))
height
=
int_or_none
(
try_get
(
data
,
lambda
x
:
x
[
'
video
'
]
[
'height'
]))
width
=
int_or_none
(
try_get
(
data
,
lambda
x
:
video
[
'width'
]))
height
=
int_or_none
(
try_get
(
data
,
lambda
x
:
video
[
'height'
]))
format_urls
=
set
()
formats
=
[]
for
count
,
(
key
,
label
)
in
enumerate
(((
'play_addr_lowbr'
,
'Low'
),
(
'play_addr'
,
'Normal'
),
(
'download_addr'
,
'Download'
)),
-
2
):
for
format
in
try_get
(
data
,
lambda
x
:
x
[
'video'
][
key
][
'url_list'
]):
for
format_id
in
(
'play_addr_lowbr'
,
'play_addr'
,
'play_addr_h264'
,
'download_addr'
):
for
format
in
try_get
(
video
,
lambda
x
:
x
[
format_id
][
'url_list'
],
list
)
or
[]:
format_url
=
url_or_none
(
format
)
if
not
format_url
:
continue
if
format_url
in
format_urls
:
continue
format_urls
.
add
(
format_url
)
formats
.
append
({
'url'
:
format_url
,
'ext'
:
'mp4'
,
'height'
:
height
,
'width'
:
width
,
'format_note'
:
label
,
'quality'
:
count
})
self
.
_sort_formats
(
formats
)
thumbnail
=
url_or_none
(
try_get
(
video
,
lambda
x
:
x
[
'cover'
][
'url_list'
][
0
],
compat_str
))
uploader
=
try_get
(
data
,
lambda
x
:
x
[
'author'
][
'nickname'
],
compat_str
)
timestamp
=
int_or_none
(
data
.
get
(
'create_time'
))
comment_count
=
int_or_none
(
data
.
get
(
'comment_count'
))
or
int_or_none
(
try_get
(
data
,
lambda
x
:
x
[
'statistics'
][
'comment_count'
]))
repost_count
=
int_or_none
(
try_get
(
data
,
lambda
x
:
x
[
'statistics'
][
'share_count'
]))
thumbnail
=
url_or_none
(
try_get
(
data
,
lambda
x
:
x
[
'video'
][
'cover'
][
'url_list'
][
0
],
compat_str
))
aweme_id
=
data
[
'aweme_id'
]
return
{
'id'
:
video
_id
,
'title'
:
title
,
'id'
:
aweme
_id
,
'title'
:
uploader
or
aweme_id
,
'description'
:
description
,
'thumbnail'
:
thumbnail
,
'uploader'
:
uploader
,
'timestamp'
:
timestamp
,
'comment_count'
:
comment_count
,
'repost_count'
:
repost_count
,
'formats'
:
formats
,
'thumbnail'
:
thumbnail
,
'width'
:
width
,
'height'
:
height
,
}
class
TikTokIE
(
TikTokBaseIE
):
_VALID_URL
=
r
'https?://(?:m\.)?tiktok\.com/v/(?P<id>\d+)'
_TEST
=
{
'url'
:
'https://m.tiktok.com/v/6606727368545406213.html'
,
'md5'
:
'd584b572e92fcd48888051f238022420'
,
'info_dict'
:
{
'id'
:
'6606727368545406213'
,
'ext'
:
'mp4'
,
'title'
:
'Zureeal'
,
'description'
:
'#bowsette#mario#cosplay#uk#lgbt#gaming#asian#bowsettecosplay'
,
'thumbnail'
:
r
're:^https?://.*~noop.image'
,
'uploader'
:
'Zureeal'
,
'timestamp'
:
1538248586
,
'upload_date'
:
'20180929'
,
'comment_count'
:
int
,
'repost_count'
:
int
,
}
}
def
_real_extract
(
self
,
url
):
video_id
=
self
.
_match_id
(
url
)
webpage
=
self
.
_download_webpage
(
url
,
video_id
)
data
=
self
.
_parse_json
(
self
.
_search_regex
(
r
'\bdata\s*=\s*({.+?})\s*;'
,
webpage
,
'data'
),
video_id
)
return
self
.
_extract_aweme
(
data
)
class
TikTokUserIE
(
TikTokBaseIE
):
_VALID_URL
=
r
'https?://(?:m\.)?tiktok\.com/h5/share/usr/(?P<id>\d+)'
_TEST
=
{
'url'
:
'https://m.tiktok.com/h5/share/usr/188294915489964032.html'
,
'info_dict'
:
{
'id'
:
'188294915489964032'
,
},
'playlist_mincount'
:
24
,
}
def
_real_extract
(
self
,
url
):
user_id
=
self
.
_match_id
(
url
)
data
=
self
.
_download_json
(
'https://m.tiktok.com/h5/share/usr/list/%s/'
%
user_id
,
user_id
,
query
=
{
'_signature'
:
'_'
})
entries
=
[]
for
aweme
in
data
[
'aweme_list'
]:
try
:
entry
=
self
.
_extract_aweme
(
aweme
)
except
ExtractorError
:
continue
entry
[
'extractor_key'
]
=
TikTokIE
.
ie_key
()
entries
.
append
(
entry
)
return
self
.
playlist_result
(
entries
,
user_id
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment