From 2bdc73b21cedea4c8415376894a2a843543429f4 Mon Sep 17 00:00:00 2001 From: Crypto90 Date: Tue, 30 Jul 2024 12:31:31 +0200 Subject: [PATCH] Update youtube.py --- yt_dlp/extractor/youtube.py | 715 +++++++++++++++++++++--------------- 1 file changed, 410 insertions(+), 305 deletions(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 0e135cc3a7ed..7364e8a2eb08 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -4,6 +4,7 @@ import copy import datetime as dt import enum +import functools import hashlib import itertools import json @@ -20,7 +21,6 @@ from .common import InfoExtractor, SearchInfoExtractor from .openload import PhantomJSwrapper -from ..compat import functools from ..jsinterp import JSInterpreter from ..networking.exceptions import HTTPError, network_exceptions from ..utils import ( @@ -77,9 +77,9 @@ 'client': { 'clientName': 'WEB', 'clientVersion': '2.20220801.00.00', - } + }, }, - 'INNERTUBE_CONTEXT_CLIENT_NAME': 1 + 'INNERTUBE_CONTEXT_CLIENT_NAME': 1, }, 'web_embedded': { 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8', @@ -89,7 +89,7 @@ 'clientVersion': '1.20220731.00.00', }, }, - 'INNERTUBE_CONTEXT_CLIENT_NAME': 56 + 'INNERTUBE_CONTEXT_CLIENT_NAME': 56, }, 'web_music': { 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30', @@ -98,7 +98,7 @@ 'client': { 'clientName': 'WEB_REMIX', 'clientVersion': '1.20220727.01.00', - } + }, }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 67, }, @@ -108,7 +108,7 @@ 'client': { 'clientName': 'WEB_CREATOR', 'clientVersion': '1.20220726.00.00', - } + }, }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 62, }, @@ -119,11 +119,11 @@ 'clientName': 'ANDROID', 'clientVersion': '19.09.37', 'androidSdkVersion': 30, - 'userAgent': 'com.google.android.youtube/19.09.37 (Linux; U; Android 11) gzip' - } + 'userAgent': 'com.google.android.youtube/19.09.37 (Linux; U; Android 11) gzip', + }, }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 3, - 'REQUIRE_JS_PLAYER': False + 'REQUIRE_JS_PLAYER': False, }, 'android_embedded': { 'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw', @@ -132,11 +132,11 @@ 'clientName': 'ANDROID_EMBEDDED_PLAYER', 'clientVersion': '19.09.37', 'androidSdkVersion': 30, - 'userAgent': 'com.google.android.youtube/19.09.37 (Linux; U; Android 11) gzip' + 'userAgent': 'com.google.android.youtube/19.09.37 (Linux; U; Android 11) gzip', }, }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 55, - 'REQUIRE_JS_PLAYER': False + 'REQUIRE_JS_PLAYER': False, }, 'android_music': { 'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI', @@ -145,11 +145,11 @@ 'clientName': 'ANDROID_MUSIC', 'clientVersion': '6.42.52', 'androidSdkVersion': 30, - 'userAgent': 'com.google.android.apps.youtube.music/6.42.52 (Linux; U; Android 11) gzip' - } + 'userAgent': 'com.google.android.apps.youtube.music/6.42.52 (Linux; U; Android 11) gzip', + }, }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 21, - 'REQUIRE_JS_PLAYER': False + 'REQUIRE_JS_PLAYER': False, }, 'android_creator': { 'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8', @@ -158,11 +158,11 @@ 'clientName': 'ANDROID_CREATOR', 'clientVersion': '22.30.100', 'androidSdkVersion': 30, - 'userAgent': 'com.google.android.apps.youtube.creator/22.30.100 (Linux; U; Android 11) gzip' + 'userAgent': 'com.google.android.apps.youtube.creator/22.30.100 (Linux; U; Android 11) gzip', }, }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 14, - 'REQUIRE_JS_PLAYER': False + 'REQUIRE_JS_PLAYER': False, }, # iOS clients have HLS live streams. Setting device model to get 60fps formats. # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558 @@ -173,11 +173,11 @@ 'clientName': 'IOS', 'clientVersion': '19.09.3', 'deviceModel': 'iPhone14,3', - 'userAgent': 'com.google.ios.youtube/19.09.3 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)' - } + 'userAgent': 'com.google.ios.youtube/19.09.3 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)', + }, }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 5, - 'REQUIRE_JS_PLAYER': False + 'REQUIRE_JS_PLAYER': False, }, 'ios_embedded': { 'INNERTUBE_CONTEXT': { @@ -185,11 +185,11 @@ 'clientName': 'IOS_MESSAGES_EXTENSION', 'clientVersion': '19.09.3', 'deviceModel': 'iPhone14,3', - 'userAgent': 'com.google.ios.youtube/19.09.3 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)' + 'userAgent': 'com.google.ios.youtube/19.09.3 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)', }, }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 66, - 'REQUIRE_JS_PLAYER': False + 'REQUIRE_JS_PLAYER': False, }, 'ios_music': { 'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s', @@ -198,11 +198,11 @@ 'clientName': 'IOS_MUSIC', 'clientVersion': '6.33.3', 'deviceModel': 'iPhone14,3', - 'userAgent': 'com.google.ios.youtubemusic/6.33.3 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)' + 'userAgent': 'com.google.ios.youtubemusic/6.33.3 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)', }, }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 26, - 'REQUIRE_JS_PLAYER': False + 'REQUIRE_JS_PLAYER': False, }, 'ios_creator': { 'INNERTUBE_CONTEXT': { @@ -210,11 +210,11 @@ 'clientName': 'IOS_CREATOR', 'clientVersion': '22.33.101', 'deviceModel': 'iPhone14,3', - 'userAgent': 'com.google.ios.ytcreator/22.33.101 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)' + 'userAgent': 'com.google.ios.ytcreator/22.33.101 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)', }, }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 15, - 'REQUIRE_JS_PLAYER': False + 'REQUIRE_JS_PLAYER': False, }, # mweb has 'ultralow' formats # See: https://github.com/yt-dlp/yt-dlp/pull/557 @@ -224,9 +224,9 @@ 'client': { 'clientName': 'MWEB', 'clientVersion': '2.20220801.00.00', - } + }, }, - 'INNERTUBE_CONTEXT_CLIENT_NAME': 2 + 'INNERTUBE_CONTEXT_CLIENT_NAME': 2, }, # This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option) # See: https://github.com/zerodytrash/YouTube-Internal-Clients @@ -238,7 +238,17 @@ 'clientVersion': '2.0', }, }, - 'INNERTUBE_CONTEXT_CLIENT_NAME': 85 + 'INNERTUBE_CONTEXT_CLIENT_NAME': 85, + }, + # This client has pre-merged video+audio 720p/1080p streams + 'mediaconnect': { + 'INNERTUBE_CONTEXT': { + 'client': { + 'clientName': 'MEDIA_CONNECT_FRONTEND', + 'clientVersion': '0.1', + }, + }, + 'INNERTUBE_CONTEXT_CLIENT_NAME': 95, }, } @@ -260,7 +270,7 @@ def build_innertube_clients(): THIRD_PARTY = { 'embedUrl': 'https://www.youtube.com/', # Can be any valid URL } - BASE_CLIENTS = ('ios', 'android', 'web', 'tv', 'mweb') + BASE_CLIENTS = ('ios', 'web', 'tv', 'mweb', 'android') priority = qualities(BASE_CLIENTS[::-1]) for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()): @@ -455,10 +465,13 @@ class YoutubeBaseInfoExtractor(InfoExtractor): 'lt', 'hu', 'nl', 'no', 'uz', 'pl', 'pt-PT', 'pt', 'ro', 'sq', 'sk', 'sl', 'sr-Latn', 'fi', 'sv', 'vi', 'tr', 'be', 'bg', 'ky', 'kk', 'mk', 'mn', 'ru', 'sr', 'uk', 'el', 'hy', 'iw', 'ur', 'ar', 'fa', 'ne', 'mr', 'hi', 'as', 'bn', 'pa', 'gu', 'or', 'ta', 'te', 'kn', 'ml', - 'si', 'th', 'lo', 'my', 'ka', 'am', 'km', 'zh-CN', 'zh-TW', 'zh-HK', 'ja', 'ko' + 'si', 'th', 'lo', 'my', 'ka', 'am', 'km', 'zh-CN', 'zh-TW', 'zh-HK', 'ja', 'ko', ] - _IGNORED_WARNINGS = {'Unavailable videos will be hidden during playback'} + _IGNORED_WARNINGS = { + 'Unavailable videos will be hidden during playback', + 'Unavailable videos are hidden', + } _YT_HANDLE_RE = r'@[\w.-]{3,30}' # https://support.google.com/youtube/answer/11585688?hl=en _YT_CHANNEL_UCID_RE = r'UC[\w-]{22}' @@ -688,7 +701,7 @@ def generate_api_headers( 'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg), 'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg), 'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg), - 'User-Agent': self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT']['client']['userAgent'], default_client=default_client) + 'User-Agent': self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT']['client']['userAgent'], default_client=default_client), } if session_index is None: session_index = self._extract_session_index(ytcfg) @@ -705,7 +718,7 @@ def _download_ytcfg(self, client, video_id): url = { 'web': 'https://www.youtube.com', 'web_music': 'https://music.youtube.com', - 'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1' + 'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1', }.get(client) if not url: return {} @@ -716,7 +729,7 @@ def _download_ytcfg(self, client, video_id): @staticmethod def _build_api_continuation_query(continuation, ctp=None): query = { - 'continuation': continuation + 'continuation': continuation, } # TODO: Inconsistency with clickTrackingParams. # Currently we have a fixed ctp contained within context (from ytcfg) @@ -756,7 +769,7 @@ def _extract_continuation(cls, renderer): return traverse_obj(renderer, ( ('contents', 'items', 'rows'), ..., 'continuationItemRenderer', - ('continuationEndpoint', ('button', 'buttonRenderer', 'command')) + ('continuationEndpoint', ('button', 'buttonRenderer', 'command')), ), get_all=False, expected_type=cls._extract_continuation_ep_data) @classmethod @@ -783,7 +796,7 @@ def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False): for alert_type, alert_message in (warnings + errors[:-1]): self.report_warning(f'YouTube said: {alert_type} - {alert_message}', only_once=only_once) if errors: - raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected) + raise ExtractorError(f'YouTube said: {errors[-1][1]}', expected=expected) def _extract_and_report_alerts(self, data, *args, **kwargs): return self._report_alerts(self._extract_alerts(data), *args, **kwargs) @@ -875,14 +888,14 @@ def _get_count(self, data, *path_list): return count @staticmethod - def _extract_thumbnails(data, *path_list): + def _extract_thumbnails(data, *path_list, final_key='thumbnails'): """ Extract thumbnails from thumbnails dict @param path_list: path list to level that contains 'thumbnails' key """ thumbnails = [] for path in path_list or [()]: - for thumbnail in traverse_obj(data, (*variadic(path), 'thumbnails', ...)): + for thumbnail in traverse_obj(data, (*variadic(path), final_key, ...)): thumbnail_url = url_or_none(thumbnail.get('url')) if not thumbnail_url: continue @@ -917,7 +930,7 @@ def extract_relative_time(relative_time_text): if start: return datetime_from_str(start) try: - return datetime_from_str('now-%s%s' % (mobj.group('time'), mobj.group('unit'))) + return datetime_from_str('now-{}{}'.format(mobj.group('time'), mobj.group('unit'))) except ValueError: return None @@ -1051,8 +1064,7 @@ def _extract_video(self, renderer): expected_type=str)) or '' url = f'https://www.youtube.com/watch?v={video_id}' if overlay_style == 'SHORTS' or '/shorts/' in navigation_url: - #url = f'https://www.youtube.com/shorts/{video_id}' - return None + url = f'https://www.youtube.com/shorts/{video_id}' time_text = (self._get_text(renderer, 'publishedTimeText', 'videoInfo') or self._get_text(reel_header_renderer, 'timestampText') or '') @@ -1105,13 +1117,13 @@ def _extract_video(self, renderer): is_unlisted=self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED) or None), view_count_field: view_count, 'live_status': live_status, - 'channel_is_verified': True if self._has_badge(owner_badges, BadgeType.VERIFIED) else None + 'channel_is_verified': True if self._has_badge(owner_badges, BadgeType.VERIFIED) else None, } class YoutubeIE(YoutubeBaseInfoExtractor): IE_DESC = 'YouTube' - _VALID_URL = r"""(?x)^ + _VALID_URL = r'''(?x)^ ( (?:https?://|//) # http(s):// or protocol-independent URL (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com| @@ -1120,7 +1132,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): (?:www\.)?hooktube\.com| (?:www\.)?yourepeat\.com| tube\.majestyc\.net| - %(invidious)s| + {invidious}| youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains (?:.*?\#/)? # handle anchor (#/) redirect urls (?: # the various things that can precede the ID: @@ -1136,16 +1148,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor): youtu\.be| # just youtu.be/xxxx vid\.plus| # or vid.plus/xxxx zwearz\.com/watch| # or zwearz.com/watch/xxxx - %(invidious)s + {invidious} )/ |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId= ) )? # all until now is optional -> you can pass the naked ID - (?P[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID + (?P[0-9A-Za-z_-]{{11}}) # here is it! the YouTube video ID (?(1).+)? # if we found the ID, everything can follow - (?:\#|$)""" % { - 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES), - } + (?:\#|$)'''.format( + invidious='|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES), + ) _EMBED_REGEX = [ r'''(?x) (?: @@ -1172,7 +1184,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): r'/(?P[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$', r'\b(?Pvfl[a-zA-Z0-9_-]+)\b.*?\.js$', ) - _formats = { + _formats = { # NB: Used in YoutubeWebArchiveIE and GoogleDriveIE '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'}, '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'}, '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'}, @@ -1282,6 +1294,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): '401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'}, } _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt') + _POTOKEN_EXPERIMENTS = ('51217476', '51217102') _GEO_BYPASS = False @@ -1316,7 +1329,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader_url': 'https://www.youtube.com/@PhilippHagemeister', 'uploader_id': '@PhilippHagemeister', 'heatmap': 'count:100', - } + 'timestamp': 1349198244, + }, }, { 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ', @@ -1359,6 +1373,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader_url': 'https://www.youtube.com/@PhilippHagemeister', 'uploader_id': '@PhilippHagemeister', 'heatmap': 'count:100', + 'timestamp': 1349198244, }, 'params': { 'skip_download': True, @@ -1372,7 +1387,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'ext': 'm4a', 'upload_date': '20121002', 'description': '', - 'title': 'UHDTV TEST 8K VIDEO.mp4' + 'title': 'UHDTV TEST 8K VIDEO.mp4', }, 'params': { 'youtube_include_dash_manifest': True, @@ -1445,6 +1460,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'comment_count': int, 'channel_is_verified': True, 'heatmap': 'count:100', + 'timestamp': 1401991663, }, }, { @@ -1504,6 +1520,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader': 'Projekt Melody', 'uploader_url': 'https://www.youtube.com/@ProjektMelody', 'uploader_id': '@ProjektMelody', + 'timestamp': 1577508724, }, }, { @@ -1578,7 +1595,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): }, 'expected_warnings': [ 'DASH manifest missing', - ] + ], }, # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431) { @@ -1609,10 +1626,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader_url': 'https://www.youtube.com/@Olympics', 'uploader_id': '@Olympics', 'channel_is_verified': True, + 'timestamp': 1440707674, }, 'params': { 'skip_download': 'requires avconv', - } + }, }, # Non-square pixels { @@ -1642,6 +1660,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader': '孫ᄋᄅ', 'uploader_url': 'https://www.youtube.com/@AllenMeow', 'uploader_id': '@AllenMeow', + 'timestamp': 1299776999, }, }, # url_encoded_fmt_stream_map is empty string @@ -1785,6 +1804,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): }, }], 'params': {'skip_download': True}, + 'skip': 'Not multifeed anymore', }, { # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536) @@ -1834,7 +1854,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'playable_in_embed': True, 'like_count': int, 'age_limit': 0, - 'channel_follower_count': int + 'channel_follower_count': int, }, 'params': { 'skip_download': True, @@ -1893,6 +1913,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader': 'The Berkman Klein Center for Internet & Society', 'uploader_id': '@BKCHarvard', 'uploader_url': 'https://www.youtube.com/@BKCHarvard', + 'timestamp': 1422422076, }, 'params': { 'skip_download': True, @@ -1928,6 +1949,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader_id': '@BernieSanders', 'channel_is_verified': True, 'heatmap': 'count:100', + 'timestamp': 1447987198, }, 'params': { 'skip_download': True, @@ -1991,6 +2013,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader_id': '@Vsauce', 'comment_count': int, 'channel_is_verified': True, + 'timestamp': 1484761047, }, 'params': { 'skip_download': True, @@ -2092,7 +2115,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'channel_id': 'UC-pWHpBjdGG69N9mM2auIAA', 'tags': 'count:11', 'live_status': 'not_live', - 'channel_follower_count': int + 'channel_follower_count': int, }, 'params': { 'skip_download': True, @@ -2146,6 +2169,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader': 'l\'Or Vert asbl', 'uploader_url': 'https://www.youtube.com/@ElevageOrVert', 'uploader_id': '@ElevageOrVert', + 'timestamp': 1497343210, }, 'params': { 'skip_download': True, @@ -2184,6 +2208,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader_id': '@Csharp-video-tutorialsBlogspot', 'channel_is_verified': True, 'heatmap': 'count:100', + 'timestamp': 1377976349, }, 'params': { 'skip_download': True, @@ -2266,7 +2291,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader_id': '@CBSMornings', 'comment_count': int, 'channel_is_verified': True, - } + 'timestamp': 1405513526, + }, }, { # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685 @@ -2283,7 +2309,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'view_count': int, 'channel': 'Walk around Japan', 'tags': ['Ueno Tokyo', 'Okachimachi Tokyo', 'Ameyoko Street', 'Tokyo attraction', 'Travel in Tokyo'], - 'thumbnail': 'https://i.ytimg.com/vi_webp/cBvYw8_A0vQ/hqdefault.webp', + 'thumbnail': 'https://i.ytimg.com/vi/cBvYw8_A0vQ/hqdefault.jpg', 'age_limit': 0, 'availability': 'public', 'channel_url': 'https://www.youtube.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw', @@ -2293,6 +2319,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader': 'Walk around Japan', 'uploader_url': 'https://www.youtube.com/@walkaroundjapan7124', 'uploader_id': '@walkaroundjapan7124', + 'timestamp': 1605884416, }, 'params': { 'skip_download': True, @@ -2300,11 +2327,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor): }, { # Has multiple audio streams 'url': 'WaOKSUlf4TM', - 'only_matching': True + 'only_matching': True, }, { # Requires Premium: has format 141 when requested using YTM url 'url': 'https://music.youtube.com/watch?v=XclachpHxis', - 'only_matching': True + 'only_matching': True, }, { # multiple subtitles with same lang_code 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug', @@ -2344,6 +2371,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'format': '17', # 3gp format available on android 'extractor_args': {'youtube': {'player_client': ['android']}}, }, + 'skip': 'android client broken', }, { # Skip download of additional client configs (remix client config in this case) @@ -2387,7 +2415,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'comment_count': int, 'channel_is_verified': True, 'heatmap': 'count:100', - }, 'params': {'format': 'mhtml', 'skip_download': True} + 'timestamp': 1395685455, + }, 'params': {'format': 'mhtml', 'skip_download': True}, }, { # Ensure video upload_date is in UTC timezone (video was uploaded 1641170939) 'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4', @@ -2416,37 +2445,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader_url': 'https://www.youtube.com/@LeonNguyen', 'uploader_id': '@LeonNguyen', 'heatmap': 'count:100', - } - }, { - # Same video as above, but with --compat-opt no-youtube-prefer-utc-upload-date - 'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4', - 'info_dict': { - 'id': '2NUZ8W2llS4', - 'ext': 'mp4', - 'title': 'The NP that test your phone performance 🙂', - 'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d', - 'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA', - 'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA', - 'duration': 21, - 'view_count': int, - 'age_limit': 0, - 'categories': ['Gaming'], - 'tags': 'count:23', - 'playable_in_embed': True, - 'live_status': 'not_live', - 'upload_date': '20220102', - 'like_count': int, - 'availability': 'public', - 'channel': 'Leon Nguyen', - 'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp', - 'comment_count': int, - 'channel_follower_count': int, - 'uploader': 'Leon Nguyen', - 'uploader_url': 'https://www.youtube.com/@LeonNguyen', - 'uploader_id': '@LeonNguyen', - 'heatmap': 'count:100', + 'timestamp': 1641170939, }, - 'params': {'compat_opts': ['no-youtube-prefer-utc-upload-date']} }, { # date text is premiered video, ensure upload date in UTC (published 1641172509) 'url': 'https://www.youtube.com/watch?v=mzZzzBU6lrM', @@ -2478,38 +2478,41 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'comment_count': int, 'channel_is_verified': True, 'heatmap': 'count:100', - } + 'timestamp': 1641172509, + }, }, - { # continuous livestream. Microformat upload date should be preferred. - # Upload date was 2021-06-19 (not UTC), while stream start is 2021-11-27 - 'url': 'https://www.youtube.com/watch?v=kgx4WGK0oNU', + { # continuous livestream. + # Upload date was 2022-07-12T05:12:29-07:00, while stream start is 2022-07-12T15:59:30+00:00 + 'url': 'https://www.youtube.com/watch?v=jfKfPfyJRdk', 'info_dict': { - 'id': 'kgx4WGK0oNU', - 'title': r're:jazz\/lofi hip hop radio🌱chill beats to relax\/study to \[LIVE 24\/7\] \d{4}-\d{2}-\d{2} \d{2}:\d{2}', + 'id': 'jfKfPfyJRdk', 'ext': 'mp4', - 'channel_id': 'UC84whx2xxsiA1gXHXXqKGOA', - 'availability': 'public', + 'channel_id': 'UCSJ4gkVC6NrvII8umztf0Ow', + 'like_count': int, + 'uploader': 'Lofi Girl', + 'categories': ['Music'], + 'concurrent_view_count': int, + 'playable_in_embed': True, + 'timestamp': 1657627949, + 'release_date': '20220712', + 'channel_url': 'https://www.youtube.com/channel/UCSJ4gkVC6NrvII8umztf0Ow', + 'description': 'md5:13a6f76df898f5674f9127139f3df6f7', 'age_limit': 0, - 'release_timestamp': 1637975704, - 'upload_date': '20210619', - 'channel_url': 'https://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA', - 'live_status': 'is_live', - 'thumbnail': 'https://i.ytimg.com/vi/kgx4WGK0oNU/maxresdefault.jpg', - 'channel': 'Abao in Tokyo', + 'thumbnail': 'https://i.ytimg.com/vi/jfKfPfyJRdk/maxresdefault.jpg', + 'release_timestamp': 1657641570, + 'uploader_url': 'https://www.youtube.com/@LofiGirl', 'channel_follower_count': int, - 'release_date': '20211127', - 'tags': 'count:39', - 'categories': ['People & Blogs'], - 'like_count': int, + 'channel_is_verified': True, + 'title': r're:^lofi hip hop radio 📚 - beats to relax/study to', 'view_count': int, - 'playable_in_embed': True, - 'description': 'md5:2ef1d002cad520f65825346e2084e49d', - 'concurrent_view_count': int, - 'uploader': 'Abao in Tokyo', - 'uploader_url': 'https://www.youtube.com/@abaointokyo', - 'uploader_id': '@abaointokyo', + 'live_status': 'is_live', + 'tags': 'count:32', + 'channel': 'Lofi Girl', + 'availability': 'public', + 'upload_date': '20220712', + 'uploader_id': '@LofiGirl', }, - 'params': {'skip_download': True} + 'params': {'skip_download': True}, }, { 'url': 'https://www.youtube.com/watch?v=tjjjtzRLHvA', 'info_dict': { @@ -2535,7 +2538,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader_id': '@lesmiscore', 'uploader': 'Lesmiscore', 'uploader_url': 'https://www.youtube.com/@lesmiscore', - } + 'timestamp': 1648005313, + }, }, { # Prefer primary title+description language metadata by default # Do not prefer translated description if primary is empty @@ -2562,8 +2566,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader_url': 'https://www.youtube.com/@coletdjnz', 'uploader_id': '@coletdjnz', 'uploader': 'cole-dlp-test-acc', + 'timestamp': 1662677394, }, - 'params': {'skip_download': True} + 'params': {'skip_download': True}, }, { # Extractor argument: prefer translated title+description 'url': 'https://www.youtube.com/watch?v=gHKT4uU8Zng', @@ -2575,7 +2580,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'duration': 5, 'live_status': 'not_live', 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA', - 'upload_date': '20220728', + 'upload_date': '20220729', 'view_count': int, 'categories': ['People & Blogs'], 'thumbnail': r're:^https?://.*\.jpg', @@ -2588,6 +2593,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader_url': 'https://www.youtube.com/@coletdjnz', 'uploader_id': '@coletdjnz', 'uploader': 'cole-dlp-test-acc', + 'timestamp': 1659073275, + 'like_count': int, }, 'params': {'skip_download': True, 'extractor_args': {'youtube': {'lang': ['fr']}}}, 'expected_warnings': [r'Preferring "fr" translated fields'], @@ -2653,6 +2660,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader': 'Projekt Melody', 'uploader_id': '@ProjektMelody', 'uploader_url': 'https://www.youtube.com/@ProjektMelody', + 'timestamp': 1577508724, }, 'params': {'extractor_args': {'youtube': {'player_client': ['tv_embedded']}}, 'format': '251-drc'}, }, @@ -2687,6 +2695,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader_id': '@sana_natori', 'channel_is_verified': True, 'heatmap': 'count:100', + 'timestamp': 1671798112, }, }, { @@ -2721,7 +2730,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'heatmap': 'count:100', }, 'params': { - 'extractor_args': {'youtube': {'player_client': ['android'], 'player_skip': ['webpage']}}, + 'extractor_args': {'youtube': {'player_client': ['ios'], 'player_skip': ['webpage']}}, }, }, ] @@ -2756,10 +2765,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader_url': 'https://www.youtube.com/@ChristopherSykesDocumentaries', 'uploader_id': '@ChristopherSykesDocumentaries', 'heatmap': 'count:100', + 'timestamp': 1211825920, }, 'params': { 'skip_download': True, - } + }, }, ] @@ -2916,7 +2926,7 @@ def _extract_sequence_from_mpd(refresh_sequence, immediate): if not should_continue: known_idx = idx - 1 raise ExtractorError('breaking out of outer loop') - last_segment_url = urljoin(fragment_base_url, 'sq/%d' % idx) + last_segment_url = urljoin(fragment_base_url, f'sq/{idx}') yield { 'url': last_segment_url, 'fragment_count': last_seq, @@ -2965,7 +2975,7 @@ def _extract_player_info(cls, player_url): if id_m: break else: - raise ExtractorError('Cannot identify player %r' % player_url) + raise ExtractorError(f'Cannot identify player {player_url!r}') return id_m.group('id') def _load_player(self, video_id, player_url, fatal=True): @@ -2974,7 +2984,7 @@ def _load_player(self, video_id, player_url, fatal=True): code = self._download_webpage( player_url, video_id, fatal=fatal, note='Downloading player ' + player_id, - errnote='Download of %s failed' % player_url) + errnote=f'Download of {player_url} failed') if code: self._code_cache[player_id] = code return self._code_cache.get(player_id) @@ -3035,10 +3045,9 @@ def _genslice(start, end, step): cache_res = func(test_string) cache_spec = [ord(c) for c in cache_res] expr_code = ' + '.join(gen_sig_code(cache_spec)) - signature_id_tuple = '(%s)' % ( - ', '.join(str(len(p)) for p in example_sig.split('.'))) - code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n' - ' return %s\n') % (signature_id_tuple, expr_code) + signature_id_tuple = '({})'.format(', '.join(str(len(p)) for p in example_sig.split('.'))) + code = (f'if tuple(len(p) for p in s.split(\'.\')) == {signature_id_tuple}:\n' + f' return {expr_code}\n') self.to_screen('Extracted signature function:\n' + code) def _parse_sig_js(self, jscode): @@ -3122,7 +3131,15 @@ def _decrypt_nsig(self, s, video_id, player_url): def _extract_n_function_name(self, jscode): funcname, idx = self._search_regex( - r'\.get\("n"\)\)&&\(b=(?P[a-zA-Z0-9$]+)(?:\[(?P\d+)\])?\([a-zA-Z0-9]\)', + r'''(?x) + (?: + \.get\("n"\)\)&&\(b=| + (?: + b=String\.fromCharCode\(110\)| + ([a-zA-Z0-9$.]+)&&\(b="nn"\[\+\1\] + ),c=a\.get\(b\)\)&&\(c= + ) + (?P[a-zA-Z0-9$]+)(?:\[(?P\d+)\])?\([a-zA-Z0-9]\)''', jscode, 'Initial JS player n function name', group=('nfunc', 'idx')) if not idx: return funcname @@ -3133,7 +3150,7 @@ def _extract_n_function_name(self, jscode): def _extract_n_function_code(self, video_id, player_url): player_id = self._extract_player_info(player_url) - func_code = self.cache.load('youtube-nsig', player_id, min_ver='2022.09.1') + func_code = self.cache.load('youtube-nsig', player_id, min_ver='2024.07.09') jscode = func_code or self._load_player(video_id, player_url) jsi = JSInterpreter(jscode) @@ -3142,17 +3159,7 @@ def _extract_n_function_code(self, video_id, player_url): func_name = self._extract_n_function_name(jscode) - # For redundancy - func_code = self._search_regex( - r'''(?xs)%s\s*=\s*function\s*\((?P[\w$]+)\)\s* - # NB: The end of the regex is intentionally kept strict - {(?P.+?}\s*return\ [\w$]+.join\(""\))};''' % func_name, - jscode, 'nsig function', group=('var', 'code'), default=None) - if func_code: - func_code = ([func_code[0]], func_code[1]) - else: - self.write_debug('Extracting nsig function with jsinterp') - func_code = jsi.extract_function_code(func_name) + func_code = jsi.extract_function_code(func_name) self.cache.store('youtube-nsig', player_id, func_code) return jsi, player_id, func_code @@ -3212,7 +3219,7 @@ def _mark_watched(self, video_id, player_responses): # cpn generation algorithm is reverse engineered from base.js. # In fact it works even with dummy cpn. CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_' - cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)) + cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(16)) # # more consistent results setting it to right before the end video_length = [str(float((qs.get('len') or ['1.5'])[0]) - 1)] @@ -3249,7 +3256,7 @@ def _extract_from_webpage(cls, url, webpage): webpage) if mobj: yield cls.url_result(mobj.group('url'), cls) - raise cls.StopExtraction() + raise cls.StopExtraction yield from super()._extract_from_webpage(url, webpage) @@ -3274,7 +3281,7 @@ def _extract_chapters_from_json(self, data, duration): chapter_list = traverse_obj( data, ( 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer', - 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters' + 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters', ), expected_type=list) return self._extract_chapters_helper( @@ -3308,7 +3315,36 @@ def _extract_heatmap(self, data): 'value': ('intensityScoreNormalized', {float_or_none}), })) or None - def _extract_comment(self, comment_renderer, parent=None): + def _extract_comment(self, entities, parent=None): + comment_entity_payload = get_first(entities, ('payload', 'commentEntityPayload', {dict})) + if not (comment_id := traverse_obj(comment_entity_payload, ('properties', 'commentId', {str}))): + return + + toolbar_entity_payload = get_first(entities, ('payload', 'engagementToolbarStateEntityPayload', {dict})) + time_text = traverse_obj(comment_entity_payload, ('properties', 'publishedTime', {str})) or '' + + return { + 'id': comment_id, + 'parent': parent or 'root', + **traverse_obj(comment_entity_payload, { + 'text': ('properties', 'content', 'content', {str}), + 'like_count': ('toolbar', 'likeCountA11y', {parse_count}), + 'author_id': ('author', 'channelId', {self.ucid_or_none}), + 'author': ('author', 'displayName', {str}), + 'author_thumbnail': ('author', 'avatarThumbnailUrl', {url_or_none}), + 'author_is_uploader': ('author', 'isCreator', {bool}), + 'author_is_verified': ('author', 'isVerified', {bool}), + 'author_url': ('author', 'channelCommand', 'innertubeCommand', ( + ('browseEndpoint', 'canonicalBaseUrl'), ('commandMetadata', 'webCommandMetadata', 'url'), + ), {lambda x: urljoin('https://www.youtube.com', x)}), + }, get_all=False), + 'is_favorited': (None if toolbar_entity_payload is None else + toolbar_entity_payload.get('heartState') == 'TOOLBAR_HEART_STATE_HEARTED'), + '_time_text': time_text, # FIXME: non-standard, but we need a way of showing that it is an estimate. + 'timestamp': self._parse_time_text(time_text), + } + + def _extract_comment_old(self, comment_renderer, parent=None): comment_id = comment_renderer.get('commentId') if not comment_id: return @@ -3385,25 +3421,43 @@ def extract_header(contents): sort_text = str_or_none(sort_menu_item.get('title')) if not sort_text: sort_text = 'top comments' if comment_sort_index == 0 else 'newest first' - self.to_screen('Sorting comments by %s' % sort_text.lower()) + self.to_screen(f'Sorting comments by {sort_text.lower()}') break return _continuation - def extract_thread(contents): + def extract_thread(contents, entity_payloads): if not parent: tracker['current_page_thread'] = 0 for content in contents: if not parent and tracker['total_parent_comments'] >= max_parents: yield comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer']) - comment_renderer = get_first( - (comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]], - expected_type=dict, default={}) - comment = self._extract_comment(comment_renderer, parent) + # old comment format + if not entity_payloads: + comment_renderer = get_first( + (comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]], + expected_type=dict, default={}) + + comment = self._extract_comment_old(comment_renderer, parent) + + # new comment format + else: + view_model = ( + traverse_obj(comment_thread_renderer, ('commentViewModel', 'commentViewModel', {dict})) + or traverse_obj(content, ('commentViewModel', {dict}))) + comment_keys = traverse_obj(view_model, (('commentKey', 'toolbarStateKey'), {str})) + if not comment_keys: + continue + entities = traverse_obj(entity_payloads, lambda _, v: v['entityKey'] in comment_keys) + comment = self._extract_comment(entities, parent) + if comment: + comment['is_pinned'] = traverse_obj(view_model, ('pinnedText', {str})) is not None + if not comment: continue comment_id = comment['id'] + if comment.get('is_pinned'): tracker['pinned_comment_ids'].add(comment_id) # Sometimes YouTube may break and give us infinite looping comments. @@ -3438,15 +3492,15 @@ def extract_thread(contents): # Keeps track of counts across recursive calls if not tracker: - tracker = dict( - running_total=0, - est_total=None, - current_page_thread=0, - total_parent_comments=0, - total_reply_comments=0, - seen_comment_ids=set(), - pinned_comment_ids=set() - ) + tracker = { + 'running_total': 0, + 'est_total': None, + 'current_page_thread': 0, + 'total_parent_comments': 0, + 'total_reply_comments': 0, + 'seen_comment_ids': set(), + 'pinned_comment_ids': set(), + } # TODO: Deprecated # YouTube comments have a max depth of 2 @@ -3457,8 +3511,8 @@ def extract_thread(contents): if max_depth == 1 and parent: return - max_comments, max_parents, max_replies, max_replies_per_thread, *_ = map( - lambda p: int_or_none(p, default=sys.maxsize), self._configuration_arg('max_comments', ) + [''] * 4) + max_comments, max_parents, max_replies, max_replies_per_thread, *_ = ( + int_or_none(p, default=sys.maxsize) for p in self._configuration_arg('max_comments') + [''] * 4) continuation = self._extract_continuation(root_continuation_data) @@ -3487,7 +3541,7 @@ def extract_thread(contents): note_prefix = ' Downloading comment API JSON reply thread %d %s' % ( tracker['current_page_thread'], comment_prog_str) else: - note_prefix = '%sDownloading comment%s API JSON page %d %s' % ( + note_prefix = '{}Downloading comment{} API JSON page {} {}'.format( ' ' if parent else '', ' replies' if parent else '', page_num, comment_prog_str) @@ -3496,7 +3550,7 @@ def extract_thread(contents): check_get_keys = None if not is_forced_continuation and not (tracker['est_total'] == 0 and tracker['running_total'] == 0): check_get_keys = [[*continuation_items_path, ..., ( - 'commentsHeaderRenderer' if is_first_continuation else ('commentThreadRenderer', 'commentRenderer'))]] + 'commentsHeaderRenderer' if is_first_continuation else ('commentThreadRenderer', 'commentViewModel', 'commentRenderer'))]] try: response = self._extract_response( item_id=None, query=continuation, @@ -3520,6 +3574,7 @@ def extract_thread(contents): raise is_forced_continuation = False continuation = None + mutations = traverse_obj(response, ('frameworkUpdates', 'entityBatchUpdate', 'mutations', ..., {dict})) for continuation_items in traverse_obj(response, continuation_items_path, expected_type=list, default=[]): if is_first_continuation: continuation = extract_header(continuation_items) @@ -3528,7 +3583,7 @@ def extract_thread(contents): break continue - for entry in extract_thread(continuation_items): + for entry in extract_thread(continuation_items, mutations): if not entry: return yield entry @@ -3573,9 +3628,9 @@ def _generate_player_context(cls, sts=None): context['signatureTimestamp'] = sts return { 'playbackContext': { - 'contentPlaybackContext': context + 'contentPlaybackContext': context, }, - **cls._get_checkok_params() + **cls._get_checkok_params(), } @staticmethod @@ -3605,8 +3660,6 @@ def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, yt_query = { 'videoId': video_id, } - if _split_innertube_client(client)[0] in ('android', 'android_embedscreen'): - yt_query['params'] = 'CgIIAQ==' pp_arg = self._configuration_arg('player_params', [None], casesense=True)[0] if pp_arg: @@ -3617,24 +3670,29 @@ def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, item_id=video_id, ep='player', query=yt_query, ytcfg=player_ytcfg, headers=headers, fatal=True, default_client=client, - note='Downloading %s player API JSON' % client.replace('_', ' ').strip() + note='Downloading {} player API JSON'.format(client.replace('_', ' ').strip()), ) or None def _get_requested_clients(self, url, smuggled_data): requested_clients = [] - default = ['ios', 'android', 'web'] + android_clients = [] + default = ['ios', 'web'] allowed_clients = sorted( - (client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'), + (client for client in INNERTUBE_CLIENTS if client[:1] != '_'), key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True) for client in self._configuration_arg('player_client'): - if client in allowed_clients: - requested_clients.append(client) - elif client == 'default': + if client == 'default': requested_clients.extend(default) elif client == 'all': requested_clients.extend(allowed_clients) - else: + elif client not in allowed_clients: self.report_warning(f'Skipping unsupported client {client}') + elif client.startswith('android'): + android_clients.append(client) + else: + requested_clients.append(client) + # Force deprioritization of broken Android clients for format de-duplication + requested_clients.extend(android_clients) if not requested_clients: requested_clients = default @@ -3651,8 +3709,15 @@ def _invalid_player_response(self, pr, video_id): return pr_id def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, smuggled_data): - initial_pr = None + initial_pr = ignore_initial_response = None if webpage: + if 'web' in clients: + experiments = traverse_obj(master_ytcfg, ( + 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'serializedExperimentIds', {lambda x: x.split(',')}, ...)) + if all(x in experiments for x in self._POTOKEN_EXPERIMENTS): + self.report_warning( + 'Webpage contains broken formats (poToken experiment detected). Ignoring initial player response') + ignore_initial_response = True initial_pr = self._search_json( self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response', video_id, fatal=False) @@ -3682,8 +3747,10 @@ def append_client(*client_names): skipped_clients = {} while clients: client, base_client, variant = _split_innertube_client(clients.pop()) - player_ytcfg = master_ytcfg if client == 'web' else {} - if 'configs' not in self._configuration_arg('player_skip') and client != 'web': + player_ytcfg = {} + if client == 'web': + player_ytcfg = self._get_default_ytcfg() if ignore_initial_response else master_ytcfg + elif 'configs' not in self._configuration_arg('player_skip'): player_ytcfg = self._download_ytcfg(client, video_id) or player_ytcfg player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage) @@ -3696,11 +3763,22 @@ def append_client(*client_names): player_url = self._download_player_url(video_id) tried_iframe_fallback = True - try: - pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response( - client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr, smuggled_data) - except ExtractorError as e: - self.report_warning(e) + pr = initial_pr if client == 'web' and not ignore_initial_response else None + for retry in self.RetryManager(fatal=False): + try: + pr = pr or self._extract_player_response( + client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, + player_url if require_js_player else None, initial_pr, smuggled_data) + except ExtractorError as e: + self.report_warning(e) + break + experiments = traverse_obj(pr, ( + 'responseContext', 'serviceTrackingParams', lambda _, v: v['service'] == 'GFEEDBACK', + 'params', lambda _, v: v['key'] == 'e', 'value', {lambda x: x.split(',')}, ...)) + if all(x in experiments for x in self._POTOKEN_EXPERIMENTS): + pr = None + retry.error = ExtractorError('API returned broken formats (poToken experiment detected)', expected=True) + if not pr: continue if pr_id := self._invalid_player_response(pr, video_id): @@ -3741,6 +3819,8 @@ def _needs_live_processing(self, live_status, duration): def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, live_status, duration): CHUNK_SIZE = 10 << 20 + PREFERRED_LANG_VALUE = 10 + original_language = None itags, stream_ids = collections.defaultdict(set), [] itag_qualities, res_qualities = {}, {0: None} q = qualities([ @@ -3748,7 +3828,7 @@ def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, l # audio-only formats with unknown quality may get tagged as tiny 'tiny', 'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats - 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres' + 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres', ]) streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...)) format_types = self._configuration_arg('formats') @@ -3761,8 +3841,8 @@ def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, l def build_fragments(f): return LazyList({ 'url': update_url_query(f['url'], { - 'range': f'{range_start}-{min(range_start + CHUNK_SIZE - 1, f["filesize"])}' - }) + 'range': f'{range_start}-{min(range_start + CHUNK_SIZE - 1, f["filesize"])}', + }), } for range_start in range(0, f['filesize'], CHUNK_SIZE)) for fmt in streaming_formats: @@ -3789,6 +3869,13 @@ def build_fragments(f): itag_qualities[itag] = quality if height: res_qualities[height] = quality + + is_default = audio_track.get('audioIsDefault') + is_descriptive = 'descriptive' in (audio_track.get('displayName') or '').lower() + language_code = audio_track.get('id', '').split('.')[0] + if language_code and is_default: + original_language = language_code + # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment # (adding `&sq=0` to the URL) and parsing emsg box to determine the # number of fragment that would subsequently requested with (`&sq=N`) @@ -3803,9 +3890,9 @@ def build_fragments(f): if not all((sc, fmt_url, player_url, encrypted_sig)): continue try: - fmt_url += '&%s=%s' % ( + fmt_url += '&{}={}'.format( traverse_obj(sc, ('sp', -1)) or 'signature', - self._decrypt_signature(encrypted_sig, video_id, player_url) + self._decrypt_signature(encrypted_sig, video_id, player_url), ) except ExtractorError as e: self.report_warning('Signature extraction failed: Some formats may be missing', @@ -3814,12 +3901,11 @@ def build_fragments(f): continue query = parse_qs(fmt_url) - throttled = False if query.get('n'): try: decrypt_nsig = self._cached(self._decrypt_nsig, 'nsig', query['n'][0]) fmt_url = update_url_query(fmt_url, { - 'n': decrypt_nsig(query['n'][0], video_id, player_url) + 'n': decrypt_nsig(query['n'][0], video_id, player_url), }) except ExtractorError as e: phantomjs_hint = '' @@ -3828,20 +3914,16 @@ def build_fragments(f): f'to workaround the issue. {PhantomJSwrapper.INSTALL_HINT}\n') if player_url: self.report_warning( - f'nsig extraction failed: You may experience throttling for some formats\n{phantomjs_hint}' + f'nsig extraction failed: Some formats may be missing\n{phantomjs_hint}' f' n = {query["n"][0]} ; player = {player_url}', video_id=video_id, only_once=True) self.write_debug(e, only_once=True) else: self.report_warning( - 'Cannot decrypt nsig without player_url: You may experience throttling for some formats', + 'Cannot decrypt nsig without player_url: Some formats may be missing', video_id=video_id, only_once=True) - throttled = True + continue tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) - language_preference = ( - 10 if audio_track.get('audioIsDefault') and 10 - else -10 if 'descriptive' in (audio_track.get('displayName') or '').lower() and -10 - else -1) format_duration = traverse_obj(fmt, ('approxDurationMs', {lambda x: float_or_none(x, 1000)})) # Some formats may have much smaller duration than others (possibly damaged during encoding) # E.g. 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823 @@ -3853,6 +3935,14 @@ def build_fragments(f): f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True) client_name = fmt.get(STREAMING_DATA_CLIENT_NAME) + # Android client formats are broken due to integrity check enforcement + # Ref: https://github.com/yt-dlp/yt-dlp/issues/9554 + is_broken = client_name and client_name.startswith(short_client_name('android')) + if is_broken: + self.report_warning( + f'{video_id}: Android client formats are broken and may yield HTTP Error 403. ' + 'They will be deprioritized', only_once=True) + name = fmt.get('qualityLabel') or quality.replace('audio_quality_', '') or '' fps = int_or_none(fmt.get('fps')) or 0 dct = { @@ -3860,17 +3950,15 @@ def build_fragments(f): 'filesize': int_or_none(fmt.get('contentLength')), 'format_id': f'{itag}{"-drc" if fmt.get("isDrc") else ""}', 'format_note': join_nonempty( - join_nonempty(audio_track.get('displayName'), - language_preference > 0 and ' (default)', delim=''), + join_nonempty(audio_track.get('displayName'), is_default and ' (default)', delim=''), name, fmt.get('isDrc') and 'DRC', try_get(fmt, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()), try_get(fmt, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()), - throttled and 'THROTTLED', is_damaged and 'DAMAGED', + is_damaged and 'DAMAGED', is_broken and 'BROKEN', (self.get_param('verbose') or all_formats) and client_name, delim=', '), # Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372 - 'source_preference': ((-10 if throttled else -5 if itag == '22' else -1) - + (100 if 'Premium' in name else 0)), + 'source_preference': (-5 if itag == '22' else -1) + (100 if 'Premium' in name else 0), 'fps': fps if fps > 1 else None, # For some formats, fps is wrongly returned as 1 'audio_channels': fmt.get('audioChannels'), 'height': height, @@ -3880,11 +3968,10 @@ def build_fragments(f): 'filesize_approx': filesize_from_tbr(tbr, format_duration), 'url': fmt_url, 'width': int_or_none(fmt.get('width')), - 'language': join_nonempty(audio_track.get('id', '').split('.')[0], - 'desc' if language_preference < -1 else '') or None, - 'language_preference': language_preference, - # Strictly de-prioritize damaged and 3gp formats - 'preference': -10 if is_damaged else -2 if itag == '17' else None, + 'language': join_nonempty(language_code, 'desc' if is_descriptive else '') or None, + 'language_preference': PREFERRED_LANG_VALUE if is_default else -10 if is_descriptive else -1, + # Strictly de-prioritize broken, damaged and 3gp formats + 'preference': -20 if is_broken else -10 if is_damaged else -2 if itag == '17' else None, } mime_mobj = re.match( r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '') @@ -3943,6 +4030,10 @@ def process_manifest_format(f, proto, client_name, itag): elif itag: f['format_id'] = itag + if original_language and f.get('language') == original_language: + f['format_note'] = join_nonempty(f.get('format_note'), '(default)', delim=' ') + f['language_preference'] = PREFERRED_LANG_VALUE + if f.get('source_preference') is None: f['source_preference'] = -1 @@ -4117,7 +4208,7 @@ def _real_extract(self, url): expected_type=str) if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'): if self.get_param('noplaylist'): - self.to_screen('Downloading just video %s because of --no-playlist' % video_id) + self.to_screen(f'Downloading just video {video_id} because of --no-playlist') else: entries = [] feed_ids = [] @@ -4138,19 +4229,19 @@ def feed_entry(name): feed_title = feed_entry('title') title = video_title if feed_title: - title += ' (%s)' % feed_title + title += f' ({feed_title})' entries.append({ '_type': 'url_transparent', 'ie_key': 'Youtube', 'url': smuggle_url( - '%swatch?v=%s' % (base_url, feed_data['id'][0]), + '{}watch?v={}'.format(base_url, feed_data['id'][0]), {'force_singlefeed': True}), 'title': title, }) feed_ids.append(feed_id) self.to_screen( - 'Downloading multifeed video (%s) - add --no-playlist to just download video %s' - % (', '.join(feed_ids), video_id)) + 'Downloading multifeed video ({}) - add --no-playlist to just download video {}'.format( + ', '.join(feed_ids), video_id)) return self.playlist_result( entries, video_id, video_title, video_description) @@ -4214,7 +4305,7 @@ def feed_entry(name): # While the *1,*2,*3 thumbnails are just below their corresponding "*default" variants # in resolution, these are not the custom thumbnail. So de-prioritize them 'maxresdefault', 'hq720', 'sddefault', 'hqdefault', '0', 'mqdefault', 'default', - 'sd1', 'sd2', 'sd3', 'hq1', 'hq2', 'hq3', 'mq1', 'mq2', 'mq3', '1', '2', '3' + 'sd1', 'sd2', 'sd3', 'hq1', 'hq2', 'hq3', 'mq1', 'mq2', 'mq3', '1', '2', '3', ] n_thumbnail_names = len(thumbnail_names) thumbnails.extend({ @@ -4287,8 +4378,8 @@ def is_bad_format(fmt): 'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'), 'live_status': live_status, 'release_timestamp': live_start_time, - '_format_sort_fields': ( # source_preference is lower for throttled/potentially damaged formats - 'quality', 'res', 'fps', 'hdr:12', 'source', 'vcodec:vp9.2', 'channels', 'acodec', 'lang', 'proto') + '_format_sort_fields': ( # source_preference is lower for potentially damaged formats + 'quality', 'res', 'fps', 'hdr:12', 'source', 'vcodec:vp9.2', 'channels', 'acodec', 'lang', 'proto'), } subtitles = {} @@ -4366,7 +4457,7 @@ def process_language(container, base_url, lang_code, sub_name, query): for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]: d_k += '_time' if d_k not in info and k in s_ks: - info[d_k] = parse_duration(query[k][0]) + info[d_k] = parse_duration(v[0]) # Youtube Music Auto-generated description if (video_description or '').strip().endswith('\nAuto-generated by YouTube.'): @@ -4418,10 +4509,10 @@ def process_language(container, base_url, lang_code, sub_name, query): info['comment_count'] = traverse_obj(initial_data, ( 'contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents', ..., 'itemSectionRenderer', - 'contents', ..., 'commentsEntryPointHeaderRenderer', 'commentCount' + 'contents', ..., 'commentsEntryPointHeaderRenderer', 'commentCount', ), ( 'engagementPanels', lambda _, v: v['engagementPanelSectionListRenderer']['panelIdentifier'] == 'comment-item-section', - 'engagementPanelSectionListRenderer', 'header', 'engagementPanelTitleHeaderRenderer', 'contextualInfo' + 'engagementPanelSectionListRenderer', 'header', 'engagementPanelTitleHeaderRenderer', 'contextualInfo', ), expected_type=self._get_count, get_all=False) try: # This will error if there is no livechat @@ -4553,19 +4644,31 @@ def process_language(container, base_url, lang_code, sub_name, query): 'uploader_id': channel_handle, 'uploader_url': format_field(channel_handle, None, 'https://www.youtube.com/%s', default=None), }) + + # We only want timestamp IF it has time precision AND a timezone + # Currently the uploadDate in microformats appears to be in US/Pacific timezone. + timestamp = ( + parse_iso8601(get_first(microformats, 'uploadDate'), timezone=NO_DEFAULT) + or parse_iso8601(search_meta('uploadDate'), timezone=NO_DEFAULT) + ) + upload_date = ( + dt.datetime.fromtimestamp(timestamp, dt.timezone.utc).strftime('%Y%m%d') if timestamp else + ( + unified_strdate(get_first(microformats, 'uploadDate')) + or unified_strdate(search_meta('uploadDate')) + )) + + # In the case we cannot get the timestamp: # The upload date for scheduled, live and past live streams / premieres in microformats # may be different from the stream date. Although not in UTC, we will prefer it in this case. # See: https://github.com/yt-dlp/yt-dlp/pull/2223#issuecomment-1008485139 - upload_date = ( - unified_strdate(get_first(microformats, 'uploadDate')) - or unified_strdate(search_meta('uploadDate'))) - if not upload_date or ( - live_status in ('not_live', None) - and 'no-youtube-prefer-utc-upload-date' not in self.get_param('compat_opts', []) - ): + if not upload_date or (not timestamp and live_status in ('not_live', None)): + # this should be in UTC, as configured in the cookie/client context upload_date = strftime_or_none( self._parse_time_text(self._get_text(vpir, 'dateText'))) or upload_date + info['upload_date'] = upload_date + info['timestamp'] = timestamp if upload_date and live_status not in ('is_live', 'post_live', 'is_upcoming'): # Newly uploaded videos' HLS formats are potentially problematic and need to be checked @@ -4639,7 +4742,7 @@ def wrapper(self, url): def _extract_basic_item_renderer(item): # Modified from _extract_grid_item_renderer known_basic_renderers = ( - 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer', 'reelItemRenderer' + 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer', 'reelItemRenderer', ) for key, renderer in item.items(): if not isinstance(renderer, dict): @@ -4695,22 +4798,14 @@ def _grid_entries(self, grid_renderer): if not isinstance(renderer, dict): continue title = self._get_text(renderer, 'title') - #print("===========================") - #print(renderer) + # playlist playlist_id = renderer.get('playlistId') if playlist_id: - video_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'videoId')) - if video_id: - playlist_uploader = self._get_text(renderer, 'ownerText', 'shortBylineText') - playlist_count = renderer.get('videoCount') - - yield self.url_result(f'https://www.youtube.com/watch?v={video_id}&list={playlist_id}', - ie=YoutubeTabIE.ie_key(), video_id=playlist_id, video_title=title, playlist_uploader=playlist_uploader, playlist_count=playlist_count) - continue yield self.url_result( - 'https://www.youtube.com/playlist?list=%s' % playlist_id, - ie=YoutubeTabIE.ie_key(), video_id=playlist_id, video_title=title) + f'https://www.youtube.com/playlist?list={playlist_id}', + ie=YoutubeTabIE.ie_key(), video_id=playlist_id, + video_title=title) continue # video video_id = renderer.get('videoId') @@ -4766,7 +4861,7 @@ def _shelf_entries_from_content(self, shelf_renderer): yield from self._grid_entries(renderer) renderer = content.get('horizontalListRenderer') if renderer: - # TODO + # TODO: handle case pass def _shelf_entries(self, shelf_renderer, skip_channels=False): @@ -4843,7 +4938,7 @@ def _post_thread_entries(self, post_thread_renderer): post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], str) if playlist_id: yield self.url_result( - 'https://www.youtube.com/playlist?list=%s' % playlist_id, + f'https://www.youtube.com/playlist?list={playlist_id}', ie=YoutubeTabIE.ie_key(), video_id=playlist_id) # inline video links runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or [] @@ -4996,12 +5091,12 @@ def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data): continuation_items = traverse_obj(response, ( ('onResponseReceivedActions', 'onResponseReceivedEndpoints'), ..., - 'appendContinuationItemsAction', 'continuationItems' + 'appendContinuationItemsAction', 'continuationItems', ), 'continuationContents', get_all=False) continuation_item = traverse_obj(continuation_items, 0, None, expected_type=dict, default={}) video_items_renderer = None - for key in continuation_item.keys(): + for key in continuation_item: if key not in known_renderers: continue func, parent_key = known_renderers[key] @@ -5056,6 +5151,10 @@ def _extract_metadata_from_tabs(self, item_id, data): else: metadata_renderer = traverse_obj(data, ('metadata', 'playlistMetadataRenderer'), expected_type=dict) + # pageHeaderViewModel slow rollout began April 2024 + page_header_view_model = traverse_obj(data, ( + 'header', 'pageHeaderRenderer', 'content', 'pageHeaderViewModel', {dict})) + # We can get the uncropped banner/avatar by replacing the crop params with '=s0' # See: https://github.com/yt-dlp/yt-dlp/issues/2237#issuecomment-1013694714 def _get_uncropped(url): @@ -5068,11 +5167,13 @@ def _get_uncropped(url): avatar_thumbnails.append({ 'url': uncropped_avatar, 'id': 'avatar_uncropped', - 'preference': 1 + 'preference': 1, }) - channel_banners = self._extract_thumbnails( - data, ('header', ..., ('banner', 'mobileBanner', 'tvBanner'))) + channel_banners = ( + self._extract_thumbnails(data, ('header', ..., ('banner', 'mobileBanner', 'tvBanner'))) + or self._extract_thumbnails( + page_header_view_model, ('banner', 'imageBannerViewModel', 'image'), final_key='sources')) for banner in channel_banners: banner['preference'] = -10 @@ -5082,7 +5183,7 @@ def _get_uncropped(url): channel_banners.append({ 'url': uncropped_banner, 'id': 'banner_uncropped', - 'preference': -5 + 'preference': -5, }) # Deprecated - remove primary_sidebar_renderer when layout discontinued @@ -5099,7 +5200,11 @@ def _get_uncropped(url): or self._get_text(data, ('header', 'hashtagHeaderRenderer', 'hashtag')) or info['id']), 'availability': self._extract_availability(data), - 'channel_follower_count': self._get_count(data, ('header', ..., 'subscriberCountText')), + 'channel_follower_count': ( + self._get_count(data, ('header', ..., 'subscriberCountText')) + or traverse_obj(page_header_view_model, ( + 'metadata', 'contentMetadataViewModel', 'metadataRows', ..., 'metadataParts', + lambda _, v: 'subscribers' in v['text']['content'], 'text', 'content', {parse_count}, any))), 'description': try_get(metadata_renderer, lambda x: x.get('description', '')), 'tags': (traverse_obj(data, ('microformat', 'microformatDataRenderer', 'tags', ..., {str})) or traverse_obj(metadata_renderer, ('keywords', {lambda x: x and shlex.split(x)}, ...))), @@ -5152,7 +5257,7 @@ def _get_uncropped(url): info.update({ 'channel': self._search_regex(r'^by (.+) and \d+ others?$', owner_text, 'uploader', default=owner_text), 'channel_id': self.ucid_or_none(browse_ep.get('browseId')), - 'uploader_id': self.handle_from_url(urljoin('https://www.youtube.com', browse_ep.get('canonicalBaseUrl'))) + 'uploader_id': self.handle_from_url(urljoin('https://www.youtube.com', browse_ep.get('canonicalBaseUrl'))), }) info.update({ @@ -5184,12 +5289,12 @@ def _extract_inline_playlist(self, playlist, playlist_id, data, ytcfg): 'playlistId': playlist_id, 'videoId': watch_endpoint.get('videoId') or last_id, 'index': watch_endpoint.get('index') or len(videos), - 'params': watch_endpoint.get('params') or 'OAE%3D' + 'params': watch_endpoint.get('params') or 'OAE%3D', } response = self._extract_response( - item_id='%s page %d' % (playlist_id, page_num), + item_id=f'{playlist_id} page {page_num}', query=query, ep='next', headers=headers, ytcfg=ytcfg, - check_get_keys='contents' + check_get_keys='contents', ) playlist = try_get( response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict) @@ -5280,7 +5385,7 @@ def _reload_with_unavailable_videos(self, item_id, data, ytcfg): visitor_data=self._extract_visitor_data(data, ytcfg)) query = { 'params': 'wgYCCAA=', - 'browseId': f'VL{item_id}' + 'browseId': f'VL{item_id}', } return self._extract_response( item_id=item_id, headers=headers, query=query, @@ -5412,7 +5517,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): (?!consent\.)(?:\w+\.)? (?: youtube(?:kids)?\.com| - %(invidious)s + {invidious} )/ (?: (?Pchannel|c|user|browse)/| @@ -5420,13 +5525,13 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): feed/|hashtag/| (?:playlist|watch)\?.*?\blist= )| - (?!(?:%(reserved_names)s)\b) # Direct URLs + (?!(?:{reserved_names})\b) # Direct URLs ) (?P[^/?\#&]+) - )''' % { - 'reserved_names': YoutubeBaseInfoExtractor._RESERVED_NAMES, - 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES), - } + )'''.format( + reserved_names=YoutubeBaseInfoExtractor._RESERVED_NAMES, + invidious='|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES), + ) IE_NAME = 'youtube:tab' _TESTS = [{ @@ -5444,7 +5549,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg', 'tags': ['критическое мышление', 'наука просто', 'математика', 'анализ данных'], 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg', - 'channel_follower_count': int + 'channel_follower_count': int, }, }, { 'note': 'playlists, multipage, different order', @@ -5461,7 +5566,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg', 'channel': 'Igor Kleiner Ph.D.', 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg', - 'channel_follower_count': int + 'channel_follower_count': int, }, }, { 'note': 'playlists, series', @@ -5496,8 +5601,8 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'channel_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ', 'tags': 'count:12', 'channel': 'ThirstForScience', - 'channel_follower_count': int - } + 'channel_follower_count': int, + }, }, { 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists', 'only_matching': True, @@ -5552,7 +5657,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'uploader_url': 'https://www.youtube.com/@lexwill718', 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w', 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w', - 'channel_follower_count': int + 'channel_follower_count': int, }, 'playlist_mincount': 2, }, { @@ -5569,7 +5674,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w', 'uploader_url': 'https://www.youtube.com/@lexwill718', 'channel': 'lex will', - 'channel_follower_count': int + 'channel_follower_count': int, }, 'playlist_mincount': 975, }, { @@ -5586,7 +5691,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'channel': 'lex will', 'tags': ['bible', 'history', 'prophesy'], 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w', - 'channel_follower_count': int + 'channel_follower_count': int, }, 'playlist_mincount': 199, }, { @@ -5603,7 +5708,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w', 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w', 'tags': ['bible', 'history', 'prophesy'], - 'channel_follower_count': int + 'channel_follower_count': int, }, 'playlist_mincount': 17, }, { @@ -5926,11 +6031,11 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): }, { 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist', 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq', - 'only_matching': True + 'only_matching': True, }, { 'note': '/browse/ should redirect to /channel/', 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng', - 'only_matching': True + 'only_matching': True, }, { 'note': 'VLPL, should redirect to playlist?list=PL...', 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq', @@ -6027,7 +6132,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'uploader_id': '@PhilippHagemeister', 'uploader_url': 'https://www.youtube.com/@PhilippHagemeister', 'uploader': 'Philipp Hagemeister', - } + }, }], 'playlist_count': 1, 'params': {'extract_flat': True}, @@ -6042,7 +6147,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'playlist_mincount': 50, 'params': { 'skip_download': True, - 'extractor_args': {'youtubetab': {'skip': ['webpage']}} + 'extractor_args': {'youtubetab': {'skip': ['webpage']}}, }, }, { 'note': 'API Fallback: /videos tab, sorted by oldest first', @@ -6055,12 +6160,12 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'channel_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw', 'tags': [], 'channel_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw', - 'channel_follower_count': int + 'channel_follower_count': int, }, 'playlist_mincount': 650, 'params': { 'skip_download': True, - 'extractor_args': {'youtubetab': {'skip': ['webpage']}} + 'extractor_args': {'youtubetab': {'skip': ['webpage']}}, }, 'skip': 'Query for sorting no longer works', }, { @@ -6082,13 +6187,13 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'playlist_mincount': 101, 'params': { 'skip_download': True, - 'extractor_args': {'youtubetab': {'skip': ['webpage']}} + 'extractor_args': {'youtubetab': {'skip': ['webpage']}}, }, 'expected_warnings': ['YouTube Music is not directly supported', r'[Uu]navailable videos (are|will be) hidden'], }, { 'note': 'non-standard redirect to regional channel', 'url': 'https://www.youtube.com/channel/UCwVVpHQ2Cs9iGJfpdFngePQ', - 'only_matching': True + 'only_matching': True, }, { 'note': 'collaborative playlist (uploader name in the form "by and x other(s)")', 'url': 'https://www.youtube.com/playlist?list=PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6', @@ -6107,7 +6212,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'uploader_id': '@pukkandan', 'uploader': 'pukkandan', }, - 'playlist_mincount': 2 + 'playlist_mincount': 2, }, { 'note': 'translated tab name', 'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/playlists', @@ -6248,7 +6353,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): # No uploads and no UCID given. Should fail with no uploads error # See test_youtube_lists 'url': 'https://www.youtube.com/news', - 'only_matching': True + 'only_matching': True, }, { # No videos tab but has a shorts tab 'url': 'https://www.youtube.com/c/TKFShorts', @@ -6310,7 +6415,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'title': 'SHORT short', 'view_count': int, 'thumbnails': list, - } + }, }], 'params': {'extract_flat': True}, }, { @@ -6318,8 +6423,8 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/live', 'info_dict': { 'id': 'UCQvWX73GQygcwXOTSf_VDVg', - 'title': 'UCQvWX73GQygcwXOTSf_VDVg - Live', # TODO, should be Minecraft - Live or Minecraft - Topic - Live - 'tags': [] + 'title': 'UCQvWX73GQygcwXOTSf_VDVg - Live', # TODO: should be Minecraft - Live or Minecraft - Topic - Live + 'tags': [], }, 'playlist': [{ 'info_dict': { @@ -6337,10 +6442,10 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'uploader_url': str, 'uploader_id': str, 'channel_is_verified': bool, # this will keep changing - } + }, }], 'params': {'extract_flat': True, 'playlist_items': '1'}, - 'playlist_mincount': 1 + 'playlist_mincount': 1, }, { # Channel renderer metadata. Contains number of videos on the channel 'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/channels', @@ -6373,7 +6478,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'uploader_url': 'https://www.youtube.com/@PewDiePie', 'uploader_id': '@PewDiePie', 'channel_is_verified': True, - } + }, }], 'params': {'extract_flat': True}, }, { @@ -6545,7 +6650,7 @@ def _real_extract(self, url, smuggled_data): # Handle both video/playlist URLs qs = parse_qs(url) - video_id, playlist_id = [traverse_obj(qs, (key, 0)) for key in ('v', 'list')] + video_id, playlist_id = (traverse_obj(qs, (key, 0)) for key in ('v', 'list')) if not video_id and mobj['not_channel'].startswith('watch'): if not playlist_id: # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable @@ -6677,15 +6782,15 @@ class YoutubePlaylistIE(InfoExtractor): (?: (?: youtube(?:kids)?\.com| - %(invidious)s + {invidious} ) /.*?\?.*?\blist= )? - (?P%(playlist_id)s) - )''' % { - 'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE, - 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES), - } + (?P{playlist_id}) + )'''.format( + playlist_id=YoutubeBaseInfoExtractor._PLAYLIST_ID_RE, + invidious='|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES), + ) IE_NAME = 'youtube:playlist' _TESTS = [{ 'note': 'issue #673', @@ -6785,7 +6890,7 @@ def _real_extract(self, url): class YoutubeYtBeIE(InfoExtractor): IE_DESC = 'youtu.be' - _VALID_URL = r'https?://youtu\.be/(?P[0-9A-Za-z_-]{11})/*?.*?\blist=(?P%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE} + _VALID_URL = rf'https?://youtu\.be/(?P[0-9A-Za-z_-]{{11}})/*?.*?\blist=(?P{YoutubeBaseInfoExtractor._PLAYLIST_ID_RE})' _TESTS = [{ 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5', 'info_dict': { @@ -6811,7 +6916,7 @@ class YoutubeYtBeIE(InfoExtractor): 'availability': 'public', 'duration': 59, 'comment_count': int, - 'channel_follower_count': int + 'channel_follower_count': int, }, 'params': { 'noplaylist': True, @@ -6985,7 +7090,7 @@ class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor): 'info_dict': { 'id': 'youtube-dl test video', 'title': 'youtube-dl test video', - } + }, }, { 'note': 'Suicide/self-harm search warning', 'url': 'ytsearch1:i hate myself and i wanna die', @@ -6993,7 +7098,7 @@ class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor): 'info_dict': { 'id': 'i hate myself and i wanna die', 'title': 'i hate myself and i wanna die', - } + }, }] @@ -7008,7 +7113,7 @@ class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor): 'info_dict': { 'id': 'youtube-dl test video', 'title': 'youtube-dl test video', - } + }, }] @@ -7022,14 +7127,14 @@ class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor): 'info_dict': { 'id': 'youtube-dl test video', 'title': 'youtube-dl test video', - } + }, }, { 'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D', 'playlist_mincount': 5, 'info_dict': { 'id': 'python', 'title': 'python', - } + }, }, { 'url': 'https://www.youtube.com/results?search_query=%23cats', 'playlist_mincount': 1, @@ -7068,7 +7173,7 @@ class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor): 'uploader': 'Kurzgesagt – In a Nutshell', 'channel_is_verified': True, 'channel_follower_count': int, - } + }, }], 'params': {'extract_flat': True, 'playlist_items': '1'}, 'playlist_mincount': 1, @@ -7093,7 +7198,7 @@ class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor): 'info_dict': { 'id': 'royalty free music', 'title': 'royalty free music', - } + }, }, { 'url': 'https://music.youtube.com/search?q=royalty+free+music&sp=EgWKAQIIAWoKEAoQAxAEEAkQBQ%3D%3D', 'playlist_mincount': 30, @@ -7101,7 +7206,7 @@ class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor): 'id': 'royalty free music - songs', 'title': 'royalty free music - songs', }, - 'params': {'extract_flat': 'in_playlist'} + 'params': {'extract_flat': 'in_playlist'}, }, { 'url': 'https://music.youtube.com/search?q=royalty+free+music#community+playlists', 'playlist_mincount': 30, @@ -7109,7 +7214,7 @@ class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor): 'id': 'royalty free music - community playlists', 'title': 'royalty free music - community playlists', }, - 'params': {'extract_flat': 'in_playlist'} + 'params': {'extract_flat': 'in_playlist'}, }] _SECTIONS = { @@ -7128,7 +7233,7 @@ def _real_extract(self, url): if params: section = next((k for k, v in self._SECTIONS.items() if v == params), params) else: - section = urllib.parse.unquote_plus((url.split('#') + [''])[1]).lower() + section = urllib.parse.unquote_plus(([*url.split('#'), ''])[1]).lower() params = self._SECTIONS.get(section) if not params: section = None @@ -7148,8 +7253,8 @@ def _real_initialize(self): YoutubeBaseInfoExtractor._check_login_required(self) @classproperty - def IE_NAME(self): - return f'youtube:{self._FEED_NAME}' + def IE_NAME(cls): + return f'youtube:{cls._FEED_NAME}' def _real_extract(self, url): return self.url_result( @@ -7317,7 +7422,7 @@ class YoutubeClipIE(YoutubeTabBaseInfoExtractor): 'chapters': 'count:20', 'comment_count': int, 'heatmap': 'count:100', - } + }, }] def _real_extract(self, url):