From f4d3e9e6dc25077b79849a31a2f67f93fdc01e62 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Thu, 12 Dec 2024 13:39:38 +0000 Subject: [PATCH 01/13] [ie/soundcloud] Fix extraction (#11777) Authored by: bashonly --- yt_dlp/extractor/soundcloud.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/yt_dlp/extractor/soundcloud.py b/yt_dlp/extractor/soundcloud.py index e0dda8ff8995..66bc5f9c587f 100644 --- a/yt_dlp/extractor/soundcloud.py +++ b/yt_dlp/extractor/soundcloud.py @@ -259,6 +259,8 @@ def invalid_url(url): preset_base = preset.partition('_')[0] protocol = traverse_obj(t, ('format', 'protocol', {str})) or 'http' + if protocol.startswith(('ctr-', 'cbc-')): + continue if protocol == 'progressive': protocol = 'http' if protocol != 'hls' and '/hls' in format_url: From bc262bcad4d3683ceadf61a7eb87e233e72adef3 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Thu, 12 Dec 2024 13:44:19 +0000 Subject: [PATCH 02/13] [ie/patreon:campaign] Support /c/ URLs (#11756) Closes #11755 Authored by: bashonly --- yt_dlp/extractor/patreon.py | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/patreon.py b/yt_dlp/extractor/patreon.py index 6bdeaf15710d..a0e831a5cee0 100644 --- a/yt_dlp/extractor/patreon.py +++ b/yt_dlp/extractor/patreon.py @@ -457,7 +457,7 @@ class PatreonCampaignIE(PatreonBaseIE): _VALID_URL = r'''(?x) https?://(?:www\.)?patreon\.com/(?: (?:m|api/campaigns)/(?P\d+)| - (?P(?!creation[?/]|posts/|rss[?/])[\w-]+) + (?:c/)?(?P(?!creation[?/]|posts/|rss[?/])[\w-]+) )(?:/posts)?/?(?:$|[?#])''' _TESTS = [{ 'url': 'https://www.patreon.com/dissonancepod/', @@ -509,6 +509,26 @@ class PatreonCampaignIE(PatreonBaseIE): 'thumbnail': r're:^https?://.*$', }, 'playlist_mincount': 201, + }, { + 'url': 'https://www.patreon.com/c/OgSog', + 'info_dict': { + 'id': '8504388', + 'title': 'OGSoG', + 'description': r're:(?s)Hello and welcome to our Patreon page. We are Mari, Lasercorn, .+', + 'channel': 'OGSoG', + 'channel_id': '8504388', + 'channel_url': 'https://www.patreon.com/OgSog', + 'uploader_url': 'https://www.patreon.com/OgSog', + 'uploader_id': '72323575', + 'uploader': 'David Moss', + 'thumbnail': r're:https?://.+/.+', + 'channel_follower_count': int, + 'age_limit': 0, + }, + 'playlist_mincount': 331, + }, { + 'url': 'https://www.patreon.com/c/OgSog/posts', + 'only_matching': True, }, { 'url': 'https://www.patreon.com/dissonancepod/posts', 'only_matching': True, From d5e2a379f2adcb28bc48c7d9e90716d7278f89d2 Mon Sep 17 00:00:00 2001 From: Pew <65320806+MutantPiggieGolem1@users.noreply.github.com> Date: Thu, 12 Dec 2024 05:46:52 -0800 Subject: [PATCH 03/13] [ie/youtube] Fix `release_date` extraction (#11759) Authored by: MutantPiggieGolem1 --- yt_dlp/extractor/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index e69373ba2f42..5ba8a62aac95 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -4689,7 +4689,7 @@ def process_language(container, base_url, lang_code, sub_name, query): (?=(?P[^\n]+))(?P=artist)\n+ (?=(?P[^\n]+))(?P=album)\n (?:.+?℗\s*(?P\d{4})(?!\d))? - (?:.+?Released on\s*:\s*(?P\d{4}-\d{2}-\d{2}))? + (?:.+?Released\ on\s*:\s*(?P\d{4}-\d{2}-\d{2}))? (.+?\nArtist\s*:\s* (?=(?P[^\n]+))(?P=clean_artist)\n )?.+\nAuto-generated\ by\ YouTube\.\s*$ From f6c73aad5f1a67544bea137ebd9d1e22e0e56567 Mon Sep 17 00:00:00 2001 From: Crypto90 Date: Thu, 12 Dec 2024 14:54:11 +0100 Subject: [PATCH 04/13] [ie/youtube:search_url] Fix playlist searches (#11782) Closes #11666 Authored by: Crypto90 --- yt_dlp/extractor/youtube.py | 1 + 1 file changed, 1 insertion(+) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 5ba8a62aac95..7eb522a47ad4 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -5282,6 +5282,7 @@ def _extract_entries(self, parent_renderer, continuation_list): 'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}), 'hashtagTileRenderer': lambda x: [self._hashtag_tile_entry(x)], 'richGridRenderer': lambda x: self._extract_entries(x, continuation_list), + 'lockupViewModel': lambda x: [self._extract_lockup_view_model(x)], } for key, renderer in isr_content.items(): if key not in known_renderers: From 5460cd91891bf613a2065e2fc278d9903c37a127 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Fri, 13 Dec 2024 09:43:08 +0000 Subject: [PATCH 05/13] [ie/youtube] Fix signature function extraction for `2f1832d2` (#11801) Closes #11798 Authored by: bashonly --- test/test_youtube_signature.py | 9 +++++++++ yt_dlp/extractor/youtube.py | 6 +++--- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py index 2a99436a6d7a..13436f088451 100644 --- a/test/test_youtube_signature.py +++ b/test/test_youtube_signature.py @@ -73,6 +73,11 @@ '2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA', 'MyOSJXtKI3m-uME_jv7-pT12gOFC02RFkGoqWpzE0Cs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA', ), + ( + 'https://www.youtube.com/s/player/2f1832d2/player_ias.vflset/en_US/base.js', + '2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA', + '0QJ8wRAIgXmPlOPSBkkUs1bYFYlJCfe29xxAj7v1pDL0QwbdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJ2OySqa0q', + ), ] _NSIG_TESTS = [ @@ -192,6 +197,10 @@ 'https://www.youtube.com/s/player/3bb1f723/player_ias.vflset/en_US/base.js', 'gK15nzVyaXE9RsMP3z', 'ZFFWFLPWx9DEgQ', ), + ( + 'https://www.youtube.com/s/player/2f1832d2/player_ias.vflset/en_US/base.js', + 'YWt1qdbe8SAfkoPHW5d', 'RrRjWQOJmBiP', + ), ] diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 7eb522a47ad4..e4904965d9a3 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -3127,9 +3127,9 @@ def _parse_sig_js(self, jscode): # ;N&&(N=sig(decodeURIComponent(N)),J.set(R,encodeURIComponent(N)));return J}; # {var H=u,k=f.sp,v=sig(decodeURIComponent(f.s));H.set(k,encodeURIComponent(v))} funcname = self._search_regex( - (r'\b(?P[a-zA-Z0-9$]+)&&\((?P=var)=(?P[a-zA-Z0-9$]{2,})\(decodeURIComponent\((?P=var)\)\)', - r'(?P[a-zA-Z0-9$]+)\s*=\s*function\(\s*(?P[a-zA-Z0-9$]+)\s*\)\s*{\s*(?P=arg)\s*=\s*(?P=arg)\.split\(\s*""\s*\)\s*;\s*[^}]+;\s*return\s+(?P=arg)\.join\(\s*""\s*\)', - r'(?:\b|[^a-zA-Z0-9$])(?P[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)(?:;[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\))?', + (r'\b(?P[a-zA-Z0-9_$]+)&&\((?P=var)=(?P[a-zA-Z0-9_$]{2,})\(decodeURIComponent\((?P=var)\)\)', + r'(?P[a-zA-Z0-9_$]+)\s*=\s*function\(\s*(?P[a-zA-Z0-9_$]+)\s*\)\s*{\s*(?P=arg)\s*=\s*(?P=arg)\.split\(\s*""\s*\)\s*;\s*[^}]+;\s*return\s+(?P=arg)\.join\(\s*""\s*\)', + r'(?:\b|[^a-zA-Z0-9_$])(?P[a-zA-Z0-9_$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)(?:;[a-zA-Z0-9_$]{2}\.[a-zA-Z0-9_$]{2}\(a,\d+\))?', # Old patterns r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P[a-zA-Z0-9$]+)\(', r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P[a-zA-Z0-9$]+)\(', From dc3c4fddcc653989dae71fc563d82a308fc898cc Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Fri, 13 Dec 2024 10:21:48 +0000 Subject: [PATCH 06/13] [ie/youtube] Prioritize original language over auto-dubbed audio (#11803) Closes #11753 Authored by: bashonly --- yt_dlp/extractor/youtube.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index e4904965d9a3..fd9c7107c7f7 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -4067,10 +4067,12 @@ def build_fragments(f): if height: res_qualities[height] = quality + display_name = audio_track.get('displayName') or '' + is_original = 'original' in display_name.lower() + is_descriptive = 'descriptive' in display_name.lower() is_default = audio_track.get('audioIsDefault') - is_descriptive = 'descriptive' in (audio_track.get('displayName') or '').lower() language_code = audio_track.get('id', '').split('.')[0] - if language_code and is_default: + if language_code and (is_original or (is_default and not original_language)): original_language = language_code # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment @@ -4151,7 +4153,7 @@ def build_fragments(f): 'filesize': int_or_none(fmt.get('contentLength')), 'format_id': f'{itag}{"-drc" if fmt.get("isDrc") else ""}', 'format_note': join_nonempty( - join_nonempty(audio_track.get('displayName'), is_default and ' (default)', delim=''), + join_nonempty(display_name, is_default and ' (default)', delim=''), name, fmt.get('isDrc') and 'DRC', try_get(fmt, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()), try_get(fmt, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()), @@ -4170,7 +4172,7 @@ def build_fragments(f): 'url': fmt_url, 'width': int_or_none(fmt.get('width')), 'language': join_nonempty(language_code, 'desc' if is_descriptive else '') or None, - 'language_preference': PREFERRED_LANG_VALUE if is_default else -10 if is_descriptive else -1, + 'language_preference': PREFERRED_LANG_VALUE if is_original else 5 if is_default else -10 if is_descriptive else -1, # Strictly de-prioritize broken, damaged and 3gp formats 'preference': -20 if is_broken else -10 if is_damaged else -2 if itag == '17' else None, } From 54216696261bc07cacd9a837c501d9e0b7fed09e Mon Sep 17 00:00:00 2001 From: sepro Date: Fri, 13 Dec 2024 11:25:29 +0100 Subject: [PATCH 07/13] [cleanup] Make more playlist entries lazy (#11763) Authored by: seproDev --- yt_dlp/extractor/brightcove.py | 5 +++-- yt_dlp/extractor/dvtv.py | 2 +- yt_dlp/extractor/nytimes.py | 2 +- yt_dlp/extractor/vidyard.py | 2 +- 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/yt_dlp/extractor/brightcove.py b/yt_dlp/extractor/brightcove.py index 2526f25dac75..3ada1fd5deb8 100644 --- a/yt_dlp/extractor/brightcove.py +++ b/yt_dlp/extractor/brightcove.py @@ -31,6 +31,7 @@ update_url_query, url_or_none, ) +from ..utils.traversal import traverse_obj class BrightcoveLegacyIE(InfoExtractor): @@ -935,8 +936,8 @@ def extract_policy_key(): if content_type == 'playlist': return self.playlist_result( - [self._parse_brightcove_metadata(vid, vid.get('id'), headers) - for vid in json_data.get('videos', []) if vid.get('id')], + (self._parse_brightcove_metadata(vid, vid['id'], headers) + for vid in traverse_obj(json_data, ('videos', lambda _, v: v['id']))), json_data.get('id'), json_data.get('name'), json_data.get('description')) diff --git a/yt_dlp/extractor/dvtv.py b/yt_dlp/extractor/dvtv.py index 3e442b339b93..52d67d2bd0e9 100644 --- a/yt_dlp/extractor/dvtv.py +++ b/yt_dlp/extractor/dvtv.py @@ -162,7 +162,7 @@ def _real_extract(self, url): items = re.findall(r'(?s)playlist\.push\(({.+?})\);', webpage) if items: return self.playlist_result( - [self._parse_video_metadata(i, video_id, timestamp) for i in items], + (self._parse_video_metadata(i, video_id, timestamp) for i in items), video_id, self._html_search_meta('twitter:title', webpage)) item = self._search_regex( diff --git a/yt_dlp/extractor/nytimes.py b/yt_dlp/extractor/nytimes.py index 9ef57410ac2c..a97add71a44f 100644 --- a/yt_dlp/extractor/nytimes.py +++ b/yt_dlp/extractor/nytimes.py @@ -343,7 +343,7 @@ def _real_extract(self, url): if media_ids: media_ids.append(lead_video_id) return self.playlist_result( - [self._extract_video(media_id) for media_id in media_ids], page_id, title, description) + map(self._extract_video, media_ids), page_id, title, description) return { **self._extract_video(lead_video_id), diff --git a/yt_dlp/extractor/vidyard.py b/yt_dlp/extractor/vidyard.py index 2f6d1f4c5112..89a89b13f1f6 100644 --- a/yt_dlp/extractor/vidyard.py +++ b/yt_dlp/extractor/vidyard.py @@ -421,5 +421,5 @@ def _real_extract(self, url): return self._process_video_json(video_json['chapters'][0], video_id) return self.playlist_result( - [self._process_video_json(chapter, video_id) for chapter in video_json['chapters']], + (self._process_video_json(chapter, video_id) for chapter in video_json['chapters']), str(video_json['playerUuid']), video_json.get('name')) From 2037a6414f81db8080ca724dca506fde91974c5d Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Fri, 13 Dec 2024 10:35:40 +0000 Subject: [PATCH 08/13] Release 2024.12.13 Created by: bashonly :ci skip all --- CONTRIBUTORS | 2 ++ Changelog.md | 14 ++++++++++++++ yt_dlp/version.py | 6 +++--- 3 files changed, 19 insertions(+), 3 deletions(-) diff --git a/CONTRIBUTORS b/CONTRIBUTORS index 240197e8af7c..4b69642603c7 100644 --- a/CONTRIBUTORS +++ b/CONTRIBUTORS @@ -711,3 +711,5 @@ gitninja1234 jkruse xiaomac wesson09 +Crypto90 +MutantPiggieGolem1 diff --git a/Changelog.md b/Changelog.md index 9dc905309394..75e8240335d9 100644 --- a/Changelog.md +++ b/Changelog.md @@ -4,6 +4,20 @@ # To create a release, dispatch the https://github.com/yt-dlp/yt-dlp/actions/workflows/release.yml workflow on master --> +### 2024.12.13 + +#### Extractor changes +- **patreon**: campaign: [Support /c/ URLs](https://github.com/yt-dlp/yt-dlp/commit/bc262bcad4d3683ceadf61a7eb87e233e72adef3) ([#11756](https://github.com/yt-dlp/yt-dlp/issues/11756)) by [bashonly](https://github.com/bashonly) +- **soundcloud**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/f4d3e9e6dc25077b79849a31a2f67f93fdc01e62) ([#11777](https://github.com/yt-dlp/yt-dlp/issues/11777)) by [bashonly](https://github.com/bashonly) +- **youtube** + - [Fix `release_date` extraction](https://github.com/yt-dlp/yt-dlp/commit/d5e2a379f2adcb28bc48c7d9e90716d7278f89d2) ([#11759](https://github.com/yt-dlp/yt-dlp/issues/11759)) by [MutantPiggieGolem1](https://github.com/MutantPiggieGolem1) + - [Fix signature function extraction for `2f1832d2`](https://github.com/yt-dlp/yt-dlp/commit/5460cd91891bf613a2065e2fc278d9903c37a127) ([#11801](https://github.com/yt-dlp/yt-dlp/issues/11801)) by [bashonly](https://github.com/bashonly) + - [Prioritize original language over auto-dubbed audio](https://github.com/yt-dlp/yt-dlp/commit/dc3c4fddcc653989dae71fc563d82a308fc898cc) ([#11803](https://github.com/yt-dlp/yt-dlp/issues/11803)) by [bashonly](https://github.com/bashonly) + - search_url: [Fix playlist searches](https://github.com/yt-dlp/yt-dlp/commit/f6c73aad5f1a67544bea137ebd9d1e22e0e56567) ([#11782](https://github.com/yt-dlp/yt-dlp/issues/11782)) by [Crypto90](https://github.com/Crypto90) + +#### Misc. changes +- **cleanup**: [Make more playlist entries lazy](https://github.com/yt-dlp/yt-dlp/commit/54216696261bc07cacd9a837c501d9e0b7fed09e) ([#11763](https://github.com/yt-dlp/yt-dlp/issues/11763)) by [seproDev](https://github.com/seproDev) + ### 2024.12.06 #### Core changes diff --git a/yt_dlp/version.py b/yt_dlp/version.py index 3dec228d3614..f696e1e9d040 100644 --- a/yt_dlp/version.py +++ b/yt_dlp/version.py @@ -1,8 +1,8 @@ # Autogenerated by devscripts/update-version.py -__version__ = '2024.12.06' +__version__ = '2024.12.13' -RELEASE_GIT_HEAD = '4bd2655398aed450456197a6767639114a24eac2' +RELEASE_GIT_HEAD = '54216696261bc07cacd9a837c501d9e0b7fed09e' VARIANT = None @@ -12,4 +12,4 @@ ORIGIN = 'yt-dlp/yt-dlp' -_pkg_version = '2024.12.06' +_pkg_version = '2024.12.13' From 3d3ee458c1fe49dd5ebd7651a092119d23eb7000 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Sun, 15 Dec 2024 19:47:50 +0000 Subject: [PATCH 09/13] [update] Fix endless update loop for `linux_exe` builds (#11827) Closes #11808 Authored by: bashonly --- yt_dlp/update.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/yt_dlp/update.py b/yt_dlp/update.py index ca2ec5f376a0..dfab132afdfe 100644 --- a/yt_dlp/update.py +++ b/yt_dlp/update.py @@ -525,11 +525,16 @@ def filename(self): @functools.cached_property def cmd(self): """The command-line to run the executable, if known""" + argv = None # There is no sys.orig_argv in py < 3.10. Also, it can be [] when frozen if getattr(sys, 'orig_argv', None): - return sys.orig_argv + argv = sys.orig_argv elif getattr(sys, 'frozen', False): - return sys.argv + argv = sys.argv + # linux_static exe's argv[0] will be /tmp/staticx-NNNN/yt-dlp_linux if we don't fixup here + if argv and os.getenv('STATICX_PROG_PATH'): + argv = [self.filename, *argv[1:]] + return argv def restart(self): """Restart the executable""" From b91c3925c2059970daa801cb131c0c2f4f302e72 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Sun, 15 Dec 2024 19:55:30 +0000 Subject: [PATCH 10/13] [update] Check 64-bitness when upgrading ARM builds (#11819) Closes #11813 Authored by: bashonly --- yt_dlp/update.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/yt_dlp/update.py b/yt_dlp/update.py index dfab132afdfe..360f5ad58ccd 100644 --- a/yt_dlp/update.py +++ b/yt_dlp/update.py @@ -65,9 +65,14 @@ def _get_variant_and_executable_path(): machine = '_legacy' if version_tuple(platform.mac_ver()[0]) < (10, 15) else '' else: machine = f'_{platform.machine().lower()}' + is_64bits = sys.maxsize > 2**32 # Ref: https://en.wikipedia.org/wiki/Uname#Examples if machine[1:] in ('x86', 'x86_64', 'amd64', 'i386', 'i686'): - machine = '_x86' if platform.architecture()[0][:2] == '32' else '' + machine = '_x86' if not is_64bits else '' + # platform.machine() on 32-bit raspbian OS may return 'aarch64', so check "64-bitness" + # See: https://github.com/yt-dlp/yt-dlp/issues/11813 + elif machine[1:] == 'aarch64' and not is_64bits: + machine = '_armv7l' # sys.executable returns a /tmp/ path for staticx builds (linux_static) # Ref: https://staticx.readthedocs.io/en/latest/usage.html#run-time-information if static_exe_path := os.getenv('STATICX_PROG_PATH'): From 1a8851b689763e5173b96f70f8a71df0e4a44b66 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Sun, 15 Dec 2024 20:07:18 +0000 Subject: [PATCH 11/13] [ie/youtube] Fix `uploader_id` extraction (#11818) Closes #11816 Authored by: bashonly --- yt_dlp/extractor/youtube.py | 34 ++++++++++++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index fd9c7107c7f7..e12f728ea323 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -518,11 +518,12 @@ def ucid_or_none(self, ucid): return self._search_regex(rf'^({self._YT_CHANNEL_UCID_RE})$', ucid, 'UC-id', default=None) def handle_or_none(self, handle): - return self._search_regex(rf'^({self._YT_HANDLE_RE})$', handle, '@-handle', default=None) + return self._search_regex(rf'^({self._YT_HANDLE_RE})$', urllib.parse.unquote(handle or ''), + '@-handle', default=None) def handle_from_url(self, url): return self._search_regex(rf'^(?:https?://(?:www\.)?youtube\.com)?/({self._YT_HANDLE_RE})', - url, 'channel handle', default=None) + urllib.parse.unquote(url or ''), 'channel handle', default=None) def ucid_from_url(self, url): return self._search_regex(rf'^(?:https?://(?:www\.)?youtube\.com)?/({self._YT_CHANNEL_UCID_RE})', @@ -2801,6 +2802,35 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'extractor_args': {'youtube': {'player_client': ['ios'], 'player_skip': ['webpage']}}, }, }, + { + # uploader_id has non-ASCII characters that are percent-encoded in YT's JSON + 'url': 'https://www.youtube.com/shorts/18NGQq7p3LY', + 'info_dict': { + 'id': '18NGQq7p3LY', + 'ext': 'mp4', + 'title': '아이브 이서 장원영 리즈 삐끼삐끼 챌린지', + 'description': '', + 'uploader': 'ㅇㅇ', + 'uploader_id': '@으아-v1k', + 'uploader_url': 'https://www.youtube.com/@으아-v1k', + 'channel': 'ㅇㅇ', + 'channel_id': 'UCC25oTm2J7ZVoi5TngOHg9g', + 'channel_url': 'https://www.youtube.com/channel/UCC25oTm2J7ZVoi5TngOHg9g', + 'thumbnail': r're:https?://.+/.+\.jpg', + 'playable_in_embed': True, + 'age_limit': 0, + 'duration': 3, + 'timestamp': 1724306170, + 'upload_date': '20240822', + 'availability': 'public', + 'live_status': 'not_live', + 'view_count': int, + 'like_count': int, + 'channel_follower_count': int, + 'categories': ['People & Blogs'], + 'tags': [], + }, + }, ] _WEBPAGE_TESTS = [ From 09a6c687126f04e243fcb105a828787efddd1030 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Sun, 15 Dec 2024 20:09:48 +0000 Subject: [PATCH 12/13] [ie/youtube] Add age-gate workaround for some embeddable videos (#11821) Closes #11296 Authored by: bashonly --- yt_dlp/extractor/youtube.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index e12f728ea323..0d3963116ed5 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -1496,7 +1496,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): }, # Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000 { - 'note': 'Embed allowed age-gate video', + 'note': 'Embed allowed age-gate video; works with web_embedded', 'url': 'https://youtube.com/watch?v=HtVdAasjOgU', 'info_dict': { 'id': 'HtVdAasjOgU', @@ -1526,7 +1526,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'heatmap': 'count:100', 'timestamp': 1401991663, }, - 'skip': 'Age-restricted; requires authentication', }, { 'note': 'Age-gate video with embed allowed in public site', @@ -4013,10 +4012,20 @@ def append_client(*client_names): else: prs.append(pr) + # web_embedded can work around age-gate and age-verification for some embeddable videos + if self._is_agegated(pr) and variant != 'web_embedded': + append_client(f'web_embedded.{base_client}') + # Unauthenticated users will only get web_embedded client formats if age-gated + if self._is_agegated(pr) and not self.is_authenticated: + self.to_screen( + f'{video_id}: This video is age-restricted; some formats may be missing ' + f'without authentication. {self._login_hint()}', only_once=True) + ''' This code is pointless while web_creator is in _DEFAULT_AUTHED_CLIENTS # EU countries require age-verification for accounts to access age-restricted videos # If account is not age-verified, _is_agegated() will be truthy for non-embedded clients - if self.is_authenticated and self._is_agegated(pr): + embedding_is_disabled = variant == 'web_embedded' and self._is_unplayable(pr) + if self.is_authenticated and (self._is_agegated(pr) or embedding_is_disabled): self.to_screen( f'{video_id}: This video is age-restricted and YouTube is requiring ' 'account age-verification; some formats may be missing', only_once=True) From d298693b1b266d198e8eeecb90ea17c4a031268f Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Sun, 15 Dec 2024 20:16:04 +0000 Subject: [PATCH 13/13] [ie/soundcloud] Various fixes (#11820) - Fix original/download formats so that they are considered bestaudio - Raise appropriate error if track is DRM-protected Authored by: bashonly --- yt_dlp/extractor/soundcloud.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/soundcloud.py b/yt_dlp/extractor/soundcloud.py index 66bc5f9c587f..85779e91ab47 100644 --- a/yt_dlp/extractor/soundcloud.py +++ b/yt_dlp/extractor/soundcloud.py @@ -210,6 +210,7 @@ def _extract_info_dict(self, info, full_title=None, secret_token=None, extract_f format_urls = set() formats = [] + has_drm = False query = {'client_id': self._CLIENT_ID} if secret_token: query['secret_token'] = secret_token @@ -245,6 +246,7 @@ def _extract_info_dict(self, info, full_title=None, secret_token=None, extract_f 'url': format_url, 'quality': 10, 'format_note': 'Original', + 'vcodec': 'none', }) def invalid_url(url): @@ -260,6 +262,7 @@ def invalid_url(url): protocol = traverse_obj(t, ('format', 'protocol', {str})) or 'http' if protocol.startswith(('ctr-', 'cbc-')): + has_drm = True continue if protocol == 'progressive': protocol = 'http' @@ -317,8 +320,11 @@ def invalid_url(url): 'preference': -10 if is_preview else None, }) - if not formats and info.get('policy') == 'BLOCK': - self.raise_geo_restricted(metadata_available=True) + if not formats: + if has_drm: + self.report_drm(track_id) + if info.get('policy') == 'BLOCK': + self.raise_geo_restricted(metadata_available=True) user = info.get('user') or {}