Skip to content

Commit

Permalink
Merge branch 'yt-dlp:master' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
Crypto90 authored Dec 16, 2024
2 parents cf06307 + d298693 commit 49c58f2
Show file tree
Hide file tree
Showing 12 changed files with 132 additions and 27 deletions.
2 changes: 2 additions & 0 deletions CONTRIBUTORS
Original file line number Diff line number Diff line change
Expand Up @@ -711,3 +711,5 @@ gitninja1234
jkruse
xiaomac
wesson09
Crypto90
MutantPiggieGolem1
14 changes: 14 additions & 0 deletions Changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,20 @@
# To create a release, dispatch the https://github.com/yt-dlp/yt-dlp/actions/workflows/release.yml workflow on master
-->

### 2024.12.13

#### Extractor changes
- **patreon**: campaign: [Support /c/ URLs](https://github.com/yt-dlp/yt-dlp/commit/bc262bcad4d3683ceadf61a7eb87e233e72adef3) ([#11756](https://github.com/yt-dlp/yt-dlp/issues/11756)) by [bashonly](https://github.com/bashonly)
- **soundcloud**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/f4d3e9e6dc25077b79849a31a2f67f93fdc01e62) ([#11777](https://github.com/yt-dlp/yt-dlp/issues/11777)) by [bashonly](https://github.com/bashonly)
- **youtube**
- [Fix `release_date` extraction](https://github.com/yt-dlp/yt-dlp/commit/d5e2a379f2adcb28bc48c7d9e90716d7278f89d2) ([#11759](https://github.com/yt-dlp/yt-dlp/issues/11759)) by [MutantPiggieGolem1](https://github.com/MutantPiggieGolem1)
- [Fix signature function extraction for `2f1832d2`](https://github.com/yt-dlp/yt-dlp/commit/5460cd91891bf613a2065e2fc278d9903c37a127) ([#11801](https://github.com/yt-dlp/yt-dlp/issues/11801)) by [bashonly](https://github.com/bashonly)
- [Prioritize original language over auto-dubbed audio](https://github.com/yt-dlp/yt-dlp/commit/dc3c4fddcc653989dae71fc563d82a308fc898cc) ([#11803](https://github.com/yt-dlp/yt-dlp/issues/11803)) by [bashonly](https://github.com/bashonly)
- search_url: [Fix playlist searches](https://github.com/yt-dlp/yt-dlp/commit/f6c73aad5f1a67544bea137ebd9d1e22e0e56567) ([#11782](https://github.com/yt-dlp/yt-dlp/issues/11782)) by [Crypto90](https://github.com/Crypto90)

#### Misc. changes
- **cleanup**: [Make more playlist entries lazy](https://github.com/yt-dlp/yt-dlp/commit/54216696261bc07cacd9a837c501d9e0b7fed09e) ([#11763](https://github.com/yt-dlp/yt-dlp/issues/11763)) by [seproDev](https://github.com/seproDev)

### 2024.12.06

#### Core changes
Expand Down
9 changes: 9 additions & 0 deletions test/test_youtube_signature.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,11 @@
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
'MyOSJXtKI3m-uME_jv7-pT12gOFC02RFkGoqWpzE0Cs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
),
(
'https://www.youtube.com/s/player/2f1832d2/player_ias.vflset/en_US/base.js',
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
'0QJ8wRAIgXmPlOPSBkkUs1bYFYlJCfe29xxAj7v1pDL0QwbdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJ2OySqa0q',
),
]

_NSIG_TESTS = [
Expand Down Expand Up @@ -192,6 +197,10 @@
'https://www.youtube.com/s/player/3bb1f723/player_ias.vflset/en_US/base.js',
'gK15nzVyaXE9RsMP3z', 'ZFFWFLPWx9DEgQ',
),
(
'https://www.youtube.com/s/player/2f1832d2/player_ias.vflset/en_US/base.js',
'YWt1qdbe8SAfkoPHW5d', 'RrRjWQOJmBiP',
),
]


Expand Down
5 changes: 3 additions & 2 deletions yt_dlp/extractor/brightcove.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
update_url_query,
url_or_none,
)
from ..utils.traversal import traverse_obj


class BrightcoveLegacyIE(InfoExtractor):
Expand Down Expand Up @@ -935,8 +936,8 @@ def extract_policy_key():

if content_type == 'playlist':
return self.playlist_result(
[self._parse_brightcove_metadata(vid, vid.get('id'), headers)
for vid in json_data.get('videos', []) if vid.get('id')],
(self._parse_brightcove_metadata(vid, vid['id'], headers)
for vid in traverse_obj(json_data, ('videos', lambda _, v: v['id']))),
json_data.get('id'), json_data.get('name'),
json_data.get('description'))

Expand Down
2 changes: 1 addition & 1 deletion yt_dlp/extractor/dvtv.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ def _real_extract(self, url):
items = re.findall(r'(?s)playlist\.push\(({.+?})\);', webpage)
if items:
return self.playlist_result(
[self._parse_video_metadata(i, video_id, timestamp) for i in items],
(self._parse_video_metadata(i, video_id, timestamp) for i in items),
video_id, self._html_search_meta('twitter:title', webpage))

item = self._search_regex(
Expand Down
2 changes: 1 addition & 1 deletion yt_dlp/extractor/nytimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -343,7 +343,7 @@ def _real_extract(self, url):
if media_ids:
media_ids.append(lead_video_id)
return self.playlist_result(
[self._extract_video(media_id) for media_id in media_ids], page_id, title, description)
map(self._extract_video, media_ids), page_id, title, description)

return {
**self._extract_video(lead_video_id),
Expand Down
22 changes: 21 additions & 1 deletion yt_dlp/extractor/patreon.py
Original file line number Diff line number Diff line change
Expand Up @@ -457,7 +457,7 @@ class PatreonCampaignIE(PatreonBaseIE):
_VALID_URL = r'''(?x)
https?://(?:www\.)?patreon\.com/(?:
(?:m|api/campaigns)/(?P<campaign_id>\d+)|
(?P<vanity>(?!creation[?/]|posts/|rss[?/])[\w-]+)
(?:c/)?(?P<vanity>(?!creation[?/]|posts/|rss[?/])[\w-]+)
)(?:/posts)?/?(?:$|[?#])'''
_TESTS = [{
'url': 'https://www.patreon.com/dissonancepod/',
Expand Down Expand Up @@ -509,6 +509,26 @@ class PatreonCampaignIE(PatreonBaseIE):
'thumbnail': r're:^https?://.*$',
},
'playlist_mincount': 201,
}, {
'url': 'https://www.patreon.com/c/OgSog',
'info_dict': {
'id': '8504388',
'title': 'OGSoG',
'description': r're:(?s)Hello and welcome to our Patreon page. We are Mari, Lasercorn, .+',
'channel': 'OGSoG',
'channel_id': '8504388',
'channel_url': 'https://www.patreon.com/OgSog',
'uploader_url': 'https://www.patreon.com/OgSog',
'uploader_id': '72323575',
'uploader': 'David Moss',
'thumbnail': r're:https?://.+/.+',
'channel_follower_count': int,
'age_limit': 0,
},
'playlist_mincount': 331,
}, {
'url': 'https://www.patreon.com/c/OgSog/posts',
'only_matching': True,
}, {
'url': 'https://www.patreon.com/dissonancepod/posts',
'only_matching': True,
Expand Down
12 changes: 10 additions & 2 deletions yt_dlp/extractor/soundcloud.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,7 @@ def _extract_info_dict(self, info, full_title=None, secret_token=None, extract_f

format_urls = set()
formats = []
has_drm = False
query = {'client_id': self._CLIENT_ID}
if secret_token:
query['secret_token'] = secret_token
Expand Down Expand Up @@ -245,6 +246,7 @@ def _extract_info_dict(self, info, full_title=None, secret_token=None, extract_f
'url': format_url,
'quality': 10,
'format_note': 'Original',
'vcodec': 'none',
})

def invalid_url(url):
Expand All @@ -259,6 +261,9 @@ def invalid_url(url):
preset_base = preset.partition('_')[0]

protocol = traverse_obj(t, ('format', 'protocol', {str})) or 'http'
if protocol.startswith(('ctr-', 'cbc-')):
has_drm = True
continue
if protocol == 'progressive':
protocol = 'http'
if protocol != 'hls' and '/hls' in format_url:
Expand Down Expand Up @@ -315,8 +320,11 @@ def invalid_url(url):
'preference': -10 if is_preview else None,
})

if not formats and info.get('policy') == 'BLOCK':
self.raise_geo_restricted(metadata_available=True)
if not formats:
if has_drm:
self.report_drm(track_id)
if info.get('policy') == 'BLOCK':
self.raise_geo_restricted(metadata_available=True)

user = info.get('user') or {}

Expand Down
2 changes: 1 addition & 1 deletion yt_dlp/extractor/vidyard.py
Original file line number Diff line number Diff line change
Expand Up @@ -421,5 +421,5 @@ def _real_extract(self, url):
return self._process_video_json(video_json['chapters'][0], video_id)

return self.playlist_result(
[self._process_video_json(chapter, video_id) for chapter in video_json['chapters']],
(self._process_video_json(chapter, video_id) for chapter in video_json['chapters']),
str(video_json['playerUuid']), video_json.get('name'))
67 changes: 54 additions & 13 deletions yt_dlp/extractor/youtube.py
Original file line number Diff line number Diff line change
Expand Up @@ -518,11 +518,12 @@ def ucid_or_none(self, ucid):
return self._search_regex(rf'^({self._YT_CHANNEL_UCID_RE})$', ucid, 'UC-id', default=None)

def handle_or_none(self, handle):
return self._search_regex(rf'^({self._YT_HANDLE_RE})$', handle, '@-handle', default=None)
return self._search_regex(rf'^({self._YT_HANDLE_RE})$', urllib.parse.unquote(handle or ''),
'@-handle', default=None)

def handle_from_url(self, url):
return self._search_regex(rf'^(?:https?://(?:www\.)?youtube\.com)?/({self._YT_HANDLE_RE})',
url, 'channel handle', default=None)
urllib.parse.unquote(url or ''), 'channel handle', default=None)

def ucid_from_url(self, url):
return self._search_regex(rf'^(?:https?://(?:www\.)?youtube\.com)?/({self._YT_CHANNEL_UCID_RE})',
Expand Down Expand Up @@ -1496,7 +1497,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
},
# Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000
{
'note': 'Embed allowed age-gate video',
'note': 'Embed allowed age-gate video; works with web_embedded',
'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
'info_dict': {
'id': 'HtVdAasjOgU',
Expand Down Expand Up @@ -1526,7 +1527,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'heatmap': 'count:100',
'timestamp': 1401991663,
},
'skip': 'Age-restricted; requires authentication',
},
{
'note': 'Age-gate video with embed allowed in public site',
Expand Down Expand Up @@ -2802,6 +2802,35 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'extractor_args': {'youtube': {'player_client': ['ios'], 'player_skip': ['webpage']}},
},
},
{
# uploader_id has non-ASCII characters that are percent-encoded in YT's JSON
'url': 'https://www.youtube.com/shorts/18NGQq7p3LY',
'info_dict': {
'id': '18NGQq7p3LY',
'ext': 'mp4',
'title': '아이브 이서 장원영 리즈 삐끼삐끼 챌린지',
'description': '',
'uploader': 'ㅇㅇ',
'uploader_id': '@으아-v1k',
'uploader_url': 'https://www.youtube.com/@으아-v1k',
'channel': 'ㅇㅇ',
'channel_id': 'UCC25oTm2J7ZVoi5TngOHg9g',
'channel_url': 'https://www.youtube.com/channel/UCC25oTm2J7ZVoi5TngOHg9g',
'thumbnail': r're:https?://.+/.+\.jpg',
'playable_in_embed': True,
'age_limit': 0,
'duration': 3,
'timestamp': 1724306170,
'upload_date': '20240822',
'availability': 'public',
'live_status': 'not_live',
'view_count': int,
'like_count': int,
'channel_follower_count': int,
'categories': ['People & Blogs'],
'tags': [],
},
},
]

_WEBPAGE_TESTS = [
Expand Down Expand Up @@ -3128,9 +3157,9 @@ def _parse_sig_js(self, jscode):
# ;N&&(N=sig(decodeURIComponent(N)),J.set(R,encodeURIComponent(N)));return J};
# {var H=u,k=f.sp,v=sig(decodeURIComponent(f.s));H.set(k,encodeURIComponent(v))}
funcname = self._search_regex(
(r'\b(?P<var>[a-zA-Z0-9$]+)&&\((?P=var)=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\((?P=var)\)\)',
r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*(?P<arg>[a-zA-Z0-9$]+)\s*\)\s*{\s*(?P=arg)\s*=\s*(?P=arg)\.split\(\s*""\s*\)\s*;\s*[^}]+;\s*return\s+(?P=arg)\.join\(\s*""\s*\)',
r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)(?:;[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\))?',
(r'\b(?P<var>[a-zA-Z0-9_$]+)&&\((?P=var)=(?P<sig>[a-zA-Z0-9_$]{2,})\(decodeURIComponent\((?P=var)\)\)',
r'(?P<sig>[a-zA-Z0-9_$]+)\s*=\s*function\(\s*(?P<arg>[a-zA-Z0-9_$]+)\s*\)\s*{\s*(?P=arg)\s*=\s*(?P=arg)\.split\(\s*""\s*\)\s*;\s*[^}]+;\s*return\s+(?P=arg)\.join\(\s*""\s*\)',
r'(?:\b|[^a-zA-Z0-9_$])(?P<sig>[a-zA-Z0-9_$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)(?:;[a-zA-Z0-9_$]{2}\.[a-zA-Z0-9_$]{2}\(a,\d+\))?',
# Old patterns
r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
Expand Down Expand Up @@ -3984,10 +4013,20 @@ def append_client(*client_names):
else:
prs.append(pr)

# web_embedded can work around age-gate and age-verification for some embeddable videos
if self._is_agegated(pr) and variant != 'web_embedded':
append_client(f'web_embedded.{base_client}')
# Unauthenticated users will only get web_embedded client formats if age-gated
if self._is_agegated(pr) and not self.is_authenticated:
self.to_screen(
f'{video_id}: This video is age-restricted; some formats may be missing '
f'without authentication. {self._login_hint()}', only_once=True)

''' This code is pointless while web_creator is in _DEFAULT_AUTHED_CLIENTS
# EU countries require age-verification for accounts to access age-restricted videos
# If account is not age-verified, _is_agegated() will be truthy for non-embedded clients
if self.is_authenticated and self._is_agegated(pr):
embedding_is_disabled = variant == 'web_embedded' and self._is_unplayable(pr)
if self.is_authenticated and (self._is_agegated(pr) or embedding_is_disabled):
self.to_screen(
f'{video_id}: This video is age-restricted and YouTube is requiring '
'account age-verification; some formats may be missing', only_once=True)
Expand Down Expand Up @@ -4068,10 +4107,12 @@ def build_fragments(f):
if height:
res_qualities[height] = quality

display_name = audio_track.get('displayName') or ''
is_original = 'original' in display_name.lower()
is_descriptive = 'descriptive' in display_name.lower()
is_default = audio_track.get('audioIsDefault')
is_descriptive = 'descriptive' in (audio_track.get('displayName') or '').lower()
language_code = audio_track.get('id', '').split('.')[0]
if language_code and is_default:
if language_code and (is_original or (is_default and not original_language)):
original_language = language_code

# FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
Expand Down Expand Up @@ -4152,7 +4193,7 @@ def build_fragments(f):
'filesize': int_or_none(fmt.get('contentLength')),
'format_id': f'{itag}{"-drc" if fmt.get("isDrc") else ""}',
'format_note': join_nonempty(
join_nonempty(audio_track.get('displayName'), is_default and ' (default)', delim=''),
join_nonempty(display_name, is_default and ' (default)', delim=''),
name, fmt.get('isDrc') and 'DRC',
try_get(fmt, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()),
try_get(fmt, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),
Expand All @@ -4171,7 +4212,7 @@ def build_fragments(f):
'url': fmt_url,
'width': int_or_none(fmt.get('width')),
'language': join_nonempty(language_code, 'desc' if is_descriptive else '') or None,
'language_preference': PREFERRED_LANG_VALUE if is_default else -10 if is_descriptive else -1,
'language_preference': PREFERRED_LANG_VALUE if is_original else 5 if is_default else -10 if is_descriptive else -1,
# Strictly de-prioritize broken, damaged and 3gp formats
'preference': -20 if is_broken else -10 if is_damaged else -2 if itag == '17' else None,
}
Expand Down Expand Up @@ -4690,7 +4731,7 @@ def process_language(container, base_url, lang_code, sub_name, query):
(?=(?P<artist>[^\n]+))(?P=artist)\n+
(?=(?P<album>[^\n]+))(?P=album)\n
(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?
(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?
(?:.+?Released\ on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?
(.+?\nArtist\s*:\s*
(?=(?P<clean_artist>[^\n]+))(?P=clean_artist)\n
)?.+\nAuto-generated\ by\ YouTube\.\s*$
Expand Down
16 changes: 13 additions & 3 deletions yt_dlp/update.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,9 +65,14 @@ def _get_variant_and_executable_path():
machine = '_legacy' if version_tuple(platform.mac_ver()[0]) < (10, 15) else ''
else:
machine = f'_{platform.machine().lower()}'
is_64bits = sys.maxsize > 2**32
# Ref: https://en.wikipedia.org/wiki/Uname#Examples
if machine[1:] in ('x86', 'x86_64', 'amd64', 'i386', 'i686'):
machine = '_x86' if platform.architecture()[0][:2] == '32' else ''
machine = '_x86' if not is_64bits else ''
# platform.machine() on 32-bit raspbian OS may return 'aarch64', so check "64-bitness"
# See: https://github.com/yt-dlp/yt-dlp/issues/11813
elif machine[1:] == 'aarch64' and not is_64bits:
machine = '_armv7l'
# sys.executable returns a /tmp/ path for staticx builds (linux_static)
# Ref: https://staticx.readthedocs.io/en/latest/usage.html#run-time-information
if static_exe_path := os.getenv('STATICX_PROG_PATH'):
Expand Down Expand Up @@ -525,11 +530,16 @@ def filename(self):
@functools.cached_property
def cmd(self):
"""The command-line to run the executable, if known"""
argv = None
# There is no sys.orig_argv in py < 3.10. Also, it can be [] when frozen
if getattr(sys, 'orig_argv', None):
return sys.orig_argv
argv = sys.orig_argv
elif getattr(sys, 'frozen', False):
return sys.argv
argv = sys.argv
# linux_static exe's argv[0] will be /tmp/staticx-NNNN/yt-dlp_linux if we don't fixup here
if argv and os.getenv('STATICX_PROG_PATH'):
argv = [self.filename, *argv[1:]]
return argv

def restart(self):
"""Restart the executable"""
Expand Down
6 changes: 3 additions & 3 deletions yt_dlp/version.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
# Autogenerated by devscripts/update-version.py

__version__ = '2024.12.06'
__version__ = '2024.12.13'

RELEASE_GIT_HEAD = '4bd2655398aed450456197a6767639114a24eac2'
RELEASE_GIT_HEAD = '54216696261bc07cacd9a837c501d9e0b7fed09e'

VARIANT = None

Expand All @@ -12,4 +12,4 @@

ORIGIN = 'yt-dlp/yt-dlp'

_pkg_version = '2024.12.06'
_pkg_version = '2024.12.13'

0 comments on commit 49c58f2

Please sign in to comment.