Merge branch 'yt-dlp:master' into master

Crypto90 · Oct 21, 2024 · ed390f7 · ed390f7
2 parents e2defb0 + 87408cc
commit ed390f7
Show file tree

Hide file tree

Showing 30 changed files with 727 additions and 174 deletions.
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -240,7 +240,7 @@ jobs:
     permissions:
       contents: read
       actions: write  # For cleaning up cache
-    runs-on: macos-12
+    runs-on: macos-13
 
     steps:
       - uses: actions/checkout@v4
@@ -346,7 +346,7 @@ jobs:
   macos_legacy:
     needs: process
     if: inputs.macos_legacy
-    runs-on: macos-12
+    runs-on: macos-13
 
     steps:
       - uses: actions/checkout@v4

diff --git a/README.md b/README.md
@@ -278,7 +278,7 @@ py -m bundle.py2exe
 * **`devscripts/update-version.py`** - Update the version number based on the current date.
 * **`devscripts/set-variant.py`** - Set the build variant of the executable.
 * **`devscripts/make_changelog.py`** - Create a markdown changelog using short commit messages and update `CONTRIBUTORS` file.
-* **`devscripts/make_lazy_extractors.py`** - Create lazy extractors. Running this before building the binaries (any variant) will improve their startup performance. Set the environment variable `YTDLP_NO_LAZY_EXTRACTORS=1` if you wish to forcefully disable lazy extractor loading.
+* **`devscripts/make_lazy_extractors.py`** - Create lazy extractors. Running this before building the binaries (any variant) will improve their startup performance. Set the environment variable `YTDLP_NO_LAZY_EXTRACTORS` to something nonempty to forcefully disable lazy extractor loading.
 
 Note: See their `--help` for more info.
 
@@ -348,6 +348,13 @@ If you fork the project on GitHub, you can run your fork's [build workflow](.git
                                     containing directory ("-" for stdin). Can be
                                     used multiple times and inside other
                                     configuration files
+    --plugin-dirs PATH              Path to an additional directory to search
+                                    for plugins. This option can be used
+                                    multiple times to add multiple directories.
+                                    Note that this currently only works for
+                                    extractor plugins; postprocessor plugins can
+                                    only be loaded from the default plugin
+                                    directories
     --flat-playlist                 Do not extract the videos of a playlist,
                                     only list them
     --no-flat-playlist              Fully extract the videos of a playlist
@@ -1795,6 +1802,7 @@ The following extractors use this feature:
 * `key_query`: Passthrough the master m3u8 URL query to its HLS AES-128 decryption key URI if no value is provided, or else apply the query string given as `key_query=VALUE`. Note that this will have no effect if the key URI is provided via the `hls_key` extractor-arg. Does not apply to ffmpeg
 * `hls_key`: An HLS AES-128 key URI *or* key (as hex), and optionally the IV (as hex), in the form of `(URI|KEY)[,IV]`; e.g. `generic:hls_key=ABCDEF1234567980,0xFEDCBA0987654321`. Passing any of these values will force usage of the native HLS downloader and override the corresponding values found in the m3u8 playlist
 * `is_live`: Bypass live HLS detection and manually set `live_status` - a value of `false` will set `not_live`, any other value (or no value) will set `is_live`
+* `impersonate`: Target(s) to try and impersonate with the initial webpage request; e.g. `safari,chrome-110`. By default any available target will be used. Use `false` to disable impersonation
 
 #### funimation
 * `language`: Audio languages to extract, e.g. `funimation:language=english,japanese`
@@ -1897,6 +1905,7 @@ In other words, the file structure on the disk looks something like:
                 myplugin.py
 
 yt-dlp looks for these `yt_dlp_plugins` namespace folders in many locations (see below) and loads in plugins from **all** of them.
+Set the environment variable `YTDLP_NO_PLUGINS` to something nonempty to disable loading plugins entirely.
 
 See the [wiki for some known plugins](https://github.com/yt-dlp/yt-dlp/wiki/Plugins)
 

diff --git a/devscripts/make_lazy_extractors.py b/devscripts/make_lazy_extractors.py
@@ -2,7 +2,6 @@
 
 # Allow direct execution
 import os
-import shutil
 import sys
 
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
@@ -34,18 +33,14 @@ class {name}({bases}):
 
 
 def main():
-    lazy_extractors_filename = get_filename_args(default_outfile='yt_dlp/extractor/lazy_extractors.py')
-    if os.path.exists(lazy_extractors_filename):
-        os.remove(lazy_extractors_filename)
+    os.environ['YTDLP_NO_PLUGINS'] = 'true'
+    os.environ['YTDLP_NO_LAZY_EXTRACTORS'] = 'true'
 
-    _ALL_CLASSES = get_all_ies()  # Must be before import
+    lazy_extractors_filename = get_filename_args(default_outfile='yt_dlp/extractor/lazy_extractors.py')
 
-    import yt_dlp.plugins
+    from yt_dlp.extractor.extractors import _ALL_CLASSES
     from yt_dlp.extractor.common import InfoExtractor, SearchInfoExtractor
 
-    # Filter out plugins
-    _ALL_CLASSES = [cls for cls in _ALL_CLASSES if not cls.__module__.startswith(f'{yt_dlp.plugins.PACKAGE_NAME}.')]
-
     DummyInfoExtractor = type('InfoExtractor', (InfoExtractor,), {'IE_NAME': NO_ATTR})
     module_src = '\n'.join((
         MODULE_TEMPLATE,
@@ -58,20 +53,6 @@ def main():
     write_file(lazy_extractors_filename, f'{module_src}\n')
 
 
-def get_all_ies():
-    PLUGINS_DIRNAME = 'ytdlp_plugins'
-    BLOCKED_DIRNAME = f'{PLUGINS_DIRNAME}_blocked'
-    if os.path.exists(PLUGINS_DIRNAME):
-        # os.rename cannot be used, e.g. in Docker. See https://github.com/yt-dlp/yt-dlp/pull/4958
-        shutil.move(PLUGINS_DIRNAME, BLOCKED_DIRNAME)
-    try:
-        from yt_dlp.extractor.extractors import _ALL_CLASSES
-    finally:
-        if os.path.exists(BLOCKED_DIRNAME):
-            shutil.move(BLOCKED_DIRNAME, PLUGINS_DIRNAME)
-    return _ALL_CLASSES
-
-
 def extra_ie_code(ie, base=None):
     for var in STATIC_CLASS_PROPERTIES:
         val = getattr(ie, var)

diff --git a/devscripts/run_tests.py b/devscripts/run_tests.py
@@ -16,7 +16,7 @@
 def parse_args():
     parser = argparse.ArgumentParser(description='Run selected yt-dlp tests')
     parser.add_argument(
-        'test', help='a extractor tests, or one of "core" or "download"', nargs='*')
+        'test', help='an extractor test, test path, or one of "core" or "download"', nargs='*')
     parser.add_argument(
         '-k', help='run a test matching EXPRESSION. Same as "pytest -k"', metavar='EXPRESSION')
     parser.add_argument(
@@ -27,7 +27,6 @@ def parse_args():
 def run_tests(*tests, pattern=None, ci=False):
     run_core = 'core' in tests or (not pattern and not tests)
     run_download = 'download' in tests
-    tests = list(map(fix_test_name, tests))
 
     pytest_args = args.pytest_args or os.getenv('HATCH_TEST_ARGS', '')
     arguments = ['pytest', '-Werror', '--tb=short', *shlex.split(pytest_args)]
@@ -41,7 +40,9 @@ def run_tests(*tests, pattern=None, ci=False):
         arguments.extend(['-m', 'download'])
     else:
         arguments.extend(
-            f'test/test_download.py::TestDownload::test_{test}' for test in tests)
+            test if '/' in test
+            else f'test/test_download.py::TestDownload::test_{fix_test_name(test)}'
+            for test in tests)
 
     print(f'Running {arguments}', flush=True)
     try:

diff --git a/test/test_plugins.py b/test/test_plugins.py
@@ -10,6 +10,7 @@
 sys.path.append(str(TEST_DATA_DIR))
 importlib.invalidate_caches()
 
+from yt_dlp.utils import Config
 from yt_dlp.plugins import PACKAGE_NAME, directories, load_plugins
 
 
@@ -68,6 +69,24 @@ def test_importing_zipped_module(self):
             os.remove(zip_path)
             importlib.invalidate_caches()  # reset the import caches
 
+    def test_plugin_dirs(self):
+        # Internal plugin dirs hack for CLI --plugin-dirs
+        # To be replaced with proper system later
+        custom_plugin_dir = TEST_DATA_DIR / 'plugin_packages'
+        Config._plugin_dirs = [str(custom_plugin_dir)]
+        importlib.invalidate_caches()  # reset the import caches
+
+        try:
+            package = importlib.import_module(f'{PACKAGE_NAME}.extractor')
+            self.assertIn(custom_plugin_dir / 'testpackage' / PACKAGE_NAME / 'extractor', map(Path, package.__path__))
+
+            plugins_ie = load_plugins('extractor', 'IE')
+            self.assertIn('PackagePluginIE', plugins_ie.keys())
+
+        finally:
+            Config._plugin_dirs = []
+            importlib.invalidate_caches()  # reset the import caches
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/test/test_traversal.py b/test/test_traversal.py
@@ -4,8 +4,18 @@
 
 import pytest
 
-from yt_dlp.utils import dict_get, int_or_none, str_or_none
-from yt_dlp.utils.traversal import traverse_obj
+from yt_dlp.utils import (
+    ExtractorError,
+    determine_ext,
+    dict_get,
+    int_or_none,
+    str_or_none,
+)
+from yt_dlp.utils.traversal import (
+    traverse_obj,
+    require,
+    subs_list_to_dict,
+)
 
 _TEST_DATA = {
     100: 100,
@@ -420,6 +430,71 @@ def test_traversal_morsel(self):
         assert traverse_obj(morsel, [(None,), any]) == morsel, \
             'Morsel should not be implicitly changed to dict on usage'
 
+    def test_traversal_filter(self):
+        data = [None, False, True, 0, 1, 0.0, 1.1, '', 'str', {}, {0: 0}, [], [1]]
+
+        assert traverse_obj(data, [..., filter]) == [True, 1, 1.1, 'str', {0: 0}, [1]], \
+            '`filter` should filter falsy values'
+
+
+class TestTraversalHelpers:
+    def test_traversal_require(self):
+        with pytest.raises(ExtractorError):
+            traverse_obj(_TEST_DATA, ['None', {require('value')}])
+        assert traverse_obj(_TEST_DATA, ['str', {require('value')}]) == 'str', \
+            '`require` should pass through non `None` values'
+
+    def test_subs_list_to_dict(self):
+        assert traverse_obj([
+            {'name': 'de', 'url': 'https://example.com/subs/de.vtt'},
+            {'name': 'en', 'url': 'https://example.com/subs/en1.ass'},
+            {'name': 'en', 'url': 'https://example.com/subs/en2.ass'},
+        ], [..., {
+            'id': 'name',
+            'url': 'url',
+        }, all, {subs_list_to_dict}]) == {
+            'de': [{'url': 'https://example.com/subs/de.vtt'}],
+            'en': [
+                {'url': 'https://example.com/subs/en1.ass'},
+                {'url': 'https://example.com/subs/en2.ass'},
+            ],
+        }, 'function should build subtitle dict from list of subtitles'
+        assert traverse_obj([
+            {'name': 'de', 'url': 'https://example.com/subs/de.ass'},
+            {'name': 'de'},
+            {'name': 'en', 'content': 'content'},
+            {'url': 'https://example.com/subs/en'},
+        ], [..., {
+            'id': 'name',
+            'data': 'content',
+            'url': 'url',
+        }, all, {subs_list_to_dict}]) == {
+            'de': [{'url': 'https://example.com/subs/de.ass'}],
+            'en': [{'data': 'content'}],
+        }, 'subs with mandatory items missing should be filtered'
+        assert traverse_obj([
+            {'url': 'https://example.com/subs/de.ass', 'name': 'de'},
+            {'url': 'https://example.com/subs/en', 'name': 'en'},
+        ], [..., {
+            'id': 'name',
+            'ext': ['url', {lambda x: determine_ext(x, default_ext=None)}],
+            'url': 'url',
+        }, all, {subs_list_to_dict(ext='ext')}]) == {
+            'de': [{'url': 'https://example.com/subs/de.ass', 'ext': 'ass'}],
+            'en': [{'url': 'https://example.com/subs/en', 'ext': 'ext'}],
+        }, '`ext` should set default ext but leave existing value untouched'
+        assert traverse_obj([
+            {'name': 'en', 'url': 'https://example.com/subs/en2', 'prio': True},
+            {'name': 'en', 'url': 'https://example.com/subs/en1', 'prio': False},
+        ], [..., {
+            'id': 'name',
+            'quality': ['prio', {int}],
+            'url': 'url',
+        }, all, {subs_list_to_dict(ext='ext')}]) == {'en': [
+            {'url': 'https://example.com/subs/en1', 'ext': 'ext'},
+            {'url': 'https://example.com/subs/en2', 'ext': 'ext'},
+        ]}, '`quality` key should sort subtitle list accordingly'
+
 
 class TestDictGet:
     def test_dict_get(self):

diff --git a/test/test_utils.py b/test/test_utils.py
@@ -221,9 +221,10 @@ def test_sanitize_ids(self):
         self.assertEqual(sanitize_filename('N0Y__7-UOdI', is_id=True), 'N0Y__7-UOdI')
 
     def test_sanitize_path(self):
-        if sys.platform != 'win32':
-            return
+        with unittest.mock.patch('sys.platform', 'win32'):
+            self._test_sanitize_path()
 
+    def _test_sanitize_path(self):
         self.assertEqual(sanitize_path('abc'), 'abc')
         self.assertEqual(sanitize_path('abc/def'), 'abc\\def')
         self.assertEqual(sanitize_path('abc\\def'), 'abc\\def')
@@ -256,6 +257,11 @@ def test_sanitize_path(self):
         self.assertEqual(sanitize_path('./abc'), 'abc')
         self.assertEqual(sanitize_path('./../abc'), '..\\abc')
 
+        self.assertEqual(sanitize_path('\\abc'), '\\abc')
+        self.assertEqual(sanitize_path('C:abc'), 'C:abc')
+        self.assertEqual(sanitize_path('C:abc\\..\\'), 'C:..')
+        self.assertEqual(sanitize_path('C:\\abc:%(title)s.%(ext)s'), 'C:\\abc#%(title)s.%(ext)s')
+
     def test_sanitize_url(self):
         self.assertEqual(sanitize_url('//foo.bar'), 'http://foo.bar')
         self.assertEqual(sanitize_url('httpss://foo.bar'), 'https://foo.bar')

diff --git a/test/testdata/plugin_packages/testpackage/yt_dlp_plugins/extractor/package.py b/test/testdata/plugin_packages/testpackage/yt_dlp_plugins/extractor/package.py
@@ -0,0 +1,5 @@
+from yt_dlp.extractor.common import InfoExtractor
+
+
+class PackagePluginIE(InfoExtractor):
+    pass
diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py
@@ -4070,6 +4070,10 @@ def get_encoding(stream):
 
         write_debug(f'Proxy map: {self.proxies}')
         write_debug(f'Request Handlers: {", ".join(rh.RH_NAME for rh in self._request_director.handlers.values())}')
+        if os.environ.get('YTDLP_NO_PLUGINS'):
+            write_debug('Plugins are forcibly disabled')
+            return
+
         for plugin_type, plugins in {'Extractor': plugin_ies, 'Post-Processor': plugin_pps}.items():
             display_list = ['{}{}'.format(
                 klass.__name__, '' if klass.__name__ == name else f' as {name}')
@@ -4120,7 +4124,8 @@ def cookiejar(self):
                 self.params.get('cookiefile'), self.params.get('cookiesfrombrowser'), self)
         except CookieLoadError as error:
             cause = error.__context__
-            self.report_error(str(cause), tb=''.join(traceback.format_exception(cause)))
+            # compat: <=py3.9: `traceback.format_exception` has a different signature
+            self.report_error(str(cause), tb=''.join(traceback.format_exception(None, cause, cause.__traceback__)))
             raise
 
     @property

diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py
@@ -34,6 +34,7 @@
 )
 from .update import Updater
 from .utils import (
+    Config,
     NO_DEFAULT,
     POSTPROCESS_WHEN,
     DateRange,
@@ -967,6 +968,10 @@ def _real_main(argv=None):
 
     parser, opts, all_urls, ydl_opts = parse_options(argv)
 
+    # HACK: Set the plugin dirs early on
+    # TODO(coletdjnz): remove when plugin globals system is implemented
+    Config._plugin_dirs = opts.plugin_dirs
+
     # Dump user agent
     if opts.dump_user_agent:
         ua = traverse_obj(opts.headers, 'User-Agent', casesense=False, default=std_headers['User-Agent'])

diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py
@@ -363,7 +363,10 @@
 )
 from .ccma import CCMAIE
 from .cctv import CCTVIE
-from .cda import CDAIE
+from .cda import (
+    CDAIE,
+    CDAFolderIE,
+)
 from .cellebrite import CellebriteIE
 from .ceskatelevize import CeskaTelevizeIE
 from .cgtn import CGTNIE

diff --git a/yt_dlp/extractor/adobepass.py b/yt_dlp/extractor/adobepass.py
@@ -1355,6 +1355,7 @@
 class AdobePassIE(InfoExtractor):  # XXX: Conventionally, base classes should end with BaseIE/InfoExtractor
     _SERVICE_PROVIDER_TEMPLATE = 'https://sp.auth.adobe.com/adobe-services/%s'
     _USER_AGENT = 'Mozilla/5.0 (X11; Linux i686; rv:47.0) Gecko/20100101 Firefox/47.0'
+    _MODERN_USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; rv:131.0) Gecko/20100101 Firefox/131.0'
     _MVPD_CACHE = 'ap-mvpd'
 
     _DOWNLOADING_LOGIN_PAGE = 'Downloading Provider Login Page'
@@ -1454,7 +1455,11 @@ def extract_redirect_url(html, url=None, fatal=False):
                             'no_iframe': 'false',
                             'domain_name': 'adobe.com',
                             'redirect_url': url,
-                        })
+                        }, headers={
+                            # yt-dlp's default user-agent is usually too old for Comcast_SSO
+                            # See: https://github.com/yt-dlp/yt-dlp/issues/10848
+                            'User-Agent': self._MODERN_USER_AGENT,
+                        } if mso_id == 'Comcast_SSO' else None)
                 elif not self._cookies_passed:
                     raise_mvpd_required()