Skip to content

Commit

Permalink
Merge branch 'yt-dlp:master' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
Crypto90 authored Oct 21, 2024
2 parents e2defb0 + 87408cc commit ed390f7
Show file tree
Hide file tree
Showing 30 changed files with 727 additions and 174 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,7 @@ jobs:
permissions:
contents: read
actions: write # For cleaning up cache
runs-on: macos-12
runs-on: macos-13

steps:
- uses: actions/checkout@v4
Expand Down Expand Up @@ -346,7 +346,7 @@ jobs:
macos_legacy:
needs: process
if: inputs.macos_legacy
runs-on: macos-12
runs-on: macos-13

steps:
- uses: actions/checkout@v4
Expand Down
11 changes: 10 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,7 @@ py -m bundle.py2exe
* **`devscripts/update-version.py`** - Update the version number based on the current date.
* **`devscripts/set-variant.py`** - Set the build variant of the executable.
* **`devscripts/make_changelog.py`** - Create a markdown changelog using short commit messages and update `CONTRIBUTORS` file.
* **`devscripts/make_lazy_extractors.py`** - Create lazy extractors. Running this before building the binaries (any variant) will improve their startup performance. Set the environment variable `YTDLP_NO_LAZY_EXTRACTORS=1` if you wish to forcefully disable lazy extractor loading.
* **`devscripts/make_lazy_extractors.py`** - Create lazy extractors. Running this before building the binaries (any variant) will improve their startup performance. Set the environment variable `YTDLP_NO_LAZY_EXTRACTORS` to something nonempty to forcefully disable lazy extractor loading.

Note: See their `--help` for more info.

Expand Down Expand Up @@ -348,6 +348,13 @@ If you fork the project on GitHub, you can run your fork's [build workflow](.git
containing directory ("-" for stdin). Can be
used multiple times and inside other
configuration files
--plugin-dirs PATH Path to an additional directory to search
for plugins. This option can be used
multiple times to add multiple directories.
Note that this currently only works for
extractor plugins; postprocessor plugins can
only be loaded from the default plugin
directories
--flat-playlist Do not extract the videos of a playlist,
only list them
--no-flat-playlist Fully extract the videos of a playlist
Expand Down Expand Up @@ -1795,6 +1802,7 @@ The following extractors use this feature:
* `key_query`: Passthrough the master m3u8 URL query to its HLS AES-128 decryption key URI if no value is provided, or else apply the query string given as `key_query=VALUE`. Note that this will have no effect if the key URI is provided via the `hls_key` extractor-arg. Does not apply to ffmpeg
* `hls_key`: An HLS AES-128 key URI *or* key (as hex), and optionally the IV (as hex), in the form of `(URI|KEY)[,IV]`; e.g. `generic:hls_key=ABCDEF1234567980,0xFEDCBA0987654321`. Passing any of these values will force usage of the native HLS downloader and override the corresponding values found in the m3u8 playlist
* `is_live`: Bypass live HLS detection and manually set `live_status` - a value of `false` will set `not_live`, any other value (or no value) will set `is_live`
* `impersonate`: Target(s) to try and impersonate with the initial webpage request; e.g. `safari,chrome-110`. By default any available target will be used. Use `false` to disable impersonation

#### funimation
* `language`: Audio languages to extract, e.g. `funimation:language=english,japanese`
Expand Down Expand Up @@ -1897,6 +1905,7 @@ In other words, the file structure on the disk looks something like:
myplugin.py

yt-dlp looks for these `yt_dlp_plugins` namespace folders in many locations (see below) and loads in plugins from **all** of them.
Set the environment variable `YTDLP_NO_PLUGINS` to something nonempty to disable loading plugins entirely.

See the [wiki for some known plugins](https://github.com/yt-dlp/yt-dlp/wiki/Plugins)

Expand Down
27 changes: 4 additions & 23 deletions devscripts/make_lazy_extractors.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

# Allow direct execution
import os
import shutil
import sys

sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
Expand Down Expand Up @@ -34,18 +33,14 @@ class {name}({bases}):


def main():
lazy_extractors_filename = get_filename_args(default_outfile='yt_dlp/extractor/lazy_extractors.py')
if os.path.exists(lazy_extractors_filename):
os.remove(lazy_extractors_filename)
os.environ['YTDLP_NO_PLUGINS'] = 'true'
os.environ['YTDLP_NO_LAZY_EXTRACTORS'] = 'true'

_ALL_CLASSES = get_all_ies() # Must be before import
lazy_extractors_filename = get_filename_args(default_outfile='yt_dlp/extractor/lazy_extractors.py')

import yt_dlp.plugins
from yt_dlp.extractor.extractors import _ALL_CLASSES
from yt_dlp.extractor.common import InfoExtractor, SearchInfoExtractor

# Filter out plugins
_ALL_CLASSES = [cls for cls in _ALL_CLASSES if not cls.__module__.startswith(f'{yt_dlp.plugins.PACKAGE_NAME}.')]

DummyInfoExtractor = type('InfoExtractor', (InfoExtractor,), {'IE_NAME': NO_ATTR})
module_src = '\n'.join((
MODULE_TEMPLATE,
Expand All @@ -58,20 +53,6 @@ def main():
write_file(lazy_extractors_filename, f'{module_src}\n')


def get_all_ies():
PLUGINS_DIRNAME = 'ytdlp_plugins'
BLOCKED_DIRNAME = f'{PLUGINS_DIRNAME}_blocked'
if os.path.exists(PLUGINS_DIRNAME):
# os.rename cannot be used, e.g. in Docker. See https://github.com/yt-dlp/yt-dlp/pull/4958
shutil.move(PLUGINS_DIRNAME, BLOCKED_DIRNAME)
try:
from yt_dlp.extractor.extractors import _ALL_CLASSES
finally:
if os.path.exists(BLOCKED_DIRNAME):
shutil.move(BLOCKED_DIRNAME, PLUGINS_DIRNAME)
return _ALL_CLASSES


def extra_ie_code(ie, base=None):
for var in STATIC_CLASS_PROPERTIES:
val = getattr(ie, var)
Expand Down
7 changes: 4 additions & 3 deletions devscripts/run_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
def parse_args():
parser = argparse.ArgumentParser(description='Run selected yt-dlp tests')
parser.add_argument(
'test', help='a extractor tests, or one of "core" or "download"', nargs='*')
'test', help='an extractor test, test path, or one of "core" or "download"', nargs='*')
parser.add_argument(
'-k', help='run a test matching EXPRESSION. Same as "pytest -k"', metavar='EXPRESSION')
parser.add_argument(
Expand All @@ -27,7 +27,6 @@ def parse_args():
def run_tests(*tests, pattern=None, ci=False):
run_core = 'core' in tests or (not pattern and not tests)
run_download = 'download' in tests
tests = list(map(fix_test_name, tests))

pytest_args = args.pytest_args or os.getenv('HATCH_TEST_ARGS', '')
arguments = ['pytest', '-Werror', '--tb=short', *shlex.split(pytest_args)]
Expand All @@ -41,7 +40,9 @@ def run_tests(*tests, pattern=None, ci=False):
arguments.extend(['-m', 'download'])
else:
arguments.extend(
f'test/test_download.py::TestDownload::test_{test}' for test in tests)
test if '/' in test
else f'test/test_download.py::TestDownload::test_{fix_test_name(test)}'
for test in tests)

print(f'Running {arguments}', flush=True)
try:
Expand Down
19 changes: 19 additions & 0 deletions test/test_plugins.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
sys.path.append(str(TEST_DATA_DIR))
importlib.invalidate_caches()

from yt_dlp.utils import Config
from yt_dlp.plugins import PACKAGE_NAME, directories, load_plugins


Expand Down Expand Up @@ -68,6 +69,24 @@ def test_importing_zipped_module(self):
os.remove(zip_path)
importlib.invalidate_caches() # reset the import caches

def test_plugin_dirs(self):
# Internal plugin dirs hack for CLI --plugin-dirs
# To be replaced with proper system later
custom_plugin_dir = TEST_DATA_DIR / 'plugin_packages'
Config._plugin_dirs = [str(custom_plugin_dir)]
importlib.invalidate_caches() # reset the import caches

try:
package = importlib.import_module(f'{PACKAGE_NAME}.extractor')
self.assertIn(custom_plugin_dir / 'testpackage' / PACKAGE_NAME / 'extractor', map(Path, package.__path__))

plugins_ie = load_plugins('extractor', 'IE')
self.assertIn('PackagePluginIE', plugins_ie.keys())

finally:
Config._plugin_dirs = []
importlib.invalidate_caches() # reset the import caches


if __name__ == '__main__':
unittest.main()
79 changes: 77 additions & 2 deletions test/test_traversal.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,18 @@

import pytest

from yt_dlp.utils import dict_get, int_or_none, str_or_none
from yt_dlp.utils.traversal import traverse_obj
from yt_dlp.utils import (
ExtractorError,
determine_ext,
dict_get,
int_or_none,
str_or_none,
)
from yt_dlp.utils.traversal import (
traverse_obj,
require,
subs_list_to_dict,
)

_TEST_DATA = {
100: 100,
Expand Down Expand Up @@ -420,6 +430,71 @@ def test_traversal_morsel(self):
assert traverse_obj(morsel, [(None,), any]) == morsel, \
'Morsel should not be implicitly changed to dict on usage'

def test_traversal_filter(self):
data = [None, False, True, 0, 1, 0.0, 1.1, '', 'str', {}, {0: 0}, [], [1]]

assert traverse_obj(data, [..., filter]) == [True, 1, 1.1, 'str', {0: 0}, [1]], \
'`filter` should filter falsy values'


class TestTraversalHelpers:
def test_traversal_require(self):
with pytest.raises(ExtractorError):
traverse_obj(_TEST_DATA, ['None', {require('value')}])
assert traverse_obj(_TEST_DATA, ['str', {require('value')}]) == 'str', \
'`require` should pass through non `None` values'

def test_subs_list_to_dict(self):
assert traverse_obj([
{'name': 'de', 'url': 'https://example.com/subs/de.vtt'},
{'name': 'en', 'url': 'https://example.com/subs/en1.ass'},
{'name': 'en', 'url': 'https://example.com/subs/en2.ass'},
], [..., {
'id': 'name',
'url': 'url',
}, all, {subs_list_to_dict}]) == {
'de': [{'url': 'https://example.com/subs/de.vtt'}],
'en': [
{'url': 'https://example.com/subs/en1.ass'},
{'url': 'https://example.com/subs/en2.ass'},
],
}, 'function should build subtitle dict from list of subtitles'
assert traverse_obj([
{'name': 'de', 'url': 'https://example.com/subs/de.ass'},
{'name': 'de'},
{'name': 'en', 'content': 'content'},
{'url': 'https://example.com/subs/en'},
], [..., {
'id': 'name',
'data': 'content',
'url': 'url',
}, all, {subs_list_to_dict}]) == {
'de': [{'url': 'https://example.com/subs/de.ass'}],
'en': [{'data': 'content'}],
}, 'subs with mandatory items missing should be filtered'
assert traverse_obj([
{'url': 'https://example.com/subs/de.ass', 'name': 'de'},
{'url': 'https://example.com/subs/en', 'name': 'en'},
], [..., {
'id': 'name',
'ext': ['url', {lambda x: determine_ext(x, default_ext=None)}],
'url': 'url',
}, all, {subs_list_to_dict(ext='ext')}]) == {
'de': [{'url': 'https://example.com/subs/de.ass', 'ext': 'ass'}],
'en': [{'url': 'https://example.com/subs/en', 'ext': 'ext'}],
}, '`ext` should set default ext but leave existing value untouched'
assert traverse_obj([
{'name': 'en', 'url': 'https://example.com/subs/en2', 'prio': True},
{'name': 'en', 'url': 'https://example.com/subs/en1', 'prio': False},
], [..., {
'id': 'name',
'quality': ['prio', {int}],
'url': 'url',
}, all, {subs_list_to_dict(ext='ext')}]) == {'en': [
{'url': 'https://example.com/subs/en1', 'ext': 'ext'},
{'url': 'https://example.com/subs/en2', 'ext': 'ext'},
]}, '`quality` key should sort subtitle list accordingly'


class TestDictGet:
def test_dict_get(self):
Expand Down
10 changes: 8 additions & 2 deletions test/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,9 +221,10 @@ def test_sanitize_ids(self):
self.assertEqual(sanitize_filename('N0Y__7-UOdI', is_id=True), 'N0Y__7-UOdI')

def test_sanitize_path(self):
if sys.platform != 'win32':
return
with unittest.mock.patch('sys.platform', 'win32'):
self._test_sanitize_path()

def _test_sanitize_path(self):
self.assertEqual(sanitize_path('abc'), 'abc')
self.assertEqual(sanitize_path('abc/def'), 'abc\\def')
self.assertEqual(sanitize_path('abc\\def'), 'abc\\def')
Expand Down Expand Up @@ -256,6 +257,11 @@ def test_sanitize_path(self):
self.assertEqual(sanitize_path('./abc'), 'abc')
self.assertEqual(sanitize_path('./../abc'), '..\\abc')

self.assertEqual(sanitize_path('\\abc'), '\\abc')
self.assertEqual(sanitize_path('C:abc'), 'C:abc')
self.assertEqual(sanitize_path('C:abc\\..\\'), 'C:..')
self.assertEqual(sanitize_path('C:\\abc:%(title)s.%(ext)s'), 'C:\\abc#%(title)s.%(ext)s')

def test_sanitize_url(self):
self.assertEqual(sanitize_url('//foo.bar'), 'http://foo.bar')
self.assertEqual(sanitize_url('httpss://foo.bar'), 'https://foo.bar')
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from yt_dlp.extractor.common import InfoExtractor


class PackagePluginIE(InfoExtractor):
pass
7 changes: 6 additions & 1 deletion yt_dlp/YoutubeDL.py
Original file line number Diff line number Diff line change
Expand Up @@ -4070,6 +4070,10 @@ def get_encoding(stream):

write_debug(f'Proxy map: {self.proxies}')
write_debug(f'Request Handlers: {", ".join(rh.RH_NAME for rh in self._request_director.handlers.values())}')
if os.environ.get('YTDLP_NO_PLUGINS'):
write_debug('Plugins are forcibly disabled')
return

for plugin_type, plugins in {'Extractor': plugin_ies, 'Post-Processor': plugin_pps}.items():
display_list = ['{}{}'.format(
klass.__name__, '' if klass.__name__ == name else f' as {name}')
Expand Down Expand Up @@ -4120,7 +4124,8 @@ def cookiejar(self):
self.params.get('cookiefile'), self.params.get('cookiesfrombrowser'), self)
except CookieLoadError as error:
cause = error.__context__
self.report_error(str(cause), tb=''.join(traceback.format_exception(cause)))
# compat: <=py3.9: `traceback.format_exception` has a different signature
self.report_error(str(cause), tb=''.join(traceback.format_exception(None, cause, cause.__traceback__)))
raise

@property
Expand Down
5 changes: 5 additions & 0 deletions yt_dlp/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
)
from .update import Updater
from .utils import (
Config,
NO_DEFAULT,
POSTPROCESS_WHEN,
DateRange,
Expand Down Expand Up @@ -967,6 +968,10 @@ def _real_main(argv=None):

parser, opts, all_urls, ydl_opts = parse_options(argv)

# HACK: Set the plugin dirs early on
# TODO(coletdjnz): remove when plugin globals system is implemented
Config._plugin_dirs = opts.plugin_dirs

# Dump user agent
if opts.dump_user_agent:
ua = traverse_obj(opts.headers, 'User-Agent', casesense=False, default=std_headers['User-Agent'])
Expand Down
5 changes: 4 additions & 1 deletion yt_dlp/extractor/_extractors.py
Original file line number Diff line number Diff line change
Expand Up @@ -363,7 +363,10 @@
)
from .ccma import CCMAIE
from .cctv import CCTVIE
from .cda import CDAIE
from .cda import (
CDAIE,
CDAFolderIE,
)
from .cellebrite import CellebriteIE
from .ceskatelevize import CeskaTelevizeIE
from .cgtn import CGTNIE
Expand Down
7 changes: 6 additions & 1 deletion yt_dlp/extractor/adobepass.py
Original file line number Diff line number Diff line change
Expand Up @@ -1355,6 +1355,7 @@
class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should end with BaseIE/InfoExtractor
_SERVICE_PROVIDER_TEMPLATE = 'https://sp.auth.adobe.com/adobe-services/%s'
_USER_AGENT = 'Mozilla/5.0 (X11; Linux i686; rv:47.0) Gecko/20100101 Firefox/47.0'
_MODERN_USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; rv:131.0) Gecko/20100101 Firefox/131.0'
_MVPD_CACHE = 'ap-mvpd'

_DOWNLOADING_LOGIN_PAGE = 'Downloading Provider Login Page'
Expand Down Expand Up @@ -1454,7 +1455,11 @@ def extract_redirect_url(html, url=None, fatal=False):
'no_iframe': 'false',
'domain_name': 'adobe.com',
'redirect_url': url,
})
}, headers={
# yt-dlp's default user-agent is usually too old for Comcast_SSO
# See: https://github.com/yt-dlp/yt-dlp/issues/10848
'User-Agent': self._MODERN_USER_AGENT,
} if mso_id == 'Comcast_SSO' else None)
elif not self._cookies_passed:
raise_mvpd_required()

Expand Down
Loading

0 comments on commit ed390f7

Please sign in to comment.