diff --git a/.github/workflows/ci_cd.yml b/.github/workflows/ci_cd.yml index cc2bd97cb89..ff9751f2b95 100644 --- a/.github/workflows/ci_cd.yml +++ b/.github/workflows/ci_cd.yml @@ -499,7 +499,7 @@ jobs: run: just api/init - name: Run API tests - run: just api/test + run: just api/test-ci - name: Print API test logs if: success() || failure() diff --git a/api/api/templatetags/get_attr.py b/api/api/templatetags/get_attr.py index aeec2e8cb0f..915c00c9e3f 100644 --- a/api/api/templatetags/get_attr.py +++ b/api/api/templatetags/get_attr.py @@ -3,7 +3,7 @@ from django import template -numeric_test = re.compile("^\d+$") +numeric_test = re.compile(r"^\d+$") register = template.Library() diff --git a/api/conf/settings/databases.py b/api/conf/settings/databases.py index 5473bcc66b1..a48fdcb9775 100644 --- a/api/conf/settings/databases.py +++ b/api/conf/settings/databases.py @@ -2,7 +2,7 @@ # Database -# https://docs.djangoproject.com/en/4.2/ref/settings/#databases +# https://docs.djangoproject.com/en/stable/ref/settings/#databases DATABASES = { "default": { @@ -12,9 +12,6 @@ "USER": config("DJANGO_DATABASE_USER", default="deploy"), "PASSWORD": config("DJANGO_DATABASE_PASSWORD", default="deploy"), "NAME": config("DJANGO_DATABASE_NAME", default="openledger"), - # Default of 30 matches RDS documentation's advised max DNS caching time - # https://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/CHAP_BestPractices.html#CHAP_BestPractices.DiskPerformance - "CONN_MAX_AGE": config("DJANGO_CONN_MAX_AGE", default=30), "CONN_HEALTH_CHECKS": config( "DJANGO_CONN_HEALTH_CHECKS", default=True, cast=bool ), @@ -22,6 +19,7 @@ "application_name": config( "DJANGO_DATABASE_APPLICATION_NAME", default="openverse-api" ), + "pool": True, }, } } diff --git a/api/conf/settings/logging.py b/api/conf/settings/logging.py index 3593ab7bcb5..90797f8c012 100644 --- a/api/conf/settings/logging.py +++ b/api/conf/settings/logging.py @@ -167,6 +167,13 @@ def suppress_unwanted_logs(record: LogRecord) -> bool: "propagate": False, } + # Add connection pool logging + LOGGING["loggers"]["django.db.backends.pool"] = { + "level": "DEBUG", + "handlers": ["console_structured"], + "propagate": False, + } + if not DEBUG: # WARNING: Do not run in production long-term as it can impact performance. middleware = ( diff --git a/api/justfile b/api/justfile index b9c97931f09..31b85ca0988 100644 --- a/api/justfile +++ b/api/justfile @@ -160,6 +160,14 @@ generate-docs doc="media-props" fail_on_diff="true": test *args: wait-up env DC_USER="ov_user" just ../exec web pytest "$@" +# Run the API tests in the CI +test-ci: wait-up + # The order is important here: the unit tests drop the database in the end, + # and when ran concurrently with the integration tests, the integration + # tests' database is dropped. + just test -k unit + just test -k "not unit" + # Run API tests locally [positional-arguments] test-local *args: diff --git a/api/pdm.lock b/api/pdm.lock index 7c10b4912ad..6dcfb15f18d 100644 --- a/api/pdm.lock +++ b/api/pdm.lock @@ -5,7 +5,7 @@ groups = ["default", "dev", "overrides", "test"] strategy = ["inherit_metadata"] lock_version = "4.5.0" -content_hash = "sha256:95f8248f465dc7788ff8eb59ea982fdbb8464bd0357313e1aaa7a15030b3a33a" +content_hash = "sha256:4958f42c8951e1c07a6279084b470fd99787be84dc222aa78ff75980b54ebdef" [[metadata.targets]] requires_python = "==3.12.*" @@ -1404,6 +1404,36 @@ files = [ {file = "psycopg_binary-3.2.3-cp312-cp312-win_amd64.whl", hash = "sha256:261f0031ee6074765096a19b27ed0f75498a8338c3dcd7f4f0d831e38adf12d1"}, ] +[[package]] +name = "psycopg-pool" +version = "3.2.4" +requires_python = ">=3.8" +summary = "Connection Pool for Psycopg" +groups = ["default"] +dependencies = [ + "typing-extensions>=4.6", +] +files = [ + {file = "psycopg_pool-3.2.4-py3-none-any.whl", hash = "sha256:f6a22cff0f21f06d72fb2f5cb48c618946777c49385358e0c88d062c59cbd224"}, + {file = "psycopg_pool-3.2.4.tar.gz", hash = "sha256:61774b5bbf23e8d22bedc7504707135aaf744679f8ef9b3fe29942920746a6ed"}, +] + +[[package]] +name = "psycopg" +version = "3.2.3" +extras = ["pool"] +requires_python = ">=3.8" +summary = "PostgreSQL database adapter for Python" +groups = ["default"] +dependencies = [ + "psycopg-pool", + "psycopg==3.2.3", +] +files = [ + {file = "psycopg-3.2.3-py3-none-any.whl", hash = "sha256:644d3973fe26908c73d4be746074f6e5224b03c1101d302d9a53bf565ad64907"}, + {file = "psycopg-3.2.3.tar.gz", hash = "sha256:a5764f67c27bec8bfac85764d23c534af2c27b893550377e37ce59c12aac47a2"}, +] + [[package]] name = "ptyprocess" version = "0.7.0" diff --git a/api/pyproject.toml b/api/pyproject.toml index 8b97769651c..8c7584028e7 100644 --- a/api/pyproject.toml +++ b/api/pyproject.toml @@ -31,7 +31,7 @@ dependencies = [ "future >=1, <1.1", "limit >=0.2.3, <0.3", "pillow >=11, <12", - "psycopg >=3.1.18, <4", + "psycopg[pool] >=3.2.3, <4", "python-decouple >=3.8, <4", "python-xmp-toolkit >=2.0.2, <3", "sentry-sdk >=2.19, <3", diff --git a/api/test/integration/test_auth.py b/api/test/integration/test_auth.py index 69a58e70dfd..8bb5295e764 100644 --- a/api/test/integration/test_auth.py +++ b/api/test/integration/test_auth.py @@ -11,6 +11,8 @@ from api.models import OAuth2Verification, ThrottledApplication +pytestmark = pytest.mark.django_db + cache_availability_params = pytest.mark.parametrize( "is_cache_reachable, cache_name", [(True, "oauth_cache"), (False, "unreachable_oauth_cache")], @@ -73,7 +75,6 @@ def test_auth_token_exchange(api_client, test_auth_tokens_registration): return res_data -@pytest.mark.django_db def test_auth_token_exchange_unsupported_method(api_client): res = api_client.get( "/v1/auth_tokens/token/", @@ -90,7 +91,6 @@ def _integration_verify_most_recent_token(api_client): return api_client.get(path) -@pytest.mark.django_db @pytest.mark.parametrize( "rate_limit_model", [x[0] for x in ThrottledApplication.RATE_LIMIT_MODELS], @@ -125,7 +125,6 @@ def test_auth_email_verification( ) -@pytest.mark.django_db @pytest.mark.parametrize( "rate_limit_model", [x[0] for x in ThrottledApplication.RATE_LIMIT_MODELS], @@ -166,7 +165,6 @@ def test_auth_rate_limit_reporting( assert res_data["verified"] is False -@pytest.mark.django_db @pytest.mark.parametrize( "sort_dir, exp_indexed_on", [ @@ -191,7 +189,6 @@ def test_sorting_authed(api_client, test_auth_token_exchange, sort_dir, exp_inde assert indexed_on == exp_indexed_on -@pytest.mark.django_db @pytest.mark.parametrize( "authority_boost, exp_source", [ @@ -219,7 +216,6 @@ def test_authority_authed( assert source == exp_source -@pytest.mark.django_db def test_invalid_credentials_401(api_client): res = api_client.get( "/v1/images/", HTTP_AUTHORIZATION="Bearer thisIsNot_ARealToken" @@ -227,7 +223,6 @@ def test_invalid_credentials_401(api_client): assert res.status_code == 401 -@pytest.mark.django_db def test_revoked_application_access(api_client, test_auth_token_exchange): token = test_auth_token_exchange["access_token"] application = AccessToken.objects.get(token=token).application @@ -258,7 +253,6 @@ def test_revoked_application_access(api_client, test_auth_token_exchange): ) ), ) -@pytest.mark.django_db def test_page_size_privileges( api_client, test_auth_token_exchange, level, page_size_modification, allowed ): @@ -304,7 +298,6 @@ def test_page_size_privileges( ) ), ) -@pytest.mark.django_db def test_pagination_depth_privileges( api_client, test_auth_token_exchange, level, pagination_depth_modification, allowed ): diff --git a/api/test/unit/management/commands/test_generatewaveforms.py b/api/test/unit/management/commands/test_generatewaveforms.py index c7ca58c6c42..866bace4832 100644 --- a/api/test/unit/management/commands/test_generatewaveforms.py +++ b/api/test/unit/management/commands/test_generatewaveforms.py @@ -14,6 +14,9 @@ from test.factory.models.audio import AudioAddOnFactory, AudioFactory +pytestmark = pytest.mark.django_db + + @mock.patch("api.models.audio.generate_peaks") def call_generatewaveforms(mock_generate_peaks: mock.MagicMock) -> tuple[str, str]: mock_generate_peaks.side_effect = lambda _: WaveformProvider.generate_waveform() @@ -35,7 +38,6 @@ def assert_all_audio_have_waveforms(): ) -@pytest.mark.django_db def test_creates_waveforms_for_audio(): AudioFactory.create_batch(153) @@ -46,7 +48,6 @@ def test_creates_waveforms_for_audio(): assert_all_audio_have_waveforms() -@pytest.mark.django_db def test_does_not_reprocess_existing_waveforms(): waveformless_audio = AudioFactory.create_batch(3) @@ -66,7 +67,6 @@ def test_does_not_reprocess_existing_waveforms(): assert_all_audio_have_waveforms() -@pytest.mark.django_db @mock.patch("api.models.audio.generate_peaks") def test_paginates_audio_waveforms_to_generate( mock_generate_peaks, django_assert_num_queries @@ -101,7 +101,6 @@ def test_paginates_audio_waveforms_to_generate( assert_all_audio_have_waveforms() -@pytest.mark.django_db @pytest.mark.parametrize( ("exception_class", "exception_args", "exception_kwargs"), ( @@ -150,7 +149,6 @@ def test_logs_and_continues_if_waveform_generation_fails( ) -@pytest.mark.django_db @mock.patch("api.models.audio.generate_peaks") def test_keyboard_interrupt_should_halt_processing(mock_generate_peaks): audio_count = 23 diff --git a/api/test/unit/models/test_media_report.py b/api/test/unit/models/test_media_report.py index d11b3338081..dda85b11665 100644 --- a/api/test/unit/models/test_media_report.py +++ b/api/test/unit/models/test_media_report.py @@ -16,7 +16,7 @@ ) -pytestmark = pytest.mark.django_db +pytestmark = pytest.mark.django_db(transaction=True) reason_params = pytest.mark.parametrize("reason", [DMCA, MATURE, OTHER]) diff --git a/api/test/unit/utils/test_moderation.py b/api/test/unit/utils/test_moderation.py index 76a0454c384..0ac8f506b17 100644 --- a/api/test/unit/utils/test_moderation.py +++ b/api/test/unit/utils/test_moderation.py @@ -10,7 +10,7 @@ from test.factory.models.oauth2 import UserFactory -pytestmark = pytest.mark.django_db +pytestmark = pytest.mark.django_db(transaction=True) @pytest.fixture diff --git a/catalog/dags/providers/provider_api_scripts/cleveland_museum.py b/catalog/dags/providers/provider_api_scripts/cleveland_museum.py index f1145fc9633..a39872ac56d 100644 --- a/catalog/dags/providers/provider_api_scripts/cleveland_museum.py +++ b/catalog/dags/providers/provider_api_scripts/cleveland_museum.py @@ -1,13 +1,17 @@ import logging -from common.licenses import get_license_info +from common.licenses import LicenseInfo from common.loader import provider_details as prov from providers.provider_api_scripts.provider_data_ingester import ProviderDataIngester logger = logging.getLogger(__name__) -CC0_LICENSE = get_license_info(license_="cc0", license_version="1.0") +CC0_LICENSE = LicenseInfo( + license="cc0", + version="1.0", + url="https://creativecommons.org/publicdomain/zero/1.0/", +) class ClevelandDataIngester(ProviderDataIngester): diff --git a/catalog/dags/providers/provider_api_scripts/metropolitan_museum.py b/catalog/dags/providers/provider_api_scripts/metropolitan_museum.py index d38e7942692..cf925337bd6 100644 --- a/catalog/dags/providers/provider_api_scripts/metropolitan_museum.py +++ b/catalog/dags/providers/provider_api_scripts/metropolitan_museum.py @@ -28,7 +28,7 @@ import argparse import logging -from common.licenses import get_license_info +from common.licenses import LicenseInfo from common.loader import provider_details as prov from providers.provider_api_scripts.provider_data_ingester import ProviderDataIngester @@ -42,7 +42,11 @@ class MetMuseumDataIngester(ProviderDataIngester): providers = {"image": prov.METROPOLITAN_MUSEUM_DEFAULT_PROVIDER} endpoint = "https://collectionapi.metmuseum.org/public/collection/v1/objects" - DEFAULT_LICENSE_INFO = get_license_info(license_="cc0", license_version="1.0") + DEFAULT_LICENSE_INFO = LicenseInfo( + license="cc0", + version="1.0", + url="https://creativecommons.org/publicdomain/zero/1.0/", + ) def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) diff --git a/catalog/dags/providers/provider_api_scripts/nappy.py b/catalog/dags/providers/provider_api_scripts/nappy.py index 89e06a79004..5a993d3a144 100644 --- a/catalog/dags/providers/provider_api_scripts/nappy.py +++ b/catalog/dags/providers/provider_api_scripts/nappy.py @@ -14,7 +14,7 @@ import logging from common import constants -from common.licenses import get_license_info +from common.licenses import LicenseInfo from common.loader import provider_details as prov from providers.provider_api_scripts.provider_data_ingester import ProviderDataIngester @@ -28,8 +28,11 @@ class NappyDataIngester(ProviderDataIngester): headers = {"Accept": "application/json"} # Hardcoded to CC0, the only license Nappy.co uses - license_info = get_license_info( - "https://creativecommons.org/publicdomain/zero/1.0/" + license_info = LicenseInfo( + license="cc0", + version="1.0", + url="https://creativecommons.org/publicdomain/zero/1.0/", + raw_url=None, ) def get_next_query_params(self, prev_query_params: dict | None) -> dict: diff --git a/catalog/dags/providers/provider_api_scripts/science_museum.py b/catalog/dags/providers/provider_api_scripts/science_museum.py index d97ea7dbb5b..dd680a56265 100644 --- a/catalog/dags/providers/provider_api_scripts/science_museum.py +++ b/catalog/dags/providers/provider_api_scripts/science_museum.py @@ -28,7 +28,11 @@ LIMIT = 100 -CC0_LICENSE = get_license_info(license_="cc0", license_version="1.0") +CC0_LICENSE = LicenseInfo( + license="cc0", + version="1.0", + url="https://creativecommons.org/publicdomain/zero/1.0/", +) class ScienceMuseumDataIngester(ProviderDataIngester): diff --git a/catalog/dags/providers/provider_api_scripts/smithsonian.py b/catalog/dags/providers/provider_api_scripts/smithsonian.py index d23c5130516..60f7d9b8276 100644 --- a/catalog/dags/providers/provider_api_scripts/smithsonian.py +++ b/catalog/dags/providers/provider_api_scripts/smithsonian.py @@ -14,7 +14,7 @@ from airflow.exceptions import AirflowException from airflow.models import Variable -from common.licenses import get_license_info +from common.licenses import LicenseInfo from common.loader import provider_details as prov from providers.provider_api_scripts.provider_data_ingester import ProviderDataIngester @@ -110,8 +110,10 @@ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.api_key = Variable.get("API_KEY_DATA_GOV") self.units_endpoint = f"{self.base_endpoint}terms/unit_code" - self.license_info = get_license_info( - license_url="https://creativecommons.org/publicdomain/zero/1.0/" + self.license_info = LicenseInfo( + license="cc0", + version="1.0", + url="https://creativecommons.org/publicdomain/zero/1.0/", ) def get_fixed_query_params(self): diff --git a/catalog/tests/dags/providers/provider_api_scripts/test_nappy.py b/catalog/tests/dags/providers/provider_api_scripts/test_nappy.py index f925cdbd701..8817525507d 100644 --- a/catalog/tests/dags/providers/provider_api_scripts/test_nappy.py +++ b/catalog/tests/dags/providers/provider_api_scripts/test_nappy.py @@ -4,7 +4,7 @@ ) from common.constants import IMAGE -from common.licenses import get_license_info +from common.licenses import LicenseInfo from providers.provider_api_scripts.nappy import NappyDataIngester @@ -105,8 +105,11 @@ def test_get_should_continue(response_json, expected_result): { "foreign_landing_url": "https://nappy.co/photo/9/woman-with-tattoos", "url": "https://images.nappy.co/uploads/large/101591721349meykm7s6hvaswwvslpjrwibeyzru1fcxtxh0hf09cs7kdhmtptef4y3k4ua5z1bkyrbxov8tmagnafm8upwa3hxaxururtx7azaf.jpg", - "license_info": get_license_info( - "https://creativecommons.org/publicdomain/zero/1.0/" + "license_info": LicenseInfo( + license="cc0", + version="1.0", + url="https://creativecommons.org/publicdomain/zero/1.0/", + raw_url=None, ), "foreign_identifier": 9, "filesize": 233500,