From faa094463bed3b6adc867bd70294e7bea9be5592 Mon Sep 17 00:00:00 2001 From: Simon Baird Date: Wed, 11 Sep 2024 11:16:23 -0400 Subject: [PATCH 1/3] Use test filtering consistently in Makefile The one exception is coverage, since the coverage numbers only make sence if you run all the tests. I also changed the ordering of targets for the sake of tidiness. Quality-of-life tweak while doing some TDD for... Ref: https://issues.redhat.com/browse/EC-742 --- Makefile | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/Makefile b/Makefile index 72706966..12031afe 100644 --- a/Makefile +++ b/Makefile @@ -83,23 +83,28 @@ ec-version: @# as the change date but let's show it anyhow @$(EC) version +# Set TEST to only run tests that match the given string. It does a regex match +# on the fully qualified name iiuc, e.g. "policy.release.foo_test.test_thing" +# so you could use TEST=test_thing or TEST=release.foo_", etc +TEST_FILTER=$(if $(TEST),--run $(TEST)) + # Todo maybe: Run tests with conftest verify instead .PHONY: test test: ## Run all tests in verbose mode and check coverage - @$(OPA) test $(TEST_FILES) -v + @$(OPA) test $(TEST_FILES) $(TEST_FILTER) --verbose $(COVERAGE) -.PHONY: coverage -# The cat does nothing but avoids a non-zero exit code from grep -v -coverage: ## Show which lines of rego are not covered by tests - @$(OPA) test $(TEST_FILES) --coverage --format json | jq -r '.files | to_entries | map("\(.key): Uncovered:\(.value.not_covered)") | .[]' | grep -v "Uncovered:null" | cat - .PHONY: quiet-test quiet-test: ## Run all tests in quiet mode and check coverage - @$(OPA) test $(TEST_FILES) + @$(OPA) test $(TEST_FILES) $(TEST_FILTER) $(COVERAGE) +.PHONY: watch +watch: ## Run tests in watch mode, use TEST=package or TEST=test to focus on a single package or test + @$(OPA) test $(TEST_FILES) $(TEST_FILTER) --verbose --watch + # Do `dnf install entr` then run this a separate terminal or split window while hacking +# (live-test and watch do similar things in different ways. Use whichever one you like better.) .PHONY: live-test live-test: ## Continuously run tests on changes to any `*.rego` files, `entr` needs to be installed @trap exit SIGINT; \ @@ -107,9 +112,10 @@ live-test: ## Continuously run tests on changes to any `*.rego` files, `entr` ne git ls-files -c -o '*.rego' | entr -r -d -c $(MAKE) --no-print-directory quiet-test; \ done -.PHONY: watch -watch: ## Run tests in watch mode, use TEST=package/test to focus on single package or test - @$(OPA) test $(TEST_FILES) --verbose --watch $(if $(TEST),--run=$(TEST)) +.PHONY: coverage +# The cat does nothing but avoids a non-zero exit code from grep -v +coverage: ## Show which lines of rego are not covered by tests + @$(OPA) test $(TEST_FILES) --coverage --format json | jq -r '.files | to_entries | map("\(.key): Uncovered:\(.value.not_covered)") | .[]' | grep -v "Uncovered:null" | cat .PHONY: fmt fmt: ## Apply default formatting to all rego files. Use before you commit From 26792e199f753f126675c0ca41fa289fe06c264b Mon Sep 17 00:00:00 2001 From: Simon Baird Date: Wed, 11 Sep 2024 11:54:09 -0400 Subject: [PATCH 2/3] Add to_array helper similar to to_set I want to use this in the next commit. (As mentioned in the comments, I think it can make some code a little more readable in some cases.) --- policy/lib/set_helpers.rego | 6 ++++-- policy/lib/set_helpers_test.rego | 5 +++++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/policy/lib/set_helpers.rego b/policy/lib/set_helpers.rego index 1fd77df9..b0d1f8b8 100644 --- a/policy/lib/set_helpers.rego +++ b/policy/lib/set_helpers.rego @@ -2,11 +2,13 @@ package lib import rego.v1 -# It's fairly idiomatic rego to just write this inline but still -# I think this makes things a little more readable +# It's fairly idiomatic rego to do this inline but these +# can make your code a little more readable in some cases # to_set(arr) := {member | some member in arr} +to_array(s) := [member | some member in s] + # Without the in keyword it could be done like this: # needle == haystack[_] # diff --git a/policy/lib/set_helpers_test.rego b/policy/lib/set_helpers_test.rego index 99854147..acc7d1a4 100644 --- a/policy/lib/set_helpers_test.rego +++ b/policy/lib/set_helpers_test.rego @@ -13,6 +13,11 @@ test_to_set if { lib.assert_equal(my_set, lib.to_set(my_set)) } +test_to_array if { + lib.assert_equal(my_list, lib.to_array(my_set)) + lib.assert_equal(my_list, lib.to_array(my_list)) +} + test_included_in if { lib.included_in("a", my_list) lib.included_in("a", my_set) From 7ccc5454beaf1aabe78f37ccbb0a0df60351973e Mon Sep 17 00:00:00 2001 From: Simon Baird Date: Fri, 6 Sep 2024 17:11:58 -0400 Subject: [PATCH 3/3] Check all rpms in sboms have a known repo id For all components in all sboms that are rpms, require they have a repository_id value in their purl, and require that the repository_id value is in in the big list of known repository_ids. Includes a bit of extra work to limit the number of violations produced, since I'm expecting there to be either none of many hundreds. As mentioned in a Todo in the comments, this is not yet added to the redhat collection, but it will be added soon in an upcoming PR. Ref: https://issues.redhat.com/browse/EC-848 --- .../modules/ROOT/pages/release_policy.adoc | 33 ++++ .../ROOT/partials/release_policy_nav.adoc | 3 + example/data/known_rpm_repositories.yml | 24 +++ policy/release/rpm_repos.rego | 149 +++++++++++++++++ policy/release/rpm_repos_test.rego | 155 ++++++++++++++++++ 5 files changed, 364 insertions(+) create mode 100644 example/data/known_rpm_repositories.yml create mode 100644 policy/release/rpm_repos.rego create mode 100644 policy/release/rpm_repos_test.rego diff --git a/antora/docs/modules/ROOT/pages/release_policy.adoc b/antora/docs/modules/ROOT/pages/release_policy.adoc index fb2379eb..13d15c7b 100644 --- a/antora/docs/modules/ROOT/pages/release_policy.adoc +++ b/antora/docs/modules/ROOT/pages/release_policy.adoc @@ -138,6 +138,7 @@ Rules included: * xref:release_policy.adoc#provenance_materials__git_clone_source_matches_provenance[Provenance Materials: Git clone source matches materials provenance] * xref:release_policy.adoc#provenance_materials__git_clone_task_found[Provenance Materials: Git clone task found] * xref:release_policy.adoc#quay_expiration__expires_label[Quay expiration: Expires label] +* xref:release_policy.adoc#rpm_repos__rule_data_provided[RPM Repos: Known repo id list provided] * xref:release_policy.adoc#rpm_signature__allowed[RPM Signature: Allowed RPM signature key] * xref:release_policy.adoc#rpm_signature__result_format[RPM Signature: Result format] * xref:release_policy.adoc#rpm_signature__rule_data_provided[RPM Signature: Rule data provided] @@ -1009,6 +1010,38 @@ Verify an attestation created by the RHTAP Jenkins build pipeline is present. * Code: `rhtap_jenkins.attestation_found` * https://github.com/enterprise-contract/ec-policies/blob/{page-origin-refhash}/policy/release/rhtap_jenkins.rego#L17[Source, window="_blank"] +[#rpm_repos_package] +== link:#rpm_repos_package[RPM Repos] + +This package defines rules to confirm that all RPM packages listed in SBOMs specify a known and permitted repository id. + +* Package name: `rpm_repos` +* Package full path: `policy.release.rpm_repos` + +[#rpm_repos__ids_known] +=== link:#rpm_repos__ids_known[All rpms have known repo ids] + +Each RPM package listed in an SBOM must specify the repository id that it comes from, and that repository id must be present in the list of known and permitted repository ids. + +*Solution*: Ensure every rpm comes from a known and permitted repository, and that the data in the SBOM correctly records that. + +* Rule type: [rule-type-indicator failure]#FAILURE# +* FAILURE message: `RPM repo id check failed: %s` +* Code: `rpm_repos.ids_known` +* https://github.com/enterprise-contract/ec-policies/blob/{page-origin-refhash}/policy/release/rpm_repos.rego#L32[Source, window="_blank"] + +[#rpm_repos__rule_data_provided] +=== link:#rpm_repos__rule_data_provided[Known repo id list provided] + +A list of known and permitted repository ids should be available in the rule data. + +*Solution*: Include a data source that provides a list of known repository ids under the 'known_rpm_repositories' key under the top level 'rule_data' key. + +* Rule type: [rule-type-indicator failure]#FAILURE# +* FAILURE message: `Rule data '%s' has unexpected format: %s` +* Code: `rpm_repos.rule_data_provided` +* https://github.com/enterprise-contract/ec-policies/blob/{page-origin-refhash}/policy/release/rpm_repos.rego#L14[Source, window="_blank"] + [#rpm_signature_package] == link:#rpm_signature_package[RPM Signature] diff --git a/antora/docs/modules/ROOT/partials/release_policy_nav.adoc b/antora/docs/modules/ROOT/partials/release_policy_nav.adoc index a7f38970..eac70b78 100644 --- a/antora/docs/modules/ROOT/partials/release_policy_nav.adoc +++ b/antora/docs/modules/ROOT/partials/release_policy_nav.adoc @@ -79,6 +79,9 @@ *** xref:release_policy.adoc#rhtap_jenkins_package[RHTAP Jenkins] **** xref:release_policy.adoc#rhtap_jenkins__invocation_id_found[RHTAP Jenkins SLSA Invocation ID present] **** xref:release_policy.adoc#rhtap_jenkins__attestation_found[RHTAP Jenkins SLSA Provenance Attestation Found] +*** xref:release_policy.adoc#rpm_repos_package[RPM Repos] +**** xref:release_policy.adoc#rpm_repos__ids_known[All rpms have known repo ids] +**** xref:release_policy.adoc#rpm_repos__rule_data_provided[Known repo id list provided] *** xref:release_policy.adoc#rpm_signature_package[RPM Signature] **** xref:release_policy.adoc#rpm_signature__allowed[Allowed RPM signature key] **** xref:release_policy.adoc#rpm_signature__result_format[Result format] diff --git a/example/data/known_rpm_repositories.yml b/example/data/known_rpm_repositories.yml new file mode 100644 index 00000000..5948f3b3 --- /dev/null +++ b/example/data/known_rpm_repositories.yml @@ -0,0 +1,24 @@ +--- +# Copyright The Enterprise Contract Contributors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: Apache-2.0 + +# See also https://github.com/release-engineering/rhtap-ec-policy/blob/main/data/known_rpm_repositories.yml +rule_data: + known_rpm_repositories: + - "rhel-9-for-x86_64-appstream-rpms" + - "rhel-9-for-x86_64-appstream-source-rpms" + - "rhel-9-for-x86_64-baseos-rpms" + - "rhel-9-for-x86_64-baseos-source-rpms" diff --git a/policy/release/rpm_repos.rego b/policy/release/rpm_repos.rego new file mode 100644 index 00000000..b20361aa --- /dev/null +++ b/policy/release/rpm_repos.rego @@ -0,0 +1,149 @@ +# +# METADATA +# title: RPM Repos +# description: >- +# This package defines rules to confirm that all RPM packages listed +# in SBOMs specify a known and permitted repository id. +# +package policy.release.rpm_repos + +import rego.v1 + +import data.lib + +# METADATA +# title: Known repo id list provided +# description: >- +# A list of known and permitted repository ids should be available in the rule data. +# custom: +# short_name: rule_data_provided +# failure_msg: "Rule data '%s' has unexpected format: %s" +# solution: >- +# Include a data source that provides a list of known repository ids under the +# 'known_rpm_repositories' key under the top level 'rule_data' key. +# collections: +# - redhat +# +deny contains result if { + some error in _rule_data_errors + result := lib.result_helper(rego.metadata.chain(), [_rule_data_key, error]) +} + +# METADATA +# title: All rpms have known repo ids +# description: >- +# Each RPM package listed in an SBOM must specify the repository id that it comes from, +# and that repository id must be present in the list of known and permitted repository ids. +# custom: +# short_name: ids_known +# failure_msg: 'RPM repo id check failed: %s' +# solution: >- +# Ensure every rpm comes from a known and permitted repository, and that the data in the +# SBOM correctly records that. +# # Todo: Until the sbom generation is upated this will always fail, so don't include it +# # in the redhat collection yet. See https://issues.redhat.com/browse/STONEBLD-2638 +# #collections: +# #- redhat +# +deny contains result if { + # Don't bother with this unless we have valid rule data + count(_rule_data_errors) == 0 + + some error in _repo_id_errors + result := lib.result_helper(rego.metadata.chain(), [error]) +} + +_rule_data_errors contains msg if { + # match_schema expects either a marshaled JSON resource (String) or an Object. It doesn't + # handle an Array directly. + value := json.marshal(_known_repo_ids) + some violation in json.match_schema( + value, + { + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "array", + "items": {"type": "string"}, + "uniqueItems": true, + "minItems": 1, + }, + )[1] + msg := violation.error +} + +_repo_id_errors contains msg if { + bad_purls := all_rpm_purls - _plain_purls(all_purls_with_repo_ids) + count(bad_purls) > 0 + + some bad_purl in _truncated_msg_list(bad_purls) + msg := sprintf("An RPM component in the SBOM did not specify a repository_id value in its purl: %s", [bad_purl]) +} + +_repo_id_errors contains msg if { + bad_purls := all_purls_with_repo_ids - all_purls_with_known_repo_ids + count(bad_purls) > 0 + + some bad_purl in _truncated_msg_list(_plain_purls(bad_purls)) + msg := sprintf("An RPM component in the SBOM specified an unknown or disallowed repository_id: %s", [bad_purl]) +} + +all_purls_with_known_repo_ids contains purl_obj if { + some purl_obj in all_purls_with_repo_ids + purl_obj.repo_id in _known_repo_ids +} + +all_purls_with_repo_ids contains purl_obj if { + some purl in all_rpm_purls + ec.purl.is_valid(purl) + + purl_obj := { + "purl": purl, + "repo_id": _purl_qualifier("repository_id", purl), + } +} + +all_rpm_purls contains purl if { + some sbom in _all_sboms + some component in sbom.components + purl := component.purl + + # I'm assuming this is faster than parsing it and checking the type + startswith(purl, "pkg:rpm") +} + +# In future there will be SPDX sboms also +_all_sboms := lib.sbom.cyclonedx_sboms + +_known_repo_ids := lib.rule_data(_rule_data_key) + +_rule_data_key := "known_rpm_repositories" + +# Converts a list of purl objects, as returned by +# all_purls_with_repo_ids, back into a list of purl strings +_plain_purls(purl_objs) := {purl_obj.purl | some purl_obj in purl_objs} + +# Extract a named qualifier from a purl +_purl_qualifier(key, purl) := result if { + parsed_purl := ec.purl.parse(purl) + some qualifier in parsed_purl.qualifiers + qualifier.key == key + result := qualifier.value +} + +# SBOMs often list many hundreds of components. Let's avoid producing that +# many violations if none of the purls are passing this test. (In future we +# might move this to a shared library or to the ec-cli.) + +# If there are more than this then truncate the list +_truncate_threshold := 10 + +# ...but not if the N in the "N more" is less than this +_min_remainder_count := 4 + +_truncated_msg_list(all_msgs) := truncated_msgs if { + remainder_count := count(all_msgs) - _truncate_threshold + remainder_count >= _min_remainder_count + truncated_msgs := array.concat( + array.slice(lib.to_array(all_msgs), 0, _truncate_threshold), + [sprintf("%d additional similar violations not separately listed", [remainder_count])], + ) +} else := all_msgs diff --git a/policy/release/rpm_repos_test.rego b/policy/release/rpm_repos_test.rego new file mode 100644 index 00000000..795a27ac --- /dev/null +++ b/policy/release/rpm_repos_test.rego @@ -0,0 +1,155 @@ +package policy.release.rpm_repos_test + +import rego.v1 + +import data.lib +import data.policy.release.rpm_repos + +test_repo_id_data_empty if { + expected := { + "code": "rpm_repos.rule_data_provided", + "msg": "Rule data 'known_rpm_repositories' has unexpected format: (Root): Array must have at least 1 items", + } + + lib.assert_equal_results({expected}, rpm_repos.deny) with data.rule_data.known_rpm_repositories as [] +} + +test_repo_id_data_not_an_array if { + expected := { + "code": "rpm_repos.rule_data_provided", + "msg": sprintf("%s %s", [ + "Rule data 'known_rpm_repositories' has unexpected format:", + "(Root): Invalid type. Expected: array, given: object", + ]), + } + + lib.assert_equal_results({expected}, rpm_repos.deny) with data.rule_data.known_rpm_repositories as {"chunky": "bacon"} +} + +test_repo_id_data_not_strings if { + expected := { + "code": "rpm_repos.rule_data_provided", + "msg": "Rule data 'known_rpm_repositories' has unexpected format: 1: Invalid type. Expected: string, given: integer", + } + + lib.assert_equal_results({expected}, rpm_repos.deny) with data.rule_data.known_rpm_repositories as ["spam", 42] +} + +test_repo_id_all if { + lib.assert_equal_results( + {p1, p2, p3, p4, p5}, + rpm_repos.all_rpm_purls, + ) with rpm_repos._all_sboms as fake_sboms +} + +test_repo_id_all_with_repo_id if { + lib.assert_equal_results( + {p1, p2, p3}, + rpm_repos._plain_purls(rpm_repos.all_purls_with_repo_ids), + ) with rpm_repos._all_sboms as fake_sboms +} + +test_repo_id_all_known if { + lib.assert_equal_results( + {p1, p2}, + rpm_repos._plain_purls(rpm_repos.all_purls_with_known_repo_ids), + ) with rpm_repos._all_sboms as fake_sboms with data.rule_data.known_rpm_repositories as fake_repo_id_list +} + +test_repo_id_purls_missing_repo_ids if { + expected := { + { + "code": "rpm_repos.ids_known", + "msg": sprintf("%s %s", [ + "RPM repo id check failed: An RPM component in the SBOM did not specify a repository_id value in its purl:", + "pkg:rpm/redhat/spam@1.2.3?arch=amd64&pastry_id=puff", + ]), + }, + { + "code": "rpm_repos.ids_known", + "msg": sprintf("%s %s", [ + "RPM repo id check failed: An RPM component in the SBOM did not specify a repository_id value in its purl:", + "pkg:rpm_borken", + ]), + }, + } + + lib.assert_equal_results(expected, rpm_repos.deny) with rpm_repos._all_sboms as [fake_sbom({p1, p2, p4, p5, p6})] + with data.rule_data.known_rpm_repositories as fake_repo_id_list +} + +test_repo_id_purls_missing_repo_ids_truncated if { + expected := { + { + "code": "rpm_repos.ids_known", + "msg": sprintf("%s %s", [ + "RPM repo id check failed: An RPM component in the SBOM did not specify a repository_id value in its purl:", + "pkg:rpm/redhat/spam@1.2.3?arch=amd64&pastry_id=puff", + ]), + }, + { + "code": "rpm_repos.ids_known", + "msg": sprintf("%s %s", [ + "RPM repo id check failed: An RPM component in the SBOM did not specify a repository_id value in its purl:", + "1 additional similar violations not separately listed", + ]), + }, + } + + lib.assert_equal_results(expected, rpm_repos.deny) with rpm_repos._all_sboms as [fake_sbom({p1, p2, p4, p5, p6})] + with data.rule_data.known_rpm_repositories as fake_repo_id_list + with rpm_repos._truncate_threshold as 1 with rpm_repos._min_remainder_count as 0 +} + +test_repo_id_purls_unknown_repo_ids if { + expected := { + "code": "rpm_repos.ids_known", + "msg": sprintf("%s %s", [ + "RPM repo id check failed: An RPM component in the SBOM specified an unknown or disallowed repository_id:", + "pkg:rpm/redhat/spam@1.2.3?arch=amd64&repository_id=rhel-23-unrecognized-2-rpms", + ]), + } + + lib.assert_equal_results({expected}, rpm_repos.deny) with rpm_repos._all_sboms as [fake_sbom({p1, p2, p3, p6})] + with data.rule_data.known_rpm_repositories as fake_repo_id_list +} + +test_clamp_violation_strings if { + lib.assert_equal( + ["a", "b", "c", "2 additional similar violations not separately listed"], + rpm_repos._truncated_msg_list(["a", "b", "c", "d", "e"]), + ) with rpm_repos._truncate_threshold as 3 with rpm_repos._min_remainder_count as 0 + + lib.assert_equal( + ["a", "b", "c", "d", "e"], + rpm_repos._truncated_msg_list(["a", "b", "c", "d", "e"]), + ) with rpm_repos._truncate_threshold as 5 + + lib.assert_equal( + ["a", "b", "3 additional similar violations not separately listed"], + rpm_repos._truncated_msg_list(["a", "b", "c", "d", "e"]), + ) with rpm_repos._truncate_threshold as 2 with rpm_repos._min_remainder_count as 3 +} + +test_all_sboms if { + # (Needed for 100% coverage) + lib.assert_equal("spam-1000", rpm_repos._all_sboms) with lib.sbom.cyclonedx_sboms as "spam-1000" +} + +fake_sboms := [fake_sbom({p1, p2, p3, p4, p5, p6})] + +fake_sbom(fake_purls) := {"components": [{"purl": p} | some p in fake_purls]} + +fake_repo_id_list := ["rhel-23-for-spam-9-rpms", "rhel-42-for-bacon-12-rpms"] + +p1 := "pkg:rpm/redhat/spam@1.2.3?arch=amd64&repository_id=rhel-23-for-spam-9-rpms" + +p2 := "pkg:rpm/redhat/spam@1.2.3?arch=amd64&repository_id=rhel-42-for-bacon-12-rpms" + +p3 := "pkg:rpm/redhat/spam@1.2.3?arch=amd64&repository_id=rhel-23-unrecognized-2-rpms" + +p4 := "pkg:rpm/redhat/spam@1.2.3?arch=amd64&pastry_id=puff" + +p5 := "pkg:rpm_borken" + +p6 := "pkg:golang/gitplanet.com/bacon@1.2.3?arch=amd64"