diff --git a/CHANGES b/CHANGES index 783b30a6..e17d8d66 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,41 @@ +1.2.0 +Enhancements: +* A new search operator, :, now enables capturing slices of Arrays (by 0-based + element number) and Hashes (by alphanumeric key-name). This looks like: + "some::array[2:15]" or "some::hash[beta:gamma]". +* yaml-get now returns JSON instead of "pretty Python" data objects when the + search returns complex data types (Arrays and Hashes). This change makes the + result more portable to non-Python consumers and ensures the result will be + one per line. +* The separator used for identifying Hash sub-keys can now be customized. If + you prefer your paths to look like "/hash/sub/key" rather than "hash.sub.key", + you can now have it your way. For now, only . and / are allowed. The + seperator can be either strictly specified or automatically inferred by + whether the first character of a given YAML Path is /. Command-line tools + like yaml-get and yaml-set have a new --pathsep argument for this; the default + is "auto" and can be set to "fslash" (/) or "dot" (.). + +Bug Fixes: +* EYAML on Windows now works when a batch file is used to wrap the Ruby `eyaml` + command. + +Known Issues: +* Escape symbols in YAML Paths parse correctly and will be properly processed, + resulting in retriving or setting the expected data. However, the parsed + path cannot be stringified back to its original form (with escape symbols). + This issue affects only logging/printing of the post-parsed path. A unit test + has been created to track this issue, but it is marked xfail until such time + as someone is willing to tackle this (very) low priority issue. Until then, + developers should try to print the pre-parsed version of their paths rather + than rely exclusively on Parser.str_path(). Further, don't do this: + 1. Accept or take a string path that has escaped characters. + 2. Parse that path. + 3. Stringify the parsed path. + 4. Parse the stringified, parsed path. This is silly, anyway because you + already have the first (good) parsed result at step 2. + 5. Try to use this parsed-stringified-parsed path result for anything. + Instead, only use the first parsed result that you got at step 2. + 1.1.2 Bug fixes: * When the YAML Path is fully quoted -- a known side-effect of using Jenkins and diff --git a/README.md b/README.md index 8dfcef82..a4506fe6 100644 --- a/README.md +++ b/README.md @@ -48,6 +48,7 @@ Hash members, this YAML Path solution grew to include new syntax for: * Array elements * Anchors by name * Search expressions for single or multiple matches +* Forward-slash notation To illustrate some of these concepts, consider this sample YAML data: @@ -83,34 +84,43 @@ sensitive::accounts: This YAML data sample contains these single-result YAML Paths: -1. `aliases[&commonUsername]` -2. `aliases[&commonPassword]` -3. `configuration::application.'general.settings'.slash\\key` -4. `configuration::application.'general.settings'.'a.dotted.subkey'[0]` -5. `configuration::application.'general.settings'.'a.dotted.subkey'[1]` -6. `configuration::application.'general.settings'.'a.dotted.subkey'[2]` -7. `sensitive::accounts.database.app_user` -8. `sensitive::accounts.database.app_pass` -9. `sensitive::accounts.application.db.users[0].name` -10. `sensitive::accounts.application.db.users[0].pass` -11. `sensitive::accounts.application.db.users[0].access_level` -12. `sensitive::accounts.application.db.users[1].name` -13. `sensitive::accounts.application.db.users[1].pass` -14. `sensitive::accounts.application.db.users[1].access_level` +Dot Notation | Forward-Slash Notation +---------------------------------------------------------------------|------------------------------------------------------------------ +`aliases[&commonUsername]` | `/aliases[&commonUsername]` +`aliases[&commonPassword]` | `/aliases[&commonPassword]` +`configuration::application.'general.settings'.slash\\key` | `/configuration::application/general.settings/slash\\key` +`configuration::application.'general.settings'.'a.dotted.subkey'[0]` | `/configuration::application/general.settings/a.dotted.subkey[0]` +`configuration::application.'general.settings'.'a.dotted.subkey'[1]` | `/configuration::application/general.settings/a.dotted.subkey[1]` +`configuration::application.'general.settings'.'a.dotted.subkey'[2]` | `/configuration::application/general.settings/a.dotted.subkey[2]` +`sensitive::accounts.database.app_user` | `/sensitive::accounts/database/app_user` +`sensitive::accounts.database.app_pass` | `/sensitive::accounts/database/app_pass` +`sensitive::accounts.application.db.users[0].name` | `/sensitive::accounts/application/db/users[0]/name` +`sensitive::accounts.application.db.users[0].pass` | `/sensitive::accounts/application/db/users[0]/pass` +`sensitive::accounts.application.db.users[0].access_level` | `/sensitive::accounts/application/db/users[0]/access_level` +`sensitive::accounts.application.db.users[1].name` | `/sensitive::accounts/application/db/users[1]/name` +`sensitive::accounts.application.db.users[1].pass` | `/sensitive::accounts/application/db/users[1]/pass` +`sensitive::accounts.application.db.users[1].access_level` | `/sensitive::accounts/application/db/users[1]/access_level` You could also access some of these sample nodes using search expressions, like: -1. `configuration::application.general\.settings.'a.dotted.subkey'[.=~/^element[1-2]$/]` -2. `sensitive::accounts.application.db.users[name=admin].access_level` -3. `sensitive::accounts.application.db.users[access_level<500].name` +Dot Notation | Forward-Slash Notation +--------------------------------------------------------------------------------------|------------------------------------------------------------------ +`configuration::application.general\.settings.'a.dotted.subkey'[.=~/^element[1-2]$/]` | `/configuration::application/general.settings/a.dotted.subkey[.=~/^element[1-2]$/]` +`configuration::application.general\.settings.'a.dotted.subkey'[1:2]` | `/configuration::application/general.settings/a.dotted.subkey[0:-2]` +`sensitive::accounts.application.db.users[name=admin].access_level` | `/sensitive::accounts/application/db/users[name=admin]/access_level` +`sensitive::accounts.application.db.users[access_level<500].name` | `/sensitive::accounts/application/db/users[access_level<500]/name` ## Supported YAML Path Forms YAML Path understands these forms: +* Top-level Array element selection: `[#]` where `#` is the 0-based element number (`#` can also be negative, causing the element to be selected from the end of the Array) +* Top-level Hash key selection: `key` * Dot notation for Hash sub-keys: `hash.child.key` * Demarcation for dotted Hash keys: `hash.'dotted.child.key'` or `hash."dotted.child.key"` -* Array element selection: `array[#]` (where `array` is omitted for top-level Arrays or is the name of the Hash key containing Array data and `#` is the 0-based element number) +* Array element selection: `array[#]` where `array` is omitted for top-level Arrays or is the name of the Hash key containing Array data and `#` is the 0-based element number (`#` can also be negative, causing the element to be selected from the end of the Array) +* Array slicing: `array[start#:stop#]` where `start#` is the first, zero-based element and `stop#` is the last element to select (either or both can be negative, causing the elements to be selected from the end of the Array) +* Hash slicing: `hash[min:max]` where `min` and `max` are alphanumeric terms between which the Hash's keys are compared * Escape symbol recognition: `hash.dotted\.child\.key` or `keys_with_\\slashes` * Top-level (Hash) Anchor lookups: `&anchor_name` * Anchor lookups in Arrays: `array[&anchor_name]` @@ -123,12 +133,13 @@ YAML Path understands these forms: * Greater Than match: `sensitive::accounts.application.db.users[access_level>0].pass` * Less Than or Equal match: `sensitive::accounts.application.db.users[access_level<=100].pass` * Greater Than or Equal match: `sensitive::accounts.application.db.users[access_level>=0].pass` - * Regular Expression matches using any delimiter you choose (other than `/`, if you need something else): `sensitive::accounts.application.db.users[access_level=~/^\D+$/].pass` or `some::hash[containing=~#/path/values#]` + * Regular Expression matches using any delimiter you choose (other than `/`, if you need something else): `sensitive::accounts.application.db.users[access_level=~/^\D+$/].pass` or `some::hash[containing=~"/path/values"]` * Invert any match with `!`, like: `sensitive::accounts.application.db.users[name!=admin].pass` * Demarcate and/or escape expression values, like: `sensitive::accounts.application.db.users[full\ name="Some User\'s Name"].pass` * Multi-level matching: `sensitive::accounts.application.db.users[name%admin].pass[encrypted!^ENC\[]` * Array element and Hash key-name searches with all of the search methods above via `.` (yields their values, not the keys themselves): `sensitive::accounts.database[.^app_]` -* Complex combinations: `some::deep.hierarchy[with!=""].'any.valid'[.$yaml][data%structure].or.[!complexity=~/^.{4}$/][2]` +* Complex combinations: `some::deep.hierarchy[with!=""].'any.valid'[.$yaml][data%structure].or[!complexity=~/^.{4}$/][2]` +* Forward-slash rather than dot notation: `/key` up to `/some::deep/hierarchy[with!=""]/any.valid[.$yaml][data%structure]/or[!complexity=~/^.{4}$/][2]` ## Installing diff --git a/bin/yaml-get b/bin/yaml-get index 82831a49..16613708 100755 --- a/bin/yaml-get +++ b/bin/yaml-get @@ -16,7 +16,7 @@ ################################################################################ import sys import argparse -import pprint +import json from os import access, R_OK from os.path import isfile @@ -24,6 +24,7 @@ from ruamel.yaml import YAML from ruamel.yaml.parser import ParserError from yamlpath.exceptions import YAMLPathException, EYAMLCommandException +from yamlpath.enums import PathSeperators from yamlpath.parser import Parser from yamlpath.eyaml import EYAMLPath @@ -51,6 +52,13 @@ def processcli(): help="YAML Path to query" ) + parser.add_argument("-t", "--pathsep", + default="auto", + choices=[l.lower() for l in PathSeperators.get_names()], + type=str.lower, + help="force the separator in YAML_PATH when inference fails" + ) + eyaml_group = parser.add_argument_group( "EYAML options", "Left unset, the EYAML keys will default to your\ system or user defaults. Both keys must be set either here or in\ @@ -108,7 +116,7 @@ def main(): args = processcli() log = ConsolePrinter(args) validateargs(args, log) - parser = Parser(log) + parser = Parser(log, pathsep=args.pathsep) processor = EYAMLPath( log, eyaml=args.eyaml, @@ -157,9 +165,11 @@ def main(): if not discovered_nodes: log.critical("No matches for {}!".format(yaml_path), 3) - pprinter = pprint.PrettyPrinter(indent=4) for node in discovered_nodes: - pprinter.pprint(node) + if isinstance(node, list) or isinstance(node, dict): + print(json.dumps(node)) + else: + print("{}".format(node)) if __name__ == "__main__": main() diff --git a/bin/yaml-set b/bin/yaml-set index 21259145..cb9493e1 100755 --- a/bin/yaml-set +++ b/bin/yaml-set @@ -28,7 +28,7 @@ from ruamel.yaml.parser import ParserError from yamlpath.exceptions import YAMLPathException, EYAMLCommandException from yamlpath.eyaml import EYAMLPath -from yamlpath.enums import YAMLValueFormats +from yamlpath.enums import YAMLValueFormats, PathSeperators import yamlpath.patches from yamlpath.wrappers import ConsolePrinter @@ -89,6 +89,12 @@ def processcli(): help="require that the --change YAML_PATH already exist in YAML_FILE") parser.add_argument("-b", "--backup", action="store_true", help="save a backup YAML_FILE with an extra .bak file-extension") + parser.add_argument("-t", "--pathsep", + default="auto", + choices=[l.lower() for l in PathSeperators.get_names()], + type=str.lower, + help="force the separator in YAML_PATH when inference fails" + ) eyaml_group = parser.add_argument_group( "EYAML options", "Left unset, the EYAML keys will default to your\ @@ -176,7 +182,8 @@ def main(): log, eyaml=args.eyaml, publickey=args.publickey, - privatekey=args.privatekey + privatekey=args.privatekey, + pathsep=args.pathsep, ) backup_file = args.yaml_file + ".bak" diff --git a/setup.py b/setup.py index dbd3c31f..93d848b2 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setuptools.setup( name="yamlpath", - version="1.1.2", + version="1.2.0", description="Generally-useful YAML and EYAML tools employing a human-friendly YAML Path", long_description=long_description, long_description_content_type="text/markdown", diff --git a/tests/test_eyamlpath.py b/tests/test_eyamlpath.py index bbb7f157..90516905 100644 --- a/tests/test_eyamlpath.py +++ b/tests/test_eyamlpath.py @@ -5,12 +5,13 @@ from ruamel.yaml import YAML +import yamlpath.patches from yamlpath.eyaml import EYAMLPath from yamlpath.wrappers import ConsolePrinter from yamlpath.exceptions import EYAMLCommandException requireseyaml = pytest.mark.skipif( - not EYAMLPath.get_eyaml_executable("eyaml") + EYAMLPath.get_eyaml_executable("eyaml") is None , reason="The 'eyaml' command must be installed and accessible on the PATH" + " to test and use EYAML features. Try: 'gem install hiera-eyaml'" + " after intalling ruby and rubygems." @@ -170,9 +171,10 @@ def eyamlkeys(tmp_path_factory): with open(old_public_key_file, 'w') as key_file: key_file.write(old_public_key) + eyaml_cmd = EYAMLPath.get_eyaml_executable("eyaml") run( - "eyaml createkeys --pkcs7-private-key={} --pkcs7-public-key={}" - .format(new_private_key_file, new_public_key_file).split() + "{} createkeys --pkcs7-private-key={} --pkcs7-public-key={}" + .format(eyaml_cmd, new_private_key_file, new_public_key_file).split() ) return ( @@ -207,6 +209,15 @@ def fake_run(*args, **kwargs): monkeypatch.setattr(break_module, "run", fake_run) +@pytest.fixture +def force_no_access(monkeypatch): + import yamlpath.eyaml.eyamlpath as break_module + + def fake_access(*args, **kwargs): + return False + + monkeypatch.setattr(break_module, "access", fake_access) + @requireseyaml @pytest.mark.parametrize("search,compare", [ ("aliases[&secretIdentity]", "This is not the identity you are looking for."), @@ -233,12 +244,12 @@ def test_happy_set_eyaml_value(eyamlpath_f, eyamldata, eyamlkeys, search, compar eyamlpath_f.set_eyaml_value(eyamldata, search, compare, mustexist=mustexist, output=output) # Ensure the new value is encrypted + encvalue = None for encnode in eyamlpath_f.get_nodes(eyamldata, search): - assert EYAMLPath.is_eyaml_value(encnode) + encvalue = encnode + break - # Ensure the new value decrypts back to the original value - for decnode in eyamlpath_f.get_eyaml_values(eyamldata, search, mustexist=True): - assert decnode == compare + assert EYAMLPath.is_eyaml_value(encvalue) def test_none_eyaml_value(): assert False == EYAMLPath.is_eyaml_value(None) @@ -295,4 +306,6 @@ def test_decrypt_calledprocesserror(eyamlpath_f, force_subprocess_run_cpe): with pytest.raises(EYAMLCommandException): eyamlpath_f.decrypt_eyaml("ENC[...]") -# 60, 67, 98, 101, 123-128, 138-143, 163, 190-191 +@requireseyaml +def test_non_executable(eyamlkeys, force_no_access): + assert EYAMLPath.get_eyaml_executable(str(eyamlkeys[0])) is None diff --git a/tests/test_parser.py b/tests/test_parser.py index 63cd71d9..30ac3e87 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -6,6 +6,7 @@ from yamlpath.parser import Parser from yamlpath.exceptions import YAMLPathException from yamlpath.wrappers import ConsolePrinter +from yamlpath.enums import PathSeperators @pytest.fixture def parser(): @@ -17,7 +18,6 @@ def parser(): def test_empty_str_path(parser): assert parser.str_path("") == "" -# Happy searches @pytest.mark.parametrize("yaml_path,stringified", [ ("aliases[&anchor]", "aliases[&anchor]"), ("a l i a s e s [ & a n c h o r ]", "aliases[&anchor]"), @@ -67,15 +67,29 @@ def test_empty_str_path(parser): ('&topArrayAnchor[0]', '&topArrayAnchor[0]'), ('"&topArrayAnchor[0]"', r'\&topArrayAnchor\[0\]'), ('"&subHashAnchor.child1.attr_tst"', r'\&subHashAnchor\.child1\.attr_tst'), - ("'&topArrayAnchor[!.=~/[Oo]riginal/]'", r"\&topArrayAnchor\[\!\.=\~/\[Oo\]riginal/\]"), + ("'&topArrayAnchor[!.=~/[Oo]riginal/]'", r"\&topArrayAnchor\[!\.=~/\[Oo\]riginal/\]"), ]) def test_happy_str_path_translations(parser, yaml_path, stringified): assert parser.str_path(yaml_path) == stringified +# This will be a KNOWN ISSUE for this release. The fix for this may require a +# deep rethink of the Parser class. The issue here is that escaped characters +# in YAML Paths work perfectly well, but they can't be printed back to the +# screen in their pre-parsed form. So, when a user submits a YAML Path of +# "some\\escaped\\key", all printed forms of the key will become +# "someescapedkey" even though the path WILL find the requested data. This is +# only a stringification (printing) anomoly and hense, it will be LOW PRIORITY, +# tracked as a KNOWN ISSUE, for now. +@pytest.mark.xfail +@pytest.mark.parametrize("yaml_path,stringified", [ + ('key\\with\\slashes', 'key\\with\\slashes'), +]) +def test_escaped_translations(parser, yaml_path, stringified): + assert parser.str_path(yaml_path) == stringified + def test_happy_parse_path_list_to_deque(parser): assert isinstance(parser.parse_path(["item1", "item2"]), deque) -# Unhappy searches @pytest.mark.parametrize("yaml_path", [ ('some[search ^^ "Name "]'), ('some[search $$ " Here"]'), @@ -107,8 +121,36 @@ def test_happy_parse_path_list_to_deque(parser): ('some[search = "unterminated demarcation]'), ('some[search =~ /unterminated RegEx]'), ('some[search ^= "meaningless operator"]'), + ('array[4F]'), ({}), ]) def test_uphappy_str_path_translations(parser, yaml_path): with pytest.raises(YAMLPathException): parser.str_path(yaml_path) + +@pytest.mark.parametrize("pathsep,yaml_path,stringified", [ + ('.', "some.hash.key", "some.hash.key"), + ('/', "/some/hash/key", "/some/hash/key"), + ('.', "/some/hash/key", "some.hash.key"), + ('/', "some.hash.key", "/some/hash/key"), + ('/', "/&someAnchoredArray[0]", "/&someAnchoredArray[0]"), + ('.', "&someAnchoredArray[0]", "&someAnchoredArray[0]"), + ('.', "/&someAnchoredArray[0]", "&someAnchoredArray[0]"), + ('/', "&someAnchoredArray[0]", "/&someAnchoredArray[0]"), +]) +def test_pathsep(parser, pathsep, yaml_path, stringified): + assert parser.str_path(yaml_path, pathsep=pathsep) == stringified + +@pytest.mark.parametrize("pathsep,compare", [ + (PathSeperators.DOT, PathSeperators.DOT), + (PathSeperators.FSLASH, PathSeperators.FSLASH), + ('.', PathSeperators.DOT), + ('/', PathSeperators.FSLASH), +]) +def test_pretyped_pathsep(pathsep, compare): + parser = Parser(None, pathsep=pathsep) + assert compare == parser.pathsep + +def test_bad_pathsep(): + with pytest.raises(YAMLPathException): + _ = Parser(None, pathsep="no such seperator!") diff --git a/tests/test_yamlpath.py b/tests/test_yamlpath.py index 1d6dbac6..52406547 100644 --- a/tests/test_yamlpath.py +++ b/tests/test_yamlpath.py @@ -16,6 +16,7 @@ YAMLValueFormats, PathSegmentTypes, PathSearchMethods, + PathSeperators, ) from yamlpath.wrappers import ConsolePrinter @@ -23,110 +24,112 @@ # Define a set of single-match inputs that are used for multiple tests single_match_tests = [ ("aliases[&test_scalarstring]", "This is a scalar string."), - ("aliases[&test_foldedstring]", "This is a folded multi-line string."), + ("/aliases[&test_foldedstring]", "This is a folded multi-line string."), ("aliases[&test_literalstring]", "This is a\nliteral multi-line\nstring."), - ("top_scalar", "value"), + ("/top_scalar", "value"), ("top_alias", "This is a scalar string."), - ("top_array_anchor[0]", "This is a scalar string."), + ("/top_array_anchor[0]", "This is a scalar string."), ("top_array_anchor[1]", "An original value"), - ("top_array_anchor[2]", "This is a folded multi-line string."), + ("/top_array_anchor[2]", "This is a folded multi-line string."), ("top_array_anchor[3]", "Another original value"), - ("&topArrayAnchor[0]", "This is a scalar string."), + ("/&topArrayAnchor[0]", "This is a scalar string."), ("&topArrayAnchor[1]", "An original value"), - ("&topArrayAnchor[2]", "This is a folded multi-line string."), + ("/&topArrayAnchor[2]", "This is a folded multi-line string."), ("&topArrayAnchor[3]", "Another original value"), - ("sub_hash_anchor.child1.attr_tst", "child 1"), + ("/sub_hash_anchor/child1/attr_tst", "child 1"), ("sub_hash_anchor.child1.attr_val", 100), - ("sub_hash_anchor.child2.attr_tst", "child 2"), + ("/sub_hash_anchor/child2/attr_tst", "child 2"), ("sub_hash_anchor.child2.attr_val", 200), - ("sub_hash_anchor.child3.attr_tst", "child 3"), + ("/sub_hash_anchor/child3/attr_tst", "child 3"), ("sub_hash_anchor.child3.attr_val", 300), - ("sub_hash_anchor.childN.attr_tst", "child N"), + ("/sub_hash_anchor/childN/attr_tst", "child N"), ("sub_hash_anchor.childN.attr_val", 999), - ("&subHashAnchor.child1.attr_tst", "child 1"), + ("/&subHashAnchor/child1/attr_tst", "child 1"), ("&subHashAnchor.child1.attr_val", 100), - ("&subHashAnchor.child2.attr_tst", "child 2"), + ("/&subHashAnchor/child2/attr_tst", "child 2"), ("&subHashAnchor.child2.attr_val", 200), - ("&subHashAnchor.child3.attr_tst", "child 3"), + ("/&subHashAnchor/child3/attr_tst", "child 3"), ("&subHashAnchor.child3.attr_val", 300), - ("&subHashAnchor.childN.attr_tst", "child N"), + ("/&subHashAnchor/childN/attr_tst", "child N"), ("&subHashAnchor.childN.attr_val", 999), - ("top_hash_anchor.key1", "value 1"), + ("/top_hash_anchor/key1", "value 1"), ("top_hash_anchor.key2", "value 2"), - ("top_hash_anchor.key3", "value 3"), + ("/top_hash_anchor/key3", "value 3"), ("top_hash_anchor.key_complex.child1.attr_tst", "child 1"), - ("top_hash_anchor.key_complex.child1.attr_val", 100), + ("/top_hash_anchor/key_complex/child1/attr_val", 100), ("top_hash_anchor.key_complex.child2.attr_tst", "child 2"), - ("top_hash_anchor.key_complex.child2.attr_val", 200), + ("/top_hash_anchor/key_complex/child2/attr_val", 200), ("top_hash_anchor.key_complex.child3.attr_tst", "child 3"), - ("top_hash_anchor.key_complex.child3.attr_val", 300), - ("top_hash_anchor.key_complex.childN.attr_tst", "child N"), + ("/top_hash_anchor/key_complex/child3/attr_val", 300), + ("/top_hash_anchor/key_complex/childN/attr_tst", "child N"), ("top_hash_anchor.key_complex.childN.attr_val", 999), ("&topHashAnchor.key1", "value 1"), - ("&topHashAnchor.key2", "value 2"), + ("/&topHashAnchor/key2", "value 2"), ("&topHashAnchor.key3", "value 3"), - ("&topHashAnchor.key_complex.child1.attr_tst", "child 1"), + ("/&topHashAnchor/key_complex/child1/attr_tst", "child 1"), ("&topHashAnchor.key_complex.child1.attr_val", 100), ("&topHashAnchor.key_complex.child2.attr_tst", "child 2"), - ("&topHashAnchor.key_complex.child2.attr_val", 200), + ("/&topHashAnchor/key_complex/child2/attr_val", 200), ("&topHashAnchor.key_complex.child3.attr_tst", "child 3"), - ("&topHashAnchor.key_complex.child3.attr_val", 300), - ("&topHashAnchor.key_complex.childN.attr_tst", "child N"), + ("/&topHashAnchor/key_complex/child3/attr_val", 300), + ("/&topHashAnchor/key_complex/childN/attr_tst", "child N"), ("&topHashAnchor.key_complex.childN.attr_val", 999), ("namespaced::hash.with_array[0]", "one"), - ("namespaced::hash.with_array[1]", "two"), + ("/namespaced::hash/with_array[1]", "two"), ("namespaced::hash.with_array[2]", "three"), ("namespaced::hash.with_array_of_hashes[0].id", 1), - ("namespaced::hash.with_array_of_hashes[0].name", "ichi"), - ("namespaced::hash.with_array_of_hashes[1].id", 2), + ("/namespaced::hash/with_array_of_hashes[0]/name", "ichi"), + ("/namespaced::hash/with_array_of_hashes[1]/id", 2), ("namespaced::hash.with_array_of_hashes[1].name", "ni"), - ("namespaced::hash.with_array_of_hashes[2].id", 3), + ("/namespaced::hash/with_array_of_hashes[2]/id", 3), ("namespaced::hash.with_array_of_hashes[2].name", "san"), - ("namespaced::hash.with_array_alias[0]", "This is a scalar string."), + ("/namespaced::hash/with_array_alias[0]", "This is a scalar string."), ("namespaced::hash.with_array_alias[1]", "An original value"), - ("namespaced::hash.with_array_alias[2]", "This is a folded multi-line string."), + ("/namespaced::hash/with_array_alias[2]", "This is a folded multi-line string."), ("namespaced::hash.with_array_alias[3]", "Another original value"), - ("namespaced::hash.with_hash_alias.key1", "value 1"), + ("/namespaced::hash/with_hash_alias/key1", "value 1"), ("namespaced::hash.with_hash_alias.key2", "value 2"), - ("namespaced::hash.with_hash_alias.key3", "value 3.2"), + ("/namespaced::hash/with_hash_alias/key3", "value 3.2"), ("namespaced::hash.with_hash_alias.key4", "value 4.0"), - ("namespaced::hash.with_hash_alias.key_complex.child1.attr_tst", "child 1"), + ("/namespaced::hash/with_hash_alias/key_complex/child1/attr_tst", "child 1"), ("namespaced::hash.with_hash_alias.key_complex.child1.attr_val", 100), ("namespaced::hash.with_hash_alias.key_complex.child2.attr_tst", "child 2"), - ("namespaced::hash.with_hash_alias.key_complex.child2.attr_val", 200), + ("/namespaced::hash/with_hash_alias/key_complex/child2/attr_val", 200), ("namespaced::hash.with_hash_alias.key_complex.child3.attr_tst", "child 3"), - ("namespaced::hash.with_hash_alias.key_complex.child3.attr_val", 300), - ("namespaced::hash.with_hash_alias.key_complex.child4.attr_tst", "child 4"), + ("/namespaced::hash/with_hash_alias/key_complex/child3/attr_val", 300), + ("/namespaced::hash/with_hash_alias/key_complex/child4/attr_tst", "child 4"), ("namespaced::hash.with_hash_alias.key_complex.child4.attr_val", 400), - ("namespaced::hash.with_hash_alias.key_complex.child5.attr_tst", "child 5"), + ("/namespaced::hash/with_hash_alias/key_complex/child5/attr_tst", "child 5"), ("namespaced::hash.with_hash_alias.key_complex.child5.attr_val", 500), - ("namespaced::hash.with_hash_alias.key_complex.childN.attr_tst", "child N2"), + ("/namespaced::hash/with_hash_alias/key_complex/childN/attr_tst", "child N2"), ("namespaced::hash.with_hash_alias.key_complex.childN.attr_val", 0), (r"namespaced::hash.and\.with\.dotted\.child.that", "has it's own"), - (r"namespaced::hash.and\.with\.dotted\.child.child", "nodes"), + (r"/namespaced::hash/and.with.dotted.child/child", "nodes"), ("namespaced::hash.with_array_of_hashes[id=1].name", "ichi"), - ("namespaced::hash.with_array_of_hashes[name=ichi].id", 1), + ("/namespaced::hash/with_array_of_hashes[name=ichi]/id", 1), ("namespaced::hash.with_array_of_hashes[name='ichi'].id", 1), - ("namespaced::hash.with_array_of_hashes[id=2].name", "ni"), + ("/namespaced::hash/with_array_of_hashes[id=2]/name", "ni"), ("namespaced::hash.with_array_of_hashes[name=ni].id", 2), - ("namespaced::hash.with_array_of_hashes[name='ni'].id", 2), - ("namespaced::hash.with_array_of_hashes[id=3].name", "san"), + ("/namespaced::hash/with_array_of_hashes[name='ni']/id", 2), + ("/namespaced::hash/with_array_of_hashes[id=3]/name", "san"), ("namespaced::hash.with_array_of_hashes[name=san].id", 3), ("namespaced::hash.with_array_of_hashes[name='san'].id", 3), - ("namespaced::hash.with_array_of_hashes[name^ich].id", 1), + ("/namespaced::hash/with_array_of_hashes[name^ich]/id", 1), ("namespaced::hash.with_array_of_hashes[name$n].id", 3), (r"namespaced::hash.with_array_of_hashes[name%a].id", 3), ("namespaced::hash.with_array_of_hashes[id<2].name", "ichi"), - ("namespaced::hash.with_array_of_hashes[id>2].name", "san"), - ("namespaced::hash.with_array_of_hashes[id<=1].name", "ichi"), + ("/namespaced::hash/with_array_of_hashes[id>2]/name", "san"), + ("/namespaced::hash/with_array_of_hashes[id<=1]/name", "ichi"), ("namespaced::hash.with_array_of_hashes[id>=3].name", "san"), (r"namespaced::hash.with_array_of_hashes[name!%i].id", 3), - (r"[.^top_][.^key][.^child][attr_tst=child\ 2]", "child 2"), + (r"/[.^top_][.^key][.^child][attr_tst=child\ 2]", "child 2"), (r"complex.hash_of_hashes[.=~/^child\d+/].children[third=~/^j[^u]\s\w+$/]", "ji ni"), - (r"complex.hash_of_hashes[.=~/^child[0-9]+/].children[third=~/^j[^u] \w+$/]", "ji ni"), + (r"/complex/hash_of_hashes[.=~/^child[0-9]+/]/children[third=~/^j[^u] \w+$/]", "ji ni"), (r"complex.hash_of_hashes[.=~_^child\d+_].children[third=~#^j[^u] \w+$#]", "ji ni"), - (r"complex.hash_of_hashes[ . =~ !^child\d+! ].children[ third =~ a^j[^u] \w+$a ]", "ji ni"), + (r"/complex/hash_of_hashes[ . =~ !^child\d+! ]/children[ third =~ a^j[^u] \w+$a ]", "ji ni"), (r"complex.hash_of_hashes[.=~ -^child\d+-].children[third =~ $^j[^u] \w+$]", "ji ni"), + ("namespaced::hash.with_array[1:1]", "two"), + ("/namespaced::hash/with_array[1:1]", "two"), ] # Define a set of multiple-match inputs that are used for multiple tests @@ -149,6 +152,12 @@ ("complex.hash_of_hashes[.^child].children.first", ["ichi", "shi", "shichi", "ju"]), (r"complex.hash_of_hashes[.^child].children[first%ichi]", ["ichi", "shichi"]), (r"&topArrayAnchor[.%original]", ["An original value", "Another original value"]), + ("namespaced::hash.with_array[0:2]", [["one", "two"]]), + ("/namespaced::hash/with_array[0:2]", [["one", "two"]]), + ("&topHashAnchor[key1:key2]", ["value 1", "value 2"]), + ("/&topHashAnchor[key1:key2]", ["value 1", "value 2"]), + ("namespaced::hash.with_array_of_hashes[0:2].id", [1, 2]), + ("/namespaced::hash/with_array_of_hashes[0:2]/id", [1, 2]), ] @pytest.fixture @@ -365,6 +374,8 @@ def test_happy_multiple_get_nodes_req(yamlpath, yamldata, search, compare): ("namespaced::hash.with_array_of_hashes[ref>=1.41F].id", "Invalid index"), ("namespaced::hash.with_array_of_hashes[ref<1.41F].id", "Invalid index"), ("namespaced::hash.with_array_of_hashes[ref<=1.41F].id", "Invalid index"), + ("namespaced::hash.with_array[1:4F]", "borked"), + ("/namespaced::hash/with_array[4F:1]", "borken"), ]) def test_unhappy_singular_get_leaf_nodes(yamlpath, yamldata, search, compare): with pytest.raises(YAMLPathException): @@ -502,6 +513,11 @@ def test_nonexistant_path_search_method(yamlpath, yamldata): for _ in yamlpath._search(yamldata["top_scalar"], [True, PathSearchMethods.DNF, ".", "top_scalar"]): pass +def test_nonexistant_path_search_method_operator(): + from yamlpath.enums import PathSearchMethods + with pytest.raises(NotImplementedError): + _ = PathSearchMethods.to_operator("non-existant") + def test_nonexistant_path_segment_types(yamlpath, yamldata): from enum import Enum from yamlpath.enums import PathSegmentTypes @@ -512,6 +528,21 @@ def test_nonexistant_path_segment_types(yamlpath, yamldata): for _ in yamlpath._get_elements_by_ref(yamldata, (PathSegmentTypes.DNF, False)): pass +@pytest.mark.parametrize("sep,val", [ + ('.', PathSeperators.DOT), + ('/', PathSeperators.FSLASH), + ("DOT", PathSeperators.DOT), + ("FSLASH", PathSeperators.FSLASH), + (PathSeperators.DOT, PathSeperators.DOT), + (PathSeperators.FSLASH, PathSeperators.FSLASH), +]) +def test_seperators_from_str(sep, val): + assert val == PathSeperators.from_str(sep) + +def test_bad_separator_from_str(): + with pytest.raises(NameError): + _ = PathSeperators.from_str("DNF") + def test_append_list_element_value_error(yamlpath): with pytest.raises(ValueError): yamlpath._append_list_element([], PathSearchMethods, "anchor") @@ -545,3 +576,9 @@ def test_update_value(yamlpath, yamldata, newval, newform): def test_bad_update_value(yamlpath, yamldata, newval, newform): with pytest.raises(SystemExit): yamlpath._update_value(yamldata, yamldata["top_scalar"], newval, newform) + +def test_yamlpath_exception(): + try: + raise YAMLPathException("meh", "/some/path", "/some") + except YAMLPathException as ex: + _ = str(ex) diff --git a/yamlpath/enums/__init__.py b/yamlpath/enums/__init__.py index 79e06885..a8193abe 100644 --- a/yamlpath/enums/__init__.py +++ b/yamlpath/enums/__init__.py @@ -1,3 +1,4 @@ from yamlpath.enums.yamlvalueformats import YAMLValueFormats from yamlpath.enums.pathsegmenttypes import PathSegmentTypes from yamlpath.enums.pathsearchmethods import PathSearchMethods +from yamlpath.enums.pathseperators import PathSeperators diff --git a/yamlpath/enums/pathseperators.py b/yamlpath/enums/pathseperators.py new file mode 100644 index 00000000..baca8a49 --- /dev/null +++ b/yamlpath/enums/pathseperators.py @@ -0,0 +1,56 @@ +"""Implements the PathSeperators enumeration.""" +from enum import Enum, auto + + +class PathSeperators(Enum): + """Supported representation formats for YAML values.""" + AUTO = auto() + DOT = auto() + FSLASH = auto() + + @staticmethod + def get_names(): + """Returns all entry names for this enumeration. + + Positional Parameters: N/A + + Returns: (list) Upper-case names from this enumeration + + Raises: N/A + """ + return [entry.name.upper() for entry in PathSeperators] + + @staticmethod + def from_str(name): + """Converts a string value to a value of this enumeration, if valid. + + Positional Parameters: + 1. name (str) The name to convert + + Returns: (PathSeperators) the converted enumeration value + + Raises: + NameError when name doesn't match any enumeration values. + """ + if isinstance(name, PathSeperators): + return name + + check = str(name).upper() + + if check == '.': + check = "DOT" + elif check == '/': + check = "FSLASH" + + if check in PathSeperators.get_names(): + return PathSeperators[check] + else: + raise NameError("PathSeperators has no such item, " + check) + + @staticmethod + def to_seperator(name): + seperator = '.' + if name == PathSeperators.FSLASH: + seperator = '/' + + return seperator diff --git a/yamlpath/exceptions/yamlpath.py b/yamlpath/exceptions/yamlpath.py index f6c3c92d..20142660 100644 --- a/yamlpath/exceptions/yamlpath.py +++ b/yamlpath/exceptions/yamlpath.py @@ -17,12 +17,13 @@ def __init__(self, user_message, yaml_path, segment=None): .format(user_message, yaml_path, segment) ) - def __reduce__(self): - return YAMLPathException, ( - self.user_message, - self.yaml_path, - self.segment - ) + # Should Pickling ever be necessary: + # def __reduce__(self): + # return YAMLPathException, ( + # self.user_message, + # self.yaml_path, + # self.segment + # ) def __str__(self): message = "" diff --git a/yamlpath/eyaml/eyamlpath.py b/yamlpath/eyaml/eyamlpath.py index 1ed5393a..fbc1e579 100644 --- a/yamlpath/eyaml/eyamlpath.py +++ b/yamlpath/eyaml/eyamlpath.py @@ -5,7 +5,7 @@ import re from subprocess import run, PIPE, CalledProcessError from os import access, sep, X_OK -from distutils.spawn import find_executable +from shutil import which from yamlpath.enums import YAMLValueFormats from yamlpath.exceptions import EYAMLCommandException @@ -312,7 +312,7 @@ def get_eyaml_executable(binary="eyaml"): return None if str(binary).find(sep) < 0: - binary = find_executable(binary) + binary = which(binary) if not binary: return None diff --git a/yamlpath/parser.py b/yamlpath/parser.py index c2c1d277..1b388750 100644 --- a/yamlpath/parser.py +++ b/yamlpath/parser.py @@ -8,6 +8,7 @@ from yamlpath.enums import ( PathSegmentTypes, PathSearchMethods, + PathSeperators, ) @@ -17,57 +18,101 @@ class Parser: # Cache parsed YAML Path results across instances to avoid repeated parsing _static_parsings = {} - def __init__(self, logger): + def __init__(self, logger, **kwargs): """Init this class. Positional Parameters: 1. logger (ConsoleWriter) Instance of ConsoleWriter or any similar wrapper (say, around stdlib logging modules) + Optional Parameters: + 1. pathsep (string) A PathSeperators value for controlling the YAML + Path seperator + Returns: N/A Raises: N/A """ self.log = logger - def str_path(self, yaml_path): + pathsep = kwargs.pop("pathsep", "auto") + if isinstance(pathsep, PathSeperators): + self.pathsep = pathsep + else: + try: + self.pathsep = PathSeperators.from_str(pathsep) + except NameError: + raise YAMLPathException( + "Unknown YAML Path seperator, {}.".format(pathsep) + , pathsep + ) + + def _infer_pathsep(self, yaml_path): + """Gets the most likely YAML Path seperator to use based on whether one + has already been manually specified or from what can be inferred from a + sample. + + Positional Parameters: + 1. yaml_path (any) The YAML Path to evaluate + + Returns: (str) The stringified YAML Path seperator + + Raises: N/A + """ + seperator = '.' + if self.pathsep is not PathSeperators.AUTO: + seperator = PathSeperators.to_seperator(self.pathsep) + elif not yaml_path: + self.pathsep = PathSeperators.DOT + seperator = '.' + elif '/' == yaml_path[0]: + self.pathsep = PathSeperators.FSLASH + seperator = '/' + + return seperator + + def str_path(self, yaml_path, **kwargs): """Returns the printable, user-friendly version of a YAML Path. Positional Parameters: 1. yaml_path (any) The YAML Path to convert + Optional Parameters: + 1. pathsep (string) A PathSeperators value for controlling the YAML + Path seperator + Returns: (str) The stringified YAML Path Raises: N/A """ parsed_path = self.parse_path(yaml_path) - add_dot = False + add_sep = False ppath = "" + pathsep = kwargs.pop("pathsep", self._infer_pathsep(yaml_path)) + + # FSLASH pathsep requires a path starting with a / + if pathsep == '/': + ppath = "/" for (ptype, element_id) in parsed_path: if ptype == PathSegmentTypes.KEY: - if add_dot: - ppath += "." + if add_sep: + ppath += pathsep + ppath += ( element_id - .replace(".", r"\.") + .replace(pathsep, "\\{}".format(pathsep)) .replace("&", r"\&") - .replace("!", r"\!") - .replace("~", r"\~") .replace("[", r"\[") .replace("]", r"\]") - .replace("{", r"\{") - .replace("}", r"\}") - .replace("(", r"\(") - .replace("(", r"\(") ) elif ptype == PathSegmentTypes.INDEX: ppath += "[{}]".format(element_id) elif ptype == PathSegmentTypes.ANCHOR: - if ppath: + if add_sep: ppath += "[&{}]".format(element_id) else: - ppath = "&{}".format(element_id) + ppath += "&{}".format(element_id) elif ptype == PathSegmentTypes.SEARCH: invert, method, attr, term = element_id if method == PathSearchMethods.REGEX: @@ -83,7 +128,7 @@ def str_path(self, yaml_path): + "]" ) - add_dot = True + add_sep = True return ppath @@ -129,9 +174,15 @@ def parse_path(self, yaml_path): if yaml_path in Parser._static_parsings: return Parser._static_parsings[yaml_path].copy() + # Infer the path seperator + pathsep = self._infer_pathsep(yaml_path) + first_anchor_pos = 0 + if pathsep == '/': + first_anchor_pos = 1 + element_id = "" demarc_stack = [] - seeking_anchor_mark = yaml_path[0] == "&" + seeking_anchor_mark = yaml_path[first_anchor_pos] == "&" escape_next = False element_type = None search_inverted = False @@ -160,7 +211,7 @@ def parse_path(self, yaml_path): # the RegEx; thus, users must select a delimiter that won't # appear within the RegEx (which is exactly why the user # gets to choose the delimiter). - pass + pass # pragma: no cover # The escape test MUST come AFTER the RegEx capture test so users # won't be forced into "The Backslash Plague". @@ -228,7 +279,7 @@ def parse_path(self, yaml_path): continue elif demarc_count == 0 and c == "[": - # Array INDEX or SEARCH + # Array INDEX/SLICE or SEARCH if element_id: # Record its predecessor element; unless it has already # been identified as a special type, assume it is a KEY. @@ -294,7 +345,7 @@ def parse_path(self, yaml_path): , yaml_path ) - continue + continue # pragma: no cover elif c == "~": if search_method == PathSearchMethods.EQUALS: @@ -308,7 +359,8 @@ def parse_path(self, yaml_path): ).format(c) , yaml_path ) - continue + + continue # pragma: no cover elif not element_id: # All tests beyond this point require an operand @@ -367,8 +419,11 @@ def parse_path(self, yaml_path): and c == "]" and demarc_stack[-1] == "[" ): - # Store the INDEX or SEARCH parameters - if element_type is PathSegmentTypes.INDEX: + # Store the INDEX, SLICE, or SEARCH parameters + if ( + element_type is PathSegmentTypes.INDEX + and ':' not in element_id + ): try: idx = int(element_id) except ValueError: @@ -401,7 +456,7 @@ def parse_path(self, yaml_path): search_method = None continue - elif demarc_count < 1 and c == ".": + elif demarc_count < 1 and c == pathsep: # Do not store empty elements if element_id: # Unless its type has already been identified as a special diff --git a/yamlpath/yamlpath.py b/yamlpath/yamlpath.py index 3f07b0fa..58e49f40 100644 --- a/yamlpath/yamlpath.py +++ b/yamlpath/yamlpath.py @@ -43,7 +43,11 @@ def __init__(self, logger, **kwargs): Raises: N/A """ self.log = logger - self.parser = kwargs.pop("parser", Parser(logger)) + + if "parser" in kwargs: + self.parser = kwargs.pop("parser") + else: + self.parser = Parser(logger, **kwargs) def get_nodes(self, data, yaml_path, **kwargs): """Retrieves zero or more node at YAML Path in YAML data. @@ -348,6 +352,40 @@ def _get_elements_by_ref(self, data, ref): if reftyp == PathSegmentTypes.KEY: if isinstance(data, dict) and refele in data: yield data[refele] + elif isinstance(data, list): + # Pass-through search against possible Array-of-Hashes + for rec in data: + for node in self._get_elements_by_ref(rec, ref): + if node is not None: + yield node + elif ( + reftyp == PathSegmentTypes.INDEX + and isinstance(refele, str) + and ':' in refele + ): + # Array index or Hash key slice + refparts = refele.split(':', 1) + min_match = refparts[0] + max_match = refparts[1] + if isinstance(data, list): + try: + intmin = int(min_match) + intmax = int(max_match) + except ValueError: + raise YAMLPathException( + "{} is not an integer array slice".format(str(refele)) + , str(ref) + ) + + if intmin == intmax and len(data) > intmin: + yield data[intmin] + else: + yield data[intmin:intmax] + + elif isinstance(data, dict): + for key, val in data.items(): + if key >= min_match and key <= max_match: + yield val elif reftyp == PathSegmentTypes.INDEX: try: intele = int(refele) @@ -619,7 +657,10 @@ def _ensure_path(self, data, path, value=None): if node is not None: matched_nodes += 1 yield node - elif curtyp is PathSegmentTypes.INDEX: + elif ( + curtyp is PathSegmentTypes.INDEX + and isinstance(curele, int) + ): for _ in range(len(data) - 1, curele): new_val = self._default_for_child(path, value) self._append_list_element(data, new_val)