From 11b75b436d5c0ff509eb966e21d7de1f3a855e7a Mon Sep 17 00:00:00 2001 From: Nolwenn <28621493+NoB0@users.noreply.github.com> Date: Tue, 20 Feb 2024 12:08:11 +0100 Subject: [PATCH] Add annotation dataclasses to pkg_types.py Fixes #106 --- pkg_api/core/annotation.py | 82 ------------------- pkg_api/core/pkg_types.py | 81 ++++++++++++++++++ pkg_api/nl_to_pkg/annotators/annotator.py | 3 +- .../annotators/three_step_annotator.py | 3 +- .../nl_to_pkg/entity_linking/entity_linker.py | 3 +- .../entity_linking/rel_entity_linking.py | 3 +- .../entity_linking/spotlight_entity_linker.py | 3 +- pkg_api/nl_to_pkg/eval_nl_to_pkg.py | 7 +- pkg_api/nl_to_pkg/nl_to_pkg.py | 2 +- pkg_api/pkg.py | 11 +-- pkg_api/utils.py | 10 ++- tests/nl_to_pkg/test_eval_nl_to_pkg.py | 3 +- tests/nl_to_pkg/test_nl_to_pkg.py | 2 +- tests/nl_to_pkg/test_rel_entity_linker.py | 4 +- .../nl_to_pkg/test_spotlight_entity_linker.py | 4 +- tests/nl_to_pkg/test_three_step_annotator.py | 2 +- tests/pkg_api/test_pkg.py | 5 +- tests/pkg_api/test_utils.py | 5 +- 18 files changed, 119 insertions(+), 114 deletions(-) delete mode 100644 pkg_api/core/annotation.py diff --git a/pkg_api/core/annotation.py b/pkg_api/core/annotation.py deleted file mode 100644 index 43d7a2d..0000000 --- a/pkg_api/core/annotation.py +++ /dev/null @@ -1,82 +0,0 @@ -"""Dataclasses for the annotations used in the PKG API.""" -from __future__ import annotations - -import uuid -from dataclasses import dataclass, field -from typing import Any, Dict, List, Optional, Union - -from pkg_api.core.pkg_types import URI - - -@dataclass -class Concept: - """Class representing a SKOS concept.""" - - description: str - related_entities: List[URI] = field(default_factory=list) - broader_entities: List[URI] = field(default_factory=list) - narrower_entities: List[URI] = field(default_factory=list) - - -@dataclass -class TripleElement: - """Class representing a subject, predicate or object element. - - Attributes: - reference: Raw string reference of the element. - value: URI, Concept or literal value of the element. - """ - - reference: str - value: Union[URI, Concept, str, None] = field(default=None) - - @staticmethod - def from_value(value: Union[URI, Concept, str]) -> TripleElement: - """Creates a TripleElement from a value. - - Args: - value: URI, Concept or literal value. - - Returns: - TripleElement. - """ - if isinstance(value, URI): - return TripleElement("", value) - elif isinstance(value, Concept): - return TripleElement(value.description, value) - return TripleElement(value, value) - - -@dataclass -class Triple: - """Class representing a subject, predicate, object triple.""" - - subject: Optional[TripleElement] = None - predicate: Optional[TripleElement] = None - object: Optional[TripleElement] = None - - -@dataclass -class Preference: - """Class representing a preference. - - Note: In the current version of the PKG API, topic refers to the object of - a triple. - """ - - topic: TripleElement - weight: float - - -@dataclass -class PKGData: - """Class representing an annotated statement. - - Annotations include a triple, a preference, and logging data. - """ - - id: uuid.UUID - statement: str - triple: Optional[Triple] = None - preference: Optional[Preference] = None - logging_data: Dict[str, Any] = field(default_factory=dict) diff --git a/pkg_api/core/pkg_types.py b/pkg_api/core/pkg_types.py index fa6de09..d296723 100644 --- a/pkg_api/core/pkg_types.py +++ b/pkg_api/core/pkg_types.py @@ -1,4 +1,11 @@ """PKG package types.""" + +from __future__ import annotations + +import uuid +from dataclasses import dataclass, field +from typing import Any, Dict, List, Optional, Union + from rfc3987 import match SPARQLQuery = str @@ -9,3 +16,77 @@ def __new__(cls, *args, **kwargs): """Creates a new URI.""" assert match(args[0], rule="IRI"), f"Invalid URI: {args[0]}" return super().__new__(cls, *args, **kwargs) + + +@dataclass +class Concept: + """Class representing a SKOS concept.""" + + description: str + related_entities: List[URI] = field(default_factory=list) + broader_entities: List[URI] = field(default_factory=list) + narrower_entities: List[URI] = field(default_factory=list) + + +@dataclass +class TripleElement: + """Class representing a subject, predicate or object element. + + Attributes: + reference: Raw string reference of the element. + value: URI, Concept or literal value of the element. + """ + + reference: str + value: Union[URI, Concept, str, None] = field(default=None) + + @staticmethod + def from_value(value: Union[URI, Concept, str]) -> TripleElement: + """Creates a TripleElement from a value. + + Args: + value: URI, Concept or literal value. + + Returns: + TripleElement. + """ + if isinstance(value, URI): + return TripleElement("", value) + elif isinstance(value, Concept): + return TripleElement(value.description, value) + return TripleElement(value, value) + + +@dataclass +class Triple: + """Class representing a subject, predicate, object triple.""" + + subject: Optional[TripleElement] = None + predicate: Optional[TripleElement] = None + object: Optional[TripleElement] = None + + +@dataclass +class Preference: + """Class representing a preference. + + Note: In the current version of the PKG API, topic refers to the object of + a triple. + """ + + topic: TripleElement + weight: float + + +@dataclass +class PKGData: + """Class representing an annotated statement. + + Annotations include a triple, a preference, and logging data. + """ + + id: uuid.UUID + statement: str + triple: Optional[Triple] = None + preference: Optional[Preference] = None + logging_data: Dict[str, Any] = field(default_factory=dict) diff --git a/pkg_api/nl_to_pkg/annotators/annotator.py b/pkg_api/nl_to_pkg/annotators/annotator.py index d6a938f..5790270 100644 --- a/pkg_api/nl_to_pkg/annotators/annotator.py +++ b/pkg_api/nl_to_pkg/annotators/annotator.py @@ -5,12 +5,11 @@ -1) in the query. """ - from abc import ABC, abstractmethod from typing import Tuple -from pkg_api.core.annotation import PKGData from pkg_api.core.intents import Intent +from pkg_api.core.pkg_types import PKGData from pkg_api.nl_to_pkg.llm.prompt import Prompt diff --git a/pkg_api/nl_to_pkg/annotators/three_step_annotator.py b/pkg_api/nl_to_pkg/annotators/three_step_annotator.py index d4344ca..772c74d 100644 --- a/pkg_api/nl_to_pkg/annotators/three_step_annotator.py +++ b/pkg_api/nl_to_pkg/annotators/three_step_annotator.py @@ -4,13 +4,12 @@ with a triple and a preference using LLM. """ - import re import uuid from typing import Dict, Optional, Tuple -from pkg_api.core.annotation import PKGData, Preference, Triple, TripleElement from pkg_api.core.intents import Intent +from pkg_api.core.pkg_types import PKGData, Preference, Triple, TripleElement from pkg_api.nl_to_pkg.annotators.annotator import StatementAnnotator from pkg_api.nl_to_pkg.llm.llm_connector import LLMConnector from pkg_api.nl_to_pkg.llm.prompt import Prompt diff --git a/pkg_api/nl_to_pkg/entity_linking/entity_linker.py b/pkg_api/nl_to_pkg/entity_linking/entity_linker.py index f3fb9aa..88c311c 100644 --- a/pkg_api/nl_to_pkg/entity_linking/entity_linker.py +++ b/pkg_api/nl_to_pkg/entity_linking/entity_linker.py @@ -1,9 +1,8 @@ """Abstract class for entity linking.""" - from abc import ABC, abstractmethod -from pkg_api.core.annotation import PKGData +from pkg_api.core.pkg_types import PKGData class EntityLinker(ABC): diff --git a/pkg_api/nl_to_pkg/entity_linking/rel_entity_linking.py b/pkg_api/nl_to_pkg/entity_linking/rel_entity_linking.py index a924312..0598cc5 100644 --- a/pkg_api/nl_to_pkg/entity_linking/rel_entity_linking.py +++ b/pkg_api/nl_to_pkg/entity_linking/rel_entity_linking.py @@ -4,8 +4,7 @@ import requests -from pkg_api.core.annotation import Concept, PKGData, TripleElement -from pkg_api.core.pkg_types import URI +from pkg_api.core.pkg_types import URI, Concept, PKGData, TripleElement from pkg_api.nl_to_pkg.entity_linking.entity_linker import EntityLinker _DEFAULT_API_URL = "https://rel.cs.ru.nl/api" diff --git a/pkg_api/nl_to_pkg/entity_linking/spotlight_entity_linker.py b/pkg_api/nl_to_pkg/entity_linking/spotlight_entity_linker.py index 0549f92..b8611f0 100644 --- a/pkg_api/nl_to_pkg/entity_linking/spotlight_entity_linker.py +++ b/pkg_api/nl_to_pkg/entity_linking/spotlight_entity_linker.py @@ -4,8 +4,7 @@ import requests -from pkg_api.core.annotation import Concept, PKGData, TripleElement -from pkg_api.core.pkg_types import URI +from pkg_api.core.pkg_types import URI, Concept, PKGData, TripleElement from pkg_api.nl_to_pkg.entity_linking.entity_linker import EntityLinker from pkg_api.util.load_config import load_yaml_config diff --git a/pkg_api/nl_to_pkg/eval_nl_to_pkg.py b/pkg_api/nl_to_pkg/eval_nl_to_pkg.py index 0d8cb0c..6a6b930 100644 --- a/pkg_api/nl_to_pkg/eval_nl_to_pkg.py +++ b/pkg_api/nl_to_pkg/eval_nl_to_pkg.py @@ -1,10 +1,13 @@ """Evaluates the NL to PKG models.""" + import csv -from typing import Dict, Any, List, Tuple +from typing import Any, Dict, List, Tuple + from sklearn.metrics import f1_score from tqdm import tqdm -from pkg_api.core.annotation import PKGData + from pkg_api.core.intents import Intent +from pkg_api.core.pkg_types import PKGData from pkg_api.nl_to_pkg.annotators.three_step_annotator import ( ThreeStepStatementAnnotator, ) diff --git a/pkg_api/nl_to_pkg/nl_to_pkg.py b/pkg_api/nl_to_pkg/nl_to_pkg.py index c01005a..248be8f 100644 --- a/pkg_api/nl_to_pkg/nl_to_pkg.py +++ b/pkg_api/nl_to_pkg/nl_to_pkg.py @@ -2,8 +2,8 @@ from typing import Tuple -from pkg_api.core.annotation import PKGData from pkg_api.core.intents import Intent +from pkg_api.core.pkg_types import PKGData from pkg_api.nl_to_pkg import EntityLinker, StatementAnnotator diff --git a/pkg_api/pkg.py b/pkg_api/pkg.py index 9883d37..b5b8a7a 100644 --- a/pkg_api/pkg.py +++ b/pkg_api/pkg.py @@ -24,9 +24,8 @@ import pkg_api.utils as utils from pkg_api.connector import Connector, RDFStore -from pkg_api.core.annotation import Concept, PKGData, Triple, TripleElement from pkg_api.core.namespaces import PKGPrefixes -from pkg_api.core.pkg_types import URI +from pkg_api.core.pkg_types import URI, Concept, PKGData, Triple, TripleElement from pkg_api.mapping_vocab import MappingVocab DEFAULT_VISUALIZATION_PATH = "data/pkg_visualizations" @@ -279,9 +278,11 @@ def _parse_statement_node( setattr(_triple, k, TripleElement.from_value(v)) return PKGData( - id=uuid.UUID(f"{{{statement_node_id}}}") - if statement_node_id - else uuid.uuid1(), + id=( + uuid.UUID(f"{{{statement_node_id}}}") + if statement_node_id + else uuid.uuid1() + ), statement=statement_dict.get("statement"), triple=_triple, preference=None, diff --git a/pkg_api/utils.py b/pkg_api/utils.py index 78bccd8..661f10f 100644 --- a/pkg_api/utils.py +++ b/pkg_api/utils.py @@ -11,9 +11,15 @@ import re from typing import List, Optional, Union -from pkg_api.core.annotation import Concept, PKGData, Triple, TripleElement from pkg_api.core.namespaces import PKGPrefixes -from pkg_api.core.pkg_types import URI, SPARQLQuery +from pkg_api.core.pkg_types import ( + URI, + Concept, + PKGData, + SPARQLQuery, + Triple, + TripleElement, +) def _clean_sparql_representation(sparql: str) -> str: diff --git a/tests/nl_to_pkg/test_eval_nl_to_pkg.py b/tests/nl_to_pkg/test_eval_nl_to_pkg.py index 890ccad..f981f11 100644 --- a/tests/nl_to_pkg/test_eval_nl_to_pkg.py +++ b/tests/nl_to_pkg/test_eval_nl_to_pkg.py @@ -1,12 +1,13 @@ """Tests eval_nl_to_pkg.py file.""" + import uuid from typing import Dict, List, Tuple from unittest.mock import MagicMock, patch import pytest -from pkg_api.core.annotation import PKGData, Triple, TripleElement from pkg_api.core.intents import Intent +from pkg_api.core.pkg_types import PKGData, Triple, TripleElement from pkg_api.nl_to_pkg.annotators.three_step_annotator import ( ThreeStepStatementAnnotator, ) diff --git a/tests/nl_to_pkg/test_nl_to_pkg.py b/tests/nl_to_pkg/test_nl_to_pkg.py index c24e05d..9349ffb 100644 --- a/tests/nl_to_pkg/test_nl_to_pkg.py +++ b/tests/nl_to_pkg/test_nl_to_pkg.py @@ -5,8 +5,8 @@ import pytest -from pkg_api.core.annotation import PKGData, Preference, Triple, TripleElement from pkg_api.core.intents import Intent +from pkg_api.core.pkg_types import PKGData, Preference, Triple, TripleElement from pkg_api.nl_to_pkg.nl_to_pkg import NLtoPKG diff --git a/tests/nl_to_pkg/test_rel_entity_linker.py b/tests/nl_to_pkg/test_rel_entity_linker.py index b716b95..773ec7e 100644 --- a/tests/nl_to_pkg/test_rel_entity_linker.py +++ b/tests/nl_to_pkg/test_rel_entity_linker.py @@ -1,11 +1,11 @@ """Tests for REL entity linker.""" + import uuid from unittest.mock import Mock, patch import pytest -from pkg_api.core.annotation import Concept, PKGData, Triple, TripleElement -from pkg_api.core.pkg_types import URI +from pkg_api.core.pkg_types import URI, Concept, PKGData, Triple, TripleElement from pkg_api.nl_to_pkg.entity_linking.rel_entity_linking import RELEntityLinker diff --git a/tests/nl_to_pkg/test_spotlight_entity_linker.py b/tests/nl_to_pkg/test_spotlight_entity_linker.py index d5afbe2..712fc05 100644 --- a/tests/nl_to_pkg/test_spotlight_entity_linker.py +++ b/tests/nl_to_pkg/test_spotlight_entity_linker.py @@ -1,11 +1,11 @@ """Tests the Spotlight entity linker class.""" + import uuid from unittest.mock import Mock, patch import pytest -from pkg_api.core.annotation import Concept, PKGData, Triple, TripleElement -from pkg_api.core.pkg_types import URI +from pkg_api.core.pkg_types import URI, Concept, PKGData, Triple, TripleElement from pkg_api.nl_to_pkg.entity_linking.spotlight_entity_linker import ( SpotlightEntityLinker, ) diff --git a/tests/nl_to_pkg/test_three_step_annotator.py b/tests/nl_to_pkg/test_three_step_annotator.py index 48a5b8f..f1db20a 100644 --- a/tests/nl_to_pkg/test_three_step_annotator.py +++ b/tests/nl_to_pkg/test_three_step_annotator.py @@ -5,8 +5,8 @@ import pytest -from pkg_api.core.annotation import Preference, Triple, TripleElement from pkg_api.core.intents import Intent +from pkg_api.core.pkg_types import Preference, Triple, TripleElement from pkg_api.nl_to_pkg.annotators.three_step_annotator import ( ThreeStepStatementAnnotator, ) diff --git a/tests/pkg_api/test_pkg.py b/tests/pkg_api/test_pkg.py index 4193ae6..339e402 100644 --- a/tests/pkg_api/test_pkg.py +++ b/tests/pkg_api/test_pkg.py @@ -1,18 +1,19 @@ """Tests for the PKG module.""" + import re import uuid import pytest from pkg_api.connector import RDFStore -from pkg_api.core.annotation import ( +from pkg_api.core.pkg_types import ( + URI, Concept, PKGData, Preference, Triple, TripleElement, ) -from pkg_api.core.pkg_types import URI from pkg_api.pkg import PKG from pkg_api.utils import get_statement_node_id diff --git a/tests/pkg_api/test_utils.py b/tests/pkg_api/test_utils.py index bc7bc28..14472ea 100644 --- a/tests/pkg_api/test_utils.py +++ b/tests/pkg_api/test_utils.py @@ -1,6 +1,5 @@ """Tests for utility methods.""" - import re import uuid from typing import Optional, Union @@ -8,14 +7,14 @@ import pytest from pkg_api import utils -from pkg_api.core.annotation import ( +from pkg_api.core.pkg_types import ( + URI, Concept, PKGData, Preference, Triple, TripleElement, ) -from pkg_api.core.pkg_types import URI def strip_string(string: str) -> str: