generated from iai-group/template-project
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Feature/82-NLtoAPI-Implement-basic-entity-linker
* Simplify Triple (Only use Concept instead of Union[str, Concept]) Fixes #87 * [NLtoAPI] Implement basic entity linker Fixes #82 * Update tests * Update tests to use Concept * simplify checks in get_preference * simplify check for existance of object in triple * Merge branch 'feature/87-Simplify-Triple-Only-use-Concept-instead-of-Unionstr-Concept' of https://github.com/iai-group/pkg-api into feature/82-NLtoAPI-Implement-basic-entity-linker * Add tests * Add TripleElement class for SPO * Merge branch 'feature/87-Simplify-Triple-Only-use-Concept-instead-of-Unionstr-Concept' of https://github.com/iai-group/pkg-api into feature/82-NLtoAPI-Implement-basic-entity-linker * Migrate to new dataclass structure * Add linking for predicate and object * Address review comments * Merge branch 'main' of https://github.com/iai-group/pkg-api into feature/87-Simplify-Triple-Only-use-Concept-instead-of-Unionstr-Concept * fix imports * Fix pkg inserts with new dataclass structure * [NLtoAPI] Implement basic entity linker Fixes #82 * Merge branch 'main' of https://github.com/iai-group/pkg-api into feature/82-NLtoAPI-Implement-basic-entity-linker * Swap value and reference * Merge branch 'main' of https://github.com/iai-group/pkg-api into feature/82-NLtoAPI-Implement-basic-entity-linker * Address review comments * Address review comments * format docstrings * Add file docstrings * Address review comments
- Loading branch information
Showing
8 changed files
with
242 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
url: "https://api.dbpedia-spotlight.org/en/annotate" | ||
headers: | ||
accept: "application/json" | ||
params: | ||
confidence: 0.5 | ||
support: 50 | ||
types: null |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
93 changes: 93 additions & 0 deletions
93
pkg_api/nl_to_pkg/entity_linking/spotlight_entity_linker.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
"""Contains the DBpedia Spotlight entity linker.""" | ||
|
||
from typing import Any, Dict, Union | ||
|
||
import requests | ||
|
||
from pkg_api.core.annotation import Concept, PKGData, TripleElement | ||
from pkg_api.core.pkg_types import URI | ||
from pkg_api.nl_to_pkg.entity_linking.entity_linker import EntityLinker | ||
from pkg_api.util.load_config import load_yaml_config | ||
|
||
_DEFAULT_CONFIG_PATH = "config/entity_linking/dbpedia_spotlight.yaml" | ||
|
||
|
||
class SpotlightEntityLinker(EntityLinker): | ||
def __init__(self, path: str = _DEFAULT_CONFIG_PATH) -> None: | ||
"""Initializes the DBpedia Spotlight entity linker. | ||
Args: | ||
path: The path to the config file. Defaults to _DEFAULT_CONFIG_PATH. | ||
""" | ||
self._config = load_yaml_config(path) | ||
|
||
def link_entities(self, pkg_data: PKGData) -> PKGData: | ||
"""Returns the PKG data with linked entities. | ||
Only the predicate and object of the triple are linked to a public KG, | ||
as the subject should be retrieved from the PKG. | ||
Args: | ||
pkg_data: The PKG data to be linked. | ||
Returns: | ||
The PKG data with linked entities. | ||
""" | ||
if pkg_data.triple is None: | ||
return pkg_data | ||
|
||
for attr in ["predicate", "object"]: | ||
triple_element: TripleElement = getattr(pkg_data.triple, attr) | ||
if triple_element is not None: | ||
triple_element.value = self._get_linked_text( | ||
triple_element.reference | ||
) | ||
|
||
return pkg_data | ||
|
||
def _get_linked_text(self, reference: str) -> Union[URI, Concept, str]: | ||
"""Returns the linked object as URI, Concept or literal. | ||
Args: | ||
reference: The reference text to be linked. | ||
Returns: | ||
The linked object. | ||
""" | ||
# Return Concept as default as we cannot distinguish between Concept | ||
# and literal. | ||
linked_entities = self._get_linker_response(reference) | ||
if linked_entities is None or "Resources" not in linked_entities: | ||
return Concept(reference) | ||
|
||
# If the entire value is a single entity, return the URI. | ||
if ( | ||
len(linked_entities["Resources"]) == 1 | ||
and linked_entities["Resources"][0]["@surfaceForm"] == reference | ||
): | ||
return URI(linked_entities["Resources"][0]["@URI"]) | ||
|
||
# Otherwise, return a concept with the linked entities. | ||
value = Concept(reference) | ||
for entity in linked_entities["Resources"]: | ||
value.related_entities.append(entity["@URI"]) | ||
|
||
return value | ||
|
||
def _get_linker_response(self, text: str) -> Dict[str, Any]: | ||
"""Returns the response from the DBpedia Spotlight API. | ||
Args: | ||
text: The text to be annotated. | ||
Returns: | ||
The response from the DBpedia Spotlight API. | ||
""" | ||
params = {**self._config["params"], "text": text} | ||
response = requests.get( | ||
self._config["url"], headers=self._config["headers"], params=params | ||
) | ||
if response.status_code == 200: | ||
return response.json() | ||
else: | ||
return {"error": response.text} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
"""Utility function for loading configuration data from YAML files.""" | ||
|
||
import os | ||
from typing import Any, Dict | ||
|
||
import yaml | ||
|
||
|
||
def load_yaml_config(path: str) -> Dict[str, Any]: | ||
"""Loads configuration from a YAML file at the given path. | ||
Args: | ||
path: The file path to the YAML configuration file. | ||
Raises: | ||
FileNotFoundError: If the specified file does not exist. | ||
ValueError: If the path is not a file. | ||
Returns: | ||
A dictionary containing the configuration data. | ||
""" | ||
if not os.path.exists(path): | ||
raise FileNotFoundError(f"The file at {path} does not exist.") | ||
|
||
if not os.path.isfile(path): | ||
raise ValueError(f"The path {path} is not a file.") | ||
|
||
with open(path, "r") as file: | ||
config = yaml.safe_load(file) | ||
|
||
return config |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
"""Tests the Spotlight entity linker class.""" | ||
from unittest.mock import Mock, patch | ||
|
||
import pytest | ||
|
||
from pkg_api.core.annotation import Concept, PKGData, Triple, TripleElement | ||
from pkg_api.core.pkg_types import URI | ||
from pkg_api.nl_to_pkg.entity_linking.spotlight_entity_linker import ( | ||
SpotlightEntityLinker, | ||
) | ||
|
||
|
||
@pytest.fixture | ||
def sample_pkg_data() -> PKGData: | ||
"""Returns a test PKG data.""" | ||
return PKGData( | ||
"Test statement", | ||
Triple( | ||
TripleElement("Test Subject"), | ||
TripleElement("Test Predicate"), | ||
TripleElement("Test Object"), | ||
), | ||
) | ||
|
||
|
||
@pytest.fixture | ||
def linker() -> SpotlightEntityLinker: | ||
"""Returns a SpotlightEntityLinker instance.""" | ||
return SpotlightEntityLinker() | ||
|
||
|
||
def test_spotlight_entity_linker_initialization( | ||
linker: SpotlightEntityLinker, | ||
) -> None: | ||
"""Test the initialization of the SpotlightEntityLinker.""" | ||
assert "url" in linker._config | ||
assert "params" in linker._config | ||
assert "headers" in linker._config | ||
|
||
|
||
@patch("pkg_api.nl_to_pkg.entity_linking.spotlight_entity_linker.requests.get") | ||
def test_link_annotation_uri( | ||
mock_get: Mock, sample_pkg_data: PKGData, linker: SpotlightEntityLinker | ||
) -> None: | ||
"""Test the link_entities method.""" | ||
mock_response = Mock() | ||
mock_response.status_code = 200 | ||
mock_response.json.return_value = { | ||
"Resources": [ | ||
{ | ||
"@surfaceForm": "Test Object", | ||
"@URI": "http://dbpedia.org/resource/Test_Object", | ||
} | ||
] | ||
} | ||
mock_get.return_value = mock_response | ||
annotated_pkg_data = linker.link_entities(sample_pkg_data) | ||
|
||
assert annotated_pkg_data == sample_pkg_data | ||
assert isinstance(annotated_pkg_data.triple, Triple) | ||
assert isinstance(annotated_pkg_data.triple.object, TripleElement) | ||
assert isinstance(annotated_pkg_data.triple.object.value, URI) | ||
assert ( | ||
annotated_pkg_data.triple.object.value | ||
== "http://dbpedia.org/resource/Test_Object" | ||
) | ||
|
||
|
||
@patch("pkg_api.nl_to_pkg.entity_linking.spotlight_entity_linker.requests.get") | ||
def test_link_annotation_concept( | ||
mock_get: Mock, sample_pkg_data: PKGData, linker: SpotlightEntityLinker | ||
) -> None: | ||
"""Test the link_entities method.""" | ||
mock_response = Mock() | ||
mock_response.status_code = 200 | ||
mock_response.json.return_value = { | ||
"Resources": [ | ||
{ | ||
"@surfaceForm": "Object", | ||
"@URI": "http://dbpedia.org/resource/Object", | ||
} | ||
] | ||
} | ||
mock_get.return_value = mock_response | ||
annotated_pkg_data = linker.link_entities(sample_pkg_data) | ||
|
||
assert isinstance(annotated_pkg_data.triple, Triple) | ||
assert isinstance(annotated_pkg_data.triple.object, TripleElement) | ||
assert isinstance(annotated_pkg_data.triple.object.value, Concept) | ||
assert len(annotated_pkg_data.triple.object.value.related_entities) == 1 | ||
assert ( | ||
annotated_pkg_data.triple.object.value.related_entities[0] | ||
== "http://dbpedia.org/resource/Object" | ||
) | ||
|
||
|
||
def test_link_entities_no_change( | ||
sample_pkg_data: PKGData, linker: SpotlightEntityLinker | ||
) -> None: | ||
"""Test the link_entities method when no entities are linked.""" | ||
original_pkg_data = sample_pkg_data | ||
annotated_pkg_data = linker.link_entities(original_pkg_data) | ||
|
||
assert annotated_pkg_data == original_pkg_data |