Skip to content

Commit

Permalink
Feature/82-NLtoAPI-Implement-basic-entity-linker
Browse files Browse the repository at this point in the history
* Simplify Triple (Only use Concept instead of Union[str, Concept])
Fixes #87

* [NLtoAPI] Implement basic entity linker
Fixes #82

* Update tests

* Update tests to use Concept

* simplify checks in get_preference

* simplify check for existance of object in triple

* Merge branch 'feature/87-Simplify-Triple-Only-use-Concept-instead-of-Unionstr-Concept' of https://github.com/iai-group/pkg-api into feature/82-NLtoAPI-Implement-basic-entity-linker

* Add tests

* Add TripleElement class for SPO

* Merge branch 'feature/87-Simplify-Triple-Only-use-Concept-instead-of-Unionstr-Concept' of https://github.com/iai-group/pkg-api into feature/82-NLtoAPI-Implement-basic-entity-linker

* Migrate to new dataclass structure

* Add linking for predicate and object

* Address review comments

* Merge branch 'main' of https://github.com/iai-group/pkg-api into feature/87-Simplify-Triple-Only-use-Concept-instead-of-Unionstr-Concept

* fix imports

* Fix pkg inserts with new dataclass structure

* [NLtoAPI] Implement basic entity linker
Fixes #82

* Merge branch 'main' of https://github.com/iai-group/pkg-api into feature/82-NLtoAPI-Implement-basic-entity-linker

* Swap value and reference

* Merge branch 'main' of https://github.com/iai-group/pkg-api into feature/82-NLtoAPI-Implement-basic-entity-linker

* Address review comments

* Address review comments

* format docstrings

* Add file docstrings

* Address review comments
  • Loading branch information
IKostric authored Feb 2, 2024
1 parent e83f330 commit cda83a0
Show file tree
Hide file tree
Showing 8 changed files with 242 additions and 4 deletions.
7 changes: 7 additions & 0 deletions config/entity_linking/dbpedia_spotlight.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
url: "https://api.dbpedia-spotlight.org/en/annotate"
headers:
accept: "application/json"
params:
confidence: 0.5
support: 50
types: null
2 changes: 1 addition & 1 deletion pkg_api/nl_to_pkg/entity_linking/entity_linker.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ class EntityLinker(ABC):
"""Entity linker for linking entities to the PKG or available KGs."""

@abstractmethod
def link_annotation_entities(self, pkg_data: PKGData) -> PKGData:
def link_entities(self, pkg_data: PKGData) -> PKGData:
"""Resolves the pkg data annotations if possible.
Args:
Expand Down
93 changes: 93 additions & 0 deletions pkg_api/nl_to_pkg/entity_linking/spotlight_entity_linker.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
"""Contains the DBpedia Spotlight entity linker."""

from typing import Any, Dict, Union

import requests

from pkg_api.core.annotation import Concept, PKGData, TripleElement
from pkg_api.core.pkg_types import URI
from pkg_api.nl_to_pkg.entity_linking.entity_linker import EntityLinker
from pkg_api.util.load_config import load_yaml_config

_DEFAULT_CONFIG_PATH = "config/entity_linking/dbpedia_spotlight.yaml"


class SpotlightEntityLinker(EntityLinker):
def __init__(self, path: str = _DEFAULT_CONFIG_PATH) -> None:
"""Initializes the DBpedia Spotlight entity linker.
Args:
path: The path to the config file. Defaults to _DEFAULT_CONFIG_PATH.
"""
self._config = load_yaml_config(path)

def link_entities(self, pkg_data: PKGData) -> PKGData:
"""Returns the PKG data with linked entities.
Only the predicate and object of the triple are linked to a public KG,
as the subject should be retrieved from the PKG.
Args:
pkg_data: The PKG data to be linked.
Returns:
The PKG data with linked entities.
"""
if pkg_data.triple is None:
return pkg_data

for attr in ["predicate", "object"]:
triple_element: TripleElement = getattr(pkg_data.triple, attr)
if triple_element is not None:
triple_element.value = self._get_linked_text(
triple_element.reference
)

return pkg_data

def _get_linked_text(self, reference: str) -> Union[URI, Concept, str]:
"""Returns the linked object as URI, Concept or literal.
Args:
reference: The reference text to be linked.
Returns:
The linked object.
"""
# Return Concept as default as we cannot distinguish between Concept
# and literal.
linked_entities = self._get_linker_response(reference)
if linked_entities is None or "Resources" not in linked_entities:
return Concept(reference)

# If the entire value is a single entity, return the URI.
if (
len(linked_entities["Resources"]) == 1
and linked_entities["Resources"][0]["@surfaceForm"] == reference
):
return URI(linked_entities["Resources"][0]["@URI"])

# Otherwise, return a concept with the linked entities.
value = Concept(reference)
for entity in linked_entities["Resources"]:
value.related_entities.append(entity["@URI"])

return value

def _get_linker_response(self, text: str) -> Dict[str, Any]:
"""Returns the response from the DBpedia Spotlight API.
Args:
text: The text to be annotated.
Returns:
The response from the DBpedia Spotlight API.
"""
params = {**self._config["params"], "text": text}
response = requests.get(
self._config["url"], headers=self._config["headers"], params=params
)
if response.status_code == 200:
return response.json()
else:
return {"error": response.text}
2 changes: 1 addition & 1 deletion pkg_api/nl_to_pkg/nl_to_pkg.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,6 @@ def annotate(self, statement: str) -> Tuple[Intent, PKGData]:
A tuple of the intent and the annotated and linked statement.
"""
intent, pkg_data = self._annotator.get_annotations(statement)
linked_pkg_data = self._entity_linker.link_annotation_entities(pkg_data)
linked_pkg_data = self._entity_linker.link_entities(pkg_data)

return intent, linked_pkg_data
31 changes: 31 additions & 0 deletions pkg_api/util/load_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
"""Utility function for loading configuration data from YAML files."""

import os
from typing import Any, Dict

import yaml


def load_yaml_config(path: str) -> Dict[str, Any]:
"""Loads configuration from a YAML file at the given path.
Args:
path: The file path to the YAML configuration file.
Raises:
FileNotFoundError: If the specified file does not exist.
ValueError: If the path is not a file.
Returns:
A dictionary containing the configuration data.
"""
if not os.path.exists(path):
raise FileNotFoundError(f"The file at {path} does not exist.")

if not os.path.isfile(path):
raise ValueError(f"The path {path} is not a file.")

with open(path, "r") as file:
config = yaml.safe_load(file)

return config
5 changes: 4 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@ pytest-cov
Flask>=2.3.3
Flask-RESTful>=0.3.10
Flask-SQLAlchemy>=3.1.1
requests
types-requests
pyyaml
types-pyyaml
ollama
types-PyYAML
rfc3987
2 changes: 1 addition & 1 deletion tests/nl_to_pkg/test_nl_to_pkg.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def link_annotation_side_effect(*args, **kwargs):
pkg_data.triple.object.value = "Linked Object"
return pkg_data

mock.link_annotation_entities.side_effect = link_annotation_side_effect
mock.link_entities.side_effect = link_annotation_side_effect
return mock


Expand Down
104 changes: 104 additions & 0 deletions tests/nl_to_pkg/test_spotlight_entity_linker.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
"""Tests the Spotlight entity linker class."""
from unittest.mock import Mock, patch

import pytest

from pkg_api.core.annotation import Concept, PKGData, Triple, TripleElement
from pkg_api.core.pkg_types import URI
from pkg_api.nl_to_pkg.entity_linking.spotlight_entity_linker import (
SpotlightEntityLinker,
)


@pytest.fixture
def sample_pkg_data() -> PKGData:
"""Returns a test PKG data."""
return PKGData(
"Test statement",
Triple(
TripleElement("Test Subject"),
TripleElement("Test Predicate"),
TripleElement("Test Object"),
),
)


@pytest.fixture
def linker() -> SpotlightEntityLinker:
"""Returns a SpotlightEntityLinker instance."""
return SpotlightEntityLinker()


def test_spotlight_entity_linker_initialization(
linker: SpotlightEntityLinker,
) -> None:
"""Test the initialization of the SpotlightEntityLinker."""
assert "url" in linker._config
assert "params" in linker._config
assert "headers" in linker._config


@patch("pkg_api.nl_to_pkg.entity_linking.spotlight_entity_linker.requests.get")
def test_link_annotation_uri(
mock_get: Mock, sample_pkg_data: PKGData, linker: SpotlightEntityLinker
) -> None:
"""Test the link_entities method."""
mock_response = Mock()
mock_response.status_code = 200
mock_response.json.return_value = {
"Resources": [
{
"@surfaceForm": "Test Object",
"@URI": "http://dbpedia.org/resource/Test_Object",
}
]
}
mock_get.return_value = mock_response
annotated_pkg_data = linker.link_entities(sample_pkg_data)

assert annotated_pkg_data == sample_pkg_data
assert isinstance(annotated_pkg_data.triple, Triple)
assert isinstance(annotated_pkg_data.triple.object, TripleElement)
assert isinstance(annotated_pkg_data.triple.object.value, URI)
assert (
annotated_pkg_data.triple.object.value
== "http://dbpedia.org/resource/Test_Object"
)


@patch("pkg_api.nl_to_pkg.entity_linking.spotlight_entity_linker.requests.get")
def test_link_annotation_concept(
mock_get: Mock, sample_pkg_data: PKGData, linker: SpotlightEntityLinker
) -> None:
"""Test the link_entities method."""
mock_response = Mock()
mock_response.status_code = 200
mock_response.json.return_value = {
"Resources": [
{
"@surfaceForm": "Object",
"@URI": "http://dbpedia.org/resource/Object",
}
]
}
mock_get.return_value = mock_response
annotated_pkg_data = linker.link_entities(sample_pkg_data)

assert isinstance(annotated_pkg_data.triple, Triple)
assert isinstance(annotated_pkg_data.triple.object, TripleElement)
assert isinstance(annotated_pkg_data.triple.object.value, Concept)
assert len(annotated_pkg_data.triple.object.value.related_entities) == 1
assert (
annotated_pkg_data.triple.object.value.related_entities[0]
== "http://dbpedia.org/resource/Object"
)


def test_link_entities_no_change(
sample_pkg_data: PKGData, linker: SpotlightEntityLinker
) -> None:
"""Test the link_entities method when no entities are linked."""
original_pkg_data = sample_pkg_data
annotated_pkg_data = linker.link_entities(original_pkg_data)

assert annotated_pkg_data == original_pkg_data

0 comments on commit cda83a0

Please sign in to comment.