Skip to content

Commit

Permalink
Implmenting a number of changes to the tables model to support OOP an…
Browse files Browse the repository at this point in the history
…d an easier to use class
  • Loading branch information
BryanFauble committed Jan 7, 2025
1 parent 8bd5e19 commit 9b9ae83
Show file tree
Hide file tree
Showing 7 changed files with 897 additions and 215 deletions.
30 changes: 30 additions & 0 deletions docs/reference/oop/table_refactor.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
Contained within this file are proposed changes for interacting with Tables via this
client.



::: synapseclient.models.Table
options:
inherited_members: true
members:
- get
- store
- delete
- query
- store_rows
- delete_rows
- delete_column
- add_column
- reorder_column
- set_columns
- get_permissions
- get_acl
- set_permissions

::: synapseclient.models.FacetType
::: synapseclient.models.ColumnType
::: synapseclient.models.table.JsonSubColumn

::: synapseclient.models.Column
options:
members:
7 changes: 5 additions & 2 deletions mkdocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,9 @@ nav:
- Core: reference/core.md
- REST Apis: reference/rest_apis.md
- Experimental:
- Object-Orientated Models: reference/oop/models.md
- Async Object-Orientated Models: reference/oop/models_async.md
- Object-Orientated Models: reference/oop/models.md
- Async Object-Orientated Models: reference/oop/models_async.md
- Table refactor: reference/oop/table_refactor.md
- Further Reading:
- Home: explanations/home.md
- Domain Models of Synapse: explanations/domain_models_of_synapse.md
Expand Down Expand Up @@ -120,6 +121,8 @@ theme:
- toc.follow
- navigation.tabs
- navigation.tabs.sticky
- navigation.instant
- navigation.instant.progress

extra_css:
- css/custom.css
Expand Down
3 changes: 3 additions & 0 deletions synapseclient/api/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
put_file_multipart_add,
put_file_multipart_complete,
)
from .table_services import get_columns

__all__ = [
# annotations
Expand Down Expand Up @@ -78,4 +79,6 @@
"get_transfer_config",
# entity_factory
"get_from_entity_factory",
# columns
"get_columns",
]
93 changes: 93 additions & 0 deletions synapseclient/api/table_services.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
"""
The purpose of this module is to provide any functions that are needed to interact with
columns in the Synapse REST API.
"""

from typing import TYPE_CHECKING, List, Optional

if TYPE_CHECKING:
from synapseclient import Synapse
from synapseclient.models import Column


async def get_columns(
table_id: str,
*,
synapse_client: Optional["Synapse"] = None,
) -> List["Column"]:
"""Call to synapse and set the annotations for the given input.
Arguments:
table_id: The ID of the Table to get the columns for.
synapse_client: If not passed in and caching was not disabled by
`Synapse.allow_client_caching(False)` this will use the last created
instance from the Synapse class constructor.
Returns: The annotations set in Synapse.
"""
from synapseclient import Synapse
from synapseclient.models import Column

result = await Synapse.get_client(synapse_client=synapse_client).rest_get_async(
f"/entity/{table_id}/column",
)

columns = []

for column in result.get("results", []):
columns.append(Column().fill_from_dict(synapse_column=column))

return columns


# TODO: Finish this function, this was copied out of the Synapse class and will be used to implement this API: https://rest-docs.synapse.org/rest/org/sagebionetworks/repo/model/table/TableSchemaChangeRequest.html
# async def table_updates(
# self,
# table_id: str,
# changes: List[dict] = [],
# create_snapshot: bool = False,
# comment: str = None,
# label: str = None,
# activity: str = None,
# wait: bool = True,
# ) -> dict:
# """
# Creates view updates and snapshots

# Arguments:
# table: The schema of the EntityView or its ID.
# changes: Array of Table changes
# create_snapshot: Create snapshot
# comment: Optional snapshot comment.
# label: Optional snapshot label.
# activity: Optional activity ID applied to snapshot version.
# wait: True to wait for async table update to complete

# Returns:
# A Snapshot Response
# """
# snapshot_options = {
# "snapshotComment": comment,
# "snapshotLabel": label,
# "snapshotActivityId": activity,
# }
# new_snapshot = {
# key: value for key, value in snapshot_options.items() if value is not None
# }
# table_update_body = {
# "changes": changes,
# "createSnapshot": create_snapshot,
# "snapshotOptions": new_snapshot,
# }

# uri = "/entity/{}/table/transaction/async".format(id_of(table))

# if wait:
# result = self._waitForAsync(uri, table_update_body)

# else:
# result = self.restPOST(
# "{}/start".format(uri), body=json.dumps(table_update_body)
# )

# return result
8 changes: 4 additions & 4 deletions synapseclient/core/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
from synapseclient.core.logging_setup import DEFAULT_LOGGER_NAME

if TYPE_CHECKING:
from synapseclient.models import File, Folder, Project
from synapseclient.models import File, Folder, Project, Table

R = TypeVar("R")

Expand Down Expand Up @@ -1376,10 +1376,10 @@ def delete_none_keys(incoming_object: typing.Dict) -> None:


def merge_dataclass_entities(
source: typing.Union["Project", "Folder", "File"],
destination: typing.Union["Project", "Folder", "File"],
source: typing.Union["Project", "Folder", "File", "Table"],
destination: typing.Union["Project", "Folder", "File", "Table"],
fields_to_ignore: typing.List[str] = None,
) -> typing.Union["Project", "Folder", "File"]:
) -> typing.Union["Project", "Folder", "File", "Table"]:
"""
Utility function to merge two dataclass entities together. This is used when we are
upserting an entity from the Synapse service with the requested changes.
Expand Down
86 changes: 44 additions & 42 deletions synapseclient/models/protocols/table_protocol.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,15 @@
"""Protocol for the specific methods of this class that have synchronous counterparts
generated at runtime."""

from typing import TYPE_CHECKING, List, Optional, Protocol, Union
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Protocol, Union

import pandas as pd
from typing_extensions import Self

from synapseclient import Synapse
from synapseclient.table import CsvFileTable as Synapse_CsvFileTable
from synapseclient.table import TableQueryResult as Synaspe_TableQueryResult

if TYPE_CHECKING:
from synapseclient.models.table import (
CsvResultFormat,
Row,
RowsetResultFormat,
Table,
)
from synapseclient.models.table import Table


class ColumnSynchronousProtocol(Protocol):
Expand All @@ -40,55 +34,64 @@ class TableSynchronousProtocol(Protocol):
have a synchronous counterpart that may also be called.
"""

def store_rows_from_csv(
self, csv_path: str, *, synapse_client: Optional[Synapse] = None
) -> str:
"""Takes in a path to a CSV and stores the rows to Synapse.
def store(
self, dry_run: bool = False, *, synapse_client: Optional[Synapse] = None
) -> Self:
"""Store non-row information about a table including the columns and annotations.
Arguments:
csv_path: The path to the CSV to store.
dry_run: If True, will not actually store the table but will return log to
the console what would have been stored.
synapse_client: If not passed in and caching was not disabled by
`Synapse.allow_client_caching(False)` this will use the last created
instance from the Synapse class constructor.
Returns:
The path to the CSV that was stored.
The Table instance stored in synapse.
"""
return ""
return self

def delete_rows(
self, rows: List["Row"], *, synapse_client: Optional[Synapse] = None
def store_rows(
self,
values: Union[str, List[Dict[str, Any]], Dict[str, Any], pd.DataFrame],
*,
synapse_client: Optional[Synapse] = None,
) -> None:
"""Delete rows from a table.
"""
Takes in values from the sources defined below and stores the rows to Synapse.
Arguments:
rows: The rows to delete.
synapse_client: If not passed in and caching was not disabled by
`Synapse.allow_client_caching(False)` this will use the last created
instance from the Synapse class constructor.
Returns:
None
"""
return None
values: Supports storing data from the following sources:
def store_schema(self, *, synapse_client: Optional[Synapse] = None) -> "Table":
"""Store non-row information about a table including the columns and annotations.
- A string holding the path to a CSV file
- A list of lists (or tuples) where each element is a row
- A dictionary where the key is the column name and the value is one or more values. The values will be wrapped into a [Pandas DataFrame](http://pandas.pydata.org/pandas-docs/stable/api.html#dataframe).
- A [Pandas DataFrame](http://pandas.pydata.org/pandas-docs/stable/api.html#dataframe)
Arguments:
synapse_client: If not passed in and caching was not disabled by
`Synapse.allow_client_caching(False)` this will use the last created
instance from the Synapse class constructor.
Returns:
The Table instance stored in synapse.
None
"""
return self
return None

def get(self, *, synapse_client: Optional[Synapse] = None) -> "Table":
def get(
self,
include_columns: bool = False,
include_activity: bool = False,
*,
synapse_client: Optional[Synapse] = None,
) -> "Table":
"""Get the metadata about the table from synapse.
Arguments:
include_columns: If True, will include fully filled column objects in the
`.columns` attribute. When False, the columns will not be filled in.
include_activity: If True the activity will be included in the file
if it exists.
synapse_client: If not passed in and caching was not disabled by
`Synapse.allow_client_caching(False)` this will use the last created
instance from the Synapse class constructor.
Expand All @@ -98,7 +101,7 @@ def get(self, *, synapse_client: Optional[Synapse] = None) -> "Table":
"""
return self

def delete(self, *, synapse_client: Optional[Synapse] = None) -> None:
def delete_async(self, *, synapse_client: Optional[Synapse] = None) -> None:
"""Delete the table from synapse.
Arguments:
Expand All @@ -111,15 +114,14 @@ def delete(self, *, synapse_client: Optional[Synapse] = None) -> None:
"""
return None

@classmethod
@staticmethod
def query(
cls,
query: str,
result_format: Union["CsvResultFormat", "RowsetResultFormat"] = None,
*,
synapse_client: Optional[Synapse] = None,
) -> Union[Synapse_CsvFileTable, Synaspe_TableQueryResult, None]:
"""Query for data on a table stored in Synapse.
) -> pd.DataFrame:
"""Query for data on a table stored in Synapse. The results will always be
returned as a Pandas DataFrame.
Arguments:
query: The query to run.
Expand All @@ -129,6 +131,6 @@ def query(
instance from the Synapse class constructor.
Returns:
The results of the query.
The results of the query as a Pandas DataFrame.
"""
return None
return pd.DataFrame()
Loading

0 comments on commit 9b9ae83

Please sign in to comment.