Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve complex forms performance #740

Merged
merged 15 commits into from
Dec 4, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
15 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pyxform/instance.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def __init__(self, survey_object, **kwargs):
# get xpaths
# - prep for xpaths.
self._survey.xml()
self._xpaths = self._survey._xpath.values()
self._xpaths = [x.get_xpath() for x in self._survey._xpath.values()]

# see "answers(self):" below for explanation of this dict
self._answers = {}
Expand Down
33 changes: 19 additions & 14 deletions pyxform/survey.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,8 +84,9 @@ def is_parent_a_repeat(survey, xpath):
if not parent_xpath:
return False

if survey.any_repeat(parent_xpath):
return parent_xpath
for item in survey.iter_descendants():
if item.type == constants.REPEAT and item.get_xpath() == parent_xpath:
return parent_xpath

return is_parent_a_repeat(survey, parent_xpath)

Expand All @@ -98,7 +99,7 @@ def share_same_repeat_parent(survey, xpath, context_xpath, reference_parent=Fals
parent.

For example,
xpath = /data/repeat_a/group_a/name
xpath = /data/repeat_a/group_a/name
context_xpath = /data/repeat_a/group_b/age

returns (2, '/group_a/name')'
Expand Down Expand Up @@ -995,13 +996,13 @@ def __unicode__(self):
return f"<pyxform.survey.Survey instance at {hex(id(self))}>"

def _setup_xpath_dictionary(self):
self._xpath = {} # pylint: disable=attribute-defined-outside-init
self._xpath: dict[str, SurveyElement | None] = {} # pylint: disable=attribute-defined-outside-init
for element in self.iter_descendants():
if isinstance(element, Question | Section):
if element.name in self._xpath:
self._xpath[element.name] = None
else:
self._xpath[element.name] = element.get_xpath()
self._xpath[element.name] = element

def _var_repl_function(
self, matchobj, context, use_current=False, reference_parent=False
Expand Down Expand Up @@ -1036,7 +1037,7 @@ def _in_secondary_instance_predicate() -> bool:
def _relative_path(ref_name: str, _use_current: bool) -> str | None:
"""Given name in ${name}, return relative xpath to ${name}."""
return_path = None
xpath = self._xpath[ref_name]
xpath = self._xpath[ref_name].get_xpath()
context_xpath = context.get_xpath()
# share same root i.e repeat_a from /data/repeat_a/...
if (
Expand All @@ -1045,13 +1046,17 @@ def _relative_path(ref_name: str, _use_current: bool) -> str | None:
):
# if context xpath and target xpath fall under the same
# repeat use relative xpath referencing.
steps, ref_path = share_same_repeat_parent(
self, xpath, context_xpath, reference_parent
)
if steps:
ref_path = ref_path if ref_path.endswith(ref_name) else f"/{name}"
prefix = " current()/" if _use_current else " "
return_path = prefix + "/".join([".."] * steps) + ref_path + " "
relation = context.has_common_repeat_parent(self._xpath[ref_name])
if relation[0] == "Unrelated":
return return_path
else:
steps, ref_path = share_same_repeat_parent(
self, xpath, context_xpath, reference_parent
)
if steps:
ref_path = ref_path if ref_path.endswith(ref_name) else f"/{name}"
prefix = " current()/" if _use_current else " "
return_path = prefix + "/".join([".."] * steps) + ref_path + " "

return return_path

Expand Down Expand Up @@ -1122,7 +1127,7 @@ def _is_return_relative_path() -> bool:
last_saved_prefix = (
"instance('" + LAST_SAVED_INSTANCE_NAME + "')" if last_saved else ""
)
return " " + last_saved_prefix + self._xpath[name] + " "
return " " + last_saved_prefix + self._xpath[name].get_xpath() + " "

def insert_xpaths(self, text, context, use_current=False, reference_parent=False):
"""
Expand Down
89 changes: 64 additions & 25 deletions pyxform/survey_element.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
import json
import re
from collections import deque
from functools import lru_cache
from typing import TYPE_CHECKING, Any, ClassVar
from collections.abc import Generator
from typing import TYPE_CHECKING, Any, ClassVar, Optional

from pyxform import aliases as alias
from pyxform import constants as const
Expand Down Expand Up @@ -65,19 +65,8 @@ def _overlay(over, under):
return over if over else under


@lru_cache(maxsize=65536)
def any_repeat(survey_element: "SurveyElement", parent_xpath: str) -> bool:
"""Return True if there ia any repeat in `parent_xpath`."""
for item in survey_element.iter_descendants():
if item.get_xpath() == parent_xpath and item.type == const.REPEAT:
return True

return False


SURVEY_ELEMENT_LOCAL_KEYS = {
SURVEY_ELEMENT_LOCAL_KEYS = {
"_survey_element_default_type",
"_survey_element_repeats",
"_survey_element_xpath",
}

Expand Down Expand Up @@ -115,8 +104,7 @@ def __hash__(self):

def __setattr__(self, key, value):
if key == "parent":
# Object graph local changes invalidate these cached facts.
self._survey_element_repeats = {}
# If object graph position changes then invalidate cached.
self._survey_element_xpath = None
self[key] = value

Expand All @@ -125,7 +113,6 @@ def __init__(self, **kwargs):
"question_type_dict", QUESTION_TYPE_DICT
).get(kwargs.get("type"), {})
self._survey_element_xpath: str | None = None
self._survey_element_repeats: dict = {}
for key, default in self.FIELDS.items():
self[key] = kwargs.get(key, default())
self._link_children()
Expand Down Expand Up @@ -176,14 +163,65 @@ def iter_descendants(self):
for e in self.children:
yield from e.iter_descendants()

def any_repeat(self, parent_xpath: str) -> bool:
"""Return True if there ia any repeat in `parent_xpath`."""
current_value = self._dict_get("_survey_element_repeats")
if parent_xpath not in current_value:
new_value = any_repeat(survey_element=self, parent_xpath=parent_xpath)
current_value[parent_xpath] = new_value
return new_value
return current_value[parent_xpath]
def iter_ancestors(self) -> Generator[tuple["SurveyElement", int], None, None]:
"""Get each self.parent with their distance from self (starting at 1)."""
distance = 1
current = self.parent
while current is not None:
yield current, distance
current = current.parent
distance += 1

def has_common_repeat_parent(
self, other: "SurveyElement"
) -> tuple[str, int | None, Optional["SurveyElement"]]:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This return type! I'm particularly pained by the first string part. Could it be a boolean? In fact, could the function return only a boolean as I would expect from the function name? I don't think any of the additional info is currently used, right?

Maybe it's in anticipation of using this approach to replace share_same_repeat_parent? My preference would still be to keep this as simple and straightforward as possible for now. At the end of the day, this is a stylistic detail and it's isolated so fine to leave but I did want to let my displeasure be known. 😅

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes right now it's just a short circuit against a descent into share_same_repeat_parent, and it is part of what I had in mind to replace share_same_repeat_parent et al. Not sure how much of the return branches will be needed but if they aren't then I'd pare it back. Pretty sure the step count is required for the relative path stuff.

"""
Get the relation type, steps (generations), and the common ancestor.
"""
# Quick check for immediate relation.
if self.parent is other:
return "Parent (other)", 1, other
elif other.parent is self:
return "Parent (self)", 1, self

# Traversal tracking
self_ancestors = {}
other_ancestors = {}
self_current = self
other_current = other
self_distance = 0
other_distance = 0

# Traverse up both ancestor chains as far as necessary.
while self_current or other_current:
# Step up the self chain
if self_current:
self_distance += 1
self_current = self_current.parent
if self_current:
self_ancestors[self_current] = self_distance
if (
self_current.type == const.REPEAT
and self_current in other_ancestors
):
max_steps = max(self_distance, other_ancestors[self_current])
return "Common Ancestor Repeat", max_steps, self_current

# Step up the other chain
if other_current:
other_distance += 1
other_current = other_current.parent
if other_current:
other_ancestors[other_current] = other_distance
if (
other_current.type == const.REPEAT
and other_current in self_ancestors
):
max_steps = max(other_distance, self_ancestors[other_current])
return "Common Ancestor Repeat", max_steps, other_current

# No common ancestor found.
return "Unrelated", None, None

def get_lineage(self):
"""
Expand Down Expand Up @@ -244,6 +282,7 @@ def to_json_dict(self):
"parent",
"question_type_dictionary",
"_created",
"_xpath",
*SURVEY_ELEMENT_LOCAL_KEYS,
]
# Delete all keys that may cause a "Circular Reference"
Expand Down