Skip to content

Commit

Permalink
make Searchable properly identifiable, and rename filters to searchab…
Browse files Browse the repository at this point in the history
…le in ConceptsProcessor
  • Loading branch information
awildturtok committed Mar 14, 2023
1 parent 166dac2 commit 3e2e9d0
Show file tree
Hide file tree
Showing 5 changed files with 34 additions and 39 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
@ToString
@Slf4j
@CPSType(id = "CSV_TEMPLATE", base = SearchIndex.class)
public class FilterTemplate extends IdentifiableImpl<SearchIndexId> implements Searchable, SearchIndex {
public class FilterTemplate extends IdentifiableImpl<SearchIndexId> implements Searchable<SearchIndexId>, SearchIndex {

private static final long serialVersionUID = 1L;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
@Setter
@NoArgsConstructor
@Slf4j
public class Column extends Labeled<ColumnId> implements NamespacedIdentifiable<ColumnId>, Searchable {
public class Column extends Labeled<ColumnId> implements NamespacedIdentifiable<ColumnId>, Searchable<ColumnId> {

public static final int UNKNOWN_POSITION = -1;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import com.bakdata.conquery.io.storage.NamespaceStorage;
import com.bakdata.conquery.models.config.IndexConfig;
import com.bakdata.conquery.models.datasets.Dataset;
import com.bakdata.conquery.models.identifiable.Identifiable;
import com.bakdata.conquery.models.identifiable.ids.Id;
import com.bakdata.conquery.models.query.FilterSearch;
import com.bakdata.conquery.util.search.TrieSearch;
Expand All @@ -18,11 +19,7 @@
* <p>
* Searchable classes describe how a search should be constructed, and provide the values with getSearchValues.
*/
public interface Searchable {


//TODO instead extend Identifiable properly
public Id<?> getId();
public interface Searchable<ID extends Id<? extends Identifiable<? extends ID>>> extends Identifiable<ID> {

public Dataset getDataset();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import com.bakdata.conquery.models.datasets.concepts.filters.SingleColumnFilter;
import com.bakdata.conquery.models.events.MajorTypeId;
import com.bakdata.conquery.models.exceptions.ConceptConfigurationException;
import com.bakdata.conquery.models.identifiable.ids.specific.FilterId;
import com.bakdata.conquery.models.query.FilterSearch;
import com.bakdata.conquery.util.search.TrieSearch;
import com.fasterxml.jackson.annotation.JsonIgnore;
Expand All @@ -35,7 +36,7 @@
@NoArgsConstructor
@Slf4j
@JsonIgnoreProperties({"searchType"})
public abstract class SelectFilter<FE_TYPE> extends SingleColumnFilter<FE_TYPE> implements Searchable {
public abstract class SelectFilter<FE_TYPE> extends SingleColumnFilter<FE_TYPE> implements Searchable<FilterId> {

/**
* user given mapping from the values in the CSVs to shown labels
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,6 @@
import com.bakdata.conquery.models.datasets.concepts.tree.TreeConcept;
import com.bakdata.conquery.models.exceptions.ConceptConfigurationException;
import com.bakdata.conquery.models.exceptions.ValidatorHelper;
import com.bakdata.conquery.models.identifiable.Identifiable;
import com.bakdata.conquery.models.identifiable.ids.Id;
import com.bakdata.conquery.models.identifiable.ids.specific.ConceptElementId;
import com.bakdata.conquery.models.identifiable.ids.specific.ConnectorId;
import com.bakdata.conquery.models.identifiable.ids.specific.DatasetId;
Expand Down Expand Up @@ -75,30 +73,30 @@ public FrontendList load(Concept<?> concept) {
/**
* Cache of all search results on SelectFilters.
*/
private final LoadingCache<Pair<Searchable, String>, List<FrontendValue>>
private final LoadingCache<Pair<Searchable<?>, String>, List<FrontendValue>>
searchResults =
CacheBuilder.newBuilder().softValues().build(new CacheLoader<>() {

@Override
public List<FrontendValue> load(Pair<Searchable, String> filterAndSearch) {
public List<FrontendValue> load(Pair<Searchable<?>, String> filterAndSearch) {
final String searchTerm = filterAndSearch.getValue();
final Searchable filter = filterAndSearch.getKey();
final Searchable<?> searchable = filterAndSearch.getKey();

log.trace("Calculating a new search cache for the term \"{}\" on filter[{}]", searchTerm, filter.getId());
log.trace("Calculating a new search cache for the term \"{}\" on Searchable[{}]", searchTerm, searchable.getId());

return autocompleteTextFilter(filter, searchTerm);
return autocompleteTextFilter(searchable, searchTerm);
}

});
/**
* Cache of raw listing of values on a filter.
* We use Cursor here to reduce strain on memory and increase response time.
*/
private final LoadingCache<Searchable, CursorAndLength> listResults = CacheBuilder.newBuilder().softValues().build(new CacheLoader<>() {
private final LoadingCache<Searchable<?>, CursorAndLength> listResults = CacheBuilder.newBuilder().softValues().build(new CacheLoader<>() {
@Override
public CursorAndLength load(Searchable filter) {
log.debug("Creating cursor for `{}`", filter.getId());
return new CursorAndLength(listAllValues(filter), countAllValues(filter));
public CursorAndLength load(Searchable<?> searchable) {
log.debug("Creating cursor for `{}`", searchable.getId());
return new CursorAndLength(listAllValues(searchable), countAllValues(searchable));
}

});
Expand Down Expand Up @@ -156,16 +154,16 @@ public FrontendPreviewConfig getEntityPreviewFrontendConfig(Dataset dataset) {
* Search for all search terms at once, with stricter scoring.
* The user will upload a file and expect only well-corresponding resolutions.
*/
public ResolvedConceptsResult resolveFilterValues(Searchable filter, List<String> searchTerms) {
public ResolvedConceptsResult resolveFilterValues(Searchable<?> searchable, List<String> searchTerms) {

// search in the full text engine
final Set<String> openSearchTerms = new HashSet<>(searchTerms);

final Namespace namespace = namespaces.get(filter.getDataset().getId());
final Namespace namespace = namespaces.get(searchable.getDataset().getId());

final List<FrontendValue> out = new ArrayList<>();

for (TrieSearch<FrontendValue> search : namespace.getFilterSearch().getSearchesFor(filter)) {
for (TrieSearch<FrontendValue> search : namespace.getFilterSearch().getSearchesFor(searchable)) {
for (final Iterator<String> iterator = openSearchTerms.iterator(); iterator.hasNext(); ) {

final String searchTerm = iterator.next();
Expand All @@ -180,21 +178,20 @@ public ResolvedConceptsResult resolveFilterValues(Searchable filter, List<String
}
}

final ConnectorId connectorId = filter instanceof Filter asFilter ? asFilter.getConnector().getId() : null;
final Id<?> id = ((Identifiable<?>) filter).getId();
// Not all Searchables are children of Connectors.
final ConnectorId connectorId = searchable instanceof Filter asFilter ? asFilter.getConnector().getId() : null;


return new ResolvedConceptsResult(null, new ResolvedFilterResult(connectorId, id.toString(), out), openSearchTerms);
return new ResolvedConceptsResult(null, new ResolvedFilterResult(connectorId, searchable.getId().toString(), out), openSearchTerms);
}

public <T extends Searchable & Identifiable<?>> AutoCompleteResult autocompleteTextFilter(T filter, Optional<String> maybeText, OptionalInt pageNumberOpt, OptionalInt itemsPerPageOpt) {
public AutoCompleteResult autocompleteTextFilter(Searchable<?> searchable, Optional<String> maybeText, OptionalInt pageNumberOpt, OptionalInt itemsPerPageOpt) {
final int pageNumber = pageNumberOpt.orElse(0);
final int itemsPerPage = itemsPerPageOpt.orElse(50);

Preconditions.checkArgument(pageNumber >= 0, "Page number must be 0 or a positive integer.");
Preconditions.checkArgument(itemsPerPage > 1, "Must at least have one item per page.");

log.trace("Searching for for `{}` in `{}`. (Page = {}, Items = {})", maybeText, filter.getId(), pageNumber, itemsPerPage);
log.trace("Searching for for `{}` in `{}`. (Page = {}, Items = {})", maybeText, searchable.getId(), pageNumber, itemsPerPage);

final int startIncl = itemsPerPage * pageNumber;
final int endExcl = startIncl + itemsPerPage;
Expand All @@ -203,13 +200,13 @@ public <T extends Searchable & Identifiable<?>> AutoCompleteResult autocompleteT

// If we have none or a blank query string we list all values.
if (maybeText.isEmpty() || maybeText.get().isBlank()) {
final CursorAndLength cursorAndLength = listResults.get(filter);
final CursorAndLength cursorAndLength = listResults.get(searchable);
final Cursor<FrontendValue> cursor = cursorAndLength.values();

return new AutoCompleteResult(cursor.get(startIncl, endExcl), cursorAndLength.size());
}

final List<FrontendValue> fullResult = searchResults.get(Pair.of(filter, maybeText.get()));
final List<FrontendValue> fullResult = searchResults.get(Pair.of(searchable, maybeText.get()));

if (startIncl >= fullResult.size()) {
return new AutoCompleteResult(Collections.emptyList(), fullResult.size());
Expand All @@ -223,8 +220,8 @@ public <T extends Searchable & Identifiable<?>> AutoCompleteResult autocompleteT
}
}

private Cursor<FrontendValue> listAllValues(Searchable filter) {
final Namespace namespace = namespaces.get(filter.getDataset().getId());
private Cursor<FrontendValue> listAllValues(Searchable<?> searchable) {
final Namespace namespace = namespaces.get(searchable.getDataset().getId());
/*
Don't worry, I am as confused as you are!
For some reason, flatMapped streams in conjunction with distinct will be evaluated full before further operation.
Expand All @@ -235,35 +232,35 @@ private Cursor<FrontendValue> listAllValues(Searchable filter) {

final Iterator<FrontendValue>
iterators =
Iterators.concat(Iterators.transform(namespace.getFilterSearch().getSearchesFor(filter).iterator(), TrieSearch::iterator));
Iterators.concat(Iterators.transform(namespace.getFilterSearch().getSearchesFor(searchable).iterator(), TrieSearch::iterator));

// Use Set to accomplish distinct values
final Set<FrontendValue> seen = new HashSet<>();

return new Cursor<>(Iterators.filter(iterators, seen::add));
}

private long countAllValues(Searchable filter) {
final Namespace namespace = namespaces.get(filter.getDataset().getId());
private long countAllValues(Searchable<?> searchable) {
final Namespace namespace = namespaces.get(searchable.getDataset().getId());


return namespace.getFilterSearch().getTotal(filter);
return namespace.getFilterSearch().getTotal(searchable);
}

/**
* Autocompletion for search terms. For values of {@link SelectFilter <?>}.
* Is used by the serach cache to load missing items
*/
private List<FrontendValue> autocompleteTextFilter(Searchable filter, String text) {
final Namespace namespace = namespaces.get(filter.getDataset().getId());
private List<FrontendValue> autocompleteTextFilter(Searchable<?> searchable, String text) {
final Namespace namespace = namespaces.get(searchable.getDataset().getId());

// Note that FEValues is equals/hashcode only on value:
// The different sources might contain duplicate FEValue#values which we want to avoid as
// they are already sorted in terms of information weight by getSearchesFor

// Also note: currently we are still issuing large search requests, but much smaller allocations at once, and querying only when the past is not sufficient
return namespace.getFilterSearch()
.getSearchesFor(filter)
.getSearchesFor(searchable)
.stream()
.map(search -> createSourceSearchResult(search, Collections.singletonList(text), OptionalInt.empty()))
.flatMap(Collection::stream)
Expand Down

0 comments on commit 3e2e9d0

Please sign in to comment.