Skip to content

Commit

Permalink
Merge pull request #2968 from ingef/feature/search-on-references
Browse files Browse the repository at this point in the history
Search on references
  • Loading branch information
awildturtok authored Mar 15, 2023
2 parents 5873b7f + 2828f6b commit 6a8414c
Show file tree
Hide file tree
Showing 11 changed files with 120 additions and 148 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
@ToString
@Slf4j
@CPSType(id = "CSV_TEMPLATE", base = SearchIndex.class)
public class FilterTemplate extends IdentifiableImpl<SearchIndexId> implements Searchable, SearchIndex {
public class FilterTemplate extends IdentifiableImpl<SearchIndexId> implements Searchable<SearchIndexId>, SearchIndex {

private static final long serialVersionUID = 1L;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
@Setter
@NoArgsConstructor
@Slf4j
public class Column extends Labeled<ColumnId> implements NamespacedIdentifiable<ColumnId>, Searchable {
public class Column extends Labeled<ColumnId> implements NamespacedIdentifiable<ColumnId>, Searchable<ColumnId> {

public static final int UNKNOWN_POSITION = -1;

Expand Down Expand Up @@ -170,7 +170,7 @@ public List<TrieSearch<FrontendValue>> getSearches(IndexConfig config, Namespace
}

@Override
public List<Searchable> getSearchReferences() {
public List<Searchable<?>> getSearchReferences() {
return List.of(this);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@
import com.bakdata.conquery.apiv1.frontend.FrontendValue;
import com.bakdata.conquery.io.storage.NamespaceStorage;
import com.bakdata.conquery.models.config.IndexConfig;
import com.bakdata.conquery.models.datasets.Dataset;
import com.bakdata.conquery.models.identifiable.Identifiable;
import com.bakdata.conquery.models.identifiable.ids.Id;
import com.bakdata.conquery.models.query.FilterSearch;
import com.bakdata.conquery.util.search.TrieSearch;
import com.fasterxml.jackson.annotation.JsonIgnore;
Expand All @@ -16,7 +19,10 @@
* <p>
* Searchable classes describe how a search should be constructed, and provide the values with getSearchValues.
*/
public interface Searchable {
public interface Searchable<ID extends Id<? extends Identifiable<? extends ID>>> extends Identifiable<ID> {

public Dataset getDataset();

/**
* All available {@link FrontendValue}s for searching in a {@link TrieSearch}.
*/
Expand All @@ -28,7 +34,7 @@ public interface Searchable {
* @implSpec The order of objects returned is used to also sort search results from different sources.
*/
@JsonIgnore
default List<Searchable> getSearchReferences() {
default List<Searchable<?>> getSearchReferences() {
//Hopefully the only candidate will be Column
return List.of(this);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import com.bakdata.conquery.models.datasets.concepts.filters.SingleColumnFilter;
import com.bakdata.conquery.models.events.MajorTypeId;
import com.bakdata.conquery.models.exceptions.ConceptConfigurationException;
import com.bakdata.conquery.models.identifiable.ids.specific.FilterId;
import com.bakdata.conquery.models.query.FilterSearch;
import com.bakdata.conquery.util.search.TrieSearch;
import com.fasterxml.jackson.annotation.JsonIgnore;
Expand All @@ -35,7 +36,7 @@
@NoArgsConstructor
@Slf4j
@JsonIgnoreProperties({"searchType"})
public abstract class SelectFilter<FE_TYPE> extends SingleColumnFilter<FE_TYPE> implements Searchable {
public abstract class SelectFilter<FE_TYPE> extends SingleColumnFilter<FE_TYPE> implements Searchable<FilterId> {

/**
* user given mapping from the values in the CSVs to shown labels
Expand Down Expand Up @@ -88,8 +89,8 @@ public boolean isNotUsingTemplateAndLabels() {
private boolean generateSearchSuffixes = true;

@Override
public List<Searchable> getSearchReferences() {
final List<Searchable> out = new ArrayList<>();
public List<Searchable<?>> getSearchReferences() {
final List<Searchable<?>> out = new ArrayList<>();

if (getTemplate() != null) {
out.add(getTemplate());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,13 @@ public class UpdateFilterSearchJob extends Job {
private final NamespaceStorage storage;

@NonNull
private final Map<Searchable, TrieSearch<FrontendValue>> searchCache;
private final Map<Searchable<?>, TrieSearch<FrontendValue>> searchCache;

@NonNull
private final IndexConfig indexConfig;

@NonNull
private final Object2LongMap<SelectFilter<?>> totals;
private final Object2LongMap<Searchable<?>> totals;

@Override
public void execute() throws Exception {
Expand All @@ -58,7 +58,7 @@ public void execute() throws Exception {
.collect(Collectors.toList());


final Set<Searchable> collectedSearchables =
final Set<Searchable<?>> collectedSearchables =
allSelectFilters.stream()
.map(SelectFilter::getSearchReferences)
.flatMap(Collection::stream)
Expand All @@ -71,12 +71,12 @@ public void execute() throws Exception {
// Most computations are cheap but data intensive: we fork here to use as many cores as possible.
final ExecutorService service = Executors.newCachedThreadPool();

final Map<Searchable, TrieSearch<FrontendValue>> synchronizedResult = Collections.synchronizedMap(searchCache);
final Map<Searchable<?>, TrieSearch<FrontendValue>> synchronizedResult = Collections.synchronizedMap(searchCache);

log.debug("Found {} searchable Objects.", collectedSearchables.size());


for (Searchable searchable : collectedSearchables) {
for (Searchable<?> searchable : collectedSearchables) {

service.submit(() -> {

Expand Down Expand Up @@ -121,17 +121,18 @@ public void execute() throws Exception {

// Precompute totals as that can be slow when doing it on-demand.
totals.putAll(
allSelectFilters.parallelStream()
.collect(Collectors.toMap(
Functions.identity(),
filter -> filter.getSearchReferences().stream()
.map(searchCache::get)
.filter(Objects::nonNull) // Failed or disabled searches are null
.flatMap(TrieSearch::stream)
.mapToInt(FrontendValue::hashCode)
.distinct()
.count()
))
synchronizedResult.keySet()
.parallelStream()
.collect(Collectors.toMap(
Functions.identity(),
filter -> filter.getSearchReferences().stream()
.map(searchCache::get)
.filter(Objects::nonNull) // Failed or disabled searches are null
.flatMap(TrieSearch::stream)
.mapToInt(FrontendValue::hashCode)
.distinct()
.count()
))
);


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,14 +40,14 @@ public class FilterSearch {
* In the code below, the keys of this map will usually be called "reference".
*/
@JsonIgnore
private final Map<Searchable, TrieSearch<FrontendValue>> searchCache = new HashMap<>();
private Object2LongMap<SelectFilter<?>> totals = Object2LongMaps.emptyMap();
private final Map<Searchable<?>, TrieSearch<FrontendValue>> searchCache = new HashMap<>();
private Object2LongMap<Searchable<?>> totals = Object2LongMaps.emptyMap();

/**
* From a given {@link FrontendValue} extract all relevant keywords.
*/
public static List<String> extractKeywords(FrontendValue value) {
List<String> keywords = new ArrayList<>(3);
final List<String> keywords = new ArrayList<>(3);

keywords.add(value.getLabel());
keywords.add(value.getValue());
Expand All @@ -62,15 +62,15 @@ public static List<String> extractKeywords(FrontendValue value) {
/**
* For a {@link SelectFilter} collect all relevant {@link TrieSearch}.
*/
public List<TrieSearch<FrontendValue>> getSearchesFor(SelectFilter<?> filter) {
return filter.getSearchReferences().stream()
public List<TrieSearch<FrontendValue>> getSearchesFor(Searchable<?> searchable) {
return searchable.getSearchReferences().stream()
.map(searchCache::get)
.filter(Objects::nonNull)
.collect(Collectors.toList());
}

public long getTotal(SelectFilter<?> filter) {
return totals.getOrDefault(filter, 0);
public long getTotal(Searchable<?> searchable) {
return totals.getOrDefault(searchable, 0);
}


Expand Down
Loading

0 comments on commit 6a8414c

Please sign in to comment.