Skip to content

Commit

Permalink
Propagate minimum competitive score in ReqOptSumScorer. (apache#13026)
Browse files Browse the repository at this point in the history
If the required clause doesn't contribute scores, which typically happens if
the required clause is a `FILTER` clause, then the minimum competitive score
can be propagated directly to the optional clause.
  • Loading branch information
jpountz authored Jan 26, 2024
1 parent 082d318 commit 7d35ae4
Show file tree
Hide file tree
Showing 3 changed files with 58 additions and 20 deletions.
5 changes: 5 additions & 0 deletions lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,11 @@ Optimizations
clause of a boolean query, this helps save work on other required clauses of
the same boolean query. (Adrien Grand)

* GITHUB#13026: ReqOptSumScorer will now propagate minimum competitive scores
to the optional clause if the required clause doesn't score. In practice,
this will help boolean queries that consist of a mix OF FILTER clauses and
SHOULD clauses. (Adrien Grand)

Bug Fixes
---------------------
* GITHUB#12866: Prevent extra similarity computation for single-level HNSW graphs. (Kaival Parikh)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ class ReqOptSumScorer extends Scorer {
private final TwoPhaseIterator twoPhase;

private float minScore = 0;
private float reqMaxScore;
private final float reqMaxScore;
private boolean optIsRequired;

/**
Expand Down Expand Up @@ -295,6 +295,15 @@ public void setMinCompetitiveScore(float minScore) throws IOException {
// Potentially move to a conjunction
if (reqMaxScore < minScore) {
optIsRequired = true;
if (reqMaxScore == 0) {
// If the required clause doesn't contribute scores, we can propagate the minimum
// competitive score to the optional clause. This happens when the required clause is a
// FILTER clause.
// In theory we could generalize this and set minScore - reqMaxScore as a minimum
// competitive score, but it's unlikely to help in practice unless reqMaxScore is much
// smaller than typical scores of the optional clause.
optScorer.setMinCompetitiveScore(minScore);
}
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,15 @@

public class TestReqOptSumScorer extends LuceneTestCase {

public void testBasics() throws IOException {
public void testBasicsMust() throws IOException {
doTestBasics(Occur.MUST);
}

public void testBasicsFilter() throws IOException {
doTestBasics(Occur.FILTER);
}

private void doTestBasics(Occur reqOccur) throws IOException {
Directory dir = newDirectory();
RandomIndexWriter w =
new RandomIndexWriter(
Expand Down Expand Up @@ -75,7 +83,7 @@ public void testBasics() throws IOException {
IndexSearcher searcher = newSearcher(reader);
Query query =
new BooleanQuery.Builder()
.add(new ConstantScoreQuery(new TermQuery(new Term("f", "foo"))), Occur.MUST)
.add(new ConstantScoreQuery(new TermQuery(new Term("f", "foo"))), reqOccur)
.add(new ConstantScoreQuery(new TermQuery(new Term("f", "bar"))), Occur.SHOULD)
.build();
Weight weight = searcher.createWeight(searcher.rewrite(query), ScoreMode.TOP_SCORES, 1);
Expand All @@ -92,27 +100,35 @@ public void testBasics() throws IOException {
ss.setTopLevelScoringClause();
scorer = ss.get(Long.MAX_VALUE);
scorer.setMinCompetitiveScore(Math.nextDown(1f));
assertEquals(0, scorer.iterator().nextDoc());
if (reqOccur == Occur.MUST) {
assertEquals(0, scorer.iterator().nextDoc());
}
assertEquals(1, scorer.iterator().nextDoc());
assertEquals(2, scorer.iterator().nextDoc());
if (reqOccur == Occur.MUST) {
assertEquals(2, scorer.iterator().nextDoc());
}
assertEquals(4, scorer.iterator().nextDoc());
assertEquals(DocIdSetIterator.NO_MORE_DOCS, scorer.iterator().nextDoc());

ss = weight.scorerSupplier(context);
ss.setTopLevelScoringClause();
scorer = ss.get(Long.MAX_VALUE);
scorer.setMinCompetitiveScore(Math.nextUp(1f));
assertEquals(1, scorer.iterator().nextDoc());
assertEquals(4, scorer.iterator().nextDoc());
if (reqOccur == Occur.MUST) {
assertEquals(1, scorer.iterator().nextDoc());
assertEquals(4, scorer.iterator().nextDoc());
}
assertEquals(DocIdSetIterator.NO_MORE_DOCS, scorer.iterator().nextDoc());

ss = weight.scorerSupplier(context);
ss.setTopLevelScoringClause();
scorer = ss.get(Long.MAX_VALUE);
assertEquals(0, scorer.iterator().nextDoc());
scorer.setMinCompetitiveScore(Math.nextUp(1f));
assertEquals(1, scorer.iterator().nextDoc());
assertEquals(4, scorer.iterator().nextDoc());
if (reqOccur == Occur.MUST) {
assertEquals(1, scorer.iterator().nextDoc());
assertEquals(4, scorer.iterator().nextDoc());
}
assertEquals(DocIdSetIterator.NO_MORE_DOCS, scorer.iterator().nextDoc());

reader.close();
Expand Down Expand Up @@ -235,15 +251,23 @@ public void testMaxScoreSegment() throws IOException {
dir.close();
}

public void testRandomFrequentOpt() throws IOException {
doTestRandom(0.5);
public void testMustRandomFrequentOpt() throws IOException {
doTestRandom(Occur.MUST, 0.5);
}

public void testRandomRareOpt() throws IOException {
doTestRandom(0.05);
public void testMustRandomRareOpt() throws IOException {
doTestRandom(Occur.MUST, 0.05);
}

private void doTestRandom(double optFreq) throws IOException {
public void testFilterRandomFrequentOpt() throws IOException {
doTestRandom(Occur.FILTER, 0.5);
}

public void testFilterRandomRareOpt() throws IOException {
doTestRandom(Occur.FILTER, 0.05);
}

private void doTestRandom(Occur reqOccur, double optFreq) throws IOException {
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig());
int numDocs = atLeast(1000);
Expand All @@ -269,7 +293,7 @@ private void doTestRandom(double optFreq) throws IOException {
Query mustTerm = new TermQuery(new Term("f", "A"));
Query shouldTerm = new TermQuery(new Term("f", "B"));
Query query =
new BooleanQuery.Builder().add(mustTerm, Occur.MUST).add(shouldTerm, Occur.SHOULD).build();
new BooleanQuery.Builder().add(mustTerm, reqOccur).add(shouldTerm, Occur.SHOULD).build();

TopScoreDocCollectorManager collectorManager =
new TopScoreDocCollectorManager(10, Integer.MAX_VALUE);
Expand All @@ -293,7 +317,7 @@ private void doTestRandom(double optFreq) throws IOException {
{
Query q =
new BooleanQuery.Builder()
.add(new RandomApproximationQuery(mustTerm, random()), Occur.MUST)
.add(new RandomApproximationQuery(mustTerm, random()), reqOccur)
.add(shouldTerm, Occur.SHOULD)
.build();

Expand All @@ -304,7 +328,7 @@ private void doTestRandom(double optFreq) throws IOException {

q =
new BooleanQuery.Builder()
.add(mustTerm, Occur.MUST)
.add(mustTerm, reqOccur)
.add(new RandomApproximationQuery(shouldTerm, random()), Occur.SHOULD)
.build();
collectorManager = new TopScoreDocCollectorManager(10, 1);
Expand All @@ -314,7 +338,7 @@ private void doTestRandom(double optFreq) throws IOException {

q =
new BooleanQuery.Builder()
.add(new RandomApproximationQuery(mustTerm, random()), Occur.MUST)
.add(new RandomApproximationQuery(mustTerm, random()), reqOccur)
.add(new RandomApproximationQuery(shouldTerm, random()), Occur.SHOULD)
.build();
collectorManager = new TopScoreDocCollectorManager(10, 1);
Expand Down Expand Up @@ -348,15 +372,15 @@ private void doTestRandom(double optFreq) throws IOException {
{
query =
new BooleanQuery.Builder()
.add(query, Occur.MUST)
.add(query, reqOccur)
.add(new TermQuery(new Term("f", "C")), Occur.SHOULD)
.build();

CheckHits.checkTopScores(random(), query, searcher);

query =
new BooleanQuery.Builder()
.add(new TermQuery(new Term("f", "C")), Occur.MUST)
.add(new TermQuery(new Term("f", "C")), reqOccur)
.add(query, Occur.SHOULD)
.build();

Expand Down

0 comments on commit 7d35ae4

Please sign in to comment.