-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathQuery.cs
419 lines (341 loc) · 10.9 KB
/
Query.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
namespace Sylphe.IR
{
public abstract class Query
{
public bool Negated { get; private set; }
public abstract Query Rewrite(bool negate = false);
public abstract DocSetIterator Search(IInvertedIndex index, bool ignoreNegation = false);
public static Query Parse(string text)
{
// Expression: Disjunction
// Disjunction: Conjunction { , Conjunction }
// Conjunction: Factor { . Factor }
// Factor: [-] ( Literal | ( Expression ) )
// Literal: /[A-Za-z][A-Za-z0-9:]*/
if (string.IsNullOrWhiteSpace(text)) return new NoDocsQuery();
var index = 0;
var query = ParseDisjunction(text, ref index);
SkipWhite(text, ref index);
if (index < text.Length)
throw SyntaxError("Unexpected input after expression", index);
return query;
}
#region Parser
// ReSharper disable InconsistentNaming
private const char OR = ',';
private const char AND = '.';
private const char NOT = '-';
private const char LPAREN = '(';
private const char RPAREN = ')';
// ReSharper restore InconsistentNaming
private static Query ParseDisjunction(string text, ref int index)
{
var first = ParseConjunction(text, ref index);
SkipWhite(text, ref index);
CompoundQuery compound = null;
while (index < text.Length && text[index] == OR)
{
index += 1;
if (compound == null)
compound = CompoundQuery.Or(first);
var query = ParseConjunction(text, ref index);
SkipWhite(text, ref index);
compound.AddClause(query);
}
return compound ?? first;
}
private static Query ParseConjunction(string text, ref int index)
{
var first = ParseFactor(text, ref index);
SkipWhite(text, ref index);
CompoundQuery compound = null;
while (index < text.Length && text[index] == AND)
{
index += 1;
if (compound == null)
compound = CompoundQuery.And(first);
var query = ParseFactor(text, ref index);
SkipWhite(text, ref index);
compound.AddClause(query);
}
return compound ?? first;
}
private static Query ParseFactor(string text, ref int index)
{
var negated = false;
SkipWhite(text, ref index);
if (index < text.Length && text[index] == NOT)
{
negated = true;
index += 1;
SkipWhite(text, ref index);
}
if (index < text.Length && text[index] == LPAREN)
{
index += 1; // skip the paren
var query = ParseDisjunction(text, ref index);
query.Negated = negated;
SkipWhite(text, ref index);
if (index >= text.Length)
throw SyntaxError($"Expect '{RPAREN}' but got end-of-input", index);
if (text[index] != RPAREN)
throw SyntaxError($"Expect '{RPAREN}' but got '{text[index]}'", index);
index += 1; // skip the paren
return query;
}
return ParseLiteral(text, ref index, negated);
}
private static Query ParseLiteral(string text, ref int index, bool negate = false)
{
SkipWhite(text, ref index);
if (index >= text.Length)
throw SyntaxError("Expect a literal but got end-of-input", index);
if (!char.IsLetter(text, index))
throw SyntaxError($"Expect a literal but got '{text[index]}'", index);
var anchor = index++;
while (index < text.Length && (char.IsLetterOrDigit(text, index) || text[index] == ':')) index += 1;
var literal = text.Substring(anchor, index - anchor);
return new LiteralQuery(literal, negate);
}
private static void SkipWhite(string text, ref int index)
{
while (index < text.Length && char.IsWhiteSpace(text, index)) index += 1;
}
private static FormatException SyntaxError(string message, int position)
{
return new FormatException($"{message} (near position {position})");
}
#endregion
private class NoDocsQuery : Query
{
public override Query Rewrite(bool negate = false)
{
return negate ? (Query) new AllDocsQuery() : this;
}
public override DocSetIterator Search(IInvertedIndex index, bool ignoreNegation = false)
{
if (ignoreNegation) return new EmptyIterator();
return Negated ? index.All() : new EmptyIterator();
}
public override string ToString()
{
return "(NoDocs)";
}
}
private class AllDocsQuery : Query
{
public override Query Rewrite(bool negate = false)
{
return negate ? (Query) new NoDocsQuery() : this;
}
public override DocSetIterator Search(IInvertedIndex index, bool ignoreNegation = false)
{
if (ignoreNegation) return index.All();
return Negated ? new EmptyIterator() : index.All();
}
public override string ToString()
{
return "(AllDocs)";
}
}
private class LiteralQuery : Query
{
public string Literal { get; }
public LiteralQuery(string literal, bool negated = false)
{
Literal = literal ?? throw new ArgumentNullException(nameof(literal));
Negated = negated;
}
public override Query Rewrite(bool negate = false)
{
return negate ? new LiteralQuery(Literal, !Negated) : this;
}
public override DocSetIterator Search(IInvertedIndex index, bool ignoreNegation = false)
{
var iterator = index.Get(Literal);
if (Negated && !ignoreNegation)
{
if (!index.AllowAll)
throw new NotSupportedException("Negative literal outside conjunction not supported");
iterator = new ButNotIterator(index.All(), iterator);
}
return iterator;
}
public override string ToString()
{
return Negated ? $"(NOT {Literal})" : $"{Literal}";
}
}
private enum BooleanBinOp
{
And,
Or
}
public class CompoundQuery : Query
{
private BooleanBinOp Junctor { get; }
private List<Query> Components { get; }
public static CompoundQuery And(Query component, bool negated = false)
{
return new CompoundQuery(BooleanBinOp.And, component, negated);
}
public static CompoundQuery Or(Query component, bool negated = false)
{
return new CompoundQuery(BooleanBinOp.Or, component, negated);
}
private CompoundQuery(BooleanBinOp junctor, Query component, bool negated = false)
{
if (component == null)
throw new ArgumentNullException(nameof(component));
Junctor = junctor;
Components = new List<Query>();
Negated = negated;
Components.Add(component);
}
private CompoundQuery(BooleanBinOp junctor, IEnumerable<Query> components, bool negated = false)
{
Junctor = junctor;
Components = components?.ToList() ?? throw new ArgumentNullException(nameof(components));
Negated = negated;
}
public void AddClause(Query query)
{
if (query == null)
throw new ArgumentNullException();
Components.Add(query);
}
public override Query Rewrite(bool negate = false)
{
if (Negated) negate = !negate;
if (Components.Count == 1)
{
return Components[0].Rewrite(negate);
}
var components = new List<Query>();
var junctor = negate ? Negate(Junctor) : Junctor;
foreach (var component in Components)
{
var rewritten = component.Rewrite(negate);
if (rewritten is CompoundQuery compound && compound.Junctor == junctor)
{
components.AddRange(compound.Components); // splice in
}
else if (rewritten is AllDocsQuery)
{
// X.T = X (omit) and X,T = T (return early)
if (junctor == BooleanBinOp.Or)
{
return negate ? (Query) new NoDocsQuery() : new AllDocsQuery();
}
}
else if (rewritten is NoDocsQuery)
{
// X,F = X (omit) and X.F = F (return early)
if (junctor == BooleanBinOp.And)
{
return negate ? (Query) new AllDocsQuery() : new NoDocsQuery();
}
}
else
{
components.Add(rewritten);
}
}
// negation has been pushed down
if (components.Count == 0)
{
if (junctor == BooleanBinOp.And)
return new AllDocsQuery();
if (junctor == BooleanBinOp.Or)
return new NoDocsQuery();
throw BadJunctor(junctor);
}
if (components.Count == 1) return components[0];
return new CompoundQuery(junctor, components);
// 1. if one component: extract, apply negated, rewrite, return
// 2. if negated: swap and/or, negate each component
// 3. rewrite each component
// 4. splice in sub components with same junctor (associativity)
// 5. remove neutral elements (X.T=X and X,F=X)
// 6. simplify idempotency (X.X=X and X,X=X) TODO
// 7. if no components remain: And => Verum, Or => Falsum
}
public override DocSetIterator Search(IInvertedIndex index, bool ignoreNegation = false)
{
switch (Junctor)
{
case BooleanBinOp.And:
return CreateConjunctionIterator(index);
case BooleanBinOp.Or:
return CreateDisjunctionIterator(index);
default:
throw new NotSupportedException($"Unknown junctor '{Junctor}'");
}
}
private DocSetIterator CreateConjunctionIterator(IInvertedIndex index)
{
var nrequired = Components.Count(c => !c.Negated);
if (nrequired == 0)
{
if (!index.AllowAll)
throw new NotSupportedException("All negative conjunction is not supported");
return new ButNotIterator(index.All(),
new DisjunctionIterator(Components.Where(c => c.Negated).Select(c => c.Search(index, true))
.ToArray()));
}
var required = nrequired == 1
? Components.Single(c => !c.Negated).Search(index)
: new ConjunctionIterator(Components.Where(c => !c.Negated).Select(c => c.Search(index)).ToArray());
var nexcluded = Components.Count(c => c.Negated);
if (nexcluded == 0)
return required;
var excluded = nexcluded == 1
? Components.Single(c => c.Negated).Search(index, true)
: new DisjunctionIterator(Components.Where(c => c.Negated).Select(c => c.Search(index)).ToArray());
return new ButNotIterator(required, excluded);
}
private DocSetIterator CreateDisjunctionIterator(IInvertedIndex index)
{
//if (Components.Count(c => c.Negated) > 0)
// throw new NotSupportedException("Disjunction with negative terms is not supported");
var iterators = Components.Select(c => c.Search(index)).ToArray();
return new DisjunctionIterator(iterators);
}
private static Exception BadJunctor(BooleanBinOp junctor)
{
return new InvalidOperationException($"Junctor '{junctor}' not supported");
}
private static BooleanBinOp Negate(BooleanBinOp junctor)
{
if (junctor == BooleanBinOp.And)
return BooleanBinOp.Or;
if (junctor == BooleanBinOp.Or)
return BooleanBinOp.And;
throw new InvalidOperationException($"Cannot negate BinOp '{junctor}'");
}
private string Format(bool negated, string junctor)
{
const string sep = " ";
var sb = new StringBuilder();
if (negated) sb.Append("(NOT ");
sb.Append("(").Append(junctor ?? "*");
foreach (var query in Components)
{
sb.Append(sep);
sb.Append(query);
}
sb.Append(")");
if (negated) sb.Append(")");
return sb.ToString();
}
public override string ToString()
{
return Format(Negated, Junctor == BooleanBinOp.And ? "AND" : "OR");
}
}
}
}