diff --git a/qendpoint-backend/pom.xml b/qendpoint-backend/pom.xml index 6b75e3315..156c3185c 100644 --- a/qendpoint-backend/pom.xml +++ b/qendpoint-backend/pom.xml @@ -46,12 +46,23 @@ 5.0.2 3.4.0 1.5.6 + 2.18.1 UTF-8 UTF-8 + + com.fasterxml.jackson.core + jackson-databind + ${jackson.version} + + + com.fasterxml.jackson.core + jackson-core + ${jackson.version} + ch.qos.logback logback-core diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/compact/bitmap/Bitmap375Big.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/compact/bitmap/Bitmap375Big.java index 113501c2e..01cb9c217 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/compact/bitmap/Bitmap375Big.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/compact/bitmap/Bitmap375Big.java @@ -23,6 +23,8 @@ import com.the_qa_company.qendpoint.core.util.io.CloseSuppressPath; import com.the_qa_company.qendpoint.core.util.io.Closer; import com.the_qa_company.qendpoint.core.util.io.IOUtil; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.io.Closeable; import java.io.IOException; @@ -39,6 +41,24 @@ * @author mario.arias */ public class Bitmap375Big extends Bitmap64Big { + + private static final Logger logger = LoggerFactory.getLogger(Bitmap375Big.class); + + private static final boolean oldBinarySearch; + + static { + // check if the system property "useOldBinarySeearch" is set to true + String useOldBinarySearch = System.getProperty("useOldBinarySearch"); + if (useOldBinarySearch != null && useOldBinarySearch.equalsIgnoreCase("true")) { + oldBinarySearch = true; + logger.debug("Using old binary search"); + } else { + logger.debug("Using new binary search"); + oldBinarySearch = false; + } + + } + /** * create disk version bitmap with in memory super index * @@ -46,7 +66,6 @@ public class Bitmap375Big extends Bitmap64Big { * @param nbits number of bits * @return bitmap */ - public static Bitmap375Big disk(Path location, long nbits) { return disk(location, nbits, false); } @@ -181,6 +200,7 @@ public void updateIndex() { } pop = countSuperBlock + countBlock; indexUpToDate = true; + superBlocks.recalculateEstimatedValueLocation(); } /* @@ -189,8 +209,9 @@ public void updateIndex() { */ @Override public boolean access(long bitIndex) { - if (bitIndex < 0) + if (bitIndex < 0) { throw new IndexOutOfBoundsException("bitIndex < 0: " + bitIndex); + } long wordIndex = wordIndex(bitIndex); if (wordIndex >= words.length()) { @@ -324,7 +345,7 @@ public long select1(long x) { return 0; } // Search superblock (binary Search) - long superBlockIndex = binarySearch(superBlocks, x); + long superBlockIndex = oldBinarySearch ? binarySearch(superBlocks, x) : binarySearchNew(superBlocks, x); // If there is a run of many zeros, two correlative superblocks may have // the same value, @@ -332,7 +353,6 @@ public long select1(long x) { while (superBlockIndex > 0 && (superBlocks.get(superBlockIndex) >= x)) { superBlockIndex--; - } long countdown = x - superBlocks.get(superBlockIndex); @@ -444,6 +464,7 @@ public static long binarySearch0(LongArray arr, long fromIndex, long toIndex, lo * @param val val * @return index */ + public static long binarySearch(LongArray arr, long val) { long min = 0, max = arr.length(), mid; @@ -460,6 +481,42 @@ public static long binarySearch(LongArray arr, long val) { return min; } + public static long binarySearchNew(LongArray arr, long val) { + + long min = arr.getEstimatedLocationLowerBound(val); + long max = arr.getEstimatedLocationUpperBound(val); + long mid = arr.getEstimatedLocation(val, min, max); + + int i = 0; + while (min + 1 < max) { + // After the first iteration, the value that we are looking for is + // typically very close to the min value. Using linear search for + // the next two iterations improves the chances that we find the + // value faster than with binary search. + if (i == 1 || i == 2) { + long v = arr.get(min + 1); + if (v >= val) { + max = min + 1; + } else { + min = min + 1; + } + } else { + long v = arr.get(mid); + if (v >= val) { + max = mid; + } else { + min = mid; + } + } + mid = (min + max) / 2; + i++; + } + + arr.updateEstimatedValueLocation(val, min); + + return min; + } + public CloseSuppressPath getBlocksPath() { return blocksPath; } @@ -467,4 +524,9 @@ public CloseSuppressPath getBlocksPath() { public CloseSuppressPath getSuperBlocksPath() { return superBlocksPath; } + + @Override + public String toString() { + return "Bitmap375Big{}"; + } } diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/BitUtil.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/BitUtil.java index c2e55eaca..535a42cd4 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/BitUtil.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/BitUtil.java @@ -64,9 +64,10 @@ public static void writeLowerBitsByteAligned(long value, long numbits, OutputStr public static int select1(long value, int rank) { int bitpos = 0; while (rank > 0 && value != 0) { - rank -= value & 1; - bitpos++; - value >>>= 1; + int trailingZeros = Long.numberOfTrailingZeros(value); + bitpos += trailingZeros + 1; + value >>>= trailingZeros + 1; + rank--; } return bitpos; } diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/disk/AbstractLongArray.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/disk/AbstractLongArray.java new file mode 100644 index 000000000..f22b60b43 --- /dev/null +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/disk/AbstractLongArray.java @@ -0,0 +1,109 @@ +package com.the_qa_company.qendpoint.core.util.disk; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public abstract class AbstractLongArray implements LongArray { + + private final Logger logger = LoggerFactory.getLogger(getClass()); + + private static final int ESTIMATED_LOCATION_ARRAY_SIZE; + + static { + // get total amount of memory that this java program is allowed to use + long maxMemory = Runtime.getRuntime().maxMemory(); + + if (maxMemory >= 1024 * 1024 * 512) { + ESTIMATED_LOCATION_ARRAY_SIZE = 1024 * 128; + } else if (maxMemory >= 1024 * 1024 * 256) { + ESTIMATED_LOCATION_ARRAY_SIZE = 1024 * 64; + } else if (maxMemory >= 1024 * 1024 * 128) { + ESTIMATED_LOCATION_ARRAY_SIZE = 1024 * 32; + } else { + ESTIMATED_LOCATION_ARRAY_SIZE = 1024 * 16; + } + + } + + private final long[] estimatedLocationMax = new long[ESTIMATED_LOCATION_ARRAY_SIZE]; + private final long[] estimatedLocationMin = new long[ESTIMATED_LOCATION_ARRAY_SIZE]; + private final long[] estimatedLocation = new long[ESTIMATED_LOCATION_ARRAY_SIZE]; + + private int estimatedLocationBucketSize; + + long maxValue = 1; + + @Override + public int getEstimatedLocationArrayBucketSize() { + return estimatedLocationBucketSize; + } + + private void updateEstimatedLocationArrayBucketSize() { + int minBucketSize = (int) (maxValue / ESTIMATED_LOCATION_ARRAY_SIZE); + // we want to have the next power of 2 + int next = 1; + while (next < minBucketSize) { + next <<= 1; + } + this.estimatedLocationBucketSize = next; + } + + @Override + public long[] getEstimatedLocationArray() { + return estimatedLocation; + } + + @Override + public long[] getEstimatedLocationArrayMin() { + return estimatedLocationMin; + } + + @Override + public long[] getEstimatedLocationArrayMax() { + return estimatedLocationMax; + } + + @Override + public void recalculateEstimatedValueLocation() { + updateEstimatedLocationArrayBucketSize(); + int estimatedLocationBucketSize = getEstimatedLocationArrayBucketSize(); + long len = length(); + boolean shouldLog = len > 1024 * 1024 * 2; + if (shouldLog) { + logger.info("Recalculating estimated location array 0%"); + } + + for (int i = 0; i < len; i++) { + long val = get(i); + if (val == 0) { + continue; + } + + int index = (int) (val / estimatedLocationBucketSize + 1); + estimatedLocationMax[index] = Math.max(estimatedLocationMax[index], i); + if (estimatedLocationMin[index] == 0) { + estimatedLocationMin[index] = i; + } else { + estimatedLocationMin[index] = Math.min(estimatedLocationMin[index], i); + } + estimatedLocation[index] = (estimatedLocationMax[index] + estimatedLocationMin[index]) / 2; + + if (shouldLog && i % (1024 * 1024) == 0) { + logger.info("Recalculating estimated location array {}%", (int) Math.floor(100.0 / len * i)); + } + } + + if (shouldLog) { + logger.info("Recalculating estimated location array 100%"); + } + } + + @Override + public final void set(long index, long value) { + maxValue = Math.max(maxValue, value); + innerSet(index, value); + } + + abstract protected void innerSet(long index, long value); + +} diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/disk/LargeLongArray.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/disk/LargeLongArray.java index f0b0e874b..0593bdcef 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/disk/LargeLongArray.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/disk/LargeLongArray.java @@ -55,4 +55,5 @@ public void resize(long newSize) throws IOException { public void clear() { array.clear(); } + } diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/disk/LongArray.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/disk/LongArray.java index 77b0add59..aa9ca68c4 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/disk/LongArray.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/disk/LongArray.java @@ -1,6 +1,8 @@ package com.the_qa_company.qendpoint.core.util.disk; import com.the_qa_company.qendpoint.core.util.io.IOUtil; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.io.IOException; import java.util.Iterator; @@ -10,6 +12,10 @@ * Describe a large array of longs */ public interface LongArray extends Iterable { + + Logger logger = LoggerFactory.getLogger(LongArray.class); + long[] EMPTY_ARRAY = new long[0]; + /** * create an in memory long array * @@ -208,4 +214,84 @@ public Long next() { } }; } + + /** + * @return the estimated location array that contains the highest location + * for a given value + */ + default long[] getEstimatedLocationArrayMax() { + return getEstimatedLocationArray(); + } + + /** + * @return the estimated location array that contains the lowest location + * for a given value + */ + default long[] getEstimatedLocationArrayMin() { + return getEstimatedLocationArray(); + } + + /** + * @return the estimated location array + */ + default long[] getEstimatedLocationArray() { + return EMPTY_ARRAY; + } + + default int getEstimatedLocationArrayBucketSize() { + return 65536; + } + + default long getEstimatedLocationLowerBound(long val) { + int index = (int) (val / getEstimatedLocationArrayBucketSize() + 1); + if (index - 1 >= 0) { + long t = getEstimatedLocationArrayMax()[index - 1]; + if (t > 0) { + return t; + } + } + return 0; + } + + default long getEstimatedLocationUpperBound(long val) { + int index = (int) (val / getEstimatedLocationArrayBucketSize() + 1); + long[] estimatedLocationMin = getEstimatedLocationArrayMin(); + if (index + 1 < estimatedLocationMin.length) { + long t = estimatedLocationMin[index + 1]; + if (t > 0) { + return Math.min(length(), t); + } + } + + return length(); + } + + default long getEstimatedLocation(long val, long min, long max) { + int index = (int) (val / getEstimatedLocationArrayBucketSize() + 1); + var estimatedLocation = getEstimatedLocationArray(); + + if (index >= estimatedLocation.length) { + return (min + max) / 2; + } + long t = estimatedLocation[index]; + if (t > min && t < max) { + return t; + } else { + return (min + max) / 2; + } + } + + default void recalculateEstimatedValueLocation() { + logger.info("Class {} does not support recalculateEstimatedValueLocation()", + this.getClass().getCanonicalName()); + } + + default void updateEstimatedValueLocation(long val, long min) { + int index = (int) (val / getEstimatedLocationArrayBucketSize() + 1); + long[] estimatedLocation = getEstimatedLocationArray(); + if (index >= estimatedLocation.length) { + return; + } + estimatedLocation[index] = min; + } } diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/disk/SimpleSplitLongArray.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/disk/SimpleSplitLongArray.java index b9e2ab8fa..982485f64 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/disk/SimpleSplitLongArray.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/disk/SimpleSplitLongArray.java @@ -10,7 +10,7 @@ /** * Implementation of LongArray for simple int64 splits */ -public class SimpleSplitLongArray implements LongArray, Closeable { +public class SimpleSplitLongArray extends AbstractLongArray implements Closeable { final LongArray array; private final int shift; private final long max; @@ -30,6 +30,7 @@ private SimpleSplitLongArray(LongArray array, int numbits, long size) { max = (~0L) >>> (64 - numbits); indexMask = (1 << shift) - 1; this.numbits = numbits; + } public static SimpleSplitLongArray int8Array(long size) { @@ -80,7 +81,7 @@ public long get(long index) { } @Override - public void set(long index, long value) { + public void innerSet(long index, long value) { long rindex = index >>> shift; int sindex = (int) (index & indexMask) << (6 - shift); diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/disk/SyncLongArray.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/disk/SyncLongArray.java index 9d2ccf536..80f7dc1e2 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/disk/SyncLongArray.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/disk/SyncLongArray.java @@ -8,6 +8,7 @@ * @author Antoine Willerval */ public class SyncLongArray implements LongArray { + /** * Sync a long array * @@ -57,4 +58,49 @@ public synchronized void resize(long newSize) throws IOException { public synchronized void clear() { array.clear(); } + + @Override + public void updateEstimatedValueLocation(long val, long min) { + array.updateEstimatedValueLocation(val, min); + } + + @Override + public void recalculateEstimatedValueLocation() { + array.recalculateEstimatedValueLocation(); + } + + @Override + public long getEstimatedLocation(long val, long min, long max) { + return array.getEstimatedLocation(val, min, max); + } + + @Override + public long getEstimatedLocationUpperBound(long val) { + return array.getEstimatedLocationUpperBound(val); + } + + @Override + public long getEstimatedLocationLowerBound(long val) { + return array.getEstimatedLocationLowerBound(val); + } + + @Override + public int getEstimatedLocationArrayBucketSize() { + return array.getEstimatedLocationArrayBucketSize(); + } + + @Override + public long[] getEstimatedLocationArray() { + return array.getEstimatedLocationArray(); + } + + @Override + public long[] getEstimatedLocationArrayMin() { + return array.getEstimatedLocationArrayMin(); + } + + @Override + public long[] getEstimatedLocationArrayMax() { + return array.getEstimatedLocationArrayMax(); + } } diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/io/compress/WriteLongArrayBuffer.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/io/compress/WriteLongArrayBuffer.java index 6502676f3..7b10ac3c5 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/io/compress/WriteLongArrayBuffer.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/io/compress/WriteLongArrayBuffer.java @@ -33,9 +33,9 @@ public WriteLongArrayBuffer(LongArray array, long maxValue, int maxElement) { this.array = array; if (!DISABLE_BUFFER) { int bits = BitUtil.log2(maxValue + 2) + CompressUtil.INDEX_SHIFT; // + - // 1 - // for - // shared + // 1 + // for + // shared if (bits > 31) { bufferLong = new ArrayElementLong[maxElement / 3]; diff --git a/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/store/EndpointTripleSource.java b/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/store/EndpointTripleSource.java index fb3f40f78..131a687ff 100644 --- a/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/store/EndpointTripleSource.java +++ b/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/store/EndpointTripleSource.java @@ -37,6 +37,7 @@ public class EndpointTripleSource implements TripleSource { private static final Logger logger = LoggerFactory.getLogger(EndpointTripleSource.class); + public static final EmptyIteration EMPTY_ITERATION = new EmptyIteration<>(); private final EndpointStore endpoint; private long numberOfCurrentTriples; // count the number of times rdf4j is called within a triple pattern.. @@ -95,68 +96,22 @@ public CloseableIteration getStatements(StatementOrder stat boolean graph = endpoint.getHdt().getDictionary().supportGraphs(); // convert uris into ids if needed - Resource newSubj; - IRI newPred; - Value newObj; - Resource[] newContextes; + long subjectID = this.endpoint.getHdtConverter().subjectToID(subj); long predicateID = this.endpoint.getHdtConverter().predicateToID(pred); long objectID = this.endpoint.getHdtConverter().objectToID(obj); long[] graphID; - if (subjectID == 0 || subjectID == -1) { - newSubj = subj; - } else { - newSubj = this.endpoint.getHdtConverter().subjectIdToIRI(subjectID); - } - if (predicateID == 0 || predicateID == -1) { - newPred = pred; - } else { - newPred = this.endpoint.getHdtConverter().predicateIdToIRI(predicateID); - } - if (objectID == 0 || objectID == -1) { - newObj = obj; - } else { - newObj = this.endpoint.getHdtConverter().objectIdToIRI(objectID); - } - if (graph) { graphID = new long[contexts.length]; - newContextes = this.endpoint.getHdtConverter().graphIdToIRI(contexts, graphID); } else { graphID = null; - newContextes = contexts; } // logger.debug("SEARCH {} {} {}", newSubj, newPred, newObj); - // check if we need to search over the delta and if yes, search - CloseableIteration repositoryResult; - if (shouldSearchOverNativeStore(subjectID, predicateID, objectID)) { - if (statementOrder != null) { - throw new UnsupportedOperationException( - "Statement ordering is not supported when searching over the native store"); - } - logger.debug("Searching over native store"); - count++; - if (endpoint.isMergeTriggered) { - // query both native stores - logger.debug("Query both RDF4j stores!"); - CloseableIteration repositoryResult1 = this.endpointStoreConnection.getConnA_read() - .getStatements(newSubj, newPred, newObj, false, newContextes); - CloseableIteration repositoryResult2 = this.endpointStoreConnection.getConnB_read() - .getStatements(newSubj, newPred, newObj, false, newContextes); - repositoryResult = new CombinedNativeStoreResult(repositoryResult1, repositoryResult2); - - } else { - logger.debug("Query only one RDF4j stores!"); - repositoryResult = this.endpointStoreConnection.getCurrentConnectionRead().getStatements(newSubj, - newPred, newObj, false, newContextes); - } - } else { - logger.debug("Not searching over native store"); - repositoryResult = new EmptyIteration<>(); - } + var nativeStoreRepoResults = getNativeStoreIterator(statementOrder, subj, pred, obj, contexts, subjectID, + predicateID, objectID, graph, graphID); // iterate over the HDT file IteratorTripleID iterator; @@ -203,7 +158,66 @@ public CloseableIteration getStatements(StatementOrder stat // iterate over hdt result, delete the triples marked as deleted and add // the triples from the delta - return new EndpointStoreTripleIterator(endpointStoreConnection, this, iterator, repositoryResult); + return new EndpointStoreTripleIterator(endpointStoreConnection, this, iterator, nativeStoreRepoResults); + } + + private CloseableIteration getNativeStoreIterator(StatementOrder statementOrder, Resource subj, + IRI pred, Value obj, Resource[] contexts, long subjectID, long predicateID, long objectID, boolean graph, + long[] graphID) { + // check if we need to search over the delta and if yes, search + CloseableIteration repositoryResult; + if (shouldSearchOverNativeStore(subjectID, predicateID, objectID)) { + Resource newSubj; + IRI newPred; + Value newObj; + Resource[] newContextes; + if (subjectID == 0 || subjectID == -1) { + newSubj = subj; + } else { + newSubj = this.endpoint.getHdtConverter().subjectIdToIRI(subjectID); + } + if (predicateID == 0 || predicateID == -1) { + newPred = pred; + } else { + newPred = this.endpoint.getHdtConverter().predicateIdToIRI(predicateID); + } + if (objectID == 0 || objectID == -1) { + newObj = obj; + } else { + newObj = this.endpoint.getHdtConverter().objectIdToIRI(objectID); + } + + if (graph) { + newContextes = this.endpoint.getHdtConverter().graphIdToIRI(contexts, graphID); + } else { + newContextes = contexts; + } + + if (statementOrder != null) { + throw new UnsupportedOperationException( + "Statement ordering is not supported when searching over the native store"); + } + logger.debug("Searching over native store"); + count++; + if (endpoint.isMergeTriggered) { + // query both native stores + logger.debug("Query both RDF4j stores!"); + CloseableIteration repositoryResult1 = this.endpointStoreConnection.getConnA_read() + .getStatements(newSubj, newPred, newObj, false, newContextes); + CloseableIteration repositoryResult2 = this.endpointStoreConnection.getConnB_read() + .getStatements(newSubj, newPred, newObj, false, newContextes); + repositoryResult = new CombinedNativeStoreResult(repositoryResult1, repositoryResult2); + + } else { + logger.debug("Query only one RDF4j stores!"); + repositoryResult = this.endpointStoreConnection.getCurrentConnectionRead().getStatements(newSubj, + newPred, newObj, false, newContextes); + } + } else { + logger.debug("Not searching over native store"); + repositoryResult = EMPTY_ITERATION; + } + return repositoryResult; } // this function determines if a triple pattern should be searched over the