From ff9e7f9e795ade52977e1dc01194e33bd1d50cd3 Mon Sep 17 00:00:00 2001 From: qaate47 Date: Sun, 17 Sep 2023 23:30:21 +0200 Subject: [PATCH 1/5] GH-420 Port RDFHDT hdtq implementation to the core --- .../compact/bitmap/BitmapFactoryImpl.java | 2 +- .../core/compact/bitmap/RoaringBitmap.java | 54 ++- .../qendpoint/core/dictionary/Dictionary.java | 19 + .../core/dictionary/DictionaryFactory.java | 128 +++---- .../core/dictionary/DictionaryType.java | 1 + .../core/dictionary/TempDictionary.java | 9 + .../core/dictionary/impl/BaseDictionary.java | 74 +++- .../dictionary/impl/BaseTempDictionary.java | 5 + .../impl/CompressFourSectionDictionary.java | 10 + .../impl/DictionaryPFCOptimizedExtractor.java | 11 +- .../impl/FourQuadSectionDictionary.java | 188 ++++++++++ .../core/dictionary/impl/HashDictionary.java | 5 + .../dictionary/impl/HashQuadDictionary.java | 159 ++++++++ .../impl/MultipleSectionDictionary.java | 12 +- .../impl/MultipleSectionDictionaryBig.java | 15 +- .../impl/MultipleSectionDictionaryLang.java | 12 + .../dictionary/impl/PSFCTempDictionary.java | 11 + .../dictionary/impl/QuadTempDictionary.java | 228 ++++++++++++ .../impl/WriteMultipleSectionDictionary.java | 11 + .../WriteMultipleSectionDictionaryLang.java | 11 + .../core/enums/DictionarySectionRole.java | 3 +- .../core/enums/TripleComponentRole.java | 27 +- .../qendpoint/core/hdt/HDTManagerImpl.java | 10 + .../qendpoint/core/hdt/HDTVocabulary.java | 4 +- .../qendpoint/core/hdt/impl/HDTImpl.java | 71 ++++ .../core/hdt/impl/TempHDTImporterOnePass.java | 45 ++- .../hdt/impl/diskimport/MapOnCallHDT.java | 6 + .../core/hdt/writer/TripleWriterHDT.java | 31 +- .../qendpoint/core/header/PlainHeader.java | 17 + .../iterator/DictionaryTranslateIterator.java | 21 +- .../DictionaryTranslateIteratorBuffer.java | 78 +++- .../qendpoint/core/options/HDTOptions.java | 11 + .../core/options/HDTOptionsKeys.java | 9 + .../core/{quads => quad}/QuadString.java | 36 +- .../core/quad/impl/BitmapQuadsIterator.java | 139 +++++++ .../core/quad/impl/BitmapQuadsIteratorG.java | 116 ++++++ .../quad/impl/BitmapQuadsIteratorYFOQ.java | 97 +++++ .../quad/impl/BitmapQuadsIteratorYGFOQ.java | 96 +++++ .../quad/impl/BitmapQuadsIteratorZFOQ.java | 223 +++++++++++ .../quad/impl/BitmapQuadsIteratorZGFOQ.java | 202 ++++++++++ .../qendpoint/core/rdf/RDFAccess.java | 25 +- .../core/rdf/parsers/JenaNodeFormatter.java | 3 +- .../core/rdf/parsers/RDFParserRIOT.java | 2 +- .../core/rdf/parsers/RDFParserSimple.java | 2 +- .../qendpoint/core/storage/QEPCore.java | 4 +- .../qendpoint/core/storage/QEPMap.java | 18 +- .../qendpoint/core/tools/RDF2HDT.java | 23 +- .../qendpoint/core/triples/TempTriples.java | 13 + .../qendpoint/core/triples/TripleID.java | 90 ++++- .../core/triples/TripleIDComparatorInt.java | 29 +- .../core/triples/TriplesFactory.java | 12 + .../core/triples/impl/BitmapQuadTriples.java | 349 ++++++++++++++++++ .../core/triples/impl/BitmapTriples.java | 57 +-- .../triples/impl/BitmapTriplesIterator.java | 28 +- .../impl/BitmapTriplesIteratorYFOQ.java | 8 + .../impl/BitmapTriplesIteratorZFOQ.java | 22 +- .../core/triples/impl/OneReadTempTriples.java | 14 +- .../core/triples/impl/TripleIDInt.java | 118 +++++- .../core/triples/impl/TriplesList.java | 74 ++-- .../core/triples/impl/TriplesListLong.java | 35 +- .../util/LargeFakeDataSetStreamSupplier.java | 16 +- .../qendpoint/core/util/crc/CRC32.java | 10 +- .../qendpoint/core/hdt/HDTManagerTest.java | 239 +++++++++++- .../qendpoint/core/storage/QEPMapTest.java | 2 +- .../BitmapTriplesIteratorPositionTest.java | 23 ++ .../qendpoint/model/SimpleIRIHDT.java | 3 + .../qendpoint/store/HDTConverter.java | 2 + 67 files changed, 3116 insertions(+), 312 deletions(-) create mode 100644 qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/FourQuadSectionDictionary.java create mode 100644 qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/HashQuadDictionary.java create mode 100644 qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/QuadTempDictionary.java rename qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/{quads => quad}/QuadString.java (67%) create mode 100644 qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/quad/impl/BitmapQuadsIterator.java create mode 100644 qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/quad/impl/BitmapQuadsIteratorG.java create mode 100644 qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/quad/impl/BitmapQuadsIteratorYFOQ.java create mode 100644 qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/quad/impl/BitmapQuadsIteratorYGFOQ.java create mode 100644 qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/quad/impl/BitmapQuadsIteratorZFOQ.java create mode 100644 qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/quad/impl/BitmapQuadsIteratorZGFOQ.java create mode 100644 qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapQuadTriples.java diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/compact/bitmap/BitmapFactoryImpl.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/compact/bitmap/BitmapFactoryImpl.java index 055a343c..da3cf5eb 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/compact/bitmap/BitmapFactoryImpl.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/compact/bitmap/BitmapFactoryImpl.java @@ -36,7 +36,7 @@ public class BitmapFactoryImpl extends BitmapFactory { protected ModifiableBitmap doCreateModifiableBitmap(String type) { return switch (Objects.requireNonNullElse(type, HDTVocabulary.BITMAP_TYPE_PLAIN)) { case HDTVocabulary.BITMAP_TYPE_PLAIN -> Bitmap375Big.memory(0); - case HDTVocabulary.BITMAP_TYPE_ROAR -> new RoaringBitmap(); + case HDTVocabulary.BITMAP_TYPE_ROARING -> new RoaringBitmap(); default -> throw new IllegalArgumentException("Implementation not found for Bitmap with type " + type); }; } diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/compact/bitmap/RoaringBitmap.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/compact/bitmap/RoaringBitmap.java index 6b20cb77..0371cd78 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/compact/bitmap/RoaringBitmap.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/compact/bitmap/RoaringBitmap.java @@ -2,8 +2,15 @@ import com.the_qa_company.qendpoint.core.exceptions.NotImplementedException; import com.the_qa_company.qendpoint.core.hdt.HDTVocabulary; +import com.the_qa_company.qendpoint.core.listener.ProgressListener; +import com.the_qa_company.qendpoint.core.util.io.IOUtil; import org.roaringbitmap.longlong.Roaring64Bitmap; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.nio.ByteBuffer; + /** * {@link ModifiableBitmap} wrapper of the {@link Roaring64Bitmap} class, it * supports the {@link #set(long, boolean)}, {@link #access(long)}, @@ -35,12 +42,29 @@ public long getNumBits() { @Override public long getSizeBytes() { - return 0; + return rbm.serializedSizeInBytes(); + } + + @Override + public void save(OutputStream output, ProgressListener listener) throws IOException { + long size = getSizeBytes(); + IOUtil.writeLong(output, size); + ByteBuffer b2 = ByteBuffer.allocate((int) size); + rbm.serialize(b2); + output.write(b2.array()); + } + + @Override + public void load(InputStream input, ProgressListener listener) throws IOException { + long size = IOUtil.readLong(input); + ByteBuffer b2 = ByteBuffer.allocate((int) size); + input.read(b2.array()); + rbm.deserialize(b2); } @Override public String getType() { - return HDTVocabulary.BITMAP_TYPE_ROAR; + return HDTVocabulary.BITMAP_TYPE_ROARING; } @Override @@ -54,12 +78,21 @@ public void set(long position, boolean value) { @Override public long select1(long n) { - return rbm.select(n); + long position = n - 1; + if (position == -1) + return -1; + if (position < rbm.getLongCardinality()) { + return rbm.select(position); + } else { + return rbm.select(rbm.getLongCardinality() - 1) + 1; + } } @Override public long rank1(long position) { - return rbm.rankLong(position); + if (position >= 0) + return rbm.rankLong(position); + return 0; } @Override @@ -71,4 +104,17 @@ public long countOnes() { public void append(boolean value) { set(rbm.last() + 1, value); } + + @Override + public long selectPrev1(long start) { + return select1(rank1(start)); + } + + @Override + public long selectNext1(long start) { + long pos = rank1(start - 1); + if (pos < rbm.getLongCardinality()) + return select1(pos + 1); + return -1; + } } diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/Dictionary.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/Dictionary.java index cff67e9a..aafb65c5 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/Dictionary.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/Dictionary.java @@ -132,6 +132,15 @@ default boolean supportsLanguageOfId() { return false; } + /** + * Returns whether the dictionary supports graphs + * + * @return true if it supports graphs, false otherwise + */ + default boolean supportGraphs() { + return false; + } + /** * Returns the number of elements in the dictionary */ @@ -157,6 +166,11 @@ default boolean supportsLanguageOfId() { */ long getNobjects(); + /** + * Returns the number of objects in the dictionary. Note: Includes shared + */ + long getNgraphs(); + /** * Returns the number of subjects/objects in the dictionary. */ @@ -200,6 +214,9 @@ default long getNSection(TripleComponentRole role, boolean includeShared) { return getNobjects() - getNshared(); } } + case GRAPH -> { + return getNgraphs(); + } default -> throw new AssertionError(); } } @@ -210,6 +227,8 @@ default long getNSection(TripleComponentRole role, boolean includeShared) { DictionarySection getObjects(); + DictionarySection getGraphs(); + Map getAllObjects(); DictionarySection getShared(); diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/DictionaryFactory.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/DictionaryFactory.java index 0f53b6c4..c9f0da84 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/DictionaryFactory.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/DictionaryFactory.java @@ -19,10 +19,12 @@ package com.the_qa_company.qendpoint.core.dictionary; +import com.the_qa_company.qendpoint.core.dictionary.impl.FourQuadSectionDictionary; import com.the_qa_company.qendpoint.core.dictionary.impl.FourSectionDictionary; import com.the_qa_company.qendpoint.core.dictionary.impl.FourSectionDictionaryBig; import com.the_qa_company.qendpoint.core.dictionary.impl.FourSectionDictionaryDiff; import com.the_qa_company.qendpoint.core.dictionary.impl.HashDictionary; +import com.the_qa_company.qendpoint.core.dictionary.impl.HashQuadDictionary; import com.the_qa_company.qendpoint.core.dictionary.impl.MultipleSectionDictionary; import com.the_qa_company.qendpoint.core.dictionary.impl.MultipleSectionDictionaryDiff; import com.the_qa_company.qendpoint.core.dictionary.impl.MultipleSectionDictionaryLang; @@ -108,6 +110,7 @@ public static TempDictionary createTempDictionary(HDTOptions spec) { return switch (name) { case "", HDTOptionsKeys.TEMP_DICTIONARY_IMPL_VALUE_HASH, HDTOptionsKeys.TEMP_DICTIONARY_IMPL_VALUE_MULT_HASH -> new HashDictionary(spec); + case HDTOptionsKeys.TEMP_DICTIONARY_IMPL_VALUE_HASH_QUAD -> new HashQuadDictionary(spec); case HDTOptionsKeys.TEMP_DICTIONARY_IMPL_VALUE_HASH_PSFC -> new PSFCTempDictionary(new HashDictionary(spec)); default -> throw new IllegalFormatException("Implementation of triples not found for " + name); }; @@ -121,21 +124,15 @@ public static TempDictionary createTempDictionary(HDTOptions spec) { */ public static DictionaryPrivate createDictionary(HDTOptions spec) { String name = spec.get(HDTOptionsKeys.DICTIONARY_TYPE_KEY, ""); - switch (name) { - case "": - case HDTOptionsKeys.DICTIONARY_TYPE_VALUE_FOUR_SECTION: - return new FourSectionDictionary(spec); - case HDTOptionsKeys.DICTIONARY_TYPE_VALUE_FOUR_PSFC_SECTION: - return new PSFCFourSectionDictionary(spec); - case HDTOptionsKeys.DICTIONARY_TYPE_VALUE_FOUR_SECTION_BIG: - return new FourSectionDictionaryBig(spec); - case HDTOptionsKeys.DICTIONARY_TYPE_VALUE_MULTI_OBJECTS: - return new MultipleSectionDictionary(spec); - case HDTOptionsKeys.DICTIONARY_TYPE_VALUE_MULTI_OBJECTS_LANG: - return new MultipleSectionDictionaryLang(spec); - default: - throw new IllegalFormatException("Implementation of dictionary not found for " + name); - } + return switch (name) { + case "", HDTOptionsKeys.DICTIONARY_TYPE_VALUE_FOUR_SECTION -> new FourSectionDictionary(spec); + case HDTOptionsKeys.DICTIONARY_TYPE_VALUE_FOUR_QUAD_SECTION -> new FourQuadSectionDictionary(spec); + case HDTOptionsKeys.DICTIONARY_TYPE_VALUE_FOUR_PSFC_SECTION -> new PSFCFourSectionDictionary(spec); + case HDTOptionsKeys.DICTIONARY_TYPE_VALUE_FOUR_SECTION_BIG -> new FourSectionDictionaryBig(spec); + case HDTOptionsKeys.DICTIONARY_TYPE_VALUE_MULTI_OBJECTS -> new MultipleSectionDictionary(spec); + case HDTOptionsKeys.DICTIONARY_TYPE_VALUE_MULTI_OBJECTS_LANG -> new MultipleSectionDictionaryLang(spec); + default -> throw new IllegalFormatException("Implementation of dictionary not found for " + name); + }; } /** @@ -161,17 +158,15 @@ public static DictionaryPrivate createWriteDictionary(HDTOptions spec, Path loca * @return WriteDictionary */ public static DictionaryPrivate createWriteDictionary(String name, HDTOptions spec, Path location, int bufferSize) { - switch (name) { - case "": - case HDTVocabulary.DICTIONARY_TYPE_FOUR_SECTION, HDTOptionsKeys.DICTIONARY_TYPE_VALUE_FOUR_SECTION_BIG: - return new WriteFourSectionDictionary(spec, location, bufferSize); - case HDTVocabulary.DICTIONARY_TYPE_MULT_SECTION, HDTOptionsKeys.DICTIONARY_TYPE_VALUE_MULTI_OBJECTS: - return new WriteMultipleSectionDictionary(spec, location, bufferSize); - case HDTVocabulary.DICTIONARY_TYPE_MULT_SECTION_LANG, HDTOptionsKeys.DICTIONARY_TYPE_VALUE_MULTI_OBJECTS_LANG: - return new WriteMultipleSectionDictionaryLang(spec, location, bufferSize); - default: - throw new IllegalFormatException("Implementation of write dictionary not found for " + name); - } + return switch (name) { + case "", HDTVocabulary.DICTIONARY_TYPE_FOUR_SECTION, HDTOptionsKeys.DICTIONARY_TYPE_VALUE_FOUR_SECTION_BIG -> + new WriteFourSectionDictionary(spec, location, bufferSize); + case HDTVocabulary.DICTIONARY_TYPE_MULT_SECTION, HDTOptionsKeys.DICTIONARY_TYPE_VALUE_MULTI_OBJECTS -> + new WriteMultipleSectionDictionary(spec, location, bufferSize); + case HDTVocabulary.DICTIONARY_TYPE_MULT_SECTION_LANG, HDTOptionsKeys.DICTIONARY_TYPE_VALUE_MULTI_OBJECTS_LANG -> + new WriteMultipleSectionDictionaryLang(spec, location, bufferSize); + default -> throw new IllegalFormatException("Implementation of write dictionary not found for " + name); + }; } public static SectionCompressor createSectionCompressor(HDTOptions spec, CloseSuppressPath baseFileName, @@ -179,20 +174,16 @@ public static SectionCompressor createSectionCompressor(HDTOptions spec, CloseSu int k, boolean debugSleepKwayDict) { String name = spec.get(HDTOptionsKeys.DICTIONARY_TYPE_KEY, ""); - switch (name) { - case "": - case HDTOptionsKeys.DICTIONARY_TYPE_VALUE_FOUR_SECTION: - case HDTOptionsKeys.DICTIONARY_TYPE_VALUE_FOUR_SECTION_BIG: - return new SectionCompressor(baseFileName, source, listener, bufferSize, chunkSize, k, debugSleepKwayDict); - case HDTOptionsKeys.DICTIONARY_TYPE_VALUE_MULTI_OBJECTS: - return new MultiSectionSectionCompressor(baseFileName, source, listener, bufferSize, chunkSize, k, - debugSleepKwayDict); - case HDTOptionsKeys.DICTIONARY_TYPE_VALUE_MULTI_OBJECTS_LANG: - return new MultiSectionLangSectionCompressor(baseFileName, source, listener, bufferSize, chunkSize, k, - debugSleepKwayDict); - default: - throw new IllegalFormatException("Implementation of section compressor not found for " + name); - } + return switch (name) { + case "", HDTOptionsKeys.DICTIONARY_TYPE_VALUE_FOUR_SECTION, + HDTOptionsKeys.DICTIONARY_TYPE_VALUE_FOUR_SECTION_BIG -> + new SectionCompressor(baseFileName, source, listener, bufferSize, chunkSize, k, debugSleepKwayDict); + case HDTOptionsKeys.DICTIONARY_TYPE_VALUE_MULTI_OBJECTS -> new MultiSectionSectionCompressor(baseFileName, + source, listener, bufferSize, chunkSize, k, debugSleepKwayDict); + case HDTOptionsKeys.DICTIONARY_TYPE_VALUE_MULTI_OBJECTS_LANG -> new MultiSectionLangSectionCompressor( + baseFileName, source, listener, bufferSize, chunkSize, k, debugSleepKwayDict); + default -> throw new IllegalFormatException("Implementation of section compressor not found for " + name); + }; } /** @@ -203,18 +194,15 @@ public static SectionCompressor createSectionCompressor(HDTOptions spec, CloseSu */ public static DictionaryPrivate createDictionary(ControlInfo ci) { String name = ci.getFormat(); - switch (name) { - case HDTVocabulary.DICTIONARY_TYPE_FOUR_SECTION: - return new FourSectionDictionary(new HDTSpecification()); - case HDTVocabulary.DICTIONARY_TYPE_FOUR_PSFC_SECTION: - return new PSFCFourSectionDictionary(new HDTSpecification()); - case HDTVocabulary.DICTIONARY_TYPE_MULT_SECTION: - return new MultipleSectionDictionary(new HDTSpecification()); - case HDTVocabulary.DICTIONARY_TYPE_MULT_SECTION_LANG: - return new MultipleSectionDictionaryLang(new HDTSpecification()); - default: - throw new IllegalFormatException("Implementation of dictionary not found for " + name); - } + return switch (name) { + case HDTVocabulary.DICTIONARY_TYPE_FOUR_SECTION -> new FourSectionDictionary(new HDTSpecification()); + case HDTVocabulary.DICTIONARY_TYPE_FOUR_QUAD_SECTION -> new FourQuadSectionDictionary(new HDTSpecification()); + case HDTVocabulary.DICTIONARY_TYPE_FOUR_PSFC_SECTION -> new PSFCFourSectionDictionary(new HDTSpecification()); + case HDTVocabulary.DICTIONARY_TYPE_MULT_SECTION -> new MultipleSectionDictionary(new HDTSpecification()); + case HDTVocabulary.DICTIONARY_TYPE_MULT_SECTION_LANG -> + new MultipleSectionDictionaryLang(new HDTSpecification()); + default -> throw new IllegalFormatException("Implementation of dictionary not found for " + name); + }; } /** @@ -242,31 +230,25 @@ public static DictionaryDiff createDictionaryDiff(Dictionary dictionary, String */ public static DictionaryKCat createDictionaryKCat(Dictionary dictionary) { String type = dictionary.getType(); - switch (type) { - case HDTVocabulary.DICTIONARY_TYPE_FOUR_SECTION: - return new FourSectionDictionaryKCat(dictionary); - case HDTVocabulary.DICTIONARY_TYPE_MULT_SECTION: - return new MultipleSectionDictionaryKCat(dictionary); - case HDTVocabulary.DICTIONARY_TYPE_MULT_SECTION_LANG: - return new MultipleSectionLangDictionaryKCat(dictionary); - default: - throw new IllegalArgumentException("Implementation of DictionaryKCat not found for " + type); - } + return switch (type) { + case HDTVocabulary.DICTIONARY_TYPE_FOUR_SECTION -> new FourSectionDictionaryKCat(dictionary); + case HDTVocabulary.DICTIONARY_TYPE_MULT_SECTION -> new MultipleSectionDictionaryKCat(dictionary); + case HDTVocabulary.DICTIONARY_TYPE_MULT_SECTION_LANG -> new MultipleSectionLangDictionaryKCat(dictionary); + default -> throw new IllegalArgumentException("Implementation of DictionaryKCat not found for " + type); + }; } public static DictionaryPrivate createWriteDictionary(String type, HDTOptions spec, DictionarySectionPrivate subject, DictionarySectionPrivate predicate, DictionarySectionPrivate object, DictionarySectionPrivate shared, TreeMap sub) { - switch (type) { - case HDTVocabulary.DICTIONARY_TYPE_FOUR_SECTION: - case HDTVocabulary.DICTIONARY_TYPE_FOUR_PSFC_SECTION: - return new WriteFourSectionDictionary(spec, subject, predicate, object, shared); - case HDTVocabulary.DICTIONARY_TYPE_MULT_SECTION: - return new WriteMultipleSectionDictionary(spec, subject, predicate, shared, sub); - case HDTVocabulary.DICTIONARY_TYPE_MULT_SECTION_LANG: - return new WriteMultipleSectionDictionaryLang(spec, subject, predicate, shared, sub); - default: - throw new IllegalArgumentException("Unknown dictionary type " + type); - } + return switch (type) { + case HDTVocabulary.DICTIONARY_TYPE_FOUR_SECTION, HDTVocabulary.DICTIONARY_TYPE_FOUR_PSFC_SECTION -> + new WriteFourSectionDictionary(spec, subject, predicate, object, shared); + case HDTVocabulary.DICTIONARY_TYPE_MULT_SECTION -> + new WriteMultipleSectionDictionary(spec, subject, predicate, shared, sub); + case HDTVocabulary.DICTIONARY_TYPE_MULT_SECTION_LANG -> + new WriteMultipleSectionDictionaryLang(spec, subject, predicate, shared, sub); + default -> throw new IllegalArgumentException("Unknown dictionary type " + type); + }; } } diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/DictionaryType.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/DictionaryType.java index bb1d2a41..307b52b9 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/DictionaryType.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/DictionaryType.java @@ -32,6 +32,7 @@ public static DictionaryType fromDictionaryType(HDTOptions options) { public static DictionaryType fromDictionaryType(String dictType) { return switch (Objects.requireNonNullElse(dictType, "")) { case "", HDTOptionsKeys.DICTIONARY_TYPE_VALUE_FOUR_SECTION, + HDTOptionsKeys.DICTIONARY_TYPE_VALUE_FOUR_QUAD_SECTION, HDTOptionsKeys.DICTIONARY_TYPE_VALUE_FOUR_SECTION_BIG, HDTOptionsKeys.DICTIONARY_TYPE_VALUE_FOUR_PSFC_SECTION -> FSD; diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/TempDictionary.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/TempDictionary.java index a9749b03..2a61694b 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/TempDictionary.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/TempDictionary.java @@ -38,6 +38,15 @@ public interface TempDictionary extends Closeable { TempDictionarySection getShared(); + TempDictionarySection getGraphs(); + + /** + * Returns whether the dictionary supports graphs + * + * @return true if it supports graphs, false otherwise + */ + boolean supportGraphs(); + /** * To be executed at the start of the processing */ diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/BaseDictionary.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/BaseDictionary.java index 8e1d7780..56b9dff3 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/BaseDictionary.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/BaseDictionary.java @@ -51,6 +51,7 @@ public abstract class BaseDictionary implements DictionaryPrivate { protected DictionarySectionPrivate predicates; protected DictionarySectionPrivate objects; protected DictionarySectionPrivate shared; + protected DictionarySectionPrivate graphs; public BaseDictionary(HDTOptions spec) { this.spec = spec; @@ -59,8 +60,7 @@ public BaseDictionary(HDTOptions spec) { protected long getGlobalId(long id, DictionarySectionRole position) { return switch (position) { case SUBJECT, OBJECT -> shared.getNumberOfElements() + id; - case PREDICATE, SHARED -> id; - default -> throw new IllegalArgumentException(); + case PREDICATE, SHARED, GRAPH -> id; }; } @@ -73,7 +73,7 @@ protected long getLocalId(long id, TripleComponentRole position) { return id - shared.getNumberOfElements(); } } - case PREDICATE -> { + case PREDICATE, GRAPH -> { return id; } default -> throw new IllegalArgumentException(); @@ -122,9 +122,9 @@ public long stringToId(CharSequence str, TripleComponentRole position) { str = new CompactString(str); } - long ret = 0; + long ret; switch (position) { - case SUBJECT: + case SUBJECT -> { ret = shared.locate(str); if (ret != 0) { return getGlobalId(ret, DictionarySectionRole.SHARED); @@ -134,13 +134,22 @@ public long stringToId(CharSequence str, TripleComponentRole position) { return getGlobalId(ret, DictionarySectionRole.SUBJECT); } return -1; - case PREDICATE: + } + case PREDICATE -> { ret = predicates.locate(str); if (ret != 0) { return getGlobalId(ret, DictionarySectionRole.PREDICATE); } return -1; - case OBJECT: + } + case GRAPH -> { + ret = graphs.locate(str); + if (ret != 0) { + return getGlobalId(ret, DictionarySectionRole.GRAPH); + } + return -1; + } + case OBJECT -> { if (str.charAt(0) != '"') { ret = shared.locate(str); if (ret != 0) { @@ -152,20 +161,33 @@ public long stringToId(CharSequence str, TripleComponentRole position) { return getGlobalId(ret, DictionarySectionRole.OBJECT); } return -1; - default: - throw new IllegalArgumentException(); + } + default -> throw new IllegalArgumentException(); } } @Override public long getNumberOfElements() { - return subjects.getNumberOfElements() + predicates.getNumberOfElements() + objects.getNumberOfElements() - + shared.getNumberOfElements(); + long s = subjects.getNumberOfElements(); + long p = predicates.getNumberOfElements(); + long o = objects.getNumberOfElements(); + if (!supportGraphs()) { + return s + p + o; + } + long g = graphs.getNumberOfElements(); + return s + p + o + g; } @Override public long size() { - return subjects.size() + predicates.size() + objects.size() + shared.size(); + long s = subjects.size(); + long p = predicates.size(); + long o = objects.size(); + if (!supportGraphs()) { + return s + p + o; + } + long g = graphs.size(); + return s + p + o + g; } @Override @@ -183,6 +205,14 @@ public long getNobjects() { return objects.getNumberOfElements() + shared.getNumberOfElements(); } + @Override + public long getNgraphs() { + if (graphs == null) { + return 0; + } + return graphs.getNumberOfElements(); + } + @Override public long getNshared() { return shared.getNumberOfElements(); @@ -203,6 +233,11 @@ public DictionarySection getObjects() { return objects; } + @Override + public DictionarySection getGraphs() { + return graphs; + } + @Override public DictionarySection getShared() { return shared; @@ -210,22 +245,27 @@ public DictionarySection getShared() { private DictionarySectionPrivate getSection(long id, TripleComponentRole role) { switch (role) { - case SUBJECT: + case SUBJECT -> { if (id <= shared.getNumberOfElements()) { return shared; } else { return subjects; } - case PREDICATE: + } + case PREDICATE -> { return predicates; - case OBJECT: + } + case OBJECT -> { if (id <= shared.getNumberOfElements()) { return shared; } else { return objects; } - default: - throw new IllegalArgumentException(); + } + case GRAPH -> { + return graphs; + } + default -> throw new IllegalArgumentException(); } } diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/BaseTempDictionary.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/BaseTempDictionary.java index 11c746d3..dad0134f 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/BaseTempDictionary.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/BaseTempDictionary.java @@ -149,6 +149,11 @@ public TempDictionarySection getShared() { return shared; } + @Override + public TempDictionarySection getGraphs() { + throw new NotImplementedException(); + } + protected long getGlobalId(long id, DictionarySectionRole position) { switch (position) { case SUBJECT: diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/CompressFourSectionDictionary.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/CompressFourSectionDictionary.java index e08a7321..876f556c 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/CompressFourSectionDictionary.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/CompressFourSectionDictionary.java @@ -180,6 +180,16 @@ public TempDictionarySection getShared() { return shared; } + @Override + public TempDictionarySection getGraphs() { + throw new NotImplementedException(); + } + + @Override + public boolean supportGraphs() { + return false; + } + @Override public void startProcessing() { } diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/DictionaryPFCOptimizedExtractor.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/DictionaryPFCOptimizedExtractor.java index e8b3cd83..0c2b3677 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/DictionaryPFCOptimizedExtractor.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/DictionaryPFCOptimizedExtractor.java @@ -5,7 +5,7 @@ import com.the_qa_company.qendpoint.core.enums.TripleComponentRole; public class DictionaryPFCOptimizedExtractor implements OptimizedExtractor { - private final PFCOptimizedExtractor shared, subjects, predicates, objects; + private final PFCOptimizedExtractor shared, subjects, predicates, objects, graphs; private final long numshared; public DictionaryPFCOptimizedExtractor(BaseDictionary origDict) { @@ -14,6 +14,11 @@ public DictionaryPFCOptimizedExtractor(BaseDictionary origDict) { subjects = new PFCOptimizedExtractor((PFCDictionarySectionMap) origDict.subjects); predicates = new PFCOptimizedExtractor((PFCDictionarySectionMap) origDict.predicates); objects = new PFCOptimizedExtractor((PFCDictionarySectionMap) origDict.objects); + if (origDict.graphs == null) { + graphs = null; + } else { + graphs = new PFCOptimizedExtractor((PFCDictionarySectionMap) origDict.graphs); + } } public CharSequence idToString(long id, TripleComponentRole role) { @@ -38,6 +43,8 @@ private PFCOptimizedExtractor getSection(long id, TripleComponentRole role) { } else { return objects; } + case GRAPH: + return graphs; } throw new IllegalArgumentException(); } @@ -53,6 +60,8 @@ private long getLocalId(long id, TripleComponentRole position) { } case PREDICATE: return id; + case GRAPH: + return id; } throw new IllegalArgumentException(); diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/FourQuadSectionDictionary.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/FourQuadSectionDictionary.java new file mode 100644 index 00000000..e194be45 --- /dev/null +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/FourQuadSectionDictionary.java @@ -0,0 +1,188 @@ +/* + * File: $HeadURL: + * https://hdt-java.googlecode.com/svn/trunk/hdt-java/src/org/rdfhdt/hdt/ + * dictionary/impl/FourSectionDictionary.java $ Revision: $Rev: 191 $ Last + * modified: $Date: 2013-03-03 11:41:43 +0000 (dom, 03 mar 2013) $ Last modified + * by: $Author: mario.arias $ This library is free software; you can + * redistribute it and/or modify it under the terms of the GNU Lesser General + * Public License as published by the Free Software Foundation; version 3.0 of + * the License. This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser + * General Public License for more details. You should have received a copy of + * the GNU Lesser General Public License along with this library; if not, write + * to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, + * MA 02110-1301 USA Contacting the authors: Mario Arias: mario.arias@deri.org + * Javier D. Fernandez: jfergar@infor.uva.es Miguel A. Martinez-Prieto: + * migumar2@infor.uva.es Alejandro Andres: fuzzy.alej@gmail.com + */ + +package com.the_qa_company.qendpoint.core.dictionary.impl; + +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; + +import com.the_qa_company.qendpoint.core.dictionary.DictionarySectionPrivate; +import com.the_qa_company.qendpoint.core.dictionary.TempDictionary; +import com.the_qa_company.qendpoint.core.dictionary.impl.section.DictionarySectionFactory; +import com.the_qa_company.qendpoint.core.dictionary.impl.section.PFCDictionarySection; +import com.the_qa_company.qendpoint.core.exceptions.IllegalFormatException; +import com.the_qa_company.qendpoint.core.hdt.HDTVocabulary; +import com.the_qa_company.qendpoint.core.header.Header; +import com.the_qa_company.qendpoint.core.listener.ProgressListener; +import com.the_qa_company.qendpoint.core.options.ControlInfo; +import com.the_qa_company.qendpoint.core.options.ControlInfo.Type; +import com.the_qa_company.qendpoint.core.options.ControlInformation; +import com.the_qa_company.qendpoint.core.options.HDTOptions; +import com.the_qa_company.qendpoint.core.util.concurrent.ExceptionThread; +import com.the_qa_company.qendpoint.core.util.io.CountInputStream; +import com.the_qa_company.qendpoint.core.util.io.IOUtil; +import com.the_qa_company.qendpoint.core.util.listener.IntermediateListener; + +public class FourQuadSectionDictionary extends FourSectionDictionary { + + public FourQuadSectionDictionary(HDTOptions spec, DictionarySectionPrivate s, DictionarySectionPrivate p, + DictionarySectionPrivate o, DictionarySectionPrivate sh, DictionarySectionPrivate g) { + super(spec); + this.subjects = s; + this.predicates = p; + this.objects = o; + this.shared = sh; + this.graphs = g; + } + + public FourQuadSectionDictionary(HDTOptions spec) { + super(spec); + // FIXME: Read type from spec. + subjects = new PFCDictionarySection(spec); + predicates = new PFCDictionarySection(spec); + objects = new PFCDictionarySection(spec); + shared = new PFCDictionarySection(spec); + graphs = new PFCDictionarySection(spec); + } + + /* + * (non-Javadoc) + * @see hdt.dictionary.Dictionary#load(hdt.dictionary.Dictionary) + */ + @Override + public void load(TempDictionary other, ProgressListener listener) { + IntermediateListener iListener = new IntermediateListener(listener); + subjects.load(other.getSubjects(), iListener); + predicates.load(other.getPredicates(), iListener); + objects.load(other.getObjects(), iListener); + shared.load(other.getShared(), iListener); + graphs.load(other.getGraphs(), iListener); + } + + @Override + public void loadAsync(TempDictionary other, ProgressListener listener) throws InterruptedException { + IntermediateListener iListener = new IntermediateListener(null); + new ExceptionThread(() -> predicates.load(other.getPredicates(), iListener), "FourSecSAsyncReaderP") + .attach(new ExceptionThread(() -> subjects.load(other.getSubjects(), iListener), + "FourSecSAsyncReaderS"), + new ExceptionThread(() -> shared.load(other.getShared(), iListener), "FourSecSAsyncReaderSh"), + new ExceptionThread(() -> objects.load(other.getObjects(), iListener), "FourSecSAsyncReaderO"), + new ExceptionThread(() -> graphs.load(other.getGraphs(), iListener), "FourSecSAsyncReaderG")) + .startAll().joinAndCrashIfRequired(); + } + + /* + * (non-Javadoc) + * @see hdt.dictionary.Dictionary#save(java.io.OutputStream, + * hdt.ControlInformation, hdt.ProgressListener) + */ + @Override + public void save(OutputStream output, ControlInfo ci, ProgressListener listener) throws IOException { + ci.setType(Type.DICTIONARY); + ci.setFormat(getType()); + ci.setInt("elements", this.getNumberOfElements()); + ci.save(output); + + IntermediateListener iListener = new IntermediateListener(listener); + shared.save(output, iListener); + subjects.save(output, iListener); + predicates.save(output, iListener); + objects.save(output, iListener); + graphs.save(output, iListener); + + } + + /* + * (non-Javadoc) + * @see hdt.dictionary.Dictionary#load(java.io.InputStream) + */ + @Override + public void load(InputStream input, ControlInfo ci, ProgressListener listener) throws IOException { + if (ci.getType() != ControlInfo.Type.DICTIONARY) { + throw new IllegalFormatException("Trying to read a dictionary section, but was not dictionary."); + } + + IntermediateListener iListener = new IntermediateListener(listener); + + shared = DictionarySectionFactory.loadFrom(input, iListener); + subjects = DictionarySectionFactory.loadFrom(input, iListener); + predicates = DictionarySectionFactory.loadFrom(input, iListener); + objects = DictionarySectionFactory.loadFrom(input, iListener); + graphs = DictionarySectionFactory.loadFrom(input, iListener); + } + + @Override + public void mapFromFile(CountInputStream in, File f, ProgressListener listener) throws IOException { + ControlInformation ci = new ControlInformation(); + ci.load(in); + if (ci.getType() != ControlInfo.Type.DICTIONARY) { + throw new IllegalFormatException("Trying to read a dictionary section, but was not dictionary."); + } + + IntermediateListener iListener = new IntermediateListener(listener); + shared = DictionarySectionFactory.loadFrom(in, f, iListener); + subjects = DictionarySectionFactory.loadFrom(in, f, iListener); + predicates = DictionarySectionFactory.loadFrom(in, f, iListener); + objects = DictionarySectionFactory.loadFrom(in, f, iListener); + graphs = DictionarySectionFactory.loadFrom(in, f, iListener); + + // Use cache only for predicates. Preload only up to 100K predicates. + // FIXME: DISABLED +// predicates = new DictionarySectionCacheAll(predicates, predicates.getNumberOfElements()<100000); + } + + /* + * (non-Javadoc) + * @see hdt.dictionary.Dictionary#populateHeader(hdt.header.Header, + * java.lang.String) + */ + @Override + public void populateHeader(Header header, String rootNode) { + header.insert(rootNode, HDTVocabulary.DICTIONARY_TYPE, getType()); +// header.insert(rootNode, HDTVocabulary.DICTIONARY_NUMSUBJECTS, getNsubjects()); +// header.insert(rootNode, HDTVocabulary.DICTIONARY_NUMPREDICATES, getNpredicates()); +// header.insert(rootNode, HDTVocabulary.DICTIONARY_NUMOBJECTS, getNobjects()); + header.insert(rootNode, HDTVocabulary.DICTIONARY_NUMSHARED, getNshared()); +// header.insert(rootNode, HDTVocabulary.DICTIONARY_MAXSUBJECTID, getMaxSubjectID()); +// header.insert(rootNode, HDTVocabulary.DICTIONARY_MAXPREDICATEID, getMaxPredicateID()); +// header.insert(rootNode, HDTVocabulary.DICTIONARY_MAXOBJECTTID, getMaxObjectID()); + header.insert(rootNode, HDTVocabulary.DICTIONARY_SIZE_STRINGS, size()); + } + + /* + * (non-Javadoc) + * @see hdt.dictionary.Dictionary#getType() + */ + @Override + public String getType() { + return HDTVocabulary.DICTIONARY_TYPE_FOUR_QUAD_SECTION; + } + + @Override + public void close() throws IOException { + IOUtil.closeAll(shared, subjects, predicates, objects, graphs); + } + + @Override + public boolean supportGraphs() { + return true; + } +} diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/HashDictionary.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/HashDictionary.java index 884de29d..27f62d90 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/HashDictionary.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/HashDictionary.java @@ -120,6 +120,11 @@ public void reorganize(TempTriples triples) { isOrganized = true; } + @Override + public boolean supportGraphs() { + return false; + } + @Override public void startProcessing() { // Do nothing. diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/HashQuadDictionary.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/HashQuadDictionary.java new file mode 100644 index 00000000..38e2255b --- /dev/null +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/HashQuadDictionary.java @@ -0,0 +1,159 @@ +/* + * File: $HeadURL: + * https://hdt-java.googlecode.com/svn/trunk/hdt-java/src/org/rdfhdt/hdt/ + * dictionary/impl/HashDictionary.java $ Revision: $Rev: 191 $ Last modified: + * $Date: 2013-03-03 11:41:43 +0000 (dom, 03 mar 2013) $ Last modified by: + * $Author: mario.arias $ This library is free software; you can redistribute it + * and/or modify it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; version 3.0 of the License. This + * library is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR + * A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more + * details. You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * Contacting the authors: Mario Arias: mario.arias@deri.org Javier D. + * Fernandez: jfergar@infor.uva.es Miguel A. Martinez-Prieto: + * migumar2@infor.uva.es Alejandro Andres: fuzzy.alej@gmail.com + */ + +package com.the_qa_company.qendpoint.core.dictionary.impl; + +import java.io.IOException; +import java.util.Iterator; + +import com.the_qa_company.qendpoint.core.dictionary.DictionaryType; +import com.the_qa_company.qendpoint.core.dictionary.impl.section.HashDictionarySection; +import com.the_qa_company.qendpoint.core.enums.TripleComponentRole; +import com.the_qa_company.qendpoint.core.options.HDTOptions; +import com.the_qa_company.qendpoint.core.triples.TempTriples; +import com.the_qa_company.qendpoint.core.util.StopWatch; + +public class HashQuadDictionary extends QuadTempDictionary { + + public HashQuadDictionary(HDTOptions spec) { + super(spec); + // FIXME: Read types from spec + subjects = new HashDictionarySection(); + predicates = new HashDictionarySection(); + objects = new HashDictionarySection(DictionaryType.fromDictionaryType(spec)); + shared = new HashDictionarySection(); + graphs = new HashDictionarySection(); + } + + /* + * (non-Javadoc) + * @see hdt.dictionary.Dictionary#reorganize(hdt.triples.TempTriples) + */ + @Override + public void reorganize(TempTriples triples) { + DictionaryIDMapping mapSubj = new DictionaryIDMapping(subjects.getNumberOfElements()); + DictionaryIDMapping mapPred = new DictionaryIDMapping(predicates.getNumberOfElements()); + DictionaryIDMapping mapObj = new DictionaryIDMapping(objects.getNumberOfElements()); + DictionaryIDMapping mapGraph = new DictionaryIDMapping(graphs.getNumberOfElements()); + + StopWatch st = new StopWatch(); + + // Generate old subject mapping + Iterator itSubj = subjects.getEntries(); + while (itSubj.hasNext()) { + CharSequence str = itSubj.next(); + mapSubj.add(str); + + // GENERATE SHARED at the same time + if (str.length() > 0 && str.charAt(0) != '"' && objects.locate(str) != 0) { + shared.add(str); + } + } + + // Generate old predicate mapping + st.reset(); + Iterator itPred = predicates.getEntries(); + while (itPred.hasNext()) { + CharSequence str = itPred.next(); + mapPred.add(str); + } + + // Generate old graph mapping + Iterator itGraph = graphs.getEntries(); + while (itGraph.hasNext()) { + CharSequence str = itGraph.next(); + mapGraph.add(str); + } + + // Generate old object mapping + Iterator itObj = objects.getEntries(); + while (itObj.hasNext()) { + CharSequence str = itObj.next(); + mapObj.add(str); + } + + // Remove shared from subjects and objects + Iterator itShared = shared.getEntries(); + while (itShared.hasNext()) { + CharSequence sharedStr = itShared.next(); + subjects.remove(sharedStr); + objects.remove(sharedStr); + } + + // Sort sections individually + st.reset(); + subjects.sort(); + predicates.sort(); + graphs.sort(); + objects.sort(); + shared.sort(); + // System.out.println("Sections sorted in "+ st.stopAndShow()); + + // Update mappings with new IDs + st.reset(); + for (long j = 0; j < mapSubj.size(); j++) { + mapSubj.setNewID(j, this.stringToId(mapSubj.getString(j), TripleComponentRole.SUBJECT)); + } + + for (long j = 0; j < mapPred.size(); j++) { + mapPred.setNewID(j, this.stringToId(mapPred.getString(j), TripleComponentRole.PREDICATE)); + } + + for (long j = 0; j < mapObj.size(); j++) { + mapObj.setNewID(j, this.stringToId(mapObj.getString(j), TripleComponentRole.OBJECT)); + } + + for (long j = 0; j < mapGraph.size(); j++) { + CharSequence str = mapGraph.getString(j); + // check that because stringToId returns 0 for empty strings and if + // a string is empty its id is + // 0 no matter what + if (str.length() == 0) { + mapGraph.setNewID(j, 1); + } else { + mapGraph.setNewID(j, this.stringToId(str, TripleComponentRole.GRAPH)); + } + } + + // Replace old IDs with news + triples.replaceAllIds(mapSubj, mapPred, mapObj, mapGraph); + + isOrganized = true; + } + + @Override + public void startProcessing() { + // Do nothing. + } + + @Override + public void endProcessing() { + // Do nothing. + } + + @Override + public void close() throws IOException { + // Do nothing. + } + + @Override + public boolean supportGraphs() { + return true; + } +} diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/MultipleSectionDictionary.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/MultipleSectionDictionary.java index 0fe2b62e..c9ad2210 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/MultipleSectionDictionary.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/MultipleSectionDictionary.java @@ -7,6 +7,7 @@ import com.the_qa_company.qendpoint.core.dictionary.impl.section.DictionarySectionFactory; import com.the_qa_company.qendpoint.core.dictionary.impl.section.PFCDictionarySection; import com.the_qa_company.qendpoint.core.exceptions.IllegalFormatException; +import com.the_qa_company.qendpoint.core.exceptions.NotImplementedException; import com.the_qa_company.qendpoint.core.hdt.HDTVocabulary; import com.the_qa_company.qendpoint.core.header.Header; import com.the_qa_company.qendpoint.core.iterator.utils.MapIterator; @@ -36,7 +37,6 @@ import java.util.List; import java.util.Map; import java.util.TreeMap; -import java.util.function.Predicate; public class MultipleSectionDictionary extends MultipleBaseDictionary { @@ -225,6 +225,16 @@ public void mapFromFile(CountInputStream in, File f, ProgressListener listener) // predicates = new DictionarySectionCacheAll(predicates, predicates.getNumberOfElements()<100000); } + @Override + public long getNgraphs() { + return 0; + } + + @Override + public DictionarySection getGraphs() { + throw new NotImplementedException(); + } + @Override public long getNAllObjects() { return objects.values().stream().mapToLong(DictionarySectionPrivate::getNumberOfElements).sum(); diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/MultipleSectionDictionaryBig.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/MultipleSectionDictionaryBig.java index a5d8574f..71dab65d 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/MultipleSectionDictionaryBig.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/MultipleSectionDictionaryBig.java @@ -1,12 +1,13 @@ package com.the_qa_company.qendpoint.core.dictionary.impl; +import com.the_qa_company.qendpoint.core.compact.integer.VByte; import com.the_qa_company.qendpoint.core.dictionary.DictionarySection; import com.the_qa_company.qendpoint.core.dictionary.TempDictionary; import com.the_qa_company.qendpoint.core.dictionary.impl.section.DictionarySectionFactory; -import com.the_qa_company.qendpoint.core.dictionary.impl.section.HashDictionarySection; import com.the_qa_company.qendpoint.core.dictionary.impl.section.PFCDictionarySection; import com.the_qa_company.qendpoint.core.dictionary.impl.section.PFCDictionarySectionBig; import com.the_qa_company.qendpoint.core.exceptions.IllegalFormatException; +import com.the_qa_company.qendpoint.core.exceptions.NotImplementedException; import com.the_qa_company.qendpoint.core.hdt.HDTVocabulary; import com.the_qa_company.qendpoint.core.header.Header; import com.the_qa_company.qendpoint.core.iterator.utils.MapIterator; @@ -18,7 +19,6 @@ import com.the_qa_company.qendpoint.core.options.HDTOptions; import com.the_qa_company.qendpoint.core.util.CustomIterator; import com.the_qa_company.qendpoint.core.util.LiteralsUtils; -import com.the_qa_company.qendpoint.core.compact.integer.VByte; import com.the_qa_company.qendpoint.core.util.concurrent.ExceptionThread; import com.the_qa_company.qendpoint.core.util.io.CountInputStream; import com.the_qa_company.qendpoint.core.util.io.IOUtil; @@ -32,7 +32,6 @@ import java.io.InputStream; import java.io.OutputStream; import java.util.ArrayList; -import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; @@ -244,6 +243,16 @@ public String getType() { return HDTVocabulary.DICTIONARY_TYPE_MULT_SECTION; } + @Override + public long getNgraphs() { + return 0; + } + + @Override + public DictionarySection getGraphs() { + throw new NotImplementedException(); + } + @Override public void close() throws IOException { try { diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/MultipleSectionDictionaryLang.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/MultipleSectionDictionaryLang.java index 4e4cabea..d07827bc 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/MultipleSectionDictionaryLang.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/MultipleSectionDictionaryLang.java @@ -1,12 +1,14 @@ package com.the_qa_company.qendpoint.core.dictionary.impl; import com.the_qa_company.qendpoint.core.compact.integer.VByte; +import com.the_qa_company.qendpoint.core.dictionary.DictionarySection; import com.the_qa_company.qendpoint.core.dictionary.DictionarySectionPrivate; import com.the_qa_company.qendpoint.core.dictionary.TempDictionary; import com.the_qa_company.qendpoint.core.dictionary.impl.section.DictionarySectionFactory; import com.the_qa_company.qendpoint.core.dictionary.impl.section.PFCDictionarySection; import com.the_qa_company.qendpoint.core.dictionary.impl.section.PFCDictionarySectionBig; import com.the_qa_company.qendpoint.core.exceptions.IllegalFormatException; +import com.the_qa_company.qendpoint.core.exceptions.NotImplementedException; import com.the_qa_company.qendpoint.core.iterator.charsequence.StopIterator; import com.the_qa_company.qendpoint.core.iterator.utils.MapIterator; import com.the_qa_company.qendpoint.core.iterator.utils.PeekIterator; @@ -263,4 +265,14 @@ private void mapLiteralsMaps(CountInputStream input, File f, ProgressListener li syncLocations(); } + + @Override + public long getNgraphs() { + return 0; + } + + @Override + public DictionarySection getGraphs() { + throw new NotImplementedException(); + } } diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/PSFCTempDictionary.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/PSFCTempDictionary.java index d8590320..a8e098bd 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/PSFCTempDictionary.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/PSFCTempDictionary.java @@ -3,6 +3,7 @@ import com.the_qa_company.qendpoint.core.dictionary.TempDictionary; import com.the_qa_company.qendpoint.core.dictionary.TempDictionarySection; import com.the_qa_company.qendpoint.core.enums.TripleComponentRole; +import com.the_qa_company.qendpoint.core.exceptions.NotImplementedException; import com.the_qa_company.qendpoint.core.triples.TempTriples; import java.io.IOException; @@ -44,6 +45,16 @@ public TempDictionarySection getShared() { return delegate.getShared(); } + @Override + public TempDictionarySection getGraphs() { + throw new NotImplementedException(); + } + + @Override + public boolean supportGraphs() { + return false; + } + @Override public void startProcessing() { delegate.startProcessing(); diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/QuadTempDictionary.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/QuadTempDictionary.java new file mode 100644 index 00000000..e8f8bb2a --- /dev/null +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/QuadTempDictionary.java @@ -0,0 +1,228 @@ +/* + * File: $HeadURL: + * https://hdt-java.googlecode.com/svn/trunk/hdt-java/src/org/rdfhdt/hdt/ + * dictionary/impl/QuadTempDictionary.java $ Revision: $Rev: 191 $ Last + * modified: $Date: 2023-05-24 11:41:43 +0000 (dom, 03 mar 2013) $ Last modified + * by: $Author: dappermink $ This library is free software; you can redistribute + * it and/or modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; version 3.0 of the License. + * This library is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more + * details. You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * Contacting the authors: Mario Arias: mario.arias@deri.org Javier D. + * Fernandez: jfergar@infor.uva.es Miguel A. Martinez-Prieto: + * migumar2@infor.uva.es Alejandro Andres: fuzzy.alej@gmail.com + */ + +package com.the_qa_company.qendpoint.core.dictionary.impl; + +import java.util.Iterator; + +import com.the_qa_company.qendpoint.core.dictionary.TempDictionary; +import com.the_qa_company.qendpoint.core.dictionary.TempDictionarySection; +import com.the_qa_company.qendpoint.core.enums.DictionarySectionRole; +import com.the_qa_company.qendpoint.core.enums.TripleComponentRole; +import com.the_qa_company.qendpoint.core.exceptions.NotImplementedException; +import com.the_qa_company.qendpoint.core.options.HDTOptions; +import com.the_qa_company.qendpoint.core.triples.TempTriples; + +/** + * This abstract class implements all methods that have implementation common to + * all modifiable quad dictionaries (or could apply to) + */ +public abstract class QuadTempDictionary implements TempDictionary { + + final HDTOptions spec; + protected boolean isOrganized; + + protected TempDictionarySection subjects; + protected TempDictionarySection predicates; + protected TempDictionarySection objects; + protected TempDictionarySection shared; + protected TempDictionarySection graphs; + + public QuadTempDictionary(HDTOptions spec) { + this.spec = spec; + } + + /* + * (non-Javadoc) + * @see hdt.dictionary.Dictionary#insert(java.lang.String, + * datatypes.TripleComponentRole) + */ + @Override + public long insert(CharSequence str, TripleComponentRole position) { + switch (position) { + case SUBJECT: + isOrganized = false; + return subjects.add(str); + case PREDICATE: + isOrganized = false; + return predicates.add(str); + case OBJECT: + isOrganized = false; + return objects.add(str); + case GRAPH: + isOrganized = false; + return graphs.add(str); + default: + throw new IllegalArgumentException(); + } + } + + @Override + public void reorganize() { + + // Generate shared + Iterator itSubj = subjects.getEntries(); + while (itSubj.hasNext()) { + CharSequence str = itSubj.next(); + + // FIXME: These checks really needed? + if (str.length() > 0 && str.charAt(0) != '"' && objects.locate(str) != 0) { + shared.add(str); + } + } + + // Remove shared from subjects and objects + Iterator itShared = shared.getEntries(); + while (itShared.hasNext()) { + CharSequence sharedStr = itShared.next(); + subjects.remove(sharedStr); + objects.remove(sharedStr); + } + + // Sort sections individually + shared.sort(); + subjects.sort(); + objects.sort(); + predicates.sort(); + graphs.sort(); + + isOrganized = true; + + } + + /** + * This method is used in the one-pass way of working in which case it + * should not be used with a disk-backed dictionary because remapping + * requires practically a copy of the dictionary which is very bad... (it is + * ok for in-memory and they should override and write implementation) + */ + @Override + public void reorganize(TempTriples triples) { + throw new NotImplementedException(); + } + + @Override + public boolean isOrganized() { + return isOrganized; + } + + @Override + public void clear() { + subjects.clear(); + predicates.clear(); + shared.clear(); + objects.clear(); + graphs.clear(); + } + + @Override + public TempDictionarySection getSubjects() { + return subjects; + } + + @Override + public TempDictionarySection getPredicates() { + return predicates; + } + + @Override + public TempDictionarySection getObjects() { + return objects; + } + + @Override + public TempDictionarySection getShared() { + return shared; + } + + public TempDictionarySection getGraphs() { + return graphs; + } + + protected long getGlobalId(long id, DictionarySectionRole position) { + switch (position) { + case SUBJECT: + case OBJECT: + return shared.getNumberOfElements() + id; + + case PREDICATE: + case SHARED: + case GRAPH: + return id; + default: + throw new IllegalArgumentException(); + } + } + + /* + * (non-Javadoc) + * @see hdt.dictionary.Dictionary#stringToId(java.lang.CharSequence, + * datatypes.TripleComponentRole) + */ + @Override + public long stringToId(CharSequence str, TripleComponentRole position) { + + if (str == null || str.length() == 0) { + return 0; + } + + long ret = 0; + switch (position) { + case SUBJECT: + ret = shared.locate(str); + if (ret != 0) { + return getGlobalId(ret, DictionarySectionRole.SHARED); + } + ret = subjects.locate(str); + if (ret != 0) { + return getGlobalId(ret, DictionarySectionRole.SUBJECT); + } + return -1; + case PREDICATE: + ret = predicates.locate(str); + if (ret != 0) { + return getGlobalId(ret, DictionarySectionRole.PREDICATE); + } + return -1; + case OBJECT: + ret = shared.locate(str); + if (ret != 0) { + return getGlobalId(ret, DictionarySectionRole.SHARED); + } + ret = objects.locate(str); + if (ret != 0) { + return getGlobalId(ret, DictionarySectionRole.OBJECT); + } + return -1; + case GRAPH: + ret = graphs.locate(str); + if (ret != 0) { + return getGlobalId(ret, DictionarySectionRole.GRAPH); + } + return -1; + default: + throw new IllegalArgumentException(); + } + } + + @Override + public boolean supportGraphs() { + return false; + } +} diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/WriteMultipleSectionDictionary.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/WriteMultipleSectionDictionary.java index 74383ce2..85c78c41 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/WriteMultipleSectionDictionary.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/WriteMultipleSectionDictionary.java @@ -1,5 +1,6 @@ package com.the_qa_company.qendpoint.core.dictionary.impl; +import com.the_qa_company.qendpoint.core.dictionary.DictionarySection; import com.the_qa_company.qendpoint.core.dictionary.DictionarySectionPrivate; import com.the_qa_company.qendpoint.core.dictionary.TempDictionary; import com.the_qa_company.qendpoint.core.dictionary.impl.section.WriteDictionarySection; @@ -224,6 +225,16 @@ public void mapFromFile(CountInputStream in, File f, ProgressListener listener) throw new NotImplementedException(); } + @Override + public long getNgraphs() { + return 0; + } + + @Override + public DictionarySection getGraphs() { + throw new NotImplementedException(); + } + @Override public void load(TempDictionary other, ProgressListener listener) { throw new NotImplementedException(); diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/WriteMultipleSectionDictionaryLang.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/WriteMultipleSectionDictionaryLang.java index 52e42b48..c0b21808 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/WriteMultipleSectionDictionaryLang.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/WriteMultipleSectionDictionaryLang.java @@ -1,6 +1,7 @@ package com.the_qa_company.qendpoint.core.dictionary.impl; import com.the_qa_company.qendpoint.core.compact.integer.VByte; +import com.the_qa_company.qendpoint.core.dictionary.DictionarySection; import com.the_qa_company.qendpoint.core.dictionary.DictionarySectionPrivate; import com.the_qa_company.qendpoint.core.dictionary.TempDictionary; import com.the_qa_company.qendpoint.core.dictionary.impl.section.WriteDictionarySection; @@ -266,5 +267,15 @@ public void load(TempDictionary other, ProgressListener listener) { throw new NotImplementedException(); } + @Override + public long getNgraphs() { + return 0; + } + + @Override + public DictionarySection getGraphs() { + throw new NotImplementedException(); + } + private record TypedByteString(ByteString type, ByteString node, boolean lang) {} } diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/enums/DictionarySectionRole.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/enums/DictionarySectionRole.java index 8b4ee605..1386cb08 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/enums/DictionarySectionRole.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/enums/DictionarySectionRole.java @@ -10,7 +10,8 @@ public enum DictionarySectionRole { SUBJECT(() -> TripleComponentRole.SUBJECT, Dictionary::getSubjects), PREDICATE(() -> TripleComponentRole.PREDICATE, Dictionary::getPredicates), OBJECT(() -> TripleComponentRole.OBJECT, Dictionary::getObjects), - SHARED(() -> TripleComponentRole.SUBJECT, Dictionary::getShared); + SHARED(() -> TripleComponentRole.SUBJECT, Dictionary::getShared), + GRAPH(() -> TripleComponentRole.GRAPH, Dictionary::getGraphs); private final Supplier roleSupplier; private final Function dictionarySectionFunction; diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/enums/TripleComponentRole.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/enums/TripleComponentRole.java index 7dd6a6ef..a7d06923 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/enums/TripleComponentRole.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/enums/TripleComponentRole.java @@ -19,8 +19,6 @@ package com.the_qa_company.qendpoint.core.enums; -import com.the_qa_company.qendpoint.core.dictionary.DictionarySection; - import java.util.function.Supplier; /** @@ -32,7 +30,30 @@ public enum TripleComponentRole { /** The triple is a predicate */ PREDICATE(() -> DictionarySectionRole.PREDICATE, "p", "predicate", false), /** The triple is an object */ - OBJECT(() -> DictionarySectionRole.OBJECT, "o", "object", true); + OBJECT(() -> DictionarySectionRole.OBJECT, "o", "object", true), + /** The triple is an object */ + GRAPH(() -> DictionarySectionRole.GRAPH, "g", "graph", false); + + private static final TripleComponentRole[] NO_GRAPH_ROLES; + + static { + TripleComponentRole[] values = values(); + NO_GRAPH_ROLES = new TripleComponentRole[values.length - 1]; + int j = 0; + for (TripleComponentRole value : values) { + if (value == GRAPH) { + continue; + } + NO_GRAPH_ROLES[j++] = value; + } + } + + /** + * @return same as {@link #values()} without the graph role + */ + public static TripleComponentRole[] valuesNoGraph() { + return NO_GRAPH_ROLES; + } private DictionarySectionRole dictionarySectionRole; private final Supplier dictionarySectionRoleSupplier; diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/HDTManagerImpl.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/HDTManagerImpl.java index ec23cdad..32209122 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/HDTManagerImpl.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/HDTManagerImpl.java @@ -150,6 +150,16 @@ public HDT doGenerateHDT(String rdfFileName, String baseURI, RDFNotation rdfNota // choose the importer String loaderType = spec.get(HDTOptionsKeys.LOADER_TYPE_KEY); TempHDTImporter loader; + boolean isQuad = rdfNotation == RDFNotation.NQUAD; + if (isQuad) { + if (!spec.contains(HDTOptionsKeys.TEMP_DICTIONARY_IMPL_KEY)) { + spec.set(HDTOptionsKeys.TEMP_DICTIONARY_IMPL_KEY, HDTOptionsKeys.TEMP_DICTIONARY_IMPL_VALUE_HASH_QUAD); + } + if (!spec.contains(HDTOptionsKeys.DICTIONARY_TYPE_KEY)) { + spec.set(HDTOptionsKeys.DICTIONARY_TYPE_KEY, HDTOptionsKeys.DICTIONARY_TYPE_VALUE_FOUR_QUAD_SECTION); + } + } + if (HDTOptionsKeys.LOADER_TYPE_VALUE_DISK.equals(loaderType)) { return doGenerateHDTDisk(rdfFileName, baseURI, rdfNotation, CompressionType.guess(rdfFileName), spec, listener); diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/HDTVocabulary.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/HDTVocabulary.java index 66331799..704884ae 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/HDTVocabulary.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/HDTVocabulary.java @@ -75,6 +75,7 @@ public class HDTVocabulary { public static final String DICTIONARY_TYPE_PLAIN = HDT_DICTIONARY_BASE + "Plain>"; public static final String DICTIONARY_TYPE_FOUR_SECTION = HDT_DICTIONARY_BASE + "Four>"; public static final String DICTIONARY_TYPE_MULT_SECTION = HDT_DICTIONARY_BASE + "Mult>"; + public static final String DICTIONARY_TYPE_FOUR_QUAD_SECTION = HDT_DICTIONARY_BASE + "FourQuad>"; public static final String DICTIONARY_TYPE_MULT_SECTION_LANG = HDT_DICTIONARY_BASE + "MultLang>"; public static final String DICTIONARY_TYPE_FOUR_PSFC_SECTION = HDT_DICTIONARY_BASE + "FourPsfc>"; @@ -98,6 +99,7 @@ public class HDTVocabulary { public static final String TRIPLES_TYPE_PLAIN = HDT_TRIPLES_BASE + "Plain>"; public static final String TRIPLES_TYPE_COMPACT = HDT_TRIPLES_BASE + "Compact>"; public static final String TRIPLES_TYPE_BITMAP = HDT_TRIPLES_BASE + "Bitmap>"; + public static final String TRIPLES_TYPE_BITMAP_QUAD = HDT_TRIPLES_BASE + "BitmapQuad>"; // Index type public static final String INDEX_TYPE_FOQ = HDT_BASE + "indexFoQ>"; @@ -111,7 +113,7 @@ public class HDTVocabulary { // Bitmaps public static final String BITMAP_TYPE_PLAIN = HDT_BITMAP_BASE + "Plain>"; - public static final String BITMAP_TYPE_ROAR = HDT_BITMAP_BASE + "Roar>"; + public static final String BITMAP_TYPE_ROARING = HDT_BITMAP_BASE + "Roaring>"; // Misc public static final String ORIGINAL_SIZE = HDT_BASE + "originalSize>"; diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/HDTImpl.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/HDTImpl.java index e9437bd1..cb26ab02 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/HDTImpl.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/HDTImpl.java @@ -323,6 +323,77 @@ public long getLastTriplePosition() { } } + @Override + public IteratorTripleString search(CharSequence subject, CharSequence predicate, CharSequence object, + CharSequence graph) throws NotFoundException { + if (isClosed) { + throw new IllegalStateException("Cannot search an already closed HDT"); + } + + if (!dictionary.supportGraphs()) { + if (graph != null && !graph.isEmpty()) { + throw new IllegalArgumentException("This dictionary doesn't support graph"); + } + // fallback to the default implementation + return search(subject, predicate, object); + } + + // Conversion from TripleString to TripleID + TripleID triple = new TripleID(dictionary.stringToId(subject, TripleComponentRole.SUBJECT), + dictionary.stringToId(predicate, TripleComponentRole.PREDICATE), + dictionary.stringToId(object, TripleComponentRole.OBJECT), + dictionary.stringToId(graph, TripleComponentRole.GRAPH)); + + if (triple.isNoMatch()) { + // throw new NotFoundException("String not found in dictionary"); + return new IteratorTripleString() { + @Override + public TripleString next() { + return null; + } + + @Override + public boolean hasNext() { + return false; + } + + @Override + public ResultEstimationType numResultEstimation() { + return ResultEstimationType.EXACT; + } + + @Override + public void goToStart() { + } + + @Override + public long estimatedNumResults() { + return 0; + } + + @Override + public long getLastTriplePosition() { + throw new NotImplementedException(); + } + }; + } + + if (isMapped) { + try { + return new DictionaryTranslateIteratorBuffer(triples.search(triple), dictionary, subject, predicate, + object, graph); + } catch (NullPointerException e) { + e.printStackTrace(); + // FIXME: find why this can happen + return new DictionaryTranslateIterator(triples.search(triple), dictionary, subject, predicate, object, + graph); + } + } else { + return new DictionaryTranslateIterator(triples.search(triple), dictionary, subject, predicate, object, + graph); + } + } + public void loadFromParts(HeaderPrivate h, DictionaryPrivate d, TriplesPrivate t) { this.header = h; this.dictionary = d; diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/TempHDTImporterOnePass.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/TempHDTImporterOnePass.java index b5fe15df..9991d1d2 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/TempHDTImporterOnePass.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/TempHDTImporterOnePass.java @@ -54,14 +54,20 @@ public TripleAppender(TempDictionary dict, TempTriples triples, ProgressListener @Override public void processTriple(TripleString triple, long pos) { - triples.insert(dict.insert(triple.getSubject(), TripleComponentRole.SUBJECT), - dict.insert(triple.getPredicate(), TripleComponentRole.PREDICATE), - dict.insert(triple.getObject(), TripleComponentRole.OBJECT)); - num++; - size += triple.getSubject().length() + triple.getPredicate().length() + triple.getObject().length() + 4; // Spaces - // and - // final - // dot + long s = dict.insert(triple.getSubject(), TripleComponentRole.SUBJECT); + long p = dict.insert(triple.getPredicate(), TripleComponentRole.PREDICATE); + long o = dict.insert(triple.getObject(), TripleComponentRole.OBJECT); + if (dict.supportGraphs()) { + long g = dict.insert(triple.getGraph(), TripleComponentRole.GRAPH); + triples.insert(s, p, o, g); + size += triple.getSubject().length() + triple.getPredicate().length() + triple.getObject().length() + + triple.getGraph().length() + 5; + } else { + triples.insert(s, p, o); + // Spaces and final dot + size += triple.getSubject().length() + triple.getPredicate().length() + triple.getObject().length() + + triple.getGraph().length() + 4; + } ListenerUtil.notifyCond(listener, "Loaded " + num + " triples", num, 0, 100); } } @@ -112,14 +118,21 @@ public TempHDT loadFromTriples(HDTOptions specs, Iterator iterator long size = 0; while (iterator.hasNext()) { TripleString triple = iterator.next(); - triples.insert(dictionary.insert(triple.getSubject(), TripleComponentRole.SUBJECT), - dictionary.insert(triple.getPredicate(), TripleComponentRole.PREDICATE), - dictionary.insert(triple.getObject(), TripleComponentRole.OBJECT)); - num++; - size += triple.getSubject().length() + triple.getPredicate().length() + triple.getObject().length() + 4; // Spaces - // and - // final - // dot + if (dictionary.supportGraphs()) { + triples.insert(dictionary.insert(triple.getSubject(), TripleComponentRole.SUBJECT), + dictionary.insert(triple.getPredicate(), TripleComponentRole.PREDICATE), + dictionary.insert(triple.getObject(), TripleComponentRole.OBJECT), + dictionary.insert(triple.getGraph(), TripleComponentRole.GRAPH)); + // Spaces and final dot + size += triple.getSubject().length() + triple.getPredicate().length() + triple.getObject().length() + + triple.getGraph().length() + 5; + } else { + triples.insert(dictionary.insert(triple.getSubject(), TripleComponentRole.SUBJECT), + dictionary.insert(triple.getPredicate(), TripleComponentRole.PREDICATE), + dictionary.insert(triple.getObject(), TripleComponentRole.OBJECT)); + // Spaces and final dot + size += triple.getSubject().length() + triple.getPredicate().length() + triple.getObject().length() + 4; + } ListenerUtil.notifyCond(listener, "Loaded " + num + " triples", num, 0, 100); } dictionary.endProcessing(); diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/diskimport/MapOnCallHDT.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/diskimport/MapOnCallHDT.java index b3ede39b..3bbec86e 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/diskimport/MapOnCallHDT.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/diskimport/MapOnCallHDT.java @@ -101,6 +101,12 @@ public IteratorTripleString search(CharSequence subject, CharSequence predicate, return mapOrGetHDT().search(subject, predicate, object); } + @Override + public IteratorTripleString search(CharSequence subject, CharSequence predicate, CharSequence object, + CharSequence graph) throws NotFoundException { + return mapOrGetHDT().search(subject, predicate, object, graph); + } + @Override public void loadFromHDT(InputStream input, ProgressListener listener) throws IOException { ((HDTPrivate) mapOrGetHDT()).loadFromHDT(input, listener); diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/writer/TripleWriterHDT.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/writer/TripleWriterHDT.java index de617b72..b8260f78 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/writer/TripleWriterHDT.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/writer/TripleWriterHDT.java @@ -29,7 +29,6 @@ public class TripleWriterHDT implements TripleWriter { HDTOptions spec; String baseUri; - StopWatch st = new StopWatch(); TempHDT modHDT; TempDictionary dictionary; TempTriples triples; @@ -67,15 +66,27 @@ private void init() { } @Override - public void addTriple(TripleString triple) throws IOException { - triples.insert(dictionary.insert(triple.getSubject(), TripleComponentRole.SUBJECT), - dictionary.insert(triple.getPredicate(), TripleComponentRole.PREDICATE), - dictionary.insert(triple.getObject(), TripleComponentRole.OBJECT)); + public void addTriple(TripleString triple) { + boolean isQuad = triple.getGraph().length() > 0; + if (isQuad) { + triples.insert(dictionary.insert(triple.getSubject(), TripleComponentRole.SUBJECT), + dictionary.insert(triple.getPredicate(), TripleComponentRole.PREDICATE), + dictionary.insert(triple.getObject(), TripleComponentRole.OBJECT), + dictionary.insert(triple.getGraph(), TripleComponentRole.GRAPH)); + } else { + triples.insert(dictionary.insert(triple.getSubject(), TripleComponentRole.SUBJECT), + dictionary.insert(triple.getPredicate(), TripleComponentRole.PREDICATE), + dictionary.insert(triple.getObject(), TripleComponentRole.OBJECT)); + } num++; - size += triple.getSubject().length() + triple.getPredicate().length() + triple.getObject().length() + 4; // Spaces - // and - // final - // dot + size += triple.getSubject().length() + triple.getPredicate().length() + triple.getObject().length() + 4 // Spaces + // and + // final + // dot + ; + if (isQuad) { + size += triple.getGraph().length() + 1; // Space + } } @Override @@ -100,7 +111,7 @@ public void close() throws IOException { long originalSize = HeaderUtil.getPropertyLong(modHDT.getHeader(), "_:statistics", HDTVocabulary.ORIGINAL_SIZE); hdt.getHeader().insert("_:statistics", HDTVocabulary.ORIGINAL_SIZE, originalSize); - } catch (NotFoundException e) { + } catch (NotFoundException ignore) { } modHDT.close(); diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/header/PlainHeader.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/header/PlainHeader.java index 2b9009b0..055b0ab5 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/header/PlainHeader.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/header/PlainHeader.java @@ -32,6 +32,7 @@ import com.the_qa_company.qendpoint.core.options.ControlInfo; import com.the_qa_company.qendpoint.core.options.HDTOptions; import com.the_qa_company.qendpoint.core.options.HDTSpecification; +import com.the_qa_company.qendpoint.core.quad.QuadString; import com.the_qa_company.qendpoint.core.rdf.parsers.RDFParserSimple; import com.the_qa_company.qendpoint.core.triples.IteratorTripleString; import com.the_qa_company.qendpoint.core.triples.TripleString; @@ -171,6 +172,22 @@ public IteratorTripleString search(CharSequence subject, CharSequence predicate, return new PlainHeaderIterator(this, pattern); } + @Override + public IteratorTripleString search(CharSequence subject, CharSequence predicate, CharSequence object, + CharSequence graph) { + TripleString pattern; + String objStr = object.toString(); + if (objStr.isEmpty() || objStr.charAt(0) == '<' || objStr.charAt(0) == '"' || objStr.startsWith("http://") + || objStr.startsWith("file://")) { + pattern = new QuadString(HeaderUtil.cleanURI(subject), HeaderUtil.cleanURI(predicate), + HeaderUtil.cleanURI(object), HeaderUtil.cleanURI(graph)); + } else { + pattern = new QuadString(HeaderUtil.cleanURI(subject), HeaderUtil.cleanURI(predicate), '"' + objStr + '"', + HeaderUtil.cleanURI(graph)); + } + return new PlainHeaderIterator(this, pattern); + } + @Override public void processTriple(TripleString triple, long pos) { triples.add(new TripleString(triple)); diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/iterator/DictionaryTranslateIterator.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/iterator/DictionaryTranslateIterator.java index 81a21197..91897e0e 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/iterator/DictionaryTranslateIterator.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/iterator/DictionaryTranslateIterator.java @@ -37,7 +37,7 @@ public class DictionaryTranslateIterator implements IteratorTripleString { /** The dictionary */ final Dictionary dictionary; - CharSequence s, p, o; + CharSequence s, p, o, g; long lastSid, lastPid, lastOid; CharSequence lastSstr, lastPstr, lastOstr; @@ -51,7 +51,7 @@ public class DictionaryTranslateIterator implements IteratorTripleString { public DictionaryTranslateIterator(IteratorTripleID iteratorTripleID, Dictionary dictionary) { this.iterator = iteratorTripleID; this.dictionary = dictionary; - this.s = this.p = this.o = ""; + this.s = this.p = this.o = this.g = ""; } /** @@ -67,6 +67,23 @@ public DictionaryTranslateIterator(IteratorTripleID iteratorTripleID, Dictionary this.s = s == null ? "" : s; this.p = p == null ? "" : p; this.o = o == null ? "" : o; + this.g = ""; + } + + /** + * Basic constructor + * + * @param iteratorTripleID Iterator of TripleID to be used + * @param dictionary The dictionary to be used + */ + public DictionaryTranslateIterator(IteratorTripleID iteratorTripleID, Dictionary dictionary, CharSequence s, + CharSequence p, CharSequence o, CharSequence g) { + this.iterator = iteratorTripleID; + this.dictionary = dictionary; + this.s = s == null ? "" : s; + this.p = p == null ? "" : p; + this.o = o == null ? "" : o; + this.g = g == null ? "" : g; } /* diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/iterator/DictionaryTranslateIteratorBuffer.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/iterator/DictionaryTranslateIteratorBuffer.java index e1e13729..978a7eea 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/iterator/DictionaryTranslateIteratorBuffer.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/iterator/DictionaryTranslateIteratorBuffer.java @@ -18,22 +18,23 @@ package com.the_qa_company.qendpoint.core.iterator; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.HashMap; -import java.util.Iterator; -import java.util.List; -import java.util.Map; - import com.the_qa_company.qendpoint.core.dictionary.DictionaryPrivate; import com.the_qa_company.qendpoint.core.dictionary.impl.OptimizedExtractor; import com.the_qa_company.qendpoint.core.enums.ResultEstimationType; import com.the_qa_company.qendpoint.core.enums.TripleComponentRole; +import com.the_qa_company.qendpoint.core.quad.QuadString; import com.the_qa_company.qendpoint.core.triples.IteratorTripleString; import com.the_qa_company.qendpoint.core.triples.TripleID; import com.the_qa_company.qendpoint.core.triples.TripleString; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; + /** * Iterator of TripleStrings based on IteratorTripleID */ @@ -54,23 +55,40 @@ private static class TripleIdWithIndex { SuppliableIteratorTripleID iterator; OptimizedExtractor dictionary; - CharSequence s, p, o; + CharSequence s, p, o, g; List triples; Iterator child = Collections.emptyIterator(); - Map mapSubject, mapPredicate, mapObject; + Map mapSubject, mapPredicate, mapObject, mapGraph; + + long lastSid, lastPid, lastOid, lastGid; + CharSequence lastSstr, lastPstr, lastOstr, lastGstr; - long lastSid, lastPid, lastOid; - CharSequence lastSstr, lastPstr, lastOstr; + boolean isHDTQ; public DictionaryTranslateIteratorBuffer(SuppliableIteratorTripleID iteratorTripleID, DictionaryPrivate dictionary, CharSequence s, CharSequence p, CharSequence o) { - this(iteratorTripleID, dictionary, s, p, o, DEFAULT_BLOCK_SIZE); + this(iteratorTripleID, dictionary, s, p, o, null, DEFAULT_BLOCK_SIZE, false); } public DictionaryTranslateIteratorBuffer(SuppliableIteratorTripleID iteratorTripleID, DictionaryPrivate dictionary, CharSequence s, CharSequence p, CharSequence o, int blockSize) { + this(iteratorTripleID, dictionary, s, p, o, null, blockSize, false); + } + + public DictionaryTranslateIteratorBuffer(SuppliableIteratorTripleID iteratorTripleID, DictionaryPrivate dictionary, + CharSequence s, CharSequence p, CharSequence o, CharSequence g) { + this(iteratorTripleID, dictionary, s, p, o, g, DEFAULT_BLOCK_SIZE, true); + } + + public DictionaryTranslateIteratorBuffer(SuppliableIteratorTripleID iteratorTripleID, DictionaryPrivate dictionary, + CharSequence s, CharSequence p, CharSequence o, CharSequence g, int blockSize) { + this(iteratorTripleID, dictionary, s, p, o, g, blockSize, true); + } + + private DictionaryTranslateIteratorBuffer(SuppliableIteratorTripleID iteratorTripleID, DictionaryPrivate dictionary, + CharSequence s, CharSequence p, CharSequence o, CharSequence g, int blockSize, boolean isHDTQ) { this.blockSize = blockSize; this.iterator = iteratorTripleID; this.dictionary = dictionary.createOptimizedMapExtractor(); @@ -78,6 +96,8 @@ public DictionaryTranslateIteratorBuffer(SuppliableIteratorTripleID iteratorTrip this.s = s == null ? "" : s; this.p = p == null ? "" : p; this.o = o == null ? "" : o; + this.g = g == null ? "" : g; + this.isHDTQ = isHDTQ; } private void reset() { @@ -94,6 +114,10 @@ private void reset() { if (o.length() == 0) { mapObject = new HashMap<>(blockSize); } + + if (g.length() == 0) { + mapGraph = new HashMap<>(blockSize); + } } private void fill(long[] arr, int count, Map map, TripleComponentRole role) { @@ -119,6 +143,7 @@ private void fetchBlock() { long[] arrSubjects = new long[blockSize]; long[] arrPredicates = new long[blockSize]; long[] arrObjects = new long[blockSize]; + long[] arrGraphs = new long[blockSize]; int count = 0; for (int i = 0; i < blockSize && iterator.hasNext(); i++) { @@ -135,15 +160,23 @@ private void fetchBlock() { arrPredicates[count] = t.getPredicate(); if (o.length() == 0) arrObjects[count] = t.getObject(); + if (g.length() == 0) + arrGraphs[count] = t.getGraph(); count++; } - if (s.length() == 0) + if (s.length() == 0) { fill(arrSubjects, count, mapSubject, TripleComponentRole.SUBJECT); - if (p.length() == 0) + } + if (p.length() == 0) { fill(arrPredicates, count, mapPredicate, TripleComponentRole.PREDICATE); - if (o.length() == 0) + } + if (o.length() == 0) { fill(arrObjects, count, mapObject, TripleComponentRole.OBJECT); + } + if (g.length() == 0 && isHDTQ) { + fill(arrGraphs, count, mapGraph, TripleComponentRole.GRAPH); + } this.child = triples.iterator(); } @@ -156,7 +189,7 @@ private void fetchBlock() { public boolean hasNext() { boolean more = child.hasNext() || iterator.hasNext(); if (!more) { - mapSubject = mapPredicate = mapObject = null; + mapSubject = mapPredicate = mapObject = mapGraph = null; triples = null; } return more; @@ -197,6 +230,17 @@ public TripleString next() { lastOstr = mapObject.get(lastOid); } + if (g.length() != 0) { + lastGstr = g; + } else if (triple.getGraph() != lastGid) { + lastGid = triple.getGraph(); + lastGstr = mapGraph.get(lastGid); + } + + if (isHDTQ) { + return new QuadString(lastSstr, lastPstr, lastOstr, lastGstr); + } + return new TripleString(lastSstr, lastPstr, lastOstr); } diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/options/HDTOptions.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/options/HDTOptions.java index ea9e7d1f..58c22deb 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/options/HDTOptions.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/options/HDTOptions.java @@ -196,6 +196,17 @@ static HDTOptions readFromFile(String filename) throws IOException { */ String get(String key); + /** + * check if an option is valid + * + * @param key key + * @return true if null or empty, false otherwise + */ + default boolean contains(String key) { + String s = get(key); + return s != null && !s.isEmpty(); + } + /** * Get a path value * diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/options/HDTOptionsKeys.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/options/HDTOptionsKeys.java index e4396179..18e8491c 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/options/HDTOptionsKeys.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/options/HDTOptionsKeys.java @@ -301,6 +301,10 @@ public class HDTOptionsKeys { */ @Value(key = TEMP_DICTIONARY_IMPL_KEY, desc = "hash dictionary") public static final String TEMP_DICTIONARY_IMPL_VALUE_HASH = "hash"; + /** + * use Hash quad to create the HDTQ + */ + public static final String TEMP_DICTIONARY_IMPL_VALUE_HASH_QUAD = "hashQuad"; /** * use Hash map to create the HDT and store the multisection dictionary, * mandatory to create MSC @@ -324,6 +328,11 @@ public class HDTOptionsKeys { */ @Value(key = DICTIONARY_TYPE_KEY, desc = "Four section dictionary") public static final String DICTIONARY_TYPE_VALUE_FOUR_SECTION = HDTVocabulary.DICTIONARY_TYPE_FOUR_SECTION; + /* + * 4 Quad Section dictionary + */ + @Value(key = DICTIONARY_TYPE_KEY, desc = "Four quad section dictionary") + public static final String DICTIONARY_TYPE_VALUE_FOUR_QUAD_SECTION = HDTVocabulary.DICTIONARY_TYPE_FOUR_QUAD_SECTION; /** * Prefix AND Suffix front-coded (PSFC) 4 Section dictionary */ diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/quads/QuadString.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/quad/QuadString.java similarity index 67% rename from qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/quads/QuadString.java rename to qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/quad/QuadString.java index 1b1ad8d4..35919309 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/quads/QuadString.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/quad/QuadString.java @@ -1,4 +1,4 @@ -package com.the_qa_company.qendpoint.core.quads; +package com.the_qa_company.qendpoint.core.quad; import com.the_qa_company.qendpoint.core.exceptions.ParserException; import com.the_qa_company.qendpoint.core.triples.TripleString; @@ -18,7 +18,7 @@ public QuadString(CharSequence subject, CharSequence predicate, CharSequence obj public QuadString(TripleString other) { super(other); - this.context = other.getObject(); + this.context = other.getGraph(); } @Override @@ -29,12 +29,15 @@ public void clear() { @Override public boolean equals(Object other) { - if (context.isEmpty()) { - return super.equals(other); + if (!(other instanceof QuadString qs)) { + if (context.length() == 0) { + // not a quad string, maybe it is a TripleString + return super.equals(other); + } + return false; } - return other instanceof QuadString qs && equalsCharSequence(subject, qs.subject) - && equalsCharSequence(predicate, qs.predicate) && equalsCharSequence(object, qs.object) - && equalsCharSequence(context, qs.context); + return equalsCharSequence(subject, qs.subject) && equalsCharSequence(predicate, qs.predicate) + && equalsCharSequence(object, qs.object) && equalsCharSequence(context, qs.context); } @Override @@ -68,13 +71,14 @@ public void setAll(CharSequence subject, CharSequence predicate, CharSequence ob @Override public boolean match(TripleString pattern) { - if (!context.isEmpty() && !(pattern instanceof QuadString qs && equalsCharSequence(qs.context, context))) { + if (context.length() != 0 + && !(pattern instanceof QuadString && equalsCharSequence(((QuadString) pattern).context, context))) { // if a context is defined, we don't match return false; } - if (pattern.getSubject().isEmpty() || equalsCharSequence(pattern.getSubject(), this.subject)) { - if (pattern.getPredicate().isEmpty() || equalsCharSequence(pattern.getPredicate(), this.predicate)) { - return pattern.getObject().isEmpty() || equalsCharSequence(pattern.getObject(), this.object); + if (pattern.getSubject().length() == 0 || equalsCharSequence(pattern.getSubject(), this.subject)) { + if (pattern.getPredicate().length() == 0 || equalsCharSequence(pattern.getPredicate(), this.predicate)) { + return pattern.getObject().length() == 0 || equalsCharSequence(pattern.getObject(), this.object); } } return false; @@ -82,7 +86,7 @@ public boolean match(TripleString pattern) { @Override public boolean isEmpty() { - return super.isEmpty() && context.isEmpty(); + return super.isEmpty() && context.length() == 0; } @Override @@ -110,4 +114,12 @@ public int hashCode() { public QuadString tripleToString() { return new QuadString(subject.toString(), predicate.toString(), object.toString(), context.toString()); } + + @Override + public String toString() { + if (context.length() == 0) { + return super.toString(); + } + return super.toString() + " " + context; + } } diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/quad/impl/BitmapQuadsIterator.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/quad/impl/BitmapQuadsIterator.java new file mode 100644 index 00000000..eca2acd6 --- /dev/null +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/quad/impl/BitmapQuadsIterator.java @@ -0,0 +1,139 @@ +package com.the_qa_company.qendpoint.core.quad.impl; + +import java.util.List; + +import com.the_qa_company.qendpoint.core.compact.bitmap.Bitmap; +import com.the_qa_company.qendpoint.core.enums.TripleComponentOrder; +import com.the_qa_company.qendpoint.core.exceptions.NotImplementedException; +import com.the_qa_company.qendpoint.core.triples.TripleID; +import com.the_qa_company.qendpoint.core.triples.impl.BitmapTriples; +import com.the_qa_company.qendpoint.core.triples.impl.BitmapTriplesIterator; +import com.the_qa_company.qendpoint.core.triples.impl.TripleOrderConvert; + +public class BitmapQuadsIterator extends BitmapTriplesIterator { + + // resolves ????, S???, SP??, SPO? queries + + private final List bitmapsGraph; // one bitmap per graph + private final long numberOfGraphs; + private long posG; // the current graph bitmap + private long g; // g is variable + + public BitmapQuadsIterator(BitmapTriples triples, TripleID pattern) { + super(triples, pattern, false); + this.bitmapsGraph = triples.getQuadInfoAG(); + this.numberOfGraphs = bitmapsGraph.size(); + newSearch(pattern); + } + + @Override + public void goToStart() { + super.goToStart(); + posG = 0; + while (!bitmapsGraph.get((int) posG).access(posZ)) { + posG++; + } + g = posG + 1; + } + + @Override + public long estimatedNumResults() { + long results = 0; + for (int i = 0; i < numberOfGraphs; i++) { + results += bitmapsGraph.get(i).rank1(maxZ - 1) - bitmapsGraph.get(i).rank1(minZ - 1); + } + return results; + } + + /* + * Get the next solution + */ + @Override + public TripleID next() { + z = adjZ.get(posZ); // get the next object (Z). We just retrieve it from + // the list of objects (AdjZ) from current position + // posZ + if (posZ >= nextZ) { // if, with the current position of the object + // (posZ), we have reached the next list of + // objects (starting in nexZ), then we should + // update the associated predicate (Y) and, + // potentially, also the associated subject (X) + posY = triples.getBitmapZ().rank1(posZ - 1); // move to the next + // position of + // predicates + y = adjY.get(posY); // get the next predicate (Y). We just retrieve + // it from the list of predicates(AdjY) from + // current position posY + nextZ = adjZ.findNext(posZ) + 1; // update nextZ, storing in which + // position (in adjZ) ends the + // list of objects associated + // with the current + // subject,predicate + if (posY >= nextY) { // if we have reached the next list of objects + // (starting in nexZ) we should update the + // associated predicate (Y) and, + // potentially, also the associated subject + // (X) + x = triples.getBitmapY().rank1(posY - 1) + 1; // get the next + // subject (X) + nextY = adjY.findNext(posY) + 1; // update nextY, storing in + // which position (in AdjY) + // ends the list of + // predicates associated + // with the current subject + } + } + + g = posG + 1; + + // set posG to the next graph of this triple + do { + posG++; + } while (posG + 1 <= numberOfGraphs && !bitmapsGraph.get((int) posG).access(posZ)); + + if (posG == numberOfGraphs) { // there are no further graphs for this + // triple + posZ++; + if (posZ < maxZ) { + posG = 0; + while (!bitmapsGraph.get((int) posG).access(posZ)) { + posG++; + } + } + } + + updateOutput(); // set the components (subject,predicate,object,graph) + // of the returned triple + return returnTriple; // return the triple as solution + } + + /* + * Set the components (subject,predicate,object) of the returned triple + */ + @Override + protected void updateOutput() { + returnTriple.setAll(x, y, z, g); + TripleOrderConvert.swapComponentOrder(returnTriple, triples.getOrder(), TripleComponentOrder.SPO); + } + + @Override + public boolean hasPrevious() { + throw new NotImplementedException(); + } + + @Override + public TripleID previous() { + throw new NotImplementedException(); + } + + @Override + public boolean canGoTo() { + throw new NotImplementedException(); + } + + @Override + public void goTo(long pos) { + throw new NotImplementedException(); + } + +} diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/quad/impl/BitmapQuadsIteratorG.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/quad/impl/BitmapQuadsIteratorG.java new file mode 100644 index 00000000..ec0dce53 --- /dev/null +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/quad/impl/BitmapQuadsIteratorG.java @@ -0,0 +1,116 @@ +package com.the_qa_company.qendpoint.core.quad.impl; + +import com.the_qa_company.qendpoint.core.compact.bitmap.Bitmap; +import com.the_qa_company.qendpoint.core.exceptions.NotImplementedException; +import com.the_qa_company.qendpoint.core.triples.TripleID; +import com.the_qa_company.qendpoint.core.triples.impl.BitmapTriples; +import com.the_qa_company.qendpoint.core.triples.impl.BitmapTriplesIterator; + +public class BitmapQuadsIteratorG extends BitmapTriplesIterator { + + // resolves ???G, S??G, SP?G, SPOG queries + + private final Bitmap bitmapGraph; // the graph bitmap for the search + + public BitmapQuadsIteratorG(BitmapTriples triples, TripleID pattern) { + super(triples, pattern, false); + this.bitmapGraph = triples.getQuadInfoAG().get((int) pattern.getGraph() - 1); + newSearch(pattern); + } + + @Override + public void goToStart() { + if (minZ >= maxZ || minZ == -1) { // no results + posZ = maxZ; + } else { + super.goToStart(); + } + if (!bitmapGraph.access(posZ)) { + posZ = bitmapGraph.selectNext1(posZ + 1); + } + } + + @Override + public long estimatedNumResults() { + if (minZ == -1) { + return 0; + } + return bitmapGraph.rank1(maxZ - 1) - bitmapGraph.rank1(minZ - 1); + } + + /* + * Check if there are more solution + */ + @Override + public boolean hasNext() { + return posZ < maxZ && posZ != -1; // Just check if we have arrived to + // the maximum position of the + // objects that resolve the query + } + + /* + * Get the next solution + */ + @Override + public TripleID next() { + z = adjZ.get(posZ); // get the next object (Z). We just retrieve it from + // the list of objects (AdjZ) from current position + // posZ + if (posZ >= nextZ) { // if, with the current position of the object + // (posZ), we have reached the next list of + // objects (starting in nexZ), then we should + // update the associated predicate (Y) and, + // potentially, also the associated subject (X) + posY = triples.getBitmapZ().rank1(posZ - 1); // move to the next + // position of + // predicates + y = adjY.get(posY); // get the next predicate (Y). We just retrieve + // it from the list of predicates(AdjY) from + // current position posY + nextZ = adjZ.findNext(posZ) + 1; // update nextZ, storing in which + // position (in adjZ) ends the + // list of objects associated + // with the current + // subject,predicate + if (posY >= nextY) { // if we have reached the next list of objects + // (starting in nexZ) we should update the + // associated predicate (Y) and, + // potentially, also the associated subject + // (X) + x = triples.getBitmapY().rank1(posY - 1) + 1; // get the next + // subject (X) + nextY = adjY.findNext(posY) + 1; // update nextY, storing in + // which position (in AdjY) + // ends the list of + // predicates associated + // with the current subject + } + } + posZ = bitmapGraph.selectNext1(posZ + 1); + + updateOutput(); // set the components (subject,predicate,object,graph) + // of the returned triple + return returnTriple; // return the triple as solution + } + + @Override + public boolean hasPrevious() { + throw new NotImplementedException(); + } + + @Override + public TripleID previous() { + throw new NotImplementedException(); + } + + @Override + public boolean canGoTo() { + throw new NotImplementedException(); + } + + @Override + public void goTo(long pos) { + throw new NotImplementedException(); + } + +} diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/quad/impl/BitmapQuadsIteratorYFOQ.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/quad/impl/BitmapQuadsIteratorYFOQ.java new file mode 100644 index 00000000..f3c19e98 --- /dev/null +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/quad/impl/BitmapQuadsIteratorYFOQ.java @@ -0,0 +1,97 @@ +package com.the_qa_company.qendpoint.core.quad.impl; + +import java.util.ArrayList; +import java.util.List; +import java.util.stream.Collectors; + +import com.the_qa_company.qendpoint.core.compact.bitmap.Bitmap; +import com.the_qa_company.qendpoint.core.enums.ResultEstimationType; +import com.the_qa_company.qendpoint.core.enums.TripleComponentOrder; +import com.the_qa_company.qendpoint.core.triples.TripleID; +import com.the_qa_company.qendpoint.core.triples.impl.BitmapTriples; +import com.the_qa_company.qendpoint.core.triples.impl.BitmapTriplesIteratorYFOQ; +import com.the_qa_company.qendpoint.core.iterator.SuppliableIteratorTripleID; + +public class BitmapQuadsIteratorYFOQ implements SuppliableIteratorTripleID { + + private final BitmapTriplesIteratorYFOQ inIt; + private final List bitmapGraphs; + private List graphs; + private TripleID curTriple; + + public BitmapQuadsIteratorYFOQ(BitmapTriples triples, TripleID pattern) { + this.inIt = new BitmapTriplesIteratorYFOQ(triples, pattern); + this.bitmapGraphs = triples.getQuadInfoAG(); + this.graphs = new ArrayList<>(); + } + + private void updateNextTriple() { + if (!this.inIt.hasNext()) + throw new RuntimeException("inIt should have next"); + this.curTriple = this.inIt.next(); + this.graphs = bitmapGraphs.stream().parallel().filter(graph -> graph.access((int) this.inIt.getPosZ() - 1)) + .map(graph -> bitmapGraphs.indexOf(graph) + 1L).collect(Collectors.toList()); + } + + @Override + public boolean hasNext() { + return this.graphs.size() > 0 || this.inIt.hasNext(); + } + + @Override + public TripleID next() { + if (graphs.isEmpty()) { + this.updateNextTriple(); + return this.next(); + } + long curGraph = graphs.remove(0); + TripleID curTriple = this.curTriple.clone(); + curTriple.setGraph(curGraph); + return curTriple; + } + + @Override + public void goToStart() { + this.inIt.goToStart(); + } + + @Override + public boolean hasPrevious() { + return this.inIt.hasPrevious(); + } + + @Override + public TripleID previous() { + return this.inIt.previous(); + } + + @Override + public boolean canGoTo() { + return this.inIt.canGoTo(); + } + + @Override + public void goTo(long pos) { + this.inIt.goTo(pos); + } + + @Override + public long estimatedNumResults() { + return this.inIt.estimatedNumResults(); + } + + @Override + public ResultEstimationType numResultEstimation() { + return this.inIt.numResultEstimation(); + } + + @Override + public TripleComponentOrder getOrder() { + return this.inIt.getOrder(); + } + + @Override + public long getLastTriplePosition() { + return this.inIt.getLastTriplePosition(); + } +} diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/quad/impl/BitmapQuadsIteratorYGFOQ.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/quad/impl/BitmapQuadsIteratorYGFOQ.java new file mode 100644 index 00000000..9020a808 --- /dev/null +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/quad/impl/BitmapQuadsIteratorYGFOQ.java @@ -0,0 +1,96 @@ +package com.the_qa_company.qendpoint.core.quad.impl; + +import com.the_qa_company.qendpoint.core.compact.bitmap.Bitmap; +import com.the_qa_company.qendpoint.core.enums.ResultEstimationType; +import com.the_qa_company.qendpoint.core.enums.TripleComponentOrder; +import com.the_qa_company.qendpoint.core.iterator.SuppliableIteratorTripleID; +import com.the_qa_company.qendpoint.core.triples.TripleID; +import com.the_qa_company.qendpoint.core.triples.impl.BitmapTriples; +import com.the_qa_company.qendpoint.core.triples.impl.BitmapTriplesIteratorYFOQ; + +public class BitmapQuadsIteratorYGFOQ implements SuppliableIteratorTripleID { + + // resolves ?P?G queries + private final Bitmap bitmapGraph; // the bitmap of the requested graph + private final BitmapTriplesIteratorYFOQ inIt; + private TripleID nextRes = null; + + public BitmapQuadsIteratorYGFOQ(BitmapTriples triples, TripleID pattern) { + this.inIt = new BitmapTriplesIteratorYFOQ(triples, pattern); + this.bitmapGraph = triples.getQuadInfoAG().get((int) pattern.getGraph() - 1); + this.goToStart(); + this.calculateNext(); + } + + private boolean isValidZ() { + return this.inIt.getPosZ() != -1 && this.bitmapGraph.access(this.inIt.getPosZ() - 1); + } + + @Override + public void goToStart() { + this.inIt.goToStart(); + } + + @Override + public boolean hasNext() { + return this.nextRes != null; + } + + private void calculateNext() { + this.nextRes = null; + while (this.inIt.hasNext()) { + TripleID next = this.inIt.next().clone(); + if (!this.isValidZ()) + continue; + this.nextRes = next; + break; + } + } + + @Override + public TripleID next() { + TripleID res = this.nextRes.clone(); + this.calculateNext(); + return res; + } + + @Override + public boolean hasPrevious() { + return this.inIt.hasPrevious(); + } + + @Override + public TripleID previous() { + return this.inIt.previous(); + } + + @Override + public boolean canGoTo() { + return this.inIt.canGoTo(); + } + + @Override + public void goTo(long pos) { + this.inIt.goTo(pos); + } + + @Override + public long estimatedNumResults() { + return this.inIt.estimatedNumResults(); + } + + @Override + public ResultEstimationType numResultEstimation() { + return this.inIt.numResultEstimation(); + } + + @Override + public TripleComponentOrder getOrder() { + return this.inIt.getOrder(); + } + + @Override + public long getLastTriplePosition() { + return this.inIt.getLastTriplePosition(); + } +} diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/quad/impl/BitmapQuadsIteratorZFOQ.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/quad/impl/BitmapQuadsIteratorZFOQ.java new file mode 100644 index 00000000..348d6726 --- /dev/null +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/quad/impl/BitmapQuadsIteratorZFOQ.java @@ -0,0 +1,223 @@ +package com.the_qa_company.qendpoint.core.quad.impl; + +import java.util.List; + +import com.the_qa_company.qendpoint.core.compact.bitmap.Bitmap; +import com.the_qa_company.qendpoint.core.enums.ResultEstimationType; +import com.the_qa_company.qendpoint.core.enums.TripleComponentOrder; +import com.the_qa_company.qendpoint.core.exceptions.NotImplementedException; +import com.the_qa_company.qendpoint.core.triples.TripleID; +import com.the_qa_company.qendpoint.core.triples.impl.BitmapTriples; +import com.the_qa_company.qendpoint.core.triples.impl.BitmapTriplesIteratorZFOQ; +import com.the_qa_company.qendpoint.core.triples.impl.TripleOrderConvert; + +public class BitmapQuadsIteratorZFOQ extends BitmapTriplesIteratorZFOQ { + + // resolves ?PO?, ??O? queries + + private final List bitmapsGraph; // one bitmap per graph + private final int numberOfGraphs; + private long posG; // the current graph bitmap + private int g; // g is variable + private boolean updateXYZ = true; + + public BitmapQuadsIteratorZFOQ(BitmapTriples triples, TripleID pattern) { + super(triples, pattern); + this.bitmapsGraph = triples.getQuadInfoAG(); + this.numberOfGraphs = bitmapsGraph.size(); + newSearch(pattern); + } + + @Override + public void goToStart() { + super.goToStart(); + if (hasNext()) { + posG = getNextGraphPosition(0); + } + } + + /* + * Get the next solution + */ + @Override + public TripleID next() { + if (updateXYZ) { + long posY = adjIndex.get(posIndex); // get the position of the next + // occurrence of the predicate + // in AdjY + + z = patZ != 0 ? patZ : (int) adjIndex.findListIndex(posIndex) + 1; // get + // the + // next + // object + // (z) + // as + // the + // number + // of + // list + // in + // adIndex + // corresponding + // to + // posIndex + y = patY != 0 ? patY : (int) adjY.get(posY); // get the next + // predicate (y) as + // the element in + // adjY stores in + // position posY + x = (int) adjY.findListIndex(posY) + 1; // get the next subject (X) + // as the number of list in + // adjY corresponding to + // posY + + updateXYZ = false; + } + g = (int) posG + 1; + + posG = getNextGraphPosition((int) posG + 1); // get the next graph + // position for the + // current triple + if (posG == numberOfGraphs) { // there are no further graphs for this + // triple + posIndex++; // increase the position of the next occurrence of the + // predicate + if (hasNext()) { + updateXYZ = true; + posG = getNextGraphPosition(0); + } + } + + updateOutput(); // set the components (subject,predicate,object) of the + // returned triple + return returnTriple; // return the triple as solution + } + + private int getNextGraphPosition(int pos) { + int nextTriplePos = (int) getNextTriplePosition(); + while (pos < numberOfGraphs && !bitmapsGraph.get(pos).access(nextTriplePos)) { + pos++; + } + return pos; + } + + private long getNextTriplePosition() { + try { + return triples.getAdjacencyListZ().find(adjIndex.get(posIndex), patZ); + } catch (Exception ignore) { + return 0; + } + } + + @Override + protected void updateOutput() { + returnTriple.setAll(x, y, z, g); + TripleOrderConvert.swapComponentOrder(returnTriple, triples.getOrder(), TripleComponentOrder.SPO); + } + + @Override + public ResultEstimationType numResultEstimation() { + return ResultEstimationType.MORE_THAN; + } + + private void newSearch(TripleID pattern) { + this.pattern.assign(pattern); + + TripleOrderConvert.swapComponentOrder(this.pattern, TripleComponentOrder.SPO, triples.getOrder()); + patZ = this.pattern.getObject(); + if (patZ == 0 && (patY != 0 || this.pattern.getSubject() != 0)) { + throw new IllegalArgumentException("This structure is not meant to process this pattern"); + } + + patY = this.pattern.getPredicate(); + + adjY = triples.getAdjacencyListY(); + adjIndex = triples.getAdjacencyListIndex(); // adjIndex has the list of + // positions in adY + + findRange(); // get the boundaries where the solution for the given + // object can be found + goToStart(); // load the first solution and position the next pointers + } + + private void findRange() { + if (patZ == 0) { // if the object is not provided (usually it is in this + // iterator) + minIndex = 0; + maxIndex = adjIndex.getNumberOfElements(); + return; + } + minIndex = adjIndex.find(patZ - 1); // find the position of the first + // occurrence of the object + maxIndex = adjIndex.last(patZ - 1); // find the position of the last + // ocurrence of the object + + if (patY != 0) { // if the predicate is provided then we do a binary + // search to search for such predicate + while (minIndex <= maxIndex) { + long mid = (minIndex + maxIndex) / 2; + long predicate = getY(mid); // get predicate at mid position in + // the object index + + if (patY > predicate) { + minIndex = mid + 1; + } else if (patY < predicate) { + maxIndex = mid - 1; + } else { // the predicate has been found, now we have to find + // the min and max limits (the predicate P is + // repeated for each PO occurrence in the triples) + // Binary Search to find left boundary + long left = minIndex; + long right = mid; + long pos = 0; + + while (left <= right) { + pos = (left + right) / 2; + + predicate = getY(pos); + + if (predicate != patY) { + left = pos + 1; + } else { + right = pos - 1; + } + } + minIndex = predicate == patY ? pos : pos + 1; + + // Binary Search to find right boundary + left = mid; + right = maxIndex; + + while (left <= right) { + pos = (left + right) / 2; + predicate = getY(pos); + + if (predicate != patY) { + right = pos - 1; + } else { + left = pos + 1; + } + } + maxIndex = predicate == patY ? pos : pos - 1; + + break; + } + } + } + } + + @Override + public boolean hasPrevious() { + throw new NotImplementedException(); + } + + @Override + public TripleID previous() { + throw new NotImplementedException(); + } + + @Override + public void goTo(long pos) { + throw new NotImplementedException(); + } +} diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/quad/impl/BitmapQuadsIteratorZGFOQ.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/quad/impl/BitmapQuadsIteratorZGFOQ.java new file mode 100644 index 00000000..18b3979f --- /dev/null +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/quad/impl/BitmapQuadsIteratorZGFOQ.java @@ -0,0 +1,202 @@ +package com.the_qa_company.qendpoint.core.quad.impl; + +import com.the_qa_company.qendpoint.core.compact.bitmap.Bitmap; +import com.the_qa_company.qendpoint.core.enums.ResultEstimationType; +import com.the_qa_company.qendpoint.core.enums.TripleComponentOrder; +import com.the_qa_company.qendpoint.core.exceptions.NotImplementedException; +import com.the_qa_company.qendpoint.core.triples.TripleID; +import com.the_qa_company.qendpoint.core.triples.impl.BitmapTriples; +import com.the_qa_company.qendpoint.core.triples.impl.BitmapTriplesIteratorZFOQ; +import com.the_qa_company.qendpoint.core.triples.impl.TripleOrderConvert; + +public class BitmapQuadsIteratorZGFOQ extends BitmapTriplesIteratorZFOQ { + // resolves ?POG, ??OG queries + + private final Bitmap bitmapGraph; // the bitmap of the requested graph + + public BitmapQuadsIteratorZGFOQ(BitmapTriples triples, TripleID pattern) { + super(triples, pattern); + this.bitmapGraph = triples.getQuadInfoAG().get((int) pattern.getGraph() - 1); + newSearch(pattern); + } + + protected void findRange() { + findRange2(); + while (maxIndex >= minIndex && !bitmapGraph.access(getTriplePosition(maxIndex))) { + maxIndex--; + } + + while (maxIndex >= minIndex && !bitmapGraph.access(getTriplePosition(minIndex))) { + minIndex++; + } + } + + /* + * Check if there are more solution + */ + @Override + public boolean hasNext() { + return posIndex <= maxIndex && maxIndex >= minIndex; + } + + /* + * Get the next solution + */ + @Override + public TripleID next() { + long posY = adjIndex.get(posIndex); // get the position of the next + // occurrence of the predicate in + // AdjY + + z = patZ != 0 ? patZ : (int) adjIndex.findListIndex(posIndex) + 1; // get + // the + // next + // object + // (z) + // as + // the + // number + // of + // list + // in + // adIndex + // corresponding + // to + // posIndex + y = patY != 0 ? patY : (int) adjY.get(posY); // get the next predicate + // (y) as the element in + // adjY stores in + // position posY + x = (int) adjY.findListIndex(posY) + 1; // get the next subject (X) as + // the number of list in adjY + // corresponding to posY + + do { + posIndex++; // increase the position of the next occurrence of the + // predicate + } while (posIndex < maxIndex && !bitmapGraph.access(getNextTriplePosition())); + + updateOutput(); // set the components (subject,predicate,object) of the + // returned triple + return returnTriple; // return the triple as solution + } + + public long getTriplePosition(long index) { + try { + return triples.getAdjacencyListZ().find(adjIndex.get(index), patZ); + } catch (Exception ignore) { + return 0; + } + } + + protected void newSearch(TripleID pattern) { + this.pattern.assign(pattern); + + TripleOrderConvert.swapComponentOrder(this.pattern, TripleComponentOrder.SPO, triples.getOrder()); + patZ = this.pattern.getObject(); + if (patZ == 0 && (patY != 0 || this.pattern.getSubject() != 0)) { + throw new IllegalArgumentException("This structure is not meant to process this pattern"); + } + + patY = this.pattern.getPredicate(); + + adjY = triples.getAdjacencyListY(); + adjIndex = triples.getAdjacencyListIndex(); // adjIndex has the list of + // positions in adY + + findRange(); // get the boundaries where the solution for the given + // object can be found + goToStart(); // load the first solution and position the next pointers + } + + protected void findRange2() { + if (patZ == 0) { // if the object is not provided (usually it is in this + // iterator) + minIndex = 0; + maxIndex = adjIndex.getNumberOfElements(); + return; + } + minIndex = adjIndex.find(patZ - 1); // find the position of the first + // occurrence of the object + maxIndex = adjIndex.last(patZ - 1); // find the position of the last + // ocurrence of the object + + if (patY != 0) { // if the predicate is provided then we do a binary + // search to search for such predicate + while (minIndex <= maxIndex) { + long mid = (minIndex + maxIndex) / 2; + long predicate = getY(mid); // get predicate at mid position in + // the object index + if (patY > predicate) { + minIndex = mid + 1; + } else if (patY < predicate) { + maxIndex = mid - 1; + } else { // the predicate has been found, now we have to find + // the min and max limits (the predicate P is + // repeated for each PO occurrence in the triples) + // Binary Search to find left boundary + long left = minIndex; + long right = mid; + long pos = 0; + + while (left <= right) { + pos = (left + right) / 2; + + predicate = getY(pos); + + if (predicate != patY) { + left = pos + 1; + } else { + right = pos - 1; + } + } + minIndex = predicate == patY ? pos : pos + 1; + // Binary Search to find right boundary + left = mid; + right = maxIndex; + + while (left <= right) { + pos = (left + right) / 2; + predicate = getY(pos); + + if (predicate != patY) { + right = pos - 1; + } else { + left = pos + 1; + } + } + maxIndex = predicate == patY ? pos : pos - 1; + break; + } + } + } + } + + public long getNextTriplePosition() { + try { + return triples.getAdjacencyListZ().find(adjIndex.get(posIndex), patZ); + } catch (Exception ignore) { + return 0; + } + } + + @Override + public ResultEstimationType numResultEstimation() { + return ResultEstimationType.UP_TO; + } + + @Override + public boolean hasPrevious() { + throw new NotImplementedException(); + } + + @Override + public TripleID previous() { + throw new NotImplementedException(); + } + + @Override + public void goTo(long pos) { + throw new NotImplementedException(); + } +} diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/rdf/RDFAccess.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/rdf/RDFAccess.java index 6bf64a03..6b753100 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/rdf/RDFAccess.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/rdf/RDFAccess.java @@ -45,17 +45,36 @@ public interface RDFAccess extends Iterable { IteratorTripleString search(CharSequence subject, CharSequence predicate, CharSequence object) throws NotFoundException; + /** + * Iterate over the triples of an RDF Set that match the specified pattern. + * null and empty strings act as a wildcard. Default implementation ignore + * the graph (e.g. search(null, null, null, null) iterates over all + * elements) + * + * @param subject The subject to search + * @param predicate The predicate to search + * @param object The object to search + * @param graph The graph to search + * @return Iterator of TripleStrings + * @throws NotFoundException when the triple cannot be found + */ + default IteratorTripleString search(CharSequence subject, CharSequence predicate, CharSequence object, + CharSequence graph) throws NotFoundException { + return search(subject, predicate, object); + } + default IteratorTripleString search(TripleString triple) throws NotFoundException { - return search(triple.getSubject(), triple.getPredicate(), triple.getObject()); + return search(triple.getSubject(), triple.getPredicate(), triple.getObject(), triple.getGraph()); } default IteratorTripleString searchAll() throws NotFoundException { - return search("", "", ""); + return search("", "", "", ""); } + @Override default Iterator iterator() { try { - return search("", "", ""); + return searchAll(); } catch (NotFoundException e) { return EmptyIterator.of(); } diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/rdf/parsers/JenaNodeFormatter.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/rdf/parsers/JenaNodeFormatter.java index 8e4f2612..dc4a725d 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/rdf/parsers/JenaNodeFormatter.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/rdf/parsers/JenaNodeFormatter.java @@ -19,6 +19,7 @@ import org.apache.jena.datatypes.xsd.impl.RDFLangString; import org.apache.jena.graph.Node; import org.apache.jena.rdf.model.RDFNode; +import org.apache.jena.sparql.core.Quad; /** * Converts a Jena {@link Node} to a String format that will round trip back to @@ -37,7 +38,7 @@ public static String format(RDFNode n) { } public static String format(Node node) { - if (node == null) { + if (node == null || Quad.isDefaultGraph(node)) { return ""; } if (node.isURI()) { diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/rdf/parsers/RDFParserRIOT.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/rdf/parsers/RDFParserRIOT.java index 7773737d..0cd7bc54 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/rdf/parsers/RDFParserRIOT.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/rdf/parsers/RDFParserRIOT.java @@ -21,7 +21,7 @@ import java.io.FileNotFoundException; import java.io.InputStream; -import com.the_qa_company.qendpoint.core.quads.QuadString; +import com.the_qa_company.qendpoint.core.quad.QuadString; import org.apache.jena.graph.Triple; import org.apache.jena.riot.Lang; import org.apache.jena.riot.RDFParser; diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/rdf/parsers/RDFParserSimple.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/rdf/parsers/RDFParserSimple.java index 178ec9fa..e6bf1203 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/rdf/parsers/RDFParserSimple.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/rdf/parsers/RDFParserSimple.java @@ -20,7 +20,7 @@ import com.the_qa_company.qendpoint.core.enums.RDFNotation; import com.the_qa_company.qendpoint.core.exceptions.ParserException; -import com.the_qa_company.qendpoint.core.quads.QuadString; +import com.the_qa_company.qendpoint.core.quad.QuadString; import com.the_qa_company.qendpoint.core.rdf.RDFParserCallback; import com.the_qa_company.qendpoint.core.triples.TripleString; import com.the_qa_company.qendpoint.core.util.io.IOUtil; diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/storage/QEPCore.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/storage/QEPCore.java index 70fbee13..d1c142df 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/storage/QEPCore.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/storage/QEPCore.java @@ -278,7 +278,7 @@ public long triplesCount() { private QEPDataset openDataset(String id, Path path) throws IOException { HDT dataset = null; ModifiableBitmap bitmap = null; - ModifiableBitmap[] deltaBitmaps = new ModifiableBitmap[TripleComponentRole.values().length]; + ModifiableBitmap[] deltaBitmaps = new ModifiableBitmap[TripleComponentRole.valuesNoGraph().length]; try { // avoid loading collisions Path workDir = options.getPath(BITMAPTRIPLES_SEQUENCE_DISK_LOCATION, () -> location.resolve("work")) @@ -307,7 +307,7 @@ private QEPDataset openDataset(String id, Path path) throws IOException { bitmap = Bitmap64Big.disk(deleteBitmapPath, dataset.getTriples().getNumberOfElements()); } - for (TripleComponentRole role : TripleComponentRole.values()) { + for (TripleComponentRole role : TripleComponentRole.valuesNoGraph()) { Path deltaBitmapPath = path.resolveSibling(path.getFileName() + ".delta-" + role.getTitle() + ".bm"); long size = dataset.getDictionary().getNSection(role, role == TripleComponentRole.SUBJECT); diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/storage/QEPMap.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/storage/QEPMap.java index b69398d8..c42bbf21 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/storage/QEPMap.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/storage/QEPMap.java @@ -73,9 +73,9 @@ public class QEPMap implements Closeable { headerSize += Long.BYTES * 4 * 2; // id size - headerSize += Long.BYTES * TripleComponentRole.values().length; + headerSize += Long.BYTES * TripleComponentRole.valuesNoGraph().length; // map size - headerSize += Long.BYTES * TripleComponentRole.values().length; + headerSize += Long.BYTES * TripleComponentRole.valuesNoGraph().length; HEADER_SIZE = headerSize; } @@ -144,7 +144,7 @@ public static long getIdOfMapped(long mappedId, long countShared) { final Path path; final Uid uid; final QEPCore core; - final SectionMap[] maps = new SectionMap[TripleComponentRole.values().length]; + final SectionMap[] maps = new SectionMap[TripleComponentRole.valuesNoGraph().length]; final boolean useDataset1; final DatasetNodeConverter[] nodeConverters = new DatasetNodeConverter[maps.length]; @@ -173,9 +173,9 @@ public static long getIdOfMapped(long mappedId, long countShared) { uid = Uid.of(this.dataset1.uid(), this.dataset2.uid()); // compute the useDataset1 var - long sizeDataset1 = Arrays.stream(TripleComponentRole.values()) + long sizeDataset1 = Arrays.stream(TripleComponentRole.valuesNoGraph()) .mapToLong(r -> dataset1.dataset().getDictionary().getNSection(r, r == SUBJECT)).sum(); - long sizeDataset2 = Arrays.stream(TripleComponentRole.values()) + long sizeDataset2 = Arrays.stream(TripleComponentRole.valuesNoGraph()) .mapToLong(r -> dataset2.dataset().getDictionary().getNSection(r, r == SUBJECT)).sum(); useDataset1 = sizeDataset1 < sizeDataset2; } @@ -205,9 +205,9 @@ public void sync() throws IOException { CloseMappedByteBuffer crcBuffer = IOUtil.mapChannel(mapHeaderPath, channel, FileChannel.MapMode.READ_WRITE, HEADER_SIZE, crc.sizeof())) { // store the id and the location to write it after creation - long[] index1Size = new long[TripleComponentRole.values().length]; + long[] index1Size = new long[TripleComponentRole.valuesNoGraph().length]; int[] index1Location = new int[index1Size.length]; - long[] index2Size = new long[TripleComponentRole.values().length]; + long[] index2Size = new long[TripleComponentRole.valuesNoGraph().length]; int[] index2Location = new int[index2Size.length]; // header creation { @@ -645,7 +645,7 @@ public void sync() throws IOException { } // compute the converters for all the roles - for (TripleComponentRole role : TripleComponentRole.values()) { + for (TripleComponentRole role : TripleComponentRole.valuesNoGraph()) { int roleId = role.ordinal(); if (dataset1Base) { nodeConverters[roleId] = new DatasetNodeConverter( @@ -899,7 +899,7 @@ public void deleteLink() throws IOException { paths.add(CloseSuppressPath.of(getMapHeaderPath())); - for (TripleComponentRole role : TripleComponentRole.values()) { + for (TripleComponentRole role : TripleComponentRole.valuesNoGraph()) { paths.add(CloseSuppressPath.of(getMap1DestinationPath(role))); paths.add(CloseSuppressPath.of(getMap1OriginPath(role))); paths.add(CloseSuppressPath.of(getMap2DestinationPath(role))); diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/tools/RDF2HDT.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/tools/RDF2HDT.java index 827e0ffc..d952139a 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/tools/RDF2HDT.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/tools/RDF2HDT.java @@ -146,6 +146,17 @@ public void execute() throws ParserException, IOException { } } + boolean isQuad = notation == RDFNotation.NQUAD; + + if (isQuad) { + if (!spec.contains(HDTOptionsKeys.TEMP_DICTIONARY_IMPL_KEY)) { + spec.set(HDTOptionsKeys.TEMP_DICTIONARY_IMPL_KEY, HDTOptionsKeys.TEMP_DICTIONARY_IMPL_VALUE_HASH_QUAD); + } + if (!spec.contains(HDTOptionsKeys.DICTIONARY_TYPE_KEY)) { + spec.set(HDTOptionsKeys.DICTIONARY_TYPE_KEY, HDTOptionsKeys.DICTIONARY_TYPE_VALUE_FOUR_QUAD_SECTION); + } + } + colorTool.log("Converting " + rdfInput + " to " + hdtOutput + " as " + notation.name()); if (ntSimpleLoading) { @@ -207,11 +218,13 @@ public void execute() throws ParserException, IOException { try { // Show Basic stats if (!quiet) { - colorTool.logValue("Total Triples ......... ", "" + hdt.getTriples().getNumberOfElements()); - colorTool.logValue("Different subjects .... ", "" + hdt.getDictionary().getNsubjects()); - colorTool.logValue("Different predicates .. ", "" + hdt.getDictionary().getNpredicates()); - colorTool.logValue("Different objects ..... ", "" + hdt.getDictionary().getNobjects()); - colorTool.logValue("Common Subject/Object . ", "" + hdt.getDictionary().getNshared()); + colorTool.logValue("Total Triples ......... ", String.valueOf(hdt.getTriples().getNumberOfElements())); + colorTool.logValue("Different subjects .... ", String.valueOf(hdt.getDictionary().getNsubjects())); + colorTool.logValue("Different predicates .. ", String.valueOf(hdt.getDictionary().getNpredicates())); + colorTool.logValue("Different objects ..... ", String.valueOf(hdt.getDictionary().getNobjects())); + if (isQuad) + colorTool.logValue("Different graphs ...... ", String.valueOf(hdt.getDictionary().getNgraphs())); + colorTool.logValue("Common Subject/Object . ", String.valueOf(hdt.getDictionary().getNshared())); } // Dump to HDT file diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/TempTriples.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/TempTriples.java index 2addbd0d..13906cad 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/TempTriples.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/TempTriples.java @@ -38,6 +38,16 @@ public interface TempTriples extends TriplesPrivate, Closeable { */ boolean insert(long subject, long predicate, long object); + /** + * Add one triple with a graph + * + * @param subject subject + * @param predicate predicate + * @param object object + * @param graph graph + */ + boolean insert(long subject, long predicate, long object, long graph); + /** * Adds one or more triples * @@ -80,4 +90,7 @@ public interface TempTriples extends TriplesPrivate, Closeable { void load(Triples triples, ProgressListener listener); void replaceAllIds(DictionaryIDMapping mapSubj, DictionaryIDMapping mapPred, DictionaryIDMapping mapObj); + + void replaceAllIds(DictionaryIDMapping mapSubj, DictionaryIDMapping mapPred, DictionaryIDMapping mapObj, + DictionaryIDMapping mapGraph); } diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/TripleID.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/TripleID.java index 606aa67a..0df5db8f 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/TripleID.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/TripleID.java @@ -19,6 +19,7 @@ package com.the_qa_company.qendpoint.core.triples; +import java.io.Serial; import java.io.Serializable; import com.the_qa_company.qendpoint.core.util.LongCompare; @@ -27,11 +28,14 @@ * TripleID holds a triple using Long IDs */ public final class TripleID implements Comparable, Serializable, Cloneable { + @Serial private static final long serialVersionUID = -4685524566493494912L; private long subject; private long predicate; private long object; + private long graph; + private boolean isQuad = false; /** * Basic constructor @@ -54,6 +58,23 @@ public TripleID(long subject, long predicate, long object) { this.object = object; } + /** + * Constructor + * + * @param subject The subject + * @param predicate The predicate + * @param object The object + * @param graph The graph + */ + public TripleID(long subject, long predicate, long object, long graph) { + super(); + this.subject = subject; + this.predicate = predicate; + this.object = object; + this.graph = graph; + this.isQuad = true; + } + /** * Build a TripleID as a copy of another one. * @@ -64,6 +85,12 @@ public TripleID(TripleID other) { this.subject = other.subject; this.predicate = other.predicate; this.object = other.object; + this.graph = other.graph; + this.isQuad = other.isQuad; + } + + public boolean isQuad() { + return isQuad; } /** @@ -108,6 +135,21 @@ public void setPredicate(long predicate) { this.predicate = predicate; } + /** + * @return long the graph + */ + public long getGraph() { + return graph; + } + + /** + * @param graph the graph to set + */ + public void setGraph(long graph) { + this.graph = graph; + this.isQuad = true; + } + /** * Replace all components of a TripleID at once. Useful to reuse existing * objects. @@ -122,17 +164,37 @@ public void setAll(long subject, long predicate, long object) { this.object = object; } + /** + * Replace all components of a TripleID at once. Useful to reuse existing + * objects. + * + * @param subject subject ID + * @param predicate predicate ID + * @param object object ID + * @param graph graph ID + */ + public void setAll(long subject, long predicate, long object, long graph) { + this.subject = subject; + this.predicate = predicate; + this.object = object; + this.graph = graph; + this.isQuad = true; + } + public void assign(TripleID replacement) { subject = replacement.getSubject(); object = replacement.getObject(); predicate = replacement.getPredicate(); + graph = replacement.getGraph(); + isQuad = replacement.isQuad(); } /** * Set all components to zero. */ public void clear() { - subject = predicate = object = 0; + subject = predicate = object = graph = 0; + isQuad = false; } /* @@ -141,6 +203,9 @@ public void clear() { */ @Override public String toString() { + if (isQuad) { + return subject + " " + predicate + " " + object + " " + graph; + } return subject + " " + predicate + " " + object; } @@ -178,11 +243,14 @@ public boolean match(TripleID pattern) { long subjectPattern = pattern.getSubject(); long predicatePattern = pattern.getPredicate(); long objectPattern = pattern.getObject(); + long graphPattern = pattern.getGraph(); /* Remember that 0 acts as a wildcard */ if (subjectPattern == 0 || this.subject == subjectPattern) { if (predicatePattern == 0 || this.predicate == predicatePattern) { - return objectPattern == 0 || this.object == objectPattern; + if (objectPattern == 0 || this.object == objectPattern) { + return graphPattern == 0 || this.graph == graphPattern; + } } } return false; @@ -194,7 +262,7 @@ public boolean match(TripleID pattern) { * @return boolean */ public boolean isEmpty() { - return !(subject != 0 || predicate != 0 || object != 0); + return !(subject != 0 || predicate != 0 || object != 0 || graph != 0); } /** @@ -203,7 +271,7 @@ public boolean isEmpty() { * @return boolean */ public boolean isValid() { - return subject > 0 && predicate > 0 && object > 0; + return subject > 0 && predicate > 0 && object > 0 && (!isQuad || graph > 0); } /** @@ -212,7 +280,7 @@ public boolean isValid() { * @return boolean */ public boolean isNoMatch() { - return subject == -1 || predicate == -1 || object == -1; + return subject == -1 || predicate == -1 || object == -1 || (isQuad && graph == -1); } /** @@ -221,7 +289,8 @@ public boolean isNoMatch() { * @return String */ public String getPatternString() { - return "" + (subject == 0 ? '?' : 'S') + (predicate == 0 ? '?' : 'P') + (object == 0 ? '?' : 'O'); + return String.valueOf(subject == 0 ? '?' : 'S') + (predicate == 0 ? '?' : 'P') + (object == 0 ? '?' : 'O') + + (isQuad ? (graph == 0 ? '?' : 'G') : ""); } /** @@ -237,11 +306,11 @@ public static int size() { public boolean equals(Object o) { if (o == this) return true; - if (!(o instanceof TripleID)) { + if (!(o instanceof TripleID other)) { return false; } - TripleID other = (TripleID) o; - return !(subject != other.subject || predicate != other.predicate || object != other.object); + return !(subject != other.subject || predicate != other.predicate || object != other.object + || graph != other.graph); } @Override @@ -255,6 +324,9 @@ public TripleID clone() { @Override public int hashCode() { + if (isQuad) { + return (int) (subject * 13 + predicate * 17 + object * 31 + graph * 37); + } return (int) (subject * 13 + predicate * 17 + object * 31); } } diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/TripleIDComparatorInt.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/TripleIDComparatorInt.java index de2faf7c..020ae69b 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/TripleIDComparatorInt.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/TripleIDComparatorInt.java @@ -59,7 +59,7 @@ public int compare(TripleIDInt o1, TripleIDInt o2) { * Components of the triple. Meaning will be given based on the order * variable, see below */ - int x1 = 0, y1 = 0, z1 = 0, x2 = 0, y2 = 0, z2 = 0; + int x1 = 0, y1 = 0, z1 = 0, x2 = 0, y2 = 0, z2 = 0, g1 = 0, g2 = 0; switch (this.order) { case SPO: @@ -72,6 +72,9 @@ public int compare(TripleIDInt o1, TripleIDInt o2) { // Objects z1 = o1.getObject(); z2 = o2.getObject(); + // Graphs + g1 = o1.getGraph(); + g2 = o2.getGraph(); break; case SOP: // Subjects @@ -83,6 +86,9 @@ public int compare(TripleIDInt o1, TripleIDInt o2) { // Predicates z1 = o1.getPredicate(); z2 = o2.getPredicate(); + // Graphs + g1 = o1.getGraph(); + g2 = o2.getGraph(); break; case PSO: // Predicates @@ -94,6 +100,9 @@ public int compare(TripleIDInt o1, TripleIDInt o2) { // Objects z1 = o1.getObject(); z2 = o2.getObject(); + // Graphs + g1 = o1.getGraph(); + g2 = o2.getGraph(); break; case POS: // Predicates @@ -105,6 +114,9 @@ public int compare(TripleIDInt o1, TripleIDInt o2) { // Subjects z1 = o1.getSubject(); z2 = o2.getSubject(); + // Graphs + g1 = o1.getGraph(); + g2 = o2.getGraph(); break; case OSP: // Objects @@ -116,6 +128,9 @@ public int compare(TripleIDInt o1, TripleIDInt o2) { // Predicates z1 = o1.getPredicate(); z2 = o2.getPredicate(); + // Graphs + g1 = o1.getGraph(); + g2 = o2.getGraph(); break; case OPS: // Objects @@ -127,6 +142,9 @@ public int compare(TripleIDInt o1, TripleIDInt o2) { // Subjects z1 = o1.getSubject(); z2 = o2.getSubject(); + // Graphs + g1 = o1.getGraph(); + g2 = o2.getGraph(); break; } @@ -136,7 +154,14 @@ public int compare(TripleIDInt o1, TripleIDInt o2) { result = y1 - y2; if (result == 0) { // The third component is different? - return z1 - z2; + result = z1 - z2; + if (result == 0) { + // The fourth component is different? + return g1 - g2; + } else { + // the third component is different + return result; + } } else { // the second component is different return result; diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/TriplesFactory.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/TriplesFactory.java index c87711ac..33947acf 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/TriplesFactory.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/TriplesFactory.java @@ -21,7 +21,9 @@ import com.the_qa_company.qendpoint.core.hdt.HDTVocabulary; import com.the_qa_company.qendpoint.core.options.ControlInfo; import com.the_qa_company.qendpoint.core.options.HDTOptions; +import com.the_qa_company.qendpoint.core.options.HDTOptionsKeys; import com.the_qa_company.qendpoint.core.options.HDTSpecification; +import com.the_qa_company.qendpoint.core.triples.impl.BitmapQuadTriples; import com.the_qa_company.qendpoint.core.triples.impl.BitmapTriples; import com.the_qa_company.qendpoint.core.triples.impl.TriplesList; @@ -53,12 +55,20 @@ static public TempTriples createTempTriples(HDTOptions spec) { static public TriplesPrivate createTriples(HDTOptions spec) throws IOException { String type = spec.get("triples.format"); + boolean isQuad = spec.get(HDTOptionsKeys.DICTIONARY_TYPE_KEY, "") + .equals(HDTOptionsKeys.DICTIONARY_TYPE_VALUE_FOUR_QUAD_SECTION); + if (type == null) { + if (isQuad) { + return new BitmapQuadTriples(spec); + } return new BitmapTriples(spec); } else if (HDTVocabulary.TRIPLES_TYPE_TRIPLESLIST.equals(type)) { return new TriplesList(spec); } else if (HDTVocabulary.TRIPLES_TYPE_BITMAP.equals(type)) { return new BitmapTriples(spec); + } else if (HDTVocabulary.TRIPLES_TYPE_BITMAP_QUAD.equals(type)) { + return new BitmapQuadTriples(spec); } else { return new BitmapTriples(spec); } @@ -77,6 +87,8 @@ public static TriplesPrivate createTriples(ControlInfo ci) throws IOException { return new TriplesList(new HDTSpecification()); } else if (HDTVocabulary.TRIPLES_TYPE_BITMAP.equals(format)) { return new BitmapTriples(); + } else if (HDTVocabulary.TRIPLES_TYPE_BITMAP_QUAD.equals(format)) { + return new BitmapQuadTriples(); } else { throw new IllegalArgumentException("No implementation for Triples type: " + format); } diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapQuadTriples.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapQuadTriples.java new file mode 100644 index 00000000..89021271 --- /dev/null +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapQuadTriples.java @@ -0,0 +1,349 @@ +/** + * File: $HeadURL: + * https://hdt-java.googlecode.com/svn/trunk/hdt-java/src/org/rdfhdt/hdt/triples/impl/BitmapTriples.java + * $ Revision: $Rev: 203 $ Last modified: $Date: 2013-05-24 10:48:53 +0100 (vie, + * 24 may 2013) $ Last modified by: $Author: mario.arias $ This library is free + * software; you can redistribute it and/or modify it under the terms of the GNU + * Lesser General Public License as published by the Free Software Foundation; + * version 3.0 of the License. This library is distributed in the hope that it + * will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser + * General Public License for more details. You should have received a copy of + * the GNU Lesser General Public License along with this library; if not, write + * to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, + * MA 02110-1301 USA Contacting the authors: Mario Arias: mario.arias@deri.org + * Javier D. Fernandez: jfergar@infor.uva.es Miguel A. Martinez-Prieto: + * migumar2@infor.uva.es Alejandro Andres: fuzzy.alej@gmail.com + */ + +package com.the_qa_company.qendpoint.core.triples.impl; + +import java.io.IOException; +import java.io.OutputStream; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.List; + +import com.the_qa_company.qendpoint.core.compact.bitmap.AdjacencyList; +import com.the_qa_company.qendpoint.core.compact.bitmap.Bitmap; +import com.the_qa_company.qendpoint.core.compact.bitmap.Bitmap375Big; +import com.the_qa_company.qendpoint.core.compact.bitmap.RoaringBitmap; +import com.the_qa_company.qendpoint.core.compact.bitmap.BitmapFactory; +import com.the_qa_company.qendpoint.core.compact.bitmap.ModifiableBitmap; +import com.the_qa_company.qendpoint.core.compact.sequence.DynamicSequence; +import com.the_qa_company.qendpoint.core.compact.sequence.Sequence; +import com.the_qa_company.qendpoint.core.compact.sequence.SequenceFactory; +import com.the_qa_company.qendpoint.core.compact.sequence.SequenceLog64Big; +import com.the_qa_company.qendpoint.core.enums.TripleComponentOrder; +import com.the_qa_company.qendpoint.core.exceptions.IllegalFormatException; +import com.the_qa_company.qendpoint.core.hdt.HDTVocabulary; +import com.the_qa_company.qendpoint.core.iterator.SequentialSearchIteratorTripleID; +import com.the_qa_company.qendpoint.core.iterator.SuppliableIteratorTripleID; +import com.the_qa_company.qendpoint.core.listener.ProgressListener; +import com.the_qa_company.qendpoint.core.options.ControlInfo; +import com.the_qa_company.qendpoint.core.options.ControlInformation; +import com.the_qa_company.qendpoint.core.options.HDTOptions; +import com.the_qa_company.qendpoint.core.quad.impl.BitmapQuadsIterator; +import com.the_qa_company.qendpoint.core.quad.impl.BitmapQuadsIteratorG; +import com.the_qa_company.qendpoint.core.quad.impl.BitmapQuadsIteratorYFOQ; +import com.the_qa_company.qendpoint.core.quad.impl.BitmapQuadsIteratorYGFOQ; +import com.the_qa_company.qendpoint.core.quad.impl.BitmapQuadsIteratorZFOQ; +import com.the_qa_company.qendpoint.core.quad.impl.BitmapQuadsIteratorZGFOQ; +import com.the_qa_company.qendpoint.core.triples.IteratorTripleID; +import com.the_qa_company.qendpoint.core.triples.TempTriples; +import com.the_qa_company.qendpoint.core.triples.TripleID; +import com.the_qa_company.qendpoint.core.util.BitUtil; +import com.the_qa_company.qendpoint.core.util.io.CountInputStream; +import com.the_qa_company.qendpoint.core.util.listener.IntermediateListener; +import com.the_qa_company.qendpoint.core.util.listener.ListenerUtil; + +import com.github.andrewoma.dexx.collection.Pair; + +import java.io.File; + +/** + * @author mario.arias + */ +public class BitmapQuadTriples extends BitmapTriples { + + protected List quadInfoAG = new ArrayList<>(); + + private static ModifiableBitmap createQuadBitmap() { + return new RoaringBitmap(); + } + + public BitmapQuadTriples() throws IOException { + super(); + } + + public BitmapQuadTriples(HDTOptions spec) throws IOException { + super(spec); + } + + public BitmapQuadTriples(HDTOptions spec, Sequence seqY, Sequence seqZ, Bitmap bitY, Bitmap bitZ, + TripleComponentOrder order) throws IOException { + super(spec, seqY, seqZ, bitY, bitZ, order); + } + + /* + * (non-Javadoc) + * @see hdt.triples.Triples#getType() + */ + @Override + public String getType() { + return HDTVocabulary.TRIPLES_TYPE_BITMAP_QUAD; + } + + @Override + public void load(IteratorTripleID it, ProgressListener listener) { + long number = it.estimatedNumResults(); + + DynamicSequence vectorY = new SequenceLog64Big(BitUtil.log2(number), number + 1); + DynamicSequence vectorZ = new SequenceLog64Big(BitUtil.log2(number), number + 1); + + ModifiableBitmap bitY = Bitmap375Big.memory(number); + ModifiableBitmap bitZ = Bitmap375Big.memory(number); + + long lastX = 0, lastY = 0, lastZ = 0; + long x, y, z, g; + long numTriples = 0; + long numGraphs = 0; + + long tripleIndex = -1; + + List> triplesInGraph = new ArrayList<>(); + + while (it.hasNext()) { + TripleID triple = it.next(); + TripleOrderConvert.swapComponentOrder(triple, TripleComponentOrder.SPO, order); + + x = triple.getSubject(); + y = triple.getPredicate(); + z = triple.getObject(); + g = triple.getGraph(); + if (x == 0 || y == 0 || z == 0 || g == 0) { + throw new IllegalFormatException("None of the components of a quad can be null"); + } + numGraphs = Math.max(numGraphs, g); + long graphIndex = g - 1; + boolean sameAsLast = x == lastX && y == lastY && z == lastZ; + if (!sameAsLast) { + tripleIndex += 1; + } + triplesInGraph.add(new Pair<>(tripleIndex, graphIndex)); + if (sameAsLast) { + continue; + } + + if (numTriples == 0) { + // First triple + vectorY.append(y); + vectorZ.append(z); + } else if (x != lastX) { + if (x != lastX + 1) { + throw new IllegalFormatException("Upper level must be increasing and correlative."); + } + // X changed + bitY.append(true); + vectorY.append(y); + + bitZ.append(true); + vectorZ.append(z); + } else if (y != lastY) { + if (y < lastY) { + throw new IllegalFormatException("Middle level must be increasing for each parent."); + } + + // Y changed + bitY.append(false); + vectorY.append(y); + + bitZ.append(true); + vectorZ.append(z); + } else { + if (z < lastZ) { + throw new IllegalFormatException("Lower level must be increasing for each parent."); + } + + // Z changed + bitZ.append(false); + vectorZ.append(z); + } + + lastX = x; + lastY = y; + lastZ = z; + + ListenerUtil.notifyCond(listener, "Converting to BitmapTriples", numTriples, numTriples, number); + numTriples++; + } + + for (int i = 0; i < numGraphs; i++) { + quadInfoAG.add(createQuadBitmap()); + } + for (Pair tripleInGraph : triplesInGraph) { + long iTriple = tripleInGraph.component1(); + long iGraph = tripleInGraph.component2(); + quadInfoAG.get((int) iGraph).set(iTriple, true); + } + + if (numTriples > 0) { + bitY.append(true); + bitZ.append(true); + } + + vectorY.aggressiveTrimToSize(); + vectorZ.trimToSize(); + + // Assign local variables to BitmapTriples Object + seqY = vectorY; + seqZ = vectorZ; + bitmapY = bitY; + bitmapZ = bitZ; + + adjY = new AdjacencyList(seqY, bitmapY); + adjZ = new AdjacencyList(seqZ, bitmapZ); + + isClosed = false; + } + + @Override + public void load(TempTriples triples, ProgressListener listener) { + super.load(triples, listener); + } + + @Override + public long getNumberOfElements() { + return super.getNumberOfElements(); + } + + /* + * (non-Javadoc) + * @see hdt.triples.Triples#size() + */ + @Override + public long size() { + if (isClosed) + return 0; + long graphs = quadInfoAG.stream().map(b -> b.getSizeBytes()).reduce(0L, (a, b) -> a + b); + return seqY.size() + seqZ.size() + bitmapY.getSizeBytes() + bitmapZ.getSizeBytes() + graphs; + } + + @Override + public void save(OutputStream output, ControlInfo ci, ProgressListener listener) throws IOException { + ci.clear(); + ci.setFormat(getType()); + ci.setInt("order", order.ordinal()); + ci.setType(ControlInfo.Type.TRIPLES); + ci.save(output); + + IntermediateListener iListener = new IntermediateListener(listener); + bitmapY.save(output, iListener); + bitmapZ.save(output, iListener); + seqY.save(output, iListener); + seqZ.save(output, iListener); + ByteBuffer numGraphs = ByteBuffer.allocate(Integer.BYTES); + numGraphs.putInt(quadInfoAG.size()); + output.write(numGraphs.array()); + for (ModifiableBitmap b : quadInfoAG) { + b.save(output, iListener); + } + } + + @Override + public SuppliableIteratorTripleID search(TripleID pattern) { + if (isClosed) { + throw new IllegalStateException("Cannot search on BitmapTriples if it's already closed"); + } + + if (getNumberOfElements() == 0 || pattern.isNoMatch()) { + return new EmptyTriplesIterator(order); + } + + TripleID reorderedPat = new TripleID(pattern); + TripleOrderConvert.swapComponentOrder(reorderedPat, TripleComponentOrder.SPO, order); + String patternString = reorderedPat.getPatternString(); + + if (patternString.equals("?P??")) + return new BitmapQuadsIteratorYFOQ(this, pattern); + + if (patternString.equals("?P?G")) + return new BitmapQuadsIteratorYGFOQ(this, pattern); + + if (patternString.equals("?PO?") || patternString.equals("??O?")) + return new BitmapQuadsIteratorZFOQ(this, pattern); + + if (patternString.equals("?POG") || patternString.equals("??OG")) + return new BitmapQuadsIteratorZGFOQ(this, pattern); + + SuppliableIteratorTripleID bitIt; + if (patternString.endsWith("G")) + bitIt = new BitmapQuadsIteratorG(this, pattern); + else + bitIt = new BitmapQuadsIterator(this, pattern); + if (patternString.equals("????") || patternString.equals("???G") || patternString.equals("S???") + || patternString.equals("S??G") || patternString.equals("SP??") || patternString.equals("SP?G") + || patternString.equals("SPO?") || patternString.equals("SPOG")) { + return bitIt; + } + return new SequentialSearchIteratorTripleID(pattern, bitIt); + } + + @Override + public void mapFromFile(CountInputStream input, File f, ProgressListener listener) throws IOException { + ControlInformation ci = new ControlInformation(); + ci.load(input); + if (ci.getType() != ControlInfo.Type.TRIPLES) { + throw new IllegalFormatException("Trying to read a triples section, but was not triples."); + } + + if (!ci.getFormat().equals(getType())) { + throw new IllegalFormatException( + "Trying to read BitmapTriples, but the data does not seem to be BitmapTriples"); + } + + order = TripleComponentOrder.values()[(int) ci.getInt("order")]; + + IntermediateListener iListener = new IntermediateListener(listener); + + bitmapY = BitmapFactory.createBitmap(input); + bitmapY.load(input, iListener); + + bitmapZ = BitmapFactory.createBitmap(input); + bitmapZ.load(input, iListener); + + seqY = SequenceFactory.createStream(input, f); + seqZ = SequenceFactory.createStream(input, f); + + adjY = new AdjacencyList(seqY, bitmapY); + adjZ = new AdjacencyList(seqZ, bitmapZ); + + quadInfoAG = new ArrayList<>(); + + ByteBuffer numGraphsB = ByteBuffer.allocate(Integer.BYTES); + input.read(numGraphsB.array()); + int numGraphs = numGraphsB.getInt(); + for (int i = 0; i < numGraphs; i++) { + ModifiableBitmap b = createQuadBitmap(); + b.load(input, iListener); + quadInfoAG.add(b); + } + + isClosed = false; + } + + // Fast but dangerous covariant cast + @Override + public List getQuadInfoAG() { + return quadInfoAG; + } + + // Slower but safer + // @Override + // public List getQuadInfoAG() { + // return quadInfoAG + // .stream() + // .map(b -> (Bitmap) b) + // .collect(java.util.stream.Collectors.toList()); + // } +} diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriples.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriples.java index e629e68c..5805cb20 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriples.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriples.java @@ -23,6 +23,12 @@ import com.the_qa_company.qendpoint.core.compact.bitmap.Bitmap375Big; import com.the_qa_company.qendpoint.core.compact.bitmap.BitmapFactory; import com.the_qa_company.qendpoint.core.compact.bitmap.ModifiableBitmap; +import com.the_qa_company.qendpoint.core.compact.sequence.DynamicSequence; +import com.the_qa_company.qendpoint.core.compact.sequence.Sequence; +import com.the_qa_company.qendpoint.core.compact.sequence.SequenceFactory; +import com.the_qa_company.qendpoint.core.compact.sequence.SequenceLog64; +import com.the_qa_company.qendpoint.core.compact.sequence.SequenceLog64Big; +import com.the_qa_company.qendpoint.core.compact.sequence.SequenceLog64BigDisk; import com.the_qa_company.qendpoint.core.dictionary.Dictionary; import com.the_qa_company.qendpoint.core.enums.TripleComponentOrder; import com.the_qa_company.qendpoint.core.exceptions.IllegalFormatException; @@ -48,27 +54,23 @@ import com.the_qa_company.qendpoint.core.triples.TriplesPrivate; import com.the_qa_company.qendpoint.core.util.BitUtil; import com.the_qa_company.qendpoint.core.util.StopWatch; -import com.the_qa_company.qendpoint.core.util.io.compress.Pair; -import org.apache.commons.io.file.PathUtils; -import com.the_qa_company.qendpoint.core.compact.bitmap.*; -import com.the_qa_company.qendpoint.core.compact.sequence.DynamicSequence; -import com.the_qa_company.qendpoint.core.compact.sequence.Sequence; -import com.the_qa_company.qendpoint.core.compact.sequence.SequenceFactory; -import com.the_qa_company.qendpoint.core.compact.sequence.SequenceLog64; -import com.the_qa_company.qendpoint.core.compact.sequence.SequenceLog64Big; -import com.the_qa_company.qendpoint.core.compact.sequence.SequenceLog64BigDisk; -import com.the_qa_company.qendpoint.core.options.*; import com.the_qa_company.qendpoint.core.util.concurrent.KWayMerger; import com.the_qa_company.qendpoint.core.util.io.CloseSuppressPath; import com.the_qa_company.qendpoint.core.util.io.Closer; import com.the_qa_company.qendpoint.core.util.io.CountInputStream; import com.the_qa_company.qendpoint.core.util.io.IOUtil; +import com.the_qa_company.qendpoint.core.util.io.compress.Pair; import com.the_qa_company.qendpoint.core.util.listener.IntermediateListener; import com.the_qa_company.qendpoint.core.util.listener.ListenerUtil; +import org.apache.commons.io.file.PathUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.*; +import java.io.Closeable; +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; import java.nio.file.Files; import java.nio.file.Path; import java.util.ArrayList; @@ -95,7 +97,7 @@ public class BitmapTriples implements TriplesPrivate { boolean diskSubIndex; CreateOnUsePath diskSequenceLocation; - private boolean isClosed; + protected boolean isClosed; public BitmapTriples() throws IOException { this(new HDTSpecification()); @@ -952,18 +954,13 @@ public void generateIndex(ProgressListener listener, HDTOptions specIndex, Dicti String indexMethod = specIndex.get(HDTOptionsKeys.BITMAPTRIPLES_INDEX_METHOD_KEY, HDTOptionsKeys.BITMAPTRIPLES_INDEX_METHOD_VALUE_RECOMMENDED); switch (indexMethod) { - case HDTOptionsKeys.BITMAPTRIPLES_INDEX_METHOD_VALUE_RECOMMENDED: - case HDTOptionsKeys.BITMAPTRIPLES_INDEX_METHOD_VALUE_OPTIMIZED: + case HDTOptionsKeys.BITMAPTRIPLES_INDEX_METHOD_VALUE_RECOMMENDED, + HDTOptionsKeys.BITMAPTRIPLES_INDEX_METHOD_VALUE_OPTIMIZED -> createIndexObjectMemoryEfficient(); - break; - case HDTOptionsKeys.BITMAPTRIPLES_INDEX_METHOD_VALUE_DISK: + case HDTOptionsKeys.BITMAPTRIPLES_INDEX_METHOD_VALUE_DISK -> createIndexObjectDisk(specIndex, dictionary, listener); - break; - case HDTOptionsKeys.BITMAPTRIPLES_INDEX_METHOD_VALUE_LEGACY: - createIndexObjects(); - break; - default: - throw new IllegalArgumentException("Unknown INDEXING METHOD: " + indexMethod); + case HDTOptionsKeys.BITMAPTRIPLES_INDEX_METHOD_VALUE_LEGACY -> createIndexObjects(); + default -> throw new IllegalArgumentException("Unknown INDEXING METHOD: " + indexMethod); } predicateIndex = new PredicateIndexArray(this); @@ -1239,6 +1236,18 @@ public Sequence getSeqZ() { return seqZ; } + public AdjacencyList getAdjacencyListY() { + return adjY; + } + + public AdjacencyList getAdjacencyListZ() { + return adjZ; + } + + public AdjacencyList getAdjacencyListIndex() { + return adjIndex; + } + public Bitmap getBitmapY() { return bitmapY; } @@ -1279,4 +1288,8 @@ public void close() throws IOException { } } } + + public List getQuadInfoAG() { + throw new UnsupportedOperationException("Cannot get quad info from a BitmapTriples"); + } } diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIterator.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIterator.java index 0321e04d..57ba0a4a 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIterator.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIterator.java @@ -30,21 +30,27 @@ */ public class BitmapTriplesIterator implements SuppliableIteratorTripleID { - private final BitmapTriples triples; - private final TripleID pattern, returnTriple; - private long lastPosition; - private long patX, patY, patZ; + protected final BitmapTriples triples; + protected final TripleID pattern, returnTriple; + protected long lastPosition; + protected long patX, patY, patZ; - private AdjacencyList adjY, adjZ; - long posY, posZ, minY, minZ, maxY, maxZ; - private long nextY, nextZ; - private long x, y, z; + protected AdjacencyList adjY, adjZ; + protected long posY, posZ, minY, minZ, maxY, maxZ; + protected long nextY, nextZ; + protected long x, y, z; - public BitmapTriplesIterator(BitmapTriples triples, TripleID pattern) { + protected BitmapTriplesIterator(BitmapTriples triples, TripleID pattern, boolean search) { this.triples = triples; this.returnTriple = new TripleID(); this.pattern = new TripleID(); - newSearch(pattern); + if (search) { + newSearch(pattern); + } + } + + public BitmapTriplesIterator(BitmapTriples triples, TripleID pattern) { + this(triples, pattern, true); } public BitmapTriplesIterator(BitmapTriples triples, long minZ, long maxZ) { @@ -78,7 +84,7 @@ public void newSearch(TripleID pattern) { goToStart(); } - private void updateOutput() { + protected void updateOutput() { lastPosition = posZ; returnTriple.setAll(x, y, z); TripleOrderConvert.swapComponentOrder(returnTriple, triples.order, TripleComponentOrder.SPO); diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIteratorYFOQ.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIteratorYFOQ.java index 8520b01c..b762ab86 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIteratorYFOQ.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIteratorYFOQ.java @@ -224,4 +224,12 @@ public void remove() { public long getLastTriplePosition() { return lastPosition; } + + public long getPosY() { + return posY; + } + + public long getPosZ() { + return posZ; + } } diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIteratorZFOQ.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIteratorZFOQ.java index e1f4c647..1d18bcfe 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIteratorZFOQ.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIteratorZFOQ.java @@ -31,16 +31,16 @@ */ public class BitmapTriplesIteratorZFOQ implements SuppliableIteratorTripleID { private long lastPosIndex; - final BitmapTriples triples; - final TripleID pattern; - final TripleID returnTriple; + protected final BitmapTriples triples; + protected final TripleID pattern; + protected final TripleID returnTriple; - AdjacencyList adjY, adjIndex; - long posIndex, minIndex, maxIndex; - long x, y, z; + protected AdjacencyList adjY, adjIndex; + protected long posIndex, minIndex, maxIndex; + protected long x, y, z; - long patY; - final long patZ; + protected long patY; + protected long patZ; public BitmapTriplesIteratorZFOQ(BitmapTriples triples, TripleID pattern) { this.triples = triples; @@ -62,11 +62,11 @@ public BitmapTriplesIteratorZFOQ(BitmapTriples triples, TripleID pattern) { goToStart(); } - private long getY(long index) { + protected long getY(long index) { return adjY.get(adjIndex.get(index)); } - private void calculateRange() { + protected void calculateRange() { if (patZ == 0) { minIndex = 0; maxIndex = adjIndex.getNumberOfElements(); @@ -125,7 +125,7 @@ private void calculateRange() { } } - private void updateOutput() { + protected void updateOutput() { lastPosIndex = posIndex; returnTriple.setAll(x, y, z); TripleOrderConvert.swapComponentOrder(returnTriple, triples.order, TripleComponentOrder.SPO); diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/OneReadTempTriples.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/OneReadTempTriples.java index 4dc5cf71..b07c64de 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/OneReadTempTriples.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/OneReadTempTriples.java @@ -45,6 +45,11 @@ public boolean insert(long subject, long predicate, long object) { throw new NotImplementedException(); } + @Override + public boolean insert(long subject, long predicate, long object, long graph) { + throw new NotImplementedException(); + } + @Override public boolean insert(TripleID... triples) { throw new NotImplementedException(); @@ -87,6 +92,12 @@ public void replaceAllIds(DictionaryIDMapping mapSubj, DictionaryIDMapping mapPr throw new NotImplementedException(); } + @Override + public void replaceAllIds(DictionaryIDMapping mapSubj, DictionaryIDMapping mapPred, DictionaryIDMapping mapObj, + DictionaryIDMapping mapGraph) { + + } + @Override public void save(OutputStream output, ControlInfo ci, ProgressListener listener) throws IOException { throw new NotImplementedException(); @@ -129,8 +140,7 @@ public void saveIndex(OutputStream output, ControlInfo ci, ProgressListener list @Override public void load(TempTriples input, ProgressListener listener) { - if (input instanceof OneReadTempTriples) { - OneReadTempTriples input2 = (OneReadTempTriples) input; + if (input instanceof OneReadTempTriples input2) { this.iterator = input2.iterator; this.order = input2.order; } else { diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/TripleIDInt.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/TripleIDInt.java index 3756bbfb..2160fd28 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/TripleIDInt.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/TripleIDInt.java @@ -28,12 +28,13 @@ public final class TripleIDInt implements Comparable { private int subject; private int predicate; private int object; + private int graph; + private boolean isQuad = false; /** * Basic constructor */ public TripleIDInt() { - super(); } /** @@ -44,35 +45,66 @@ public TripleIDInt() { * @param object The object */ public TripleIDInt(int subject, int predicate, int object) { - super(); + this(); this.subject = subject; this.predicate = predicate; this.object = object; } public TripleIDInt(long subject, long predicate, long object) { - super(); + this(); this.subject = (int) subject; this.predicate = (int) predicate; this.object = (int) object; } + /** + * Constructor + * + * @param subject The subject + * @param predicate The predicate + * @param object The object + * @param graph The graph + */ + public TripleIDInt(int subject, int predicate, int object, int graph) { + this(subject, predicate, object); + this.graph = graph; + this.isQuad = true; + } + + /** + * Constructor + * + * @param subject The subject + * @param predicate The predicate + * @param object The object + * @param graph The graph + */ + public TripleIDInt(long subject, long predicate, long object, long graph) { + this(subject, predicate, object); + this.graph = (int) graph; + this.isQuad = true; + } + /** * Build a TripleID as a copy of another one. * * @param other other */ public TripleIDInt(TripleIDInt other) { - super(); this.subject = other.subject; this.predicate = other.predicate; this.object = other.object; + this.graph = other.graph; + this.isQuad = other.isQuad; } public TripleIDInt(TripleID other) { this.subject = (int) other.getSubject(); this.predicate = (int) other.getPredicate(); this.object = (int) other.getObject(); + this.graph = (int) other.getGraph(); + this.isQuad = other.isQuad(); } /** @@ -117,6 +149,20 @@ public void setPredicate(int predicate) { this.predicate = predicate; } + /** + * @return the graph + */ + public int getGraph() { + return graph; + } + + /** + * @param graph the graph to set + */ + public void setGraph(int graph) { + this.graph = graph; + } + /** * Replace all components of a TripleID at once. Useful to reuse existing * objects. @@ -129,19 +175,43 @@ public void setAll(int subject, int predicate, int object) { this.subject = subject; this.predicate = predicate; this.object = object; + this.isQuad = false; + } + + /** + * Replace all components of a TripleID at once. Useful to reuse existing + * objects. + * + * @param subject subject + * @param predicate predicate + * @param object object + * @param graph graph + */ + public void setAll(int subject, int predicate, int object, int graph) { + this.subject = subject; + this.predicate = predicate; + this.object = object; + this.graph = graph; + this.isQuad = true; } public void assign(TripleIDInt replacement) { subject = replacement.getSubject(); object = replacement.getObject(); predicate = replacement.getPredicate(); + graph = replacement.getGraph(); + isQuad = replacement.isQuad(); + } + + public boolean isQuad() { + return isQuad; } /** * Set all components to zero. */ public void clear() { - subject = predicate = object = 0; + subject = predicate = object = graph = 0; } /* @@ -150,11 +220,15 @@ public void clear() { */ @Override public String toString() { - return "" + subject + " " + predicate + " " + object; + if (isQuad) { + return subject + " " + predicate + " " + object + " " + graph; + } + return subject + " " + predicate + " " + object; } public boolean equals(TripleIDInt other) { - return !(subject != other.subject || predicate != other.predicate || object != other.object); + return !(subject != other.subject || predicate != other.predicate || object != other.object + || graph != other.graph); } /** @@ -168,7 +242,12 @@ public int compareTo(TripleIDInt other) { if (result == 0) { result = this.predicate - other.predicate; if (result == 0) { - return this.object - other.object; + result = this.object - other.object; + if (result == 0) { + return this.graph - other.graph; + } else { + return result; + } } else { return result; } @@ -190,11 +269,14 @@ public boolean match(TripleID pattern) { long subjectPattern = pattern.getSubject(); long predicatePattern = pattern.getPredicate(); long objectPattern = pattern.getObject(); + long graphPattern = pattern.getGraph(); - /* Remember that 0 acts as a wildcard */ + // Remember that 0 acts as a wildcard if (subjectPattern == 0 || this.subject == subjectPattern) { if (predicatePattern == 0 || this.predicate == predicatePattern) { - return objectPattern == 0 || this.object == objectPattern; + if (objectPattern == 0 || this.object == objectPattern) { + return graphPattern == 0 || this.graph == graphPattern; + } } } return false; @@ -204,30 +286,36 @@ public boolean match(TripleID pattern) { * Check whether all the components of the triple are empty (zero). */ public boolean isEmpty() { - return !(subject != 0 || predicate != 0 || object != 0); + return !(subject != 0 || predicate != 0 || object != 0 || graph != 0); } /** * Check whether none of the components of the triple are empty. */ public boolean isValid() { - return subject > 0 && predicate > 0 && object > 0; + return subject > 0 && predicate > 0 && object > 0 && (!isQuad || graph > 0); } /** * Get the pattern of the triple as String, such as "SP?". */ public String getPatternString() { - return "" + (subject == 0 ? '?' : 'S') + (predicate == 0 ? '?' : 'P') + (object == 0 ? '?' : 'O'); + return String.valueOf(subject == 0 ? '?' : 'S') + (predicate == 0 ? '?' : 'P') + (object == 0 ? '?' : 'O') + + (isQuad ? (graph == 0 ? '?' : 'G') : ""); } public TripleID asTripleID() { + if (isQuad) { + return new TripleID(subject, predicate, object, graph); + } return new TripleID(subject, predicate, object); } - /** size of one TripleID in memory */ + /** + * size of one TripleID in memory + */ public static int size() { - return 24; + return 33; } } diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/TriplesList.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/TriplesList.java index 4aa6e60a..445999ad 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/TriplesList.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/TriplesList.java @@ -19,10 +19,6 @@ package com.the_qa_company.qendpoint.core.triples.impl; -import java.io.*; -import java.util.ArrayList; -import java.util.Collections; - import com.the_qa_company.qendpoint.core.dictionary.Dictionary; import com.the_qa_company.qendpoint.core.dictionary.impl.DictionaryIDMapping; import com.the_qa_company.qendpoint.core.enums.ResultEstimationType; @@ -42,20 +38,30 @@ import com.the_qa_company.qendpoint.core.triples.TripleIDComparatorInt; import com.the_qa_company.qendpoint.core.triples.Triples; import com.the_qa_company.qendpoint.core.util.RDFInfo; -import com.the_qa_company.qendpoint.core.triples.*; import com.the_qa_company.qendpoint.core.util.io.CountInputStream; import com.the_qa_company.qendpoint.core.util.io.IOUtil; import com.the_qa_company.qendpoint.core.util.listener.ListenerUtil; +import java.io.DataOutputStream; +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.util.ArrayList; + /** * Implementation of TempTriples using a List of TripleID. */ public class TriplesList implements TempTriples { - /** The array to hold the triples */ + /** + * The array to hold the triples + */ private ArrayList arrayOfTriples; - /** The order of the triples */ + /** + * The order of the triples + */ private TripleComponentOrder order; private long numValidTriples; @@ -71,7 +77,7 @@ public TriplesList(HDTOptions specification) { // precise allocation of the array (minimal memory wasting) long numTriples = RDFInfo.getTriples(specification); numTriples = (numTriples > 0) ? numTriples : 100; - this.arrayOfTriples = new ArrayList((int) numTriples); + this.arrayOfTriples = new ArrayList<>((int) numTriples); // choosing starting(or default) component order String orderStr = specification.get(HDTOptionsKeys.TRIPLE_ORDER_KEY); @@ -90,7 +96,7 @@ public TriplesList(HDTOptions specification) { */ public boolean reallocateIfEmpty(int numTriples) { if (arrayOfTriples.isEmpty()) { - arrayOfTriples = new ArrayList(numTriples); + arrayOfTriples = new ArrayList<>(numTriples); return true; } else { return false; @@ -243,6 +249,14 @@ public boolean insert(long subject, long predicate, long object) { return true; } + @Override + public boolean insert(long subject, long predicate, long object, long graph) { + arrayOfTriples.add(new TripleIDInt(subject, predicate, object, graph)); + numValidTriples++; + sorted = false; + return true; + } + /* * (non-Javadoc) * @see hdt.triples.TempTriples#delete(hdt.triples.TripleID[]) @@ -271,7 +285,7 @@ public boolean remove(TripleID... patterns) { @Override public void sort(ProgressListener listener) { if (!sorted) { - Collections.sort(arrayOfTriples, TripleIDComparatorInt.getComparator(order)); + arrayOfTriples.sort(TripleIDComparatorInt.getComparator(order)); } sorted = true; } @@ -285,7 +299,7 @@ public void removeDuplicates(ProgressListener listener) { return; } - if (order == TripleComponentOrder.Unknown || !sorted) { + if (order == TripleComponentOrder.Unknown) { throw new IllegalArgumentException("Cannot remove duplicates unless sorted"); } @@ -339,20 +353,14 @@ public TripleID findTriple(long position) { @Override public void generateIndex(ProgressListener listener, HDTOptions specIndex, Dictionary dictionary) { - // TODO Auto-generated method stub - } @Override - public void loadIndex(InputStream input, ControlInfo ci, ProgressListener listener) throws IOException { - // TODO Auto-generated method stub - + public void loadIndex(InputStream input, ControlInfo ci, ProgressListener listener) { } @Override - public void saveIndex(OutputStream output, ControlInfo ci, ProgressListener listener) throws IOException { - // TODO Auto-generated method stub - + public void saveIndex(OutputStream output, ControlInfo ci, ProgressListener listener) { } @Override @@ -504,15 +512,37 @@ public void mapFromFile(CountInputStream in, File f, ProgressListener listener) } @Override - public void mapIndex(CountInputStream input, File f, ControlInfo ci, ProgressListener listener) throws IOException { + public void mapIndex(CountInputStream input, File f, ControlInfo ci, ProgressListener listener) { + } + + @Override + public void replaceAllIds(DictionaryIDMapping mapSubj, DictionaryIDMapping mapPred, DictionaryIDMapping mapObj, + DictionaryIDMapping mapGraph) { + sorted = false; + for (TripleIDInt triple : arrayOfTriples) { + if (triple.isQuad()) { + triple.setAll((int) mapSubj.getNewID(triple.getSubject() - 1), + (int) mapPred.getNewID(triple.getPredicate() - 1), + (int) mapObj.getNewID(triple.getObject() - 1), (int) mapGraph.getNewID(triple.getGraph() - 1)); + } else { + throw new IllegalArgumentException( + "You must call the replaceAllIds method without a DictionaryIDMapping for graphs if the triples are not quads."); + } + } } @Override public void replaceAllIds(DictionaryIDMapping mapSubj, DictionaryIDMapping mapPred, DictionaryIDMapping mapObj) { sorted = false; for (TripleIDInt triple : arrayOfTriples) { - triple.setAll((int) mapSubj.getNewID(triple.getSubject() - 1), - (int) mapPred.getNewID(triple.getPredicate() - 1), (int) mapObj.getNewID(triple.getObject() - 1)); + if (triple.isQuad()) { + throw new IllegalArgumentException( + "You must call the replaceAllIds method with a DictionaryIDMapping for graphs if the triples are quads."); + } else { + triple.setAll((int) mapSubj.getNewID(triple.getSubject() - 1), + (int) mapPred.getNewID(triple.getPredicate() - 1), + (int) mapObj.getNewID(triple.getObject() - 1)); + } } } diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/TriplesListLong.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/TriplesListLong.java index 0b478cd7..e39ae31d 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/TriplesListLong.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/TriplesListLong.java @@ -25,7 +25,6 @@ import java.io.InputStream; import java.io.OutputStream; import java.util.ArrayList; -import java.util.Collections; import com.the_qa_company.qendpoint.core.dictionary.Dictionary; import com.the_qa_company.qendpoint.core.dictionary.impl.DictionaryIDMapping; @@ -74,7 +73,7 @@ public TriplesListLong(HDTOptions specification) { // precise allocation of the array (minimal memory wasting) long numTriples = RDFInfo.getTriples(specification); numTriples = (numTriples > 0) ? numTriples : 100; - this.arrayOfTriples = new ArrayList((int) numTriples); + this.arrayOfTriples = new ArrayList<>((int) numTriples); // choosing starting(or default) component order String orderStr = specification.get(HDTOptionsKeys.TRIPLE_ORDER_KEY); @@ -93,7 +92,7 @@ public TriplesListLong(HDTOptions specification) { */ public boolean reallocateIfEmpty(int numTriples) { if (arrayOfTriples.isEmpty()) { - arrayOfTriples = new ArrayList(numTriples); + arrayOfTriples = new ArrayList<>(numTriples); return true; } else { return false; @@ -246,6 +245,11 @@ public boolean insert(long subject, long predicate, long object) { return true; } + @Override + public boolean insert(long subject, long predicate, long object, long graph) { + return this.insert(subject, predicate, object); + } + /* * (non-Javadoc) * @see hdt.triples.TempTriples#delete(hdt.triples.TripleID[]) @@ -274,7 +278,7 @@ public boolean remove(TripleID... patterns) { @Override public void sort(ProgressListener listener) { if (!sorted) { - Collections.sort(arrayOfTriples, TripleIDComparator.getComparator(order)); + arrayOfTriples.sort(TripleIDComparator.getComparator(order)); } sorted = true; } @@ -342,20 +346,14 @@ public TripleID findTriple(long position) { @Override public void generateIndex(ProgressListener listener, HDTOptions specIndex, Dictionary dictionary) { - // TODO Auto-generated method stub - } @Override - public void loadIndex(InputStream input, ControlInfo ci, ProgressListener listener) throws IOException { - // TODO Auto-generated method stub - + public void loadIndex(InputStream input, ControlInfo ci, ProgressListener listener) { } @Override - public void saveIndex(OutputStream output, ControlInfo ci, ProgressListener listener) throws IOException { - // TODO Auto-generated method stub - + public void saveIndex(OutputStream output, ControlInfo ci, ProgressListener listener) { } @Override @@ -387,7 +385,7 @@ public void close() throws IOException { * * @author mario.arias */ - public class TriplesListIterator implements SuppliableIteratorTripleID { + public static class TriplesListIterator implements SuppliableIteratorTripleID { private long lastPosition; private final TriplesListLong triplesList; private int pos; @@ -506,7 +504,7 @@ public void mapFromFile(CountInputStream in, File f, ProgressListener listener) } @Override - public void mapIndex(CountInputStream input, File f, ControlInfo ci, ProgressListener listener) throws IOException { + public void mapIndex(CountInputStream input, File f, ControlInfo ci, ProgressListener listener) { } @Override @@ -518,4 +516,13 @@ public void replaceAllIds(DictionaryIDMapping mapSubj, DictionaryIDMapping mapPr } } + @Override + public void replaceAllIds(DictionaryIDMapping mapSubj, DictionaryIDMapping mapPred, DictionaryIDMapping mapObj, + DictionaryIDMapping mapGraph) { + sorted = false; + for (TripleID triple : arrayOfTriples) { + triple.setAll(mapSubj.getNewID(triple.getSubject() - 1), mapPred.getNewID(triple.getPredicate() - 1), + mapObj.getNewID(triple.getObject() - 1), mapGraph.getNewID(triple.getGraph() - 1)); + } + } } diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/LargeFakeDataSetStreamSupplier.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/LargeFakeDataSetStreamSupplier.java index 24f31739..38e62ff7 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/LargeFakeDataSetStreamSupplier.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/LargeFakeDataSetStreamSupplier.java @@ -6,7 +6,7 @@ import com.the_qa_company.qendpoint.core.hdt.HDTManager; import com.the_qa_company.qendpoint.core.iterator.utils.MapIterator; import com.the_qa_company.qendpoint.core.options.HDTOptions; -import com.the_qa_company.qendpoint.core.quads.QuadString; +import com.the_qa_company.qendpoint.core.quad.QuadString; import com.the_qa_company.qendpoint.core.triples.TripleString; import com.the_qa_company.qendpoint.core.util.concurrent.ExceptionThread; import com.the_qa_company.qendpoint.core.util.string.ByteStringUtil; @@ -132,6 +132,7 @@ public static LargeFakeDataSetStreamSupplier createInfinite(long seed) { private TripleString buffer; private TripleString next; private boolean nquad; + private boolean noDefaultGraph; private boolean useBlankNode = true; private boolean useIRI = true; private boolean useLiteral = true; @@ -276,7 +277,7 @@ private CharSequence createGraph() { return ""; } int rnd = random.nextInt(10); - if (rnd < 4) { + if (rnd < 4 && !noDefaultGraph) { return ""; // no graph } if (rnd == 4) { @@ -575,6 +576,17 @@ public LargeFakeDataSetStreamSupplier withMaxGraph(int maxGraph) { return this; } + /** + * do not use default graph with quad generation + * + * @param noDefaultGraph no default graph + * @return this + */ + public LargeFakeDataSetStreamSupplier withNoDefaultGraph(boolean noDefaultGraph) { + this.noDefaultGraph = noDefaultGraph; + return this; + } + /** * Stream connected to a thread to interrupt in case of Exception */ diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/crc/CRC32.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/crc/CRC32.java index 05899e53..9c79360e 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/crc/CRC32.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/crc/CRC32.java @@ -85,30 +85,30 @@ public void update(byte data) { @Override public void writeCRC(OutputStream out) throws IOException { - IOUtil.writeInt(out, crc32 ^ 0xFFFFFFFF); + IOUtil.writeInt(out, ~crc32); } @Override public int writeCRC(CloseMappedByteBuffer channel, int offset) throws IOException { - IOUtil.writeInt(channel, offset, crc32 ^ 0xFFFFFFFF); + IOUtil.writeInt(channel, offset, ~crc32); return 4; } @Override public boolean readAndCheck(InputStream in) throws IOException { int readCRC = IOUtil.readInt(in); - return readCRC == (crc32 ^ 0xFFFFFFFF); + return readCRC == (~crc32); } @Override public boolean readAndCheck(CloseMappedByteBuffer channel, int offset) { int readCRC = IOUtil.readInt(channel, offset); - return readCRC == (crc32 ^ 0xFFFFFFFF); + return readCRC == (~crc32); } @Override public long getValue() { - return (crc32 ^ 0xFFFFFFFF) & 0xFFFFFFFFL; + return (~crc32) & 0xFFFFFFFFL; } @Override diff --git a/qendpoint-core/src/test/java/com/the_qa_company/qendpoint/core/hdt/HDTManagerTest.java b/qendpoint-core/src/test/java/com/the_qa_company/qendpoint/core/hdt/HDTManagerTest.java index ab530ebe..d72fba96 100644 --- a/qendpoint-core/src/test/java/com/the_qa_company/qendpoint/core/hdt/HDTManagerTest.java +++ b/qendpoint-core/src/test/java/com/the_qa_company/qendpoint/core/hdt/HDTManagerTest.java @@ -15,6 +15,7 @@ import com.the_qa_company.qendpoint.core.exceptions.NotFoundException; import com.the_qa_company.qendpoint.core.exceptions.ParserException; import com.the_qa_company.qendpoint.core.hdt.impl.diskimport.CompressionResult; +import com.the_qa_company.qendpoint.core.iterator.utils.FetcherIterator; import com.the_qa_company.qendpoint.core.iterator.utils.PipedCopyIterator; import com.the_qa_company.qendpoint.core.listener.ProgressListener; import com.the_qa_company.qendpoint.core.options.HDTOptions; @@ -26,6 +27,7 @@ import com.the_qa_company.qendpoint.core.triples.IteratorTripleString; import com.the_qa_company.qendpoint.core.triples.TripleID; import com.the_qa_company.qendpoint.core.triples.TripleString; +import com.the_qa_company.qendpoint.core.triples.impl.BitmapTriplesIteratorPositionTest; import com.the_qa_company.qendpoint.core.triples.impl.utils.HDTTestUtils; import com.the_qa_company.qendpoint.core.util.LargeFakeDataSetStreamSupplier; import com.the_qa_company.qendpoint.core.util.LiteralsUtils; @@ -50,12 +52,14 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.io.BufferedWriter; import java.io.File; import java.io.IOException; import java.io.InputStream; import java.nio.file.Files; import java.nio.file.Path; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collection; import java.util.Comparator; import java.util.HashMap; @@ -81,7 +85,7 @@ @RunWith(Suite.class) @Suite.SuiteClasses({ HDTManagerTest.DynamicDiskTest.class, HDTManagerTest.DynamicCatTreeTest.class, HDTManagerTest.FileDynamicTest.class, HDTManagerTest.StaticTest.class, HDTManagerTest.MSDLangTest.class, - HDTManagerTest.DictionaryLangTypeTest.class }) + HDTManagerTest.HDTQTest.class, HDTManagerTest.DictionaryLangTypeTest.class }) public class HDTManagerTest { public static class HDTManagerTestBase extends AbstractMapMemoryTest implements ProgressListener { protected final Logger logger; @@ -342,13 +346,8 @@ public static Collection params() { // async, large thread count 8 }) { List modes; - if (threads > 1) { - // async, no need for partial - modes = List.of(HDTOptionsKeys.LOADER_DISK_COMPRESSION_MODE_VALUE_COMPLETE); - } else { - modes = List.of(// HDTOptionsKeys.LOADER_DISK_COMPRESSION_MODE_VALUE_PARTIAL, - HDTOptionsKeys.LOADER_DISK_COMPRESSION_MODE_VALUE_COMPLETE); - } + // HDTOptionsKeys.LOADER_DISK_COMPRESSION_MODE_VALUE_PARTIAL, + modes = List.of(HDTOptionsKeys.LOADER_DISK_COMPRESSION_MODE_VALUE_COMPLETE); for (String mode : modes) { params.addAll(List.of( new Object[] { "base-w" + threads + "-" + mode, SIZE_VALUE * 8, 20, 50, threads, mode, @@ -988,6 +987,223 @@ public void bigGenCatTreeDiskTest() throws ParserException, IOException { } } + @RunWith(Parameterized.class) + public static class HDTQTest extends HDTManagerTestBase { + @Parameterized.Parameters(name = "default graph:{0}") + public static Collection params() { + return List.of(true, false); + } + + @Parameterized.Parameter + public boolean useDefaultGraph; + + private LargeFakeDataSetStreamSupplier createSupplier() { + // fake data generation + return LargeFakeDataSetStreamSupplier.createSupplierWithMaxTriples(10000, 42) + .withNoDefaultGraph(!useDefaultGraph).withQuads(true); + } + + private void hdtqTesd(LargeFakeDataSetStreamSupplier supplier, Path d) throws NotFoundException, IOException { + // run test + Comparator csc = CharSequenceComparator.getInstance(); + try (HDT h = HDTManager.mapIndexedHDT(d)) { + Path indexFile = d.resolveSibling(d.getFileName() + HDTVersion.get_index_suffix("-")); + assertTrue("can't find " + indexFile, Files.exists(indexFile)); + supplier.reset(); + Iterator it = supplier.createTripleStringStream(); + Set dataset = new HashSet<>(); + while (it.hasNext()) { + dataset.add(it.next().tripleToString()); + } + + supplier.reset(); + long count = 0; + for (TripleString ts : (Iterable) supplier::createTripleStringStream) { + count++; + TripleString tsstr = ts.tripleToString(); + assertTrue("can't find " + tsstr, dataset.contains(tsstr)); + CharSequence graph = ts.getGraph(); + if (graph.length() == 0) { + IteratorTripleString it2 = h.search(ts.getSubject(), ts.getPredicate(), ts.getObject()); + + // search until we have no graph + while (true) { + assertTrue(it2.hasNext()); + TripleString ts2 = it2.next(); + if (ts2.getGraph().length() == 0) { + assertEquals(ts, ts2); + break; + } + } + } else { + IteratorTripleString it2 = h.search(ts.getSubject(), ts.getPredicate(), ts.getObject(), graph); + if (!it2.hasNext()) { + BitmapTriplesIteratorPositionTest.printIterator(it2); + fail(); + } + TripleString ts2 = it2.next(); + assertEquals(ts, ts2); + assertFalse(it2.hasNext()); + + // empty search to check wildcard + IteratorTripleString it3 = h.search(ts.getSubject(), ts.getPredicate(), ts.getObject(), ""); + while (true) { + assertTrue(it3.hasNext()); + TripleString ts3 = it3.next(); + if (csc.compare(ts3.getGraph(), graph) == 0) { + assertEquals(ts, ts3); + break; + } + } + } + } + + assertEquals(dataset.size(), count); + + { + IteratorTripleString itSearch = h.search("", "", "", ""); + long count2 = 0; + while (itSearch.hasNext()) { + count2++; + TripleString ts = itSearch.next(); + TripleString tsstr = ts.tripleToString(); + assertTrue("can't find " + tsstr, dataset.contains(tsstr)); + + } + assertEquals(dataset.size(), count2); + } + + // FOQ INDEX TEST + + StringBuilder roleDesc = new StringBuilder(); + for (TripleComponentRole role : TripleComponentRole.values()) { + Set dataset2 = new HashSet<>(dataset); + roleDesc.append(",").append(role); + + Iterator roleIt; + switch (role) { + case OBJECT -> { + Iterator sh = h.getDictionary().getShared().getSortedEntries(); + Iterator ob = h.getDictionary().getObjects().getSortedEntries(); + roleIt = new FetcherIterator<>() { + @Override + protected CharSequence getNext() { + if (sh.hasNext()) { + return sh.next(); + } + if (ob.hasNext()) { + return ob.next(); + } + return null; + } + }; + } + case SUBJECT -> { + Iterator sh = h.getDictionary().getShared().getSortedEntries(); + Iterator su = h.getDictionary().getSubjects().getSortedEntries(); + roleIt = new FetcherIterator<>() { + @Override + protected CharSequence getNext() { + if (sh.hasNext()) { + return sh.next(); + } + if (su.hasNext()) { + return su.next(); + } + return null; + } + }; + } + case PREDICATE -> roleIt = h.getDictionary().getPredicates().getSortedEntries(); + case GRAPH -> roleIt = h.getDictionary().getGraphs().getSortedEntries(); + default -> throw new AssertionError(); + } + + long componentId = 0; + Set components = new HashSet<>(); + while (roleIt.hasNext()) { + CharSequence component = roleIt.next(); + String str = component.toString(); + components.add(component.toString()); + long cid = componentId++; + + Iterator eid = switch (role) { + case OBJECT -> h.search("", "", component, ""); + case SUBJECT -> h.search(component, "", "", ""); + case PREDICATE -> h.search("", component, "", ""); + case GRAPH -> h.search("", "", "", component); + }; + + while (eid.hasNext()) { + TripleString tsstr = eid.next().tripleToString(); + if (role == TripleComponentRole.GRAPH && !tsstr.getGraph().equals(str)) { + // the default graph "" is searching all the + // graphs, so we need + // to check that we are using the right one. + continue; + } + if (!dataset2.remove(tsstr)) { + BitmapTriplesIteratorPositionTest.printIterator(eid); + fail("can't remove " + tsstr + "\nfor " + role + "=" + component + "(" + cid + ")" + + "\ndone: " + roleDesc.substring(1) + "\n" + String.join(",", components)); + } + } + } + assertTrue(dataset2.isEmpty()); + } + + } + } + + @Test + public void iteratorStreamGenerationTest() throws IOException, ParserException, NotFoundException { + LargeFakeDataSetStreamSupplier supplier = createSupplier(); + Iterator it = supplier.createTripleStringStream(); + + HDTOptions spec = HDTOptions.of(HDTOptionsKeys.TEMP_DICTIONARY_IMPL_KEY, + HDTOptionsKeys.TEMP_DICTIONARY_IMPL_VALUE_HASH_QUAD, HDTOptionsKeys.DICTIONARY_TYPE_KEY, + HDTOptionsKeys.DICTIONARY_TYPE_VALUE_FOUR_QUAD_SECTION); + Path root = tempDir.newFolder().toPath(); + try { + Path d = root.resolve("d.hdt"); + try (HDT hdt = HDTManager.generateHDT(it, HDTTestUtils.BASE_URI, spec, ProgressListener.ignore())) { + hdt.saveToHDT(d.toAbsolutePath().toString(), ProgressListener.ignore()); + } + hdtqTesd(supplier, d); + } finally { + PathUtils.deleteDirectory(root); + } + } + + @Test + public void fileReadGenerationTest() throws IOException, ParserException, NotFoundException { + LargeFakeDataSetStreamSupplier supplier = createSupplier(); + Iterator it = supplier.createTripleStringStream(); + + HDTOptions spec = HDTOptions.of(HDTOptionsKeys.TEMP_DICTIONARY_IMPL_KEY, + HDTOptionsKeys.TEMP_DICTIONARY_IMPL_VALUE_HASH_QUAD, HDTOptionsKeys.DICTIONARY_TYPE_KEY, + HDTOptionsKeys.DICTIONARY_TYPE_VALUE_FOUR_QUAD_SECTION); + Path root = tempDir.newFolder().toPath(); + try { + Path nq = root.resolve("d.nq"); + try (BufferedWriter writer = Files.newBufferedWriter(nq)) { + while (it.hasNext()) { + it.next().dumpNtriple(writer); + } + writer.flush(); + } + Path d = root.resolve("d.hdt"); + try (HDT hdt = HDTManager.generateHDT(nq.toAbsolutePath().toString(), HDTTestUtils.BASE_URI, + RDFNotation.NQUAD, spec, ProgressListener.ignore())) { + hdt.saveToHDT(d.toAbsolutePath().toString(), ProgressListener.ignore()); + } + hdtqTesd(supplier, d); + } finally { + PathUtils.deleteDirectory(root); + } + } + } + public static class MSDLangTest extends HDTManagerTestBase { @Test public void msdLangTest() throws IOException, ParserException, NotFoundException { @@ -1282,7 +1498,6 @@ public void msdLangCatTest() throws IOException, ParserException, NotFoundExcept for (int i = 0; i < sub + 1; i++) { Path in; Path out; - long size; if (i == 0) { in = ng; out = hdtg; @@ -1349,7 +1564,7 @@ public void idFromIteratorTest() throws IOException, ParserException { assertTrue(dict.supportsLanguageOfId()); assertTrue(dict.supportsNodeTypeOfId()); - for (TripleComponentRole role : TripleComponentRole.values()) { + for (TripleComponentRole role : TripleComponentRole.valuesNoGraph()) { long idc = 1; Iterator it = dict.stringIterator(role, true); @@ -1426,8 +1641,8 @@ public void idFromIteratorTest() throws IOException, ParserException { public static class DictionaryLangTypeTest extends HDTManagerTestBase { @Parameterized.Parameters(name = "dict:{0}") - public static Collection params() { - return List.of(diskDict()); + public static Collection params() { + return Arrays.asList(diskDict()); } @Parameterized.Parameter diff --git a/qendpoint-core/src/test/java/com/the_qa_company/qendpoint/core/storage/QEPMapTest.java b/qendpoint-core/src/test/java/com/the_qa_company/qendpoint/core/storage/QEPMapTest.java index 1922cd2b..e9b4f972 100644 --- a/qendpoint-core/src/test/java/com/the_qa_company/qendpoint/core/storage/QEPMapTest.java +++ b/qendpoint-core/src/test/java/com/the_qa_company/qendpoint/core/storage/QEPMapTest.java @@ -52,7 +52,7 @@ public static Collection params() { private static final ModifiableBitmap[] EMPTY_DELTA; static { - EMPTY_DELTA = new ModifiableBitmap[TripleComponentRole.values().length]; + EMPTY_DELTA = new ModifiableBitmap[TripleComponentRole.valuesNoGraph().length]; Arrays.fill(EMPTY_DELTA, EmptyBitmap.of(0)); } diff --git a/qendpoint-core/src/test/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIteratorPositionTest.java b/qendpoint-core/src/test/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIteratorPositionTest.java index a2db9479..280d7f4b 100644 --- a/qendpoint-core/src/test/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIteratorPositionTest.java +++ b/qendpoint-core/src/test/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIteratorPositionTest.java @@ -36,6 +36,29 @@ public class BitmapTriplesIteratorPositionTest extends AbstractMapMemoryTest { HDTOptionsKeys.DICTIONARY_TYPE_VALUE_FOUR_SECTION_BIG, HDTOptionsKeys.DICTIONARY_TYPE_VALUE_MULTI_OBJECTS, HDTOptionsKeys.DICTIONARY_TYPE_VALUE_FOUR_PSFC_SECTION); + /** + * Print an iterator and (if found) sub iterators + * + * @param it Iterator + */ + public static void printIterator(Object it) { + for (int depth = 0;; depth++) { + System.out.println("[" + depth + "] Used iterator: " + it.getClass()); + try { + if (it instanceof DictionaryTranslateIterator) { + it = ITERATOR_SUB.get(it); + } else if (it instanceof DictionaryTranslateIteratorBuffer) { + it = ITERATOR_SUB_BUFFER.get(it); + } else if (it instanceof SequentialSearchIteratorTripleID) { + it = ITERATOR_SUB_SEQ.get(it); + } else + break; + } catch (IllegalAccessException e) { + break; + } + } + } + private static final Field ITERATOR_SUB; private static final Field ITERATOR_SUB_BUFFER; private static final Field ITERATOR_SUB_SEQ; diff --git a/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/model/SimpleIRIHDT.java b/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/model/SimpleIRIHDT.java index a890f855..f5d1a95f 100644 --- a/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/model/SimpleIRIHDT.java +++ b/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/model/SimpleIRIHDT.java @@ -1,6 +1,7 @@ package com.the_qa_company.qendpoint.model; import com.the_qa_company.qendpoint.core.enums.DictionarySectionRole; +import com.the_qa_company.qendpoint.core.exceptions.NotImplementedException; import com.the_qa_company.qendpoint.store.exception.EndpointStoreException; import org.eclipse.rdf4j.model.base.AbstractIRI; import org.eclipse.rdf4j.model.util.URIUtil; @@ -17,6 +18,7 @@ public class SimpleIRIHDT extends AbstractIRI implements HDTValue { public static final byte PREDICATE_POS = 2; public static final byte OBJECT_POS = 3; public static final byte SHARED_POS = 4; + public static final byte GRAPH_POS = 5; public static byte getPos(DictionarySectionRole role) { return switch (role) { @@ -24,6 +26,7 @@ public static byte getPos(DictionarySectionRole role) { case SUBJECT -> SUBJECT_POS; case PREDICATE -> PREDICATE_POS; case OBJECT -> OBJECT_POS; + case GRAPH -> throw new NotImplementedException("TODO: GRAPH"); }; } diff --git a/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/store/HDTConverter.java b/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/store/HDTConverter.java index 8a024860..537efe30 100644 --- a/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/store/HDTConverter.java +++ b/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/store/HDTConverter.java @@ -2,6 +2,7 @@ import com.the_qa_company.qendpoint.core.dictionary.Dictionary; import com.the_qa_company.qendpoint.core.enums.RDFNodeType; +import com.the_qa_company.qendpoint.core.exceptions.NotImplementedException; import com.the_qa_company.qendpoint.model.HDTValue; import com.the_qa_company.qendpoint.model.SimpleBNodeHDT; import com.the_qa_company.qendpoint.model.SimpleIRIHDT; @@ -218,6 +219,7 @@ public Value idToValue(TripleComponentRole role, long id) { case SUBJECT -> idToSubjectHDTResource0(id); case PREDICATE -> idToPredicateHDTResource(id); case OBJECT -> idToObjectHDTResource0(id); + case GRAPH -> throw new NotImplementedException("TODO: GRAPH"); }; } From a37f661196952288d712ee28766c7c230f878fc5 Mon Sep 17 00:00:00 2001 From: qaate47 Date: Thu, 28 Sep 2023 15:26:53 +0200 Subject: [PATCH 2/5] GH-422 fix dir loader --- .../qendpoint/core/hdt/HDTManagerImpl.java | 15 ++- .../qendpoint/core/rdf/RDFParserCallback.java | 6 + .../qendpoint/core/rdf/RDFParserFactory.java | 15 +++ .../core/rdf/parsers/RDFParserDir.java | 122 +++++++++--------- .../util/LargeFakeDataSetStreamSupplier.java | 5 +- .../qendpoint/core/hdt/HDTManagerTest.java | 48 +++++++ 6 files changed, 143 insertions(+), 68 deletions(-) diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/HDTManagerImpl.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/HDTManagerImpl.java index 32209122..fcdb2a81 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/HDTManagerImpl.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/HDTManagerImpl.java @@ -276,8 +276,13 @@ public HDT doGenerateHDT(Iterator triples, String baseURI, HDTOpti public HDT doGenerateHDTDisk(String rdfFileName, String baseURI, RDFNotation rdfNotation, CompressionType compressionType, HDTOptions hdtFormat, ProgressListener listener) throws IOException, ParserException { - // read this file as stream, do not compress to allow the - // compressionType to be different from the file extension + if (compressionType == CompressionType.NONE) { + RDFParserCallback parser = RDFParserFactory.getParserCallback(rdfNotation, hdtFormat); + try (PipedCopyIterator iterator = RDFParserFactory.readAsIterator(parser, rdfFileName, + baseURI, true, rdfNotation)) { + return doGenerateHDTDisk0(iterator, true, baseURI, hdtFormat, listener); + } + } try (InputStream stream = IOUtil.getFileInputStream(rdfFileName, false)) { return doGenerateHDTDisk(stream, baseURI, rdfNotation, compressionType, hdtFormat, listener); } @@ -420,8 +425,10 @@ protected HDT doHDTDiffBit(String location, String hdtFileName, Bitmap deleteBit protected HDT doHDTCatTree(RDFFluxStop fluxStop, HDTSupplier supplier, String filename, String baseURI, RDFNotation rdfNotation, HDTOptions hdtFormat, ProgressListener listener) throws IOException, ParserException { - try (InputStream is = IOUtil.getFileInputStream(filename)) { - return doHDTCatTree(fluxStop, supplier, is, baseURI, rdfNotation, hdtFormat, listener); + RDFParserCallback parser = RDFParserFactory.getParserCallback(rdfNotation, hdtFormat); + try (PipedCopyIterator iterator = RDFParserFactory.readAsIterator(parser, filename, baseURI, true, + rdfNotation)) { + return doHDTCatTree(fluxStop, supplier, iterator, baseURI, hdtFormat, listener); } } diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/rdf/RDFParserCallback.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/rdf/RDFParserCallback.java index 5bfdf81c..1cc6caf0 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/rdf/RDFParserCallback.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/rdf/RDFParserCallback.java @@ -20,6 +20,7 @@ package com.the_qa_company.qendpoint.core.rdf; import java.io.InputStream; +import java.nio.file.Path; import com.the_qa_company.qendpoint.core.enums.RDFNotation; import com.the_qa_company.qendpoint.core.exceptions.ParserException; @@ -50,6 +51,11 @@ default RDFCallback async() { void doParse(String fileName, String baseUri, RDFNotation notation, boolean keepBNode, RDFCallback callback) throws ParserException; + default void doParse(Path file, String baseUri, RDFNotation notation, boolean keepBNode, RDFCallback callback) + throws ParserException { + doParse(file.toAbsolutePath().toString(), baseUri, notation, keepBNode, callback); + } + void doParse(InputStream in, String baseUri, RDFNotation notation, boolean keepBNode, RDFCallback callback) throws ParserException; } diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/rdf/RDFParserFactory.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/rdf/RDFParserFactory.java index 6ca25e24..810a9578 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/rdf/RDFParserFactory.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/rdf/RDFParserFactory.java @@ -95,4 +95,19 @@ public static PipedCopyIterator readAsIterator(RDFParserCallback p (triple, pos) -> pipe.addElement(triple.tripleToString()))); } + /** + * convert a stream to a triple iterator + * + * @param parser the parser to convert the stream + * @param file path to the file to parse + * @param baseUri the base uri to parse + * @param notation the rdf notation to parse + * @return iterator + */ + public static PipedCopyIterator readAsIterator(RDFParserCallback parser, String file, String baseUri, + boolean keepBNode, RDFNotation notation) { + return PipedCopyIterator.createOfCallback(pipe -> parser.doParse(file, baseUri, notation, keepBNode, + (triple, pos) -> pipe.addElement(triple.tripleToString()))); + } + } diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/rdf/parsers/RDFParserDir.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/rdf/parsers/RDFParserDir.java index 70329684..0e2043ae 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/rdf/parsers/RDFParserDir.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/rdf/parsers/RDFParserDir.java @@ -44,81 +44,79 @@ public RDFParserDir(HDTOptions spec) { } - public RDFParserDir() { - this(HDTOptions.EMPTY); - } - @Override public void doParse(String fileName, String baseUri, RDFNotation notation, boolean keepBNode, RDFCallback callback) throws ParserException { - try { - doParse(Path.of(fileName), baseUri, notation, keepBNode, callback); - } catch (InvalidPathException e) { - throw new ParserException(e); - } + doParse(Path.of(fileName), baseUri, notation, keepBNode, callback); } - private void doParse(Path path, String baseUri, RDFNotation notation, boolean keepBNode, RDFCallback callback) + @Override + public void doParse(Path path, String baseUri, RDFNotation notation, boolean keepBNode, RDFCallback callback) throws ParserException { if (notation != RDFNotation.DIR) { throw new IllegalArgumentException("Can't parse notation different than " + RDFNotation.DIR + "!"); } - if (async == 1) { - // no async parser, faster to use recursion - try (Stream subFiles = Files.list(path)) { - subFiles.forEach(child -> { - try { - if (Files.isDirectory(child)) { - doParse(child, baseUri, RDFNotation.DIR, keepBNode, callback); - return; - } - RDFParserCallback rdfParserCallback; - RDFNotation childNotation; + try { + if (async == 1) { + // no async parser, faster to use recursion + try (Stream subFiles = Files.list(path)) { + subFiles.forEach(child -> { try { - // get the notation of the file - childNotation = RDFNotation.guess(child.toFile()); - rdfParserCallback = RDFParserFactory.getParserCallback(childNotation, spec); - } catch (IllegalArgumentException e) { - log.warn("Ignore file {}", child, e); - return; + if (Files.isDirectory(child)) { + doParse(child, baseUri, RDFNotation.DIR, keepBNode, callback); + return; + } + RDFParserCallback rdfParserCallback; + RDFNotation childNotation; + try { + // get the notation of the file + childNotation = RDFNotation.guess(child.toFile()); + rdfParserCallback = RDFParserFactory.getParserCallback(childNotation, spec); + } catch (IllegalArgumentException e) { + log.warn("Ignore file {}", child, e); + return; + } + log.debug("parse {}", child); + // we can parse it, parsing it + rdfParserCallback.doParse(child.toAbsolutePath().toString(), baseUri, childNotation, + keepBNode, callback); + } catch (ParserException e) { + throw new ContainerException(e); } - log.debug("parse {}", child); - // we can parse it, parsing it - rdfParserCallback.doParse(child.toAbsolutePath().toString(), baseUri, childNotation, keepBNode, - callback); - } catch (ParserException e) { - throw new ContainerException(e); - } - }); - } catch (IOException | SecurityException e) { - throw new ParserException(e); - } catch (ContainerException e) { - throw (ParserException) e.getCause(); - } - } else { - // use async parser because we will need to call it from multiple - // threads - RDFCallback asyncRdfCallback = callback.async(); - // create the pool - ExecutorService executorService = Executors.newFixedThreadPool(async); - // list of all the future loaded by the parser - FutureList list = new FutureList(); - // send the first task with the root directory - list.add(executorService.submit( - new LoadTask(executorService, path, baseUri, RDFNotation.DIR, keepBNode, asyncRdfCallback))); - - // wait for end of all the futures - try { - list.await(); - } catch (ExecutionException e) { - throw new ParserException(e.getCause()); - } catch (InterruptedException e) { - throw new ParserException(e); - } finally { - // close the service - executorService.shutdown(); + }); + } catch (IOException | SecurityException e) { + throw new ParserException(e); + } catch (ContainerException e) { + throw (ParserException) e.getCause(); + } + } else { + // use async parser because we will need to call it from + // multiple + // threads + RDFCallback asyncRdfCallback = callback.async(); + // create the pool + ExecutorService executorService = Executors.newFixedThreadPool(async); + // list of all the future loaded by the parser + FutureList list = new FutureList(); + // send the first task with the root directory + list.add(executorService.submit( + new LoadTask(executorService, path, baseUri, RDFNotation.DIR, keepBNode, asyncRdfCallback))); + + // wait for end of all the futures + try { + list.await(); + } catch (ExecutionException e) { + throw new ParserException(e.getCause()); + } catch (InterruptedException e) { + throw new ParserException(e); + } finally { + // close the service + executorService.shutdown(); + } } + } catch (InvalidPathException e) { + throw new ParserException(e); } } diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/LargeFakeDataSetStreamSupplier.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/LargeFakeDataSetStreamSupplier.java index 38e62ff7..1a9b4dd0 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/LargeFakeDataSetStreamSupplier.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/LargeFakeDataSetStreamSupplier.java @@ -34,6 +34,7 @@ * @author Antoine Willerval */ public class LargeFakeDataSetStreamSupplier { + public static final String BASE_URI = "http://w"; private static final Charset DEFAULT_CHARSET = ByteStringUtil.STRING_ENCODING; @@ -243,7 +244,7 @@ public ThreadedStream createNTInputStream(CompressionType compressionType) throw * @throws IOException io exception */ public HDT createFakeHDT(HDTOptions spec) throws ParserException, IOException { - return HDTManager.generateHDT(createTripleStringStream(), "http://w", spec, null); + return HDTManager.generateHDT(createTripleStringStream(), BASE_URI, spec, null); } /** @@ -294,7 +295,7 @@ private CharSequence createResource() { } private CharSequence createIRI() { - return "http://w" + random.nextInt(maxElementSplit) + "i.test.org/#Obj" + random.nextInt(maxElementSplit); + return BASE_URI + random.nextInt(maxElementSplit) + "i.test.org/#Obj" + random.nextInt(maxElementSplit); } private CharSequence createType() { diff --git a/qendpoint-core/src/test/java/com/the_qa_company/qendpoint/core/hdt/HDTManagerTest.java b/qendpoint-core/src/test/java/com/the_qa_company/qendpoint/core/hdt/HDTManagerTest.java index d72fba96..fc3d7831 100644 --- a/qendpoint-core/src/test/java/com/the_qa_company/qendpoint/core/hdt/HDTManagerTest.java +++ b/qendpoint-core/src/test/java/com/the_qa_company/qendpoint/core/hdt/HDTManagerTest.java @@ -790,6 +790,54 @@ public void generateTest() throws IOException, ParserException, NotFoundExceptio } public static class StaticTest extends HDTManagerTestBase { + @Test + public void dirInjectionTest() throws Exception { + Path root = tempDir.newFolder().toPath(); + + int seed = 345678; + int split = 10; + long size = 500; + + try { + + LargeFakeDataSetStreamSupplier supplier = LargeFakeDataSetStreamSupplier + .createSupplierWithMaxTriples(size, seed); + + HDTOptions gen = HDTOptions.of(HDTOptionsKeys.DICTIONARY_TYPE_KEY, + HDTOptionsKeys.DICTIONARY_TYPE_VALUE_MULTI_OBJECTS_LANG, HDTOptionsKeys.LOADER_TYPE_KEY, + HDTOptionsKeys.LOADER_CATTREE_LOADERTYPE_KEY, HDTOptionsKeys.LOADER_CATTREE_LOCATION_KEY, + root.resolve("work"), HDTOptionsKeys.LOADER_CATTREE_FUTURE_HDT_LOCATION_KEY, + root.resolve("work.hdt"), HDTOptionsKeys.LOADER_CATTREE_LOADERTYPE_KEY, "disk", + HDTOptionsKeys.LOADER_DISK_LOCATION_KEY, root.resolve("workd"), + HDTOptionsKeys.LOADER_DISK_FUTURE_HDT_LOCATION_KEY, root.resolve("workd.hdt")); + + Path didr = root.resolve("test"); + + Files.createDirectories(didr); + + for (int i = 0; i < split; i++) { + supplier.createNTFile(didr.resolve("d" + i + ".nt")); + } + + LargeFakeDataSetStreamSupplier supplier2 = LargeFakeDataSetStreamSupplier + .createSupplierWithMaxTriples(size * split, seed); + Path exc = root.resolve("expected.hdt"); + supplier2.createAndSaveFakeHDT(gen, exc); + + Path actual = root.resolve("actual.hdt"); + try (HDT hdt = HDTManager.generateHDT(didr, LargeFakeDataSetStreamSupplier.BASE_URI, RDFNotation.DIR, + gen, ProgressListener.ignore())) { + hdt.saveToHDT(actual); + } + + try (HDT actHDT = HDTManager.mapHDT(actual); HDT excHDT = HDTManager.mapHDT(exc)) { + assertEqualsHDT(excHDT, actHDT); + } + } finally { + PathUtils.deleteDirectory(root); + } + } + @Test public void multiSectionTest() throws ParserException, IOException, NotFoundException { Path root = tempDir.newFolder().toPath(); From 31e13e04bdec6d56a601cd7ddfe3453645c12c97 Mon Sep 17 00:00:00 2001 From: Antoine Willerval Date: Tue, 3 Oct 2023 11:02:28 +0200 Subject: [PATCH 3/5] GH-420 quads (#425) HDTq MSDL, DiffCat, GenDisk implementation and add tests --- .../compact/bitmap/AddSnapshotBitmap.java | 6 +- .../core/compact/bitmap/Bitmap64Big.java | 18 +- .../compact/bitmap/BitmapFactoryImpl.java | 2 +- .../compact/bitmap/GraphDeleteBitmap.java | 124 ++ .../compact/bitmap/MappedRoaringBitmap.java | 101 ++ .../compact/bitmap/MultiRoaringBitmap.java | 451 +++++++ .../core/compact/bitmap/RoaringBitmap32.java | 122 ++ ...oaringBitmap.java => RoaringBitmap64.java} | 24 +- .../qendpoint/core/dictionary/Dictionary.java | 12 + .../core/dictionary/DictionaryFactory.java | 97 +- .../core/dictionary/DictionaryKCat.java | 11 + .../core/dictionary/DictionaryType.java | 4 +- .../core/dictionary/impl/BaseDictionary.java | 7 + .../impl/CompressFourSectionDictionary.java | 35 +- .../impl/FourQuadSectionDictionary.java | 16 +- .../impl/MultipleLangBaseDictionary.java | 47 +- .../impl/MultipleSectionDictionaryLang.java | 46 +- ...onDictionaryLangPFCOptimizedExtractor.java | 13 +- .../impl/WriteFourSectionDictionary.java | 48 +- .../WriteMultipleSectionDictionaryLang.java | 52 +- .../dictionary/impl/kcat/BitmapTriple.java | 11 +- .../impl/kcat/FourSectionDictionaryKCat.java | 10 + .../impl/kcat/GroupBySubjectMapIterator.java | 37 +- .../core/dictionary/impl/kcat/KCatImpl.java | 26 +- .../core/dictionary/impl/kcat/KCatMerger.java | 106 +- .../kcat/MultipleSectionDictionaryKCat.java | 10 + .../MultipleSectionLangDictionaryKCat.java | 10 + .../qendpoint/core/hdt/HDTVocabulary.java | 2 + .../core/hdt/impl/HDTDiskImporter.java | 9 +- .../qendpoint/core/hdt/impl/HDTImpl.java | 23 +- .../qendpoint/core/hdt/impl/WriteHDTImpl.java | 3 +- .../impl/diskimport/CompressTripleMapper.java | 49 +- .../impl/diskimport/CompressionResult.java | 15 + .../diskimport/CompressionResultEmpty.java | 15 + .../diskimport/CompressionResultFile.java | 29 +- .../diskimport/CompressionResultPartial.java | 42 +- .../MultiSectionLangSectionCompressor.java | 6 +- .../MultiSectionSectionCompressor.java | 5 +- .../impl/diskimport/SectionCompressor.java | 117 +- .../qendpoint/core/header/PlainHeader.java | 3 +- .../iterator/DictionaryTranslateIterator.java | 33 +- .../DictionaryTranslateIteratorBuffer.java | 2 +- .../core/options/HDTOptionsKeys.java | 7 +- .../qendpoint/core/quad/QuadString.java | 13 + .../core/quad/impl/BitmapQuadsIterator.java | 139 --- .../core/quad/impl/BitmapQuadsIteratorG.java | 116 -- .../quad/impl/BitmapQuadsIteratorYFOQ.java | 97 -- .../quad/impl/BitmapQuadsIteratorYGFOQ.java | 96 -- .../quad/impl/BitmapQuadsIteratorZFOQ.java | 223 ---- .../quad/impl/BitmapQuadsIteratorZGFOQ.java | 202 ---- .../quad/impl/BitmapTriplesIteratorGraph.java | 112 ++ .../impl/BitmapTriplesIteratorGraphG.java | 93 ++ .../qendpoint/core/triples/IndexedTriple.java | 14 +- .../qendpoint/core/triples/TripleID.java | 7 + .../core/triples/TripleIDComparator.java | 6 +- .../core/triples/TripleIDComparatorSPO.java | 25 +- .../qendpoint/core/triples/Triples.java | 15 +- .../core/triples/TriplesFactory.java | 6 +- .../core/triples/impl/BitmapQuadTriples.java | 174 +-- .../core/triples/impl/BitmapTriples.java | 17 +- .../core/triples/impl/OneReadTempTriples.java | 2 +- .../core/triples/impl/TriplesList.java | 5 + .../core/triples/impl/TriplesListLong.java | 5 + .../core/triples/impl/WriteBitmapTriples.java | 84 +- .../core/util/io/CloseMappedByteBuffer.java | 9 + .../qendpoint/core/util/io/Closer.java | 10 +- .../qendpoint/core/util/io/IOUtil.java | 8 + .../io/compress/CompressTripleReader.java | 44 +- .../io/compress/CompressTripleWriter.java | 22 +- .../core/util/io/compress/CompressUtil.java | 14 +- .../io/compress/MapCompressTripleMerger.java | 45 +- .../compress/NoDuplicateTripleIDIterator.java | 6 +- .../util/io/compress/TripleGenerator.java | 43 +- .../bitmap/MultiRoaringBitmapTest.java | 184 +++ .../CompressFourSectionDictionaryTest.java | 37 +- .../dictionary/impl/kcat/KCatMergerTest.java | 1040 ++++++++++------- .../qendpoint/core/hdt/HDTManagerTest.java | 422 +++++-- .../qendpoint/core/storage/QEPCoreTest.java | 4 +- .../triples/impl/BitmapQuadTriplesTest.java | 188 +++ .../core/triples/impl/BitmapTriplesTest.java | 24 +- .../util/io/compress/CompressTripleTest.java | 4 +- 81 files changed, 3610 insertions(+), 1751 deletions(-) create mode 100644 qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/compact/bitmap/GraphDeleteBitmap.java create mode 100644 qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/compact/bitmap/MappedRoaringBitmap.java create mode 100644 qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/compact/bitmap/MultiRoaringBitmap.java create mode 100644 qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/compact/bitmap/RoaringBitmap32.java rename qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/compact/bitmap/{RoaringBitmap.java => RoaringBitmap64.java} (80%) delete mode 100644 qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/quad/impl/BitmapQuadsIterator.java delete mode 100644 qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/quad/impl/BitmapQuadsIteratorG.java delete mode 100644 qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/quad/impl/BitmapQuadsIteratorYFOQ.java delete mode 100644 qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/quad/impl/BitmapQuadsIteratorYGFOQ.java delete mode 100644 qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/quad/impl/BitmapQuadsIteratorZFOQ.java delete mode 100644 qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/quad/impl/BitmapQuadsIteratorZGFOQ.java create mode 100644 qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/quad/impl/BitmapTriplesIteratorGraph.java create mode 100644 qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/quad/impl/BitmapTriplesIteratorGraphG.java create mode 100644 qendpoint-core/src/test/java/com/the_qa_company/qendpoint/core/compact/bitmap/MultiRoaringBitmapTest.java create mode 100644 qendpoint-core/src/test/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapQuadTriplesTest.java diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/compact/bitmap/AddSnapshotBitmap.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/compact/bitmap/AddSnapshotBitmap.java index 0a80d941..aa27b2f3 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/compact/bitmap/AddSnapshotBitmap.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/compact/bitmap/AddSnapshotBitmap.java @@ -64,7 +64,7 @@ public interface AddSnapshotDeltaBitmap extends Bitmap, AutoCloseable { * @return the roaring bitmap associated with this snapshot, can be null * for empty value */ - RoaringBitmap roaringBitmap(); + RoaringBitmap64 roaringBitmap(); } private class DeltaBitmap implements SimpleBitmap, AddSnapshotDeltaBitmap { @@ -72,7 +72,7 @@ private class DeltaBitmap implements SimpleBitmap, AddSnapshotDeltaBitmap { /** * compressed memory bitmap storing the delta */ - final RoaringBitmap snapshot = new RoaringBitmap(); + final RoaringBitmap64 snapshot = new RoaringBitmap64(); /** * next snapshot created after this one */ @@ -94,7 +94,7 @@ private class DeltaBitmap implements SimpleBitmap, AddSnapshotDeltaBitmap { } @Override - public RoaringBitmap roaringBitmap() { + public RoaringBitmap64 roaringBitmap() { return snapshot; } diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/compact/bitmap/Bitmap64Big.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/compact/bitmap/Bitmap64Big.java index a4e933f4..27559c0d 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/compact/bitmap/Bitmap64Big.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/compact/bitmap/Bitmap64Big.java @@ -162,12 +162,24 @@ public boolean access(long bitIndex) { @Override public long rank1(long pos) { - throw new NotImplementedException(); + long c = 0; + for (int i = 0; i < pos; i++) { + if (access(i)) { + c++; + } + } + return c; } @Override public long rank0(long pos) { - throw new NotImplementedException(); + long c = 0; + for (int i = 0; i < pos; i++) { + if (!access(i)) { + c++; + } + } + return c; } @Override @@ -264,7 +276,7 @@ public String getType() { } public long selectPrev1(long start) { - throw new NotImplementedException(); + return select1(rank1(start)); } public long getNumBits() { diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/compact/bitmap/BitmapFactoryImpl.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/compact/bitmap/BitmapFactoryImpl.java index da3cf5eb..145935dc 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/compact/bitmap/BitmapFactoryImpl.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/compact/bitmap/BitmapFactoryImpl.java @@ -36,7 +36,7 @@ public class BitmapFactoryImpl extends BitmapFactory { protected ModifiableBitmap doCreateModifiableBitmap(String type) { return switch (Objects.requireNonNullElse(type, HDTVocabulary.BITMAP_TYPE_PLAIN)) { case HDTVocabulary.BITMAP_TYPE_PLAIN -> Bitmap375Big.memory(0); - case HDTVocabulary.BITMAP_TYPE_ROARING -> new RoaringBitmap(); + case HDTVocabulary.BITMAP_TYPE_ROARING -> new RoaringBitmap64(); default -> throw new IllegalArgumentException("Implementation not found for Bitmap with type " + type); }; } diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/compact/bitmap/GraphDeleteBitmap.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/compact/bitmap/GraphDeleteBitmap.java new file mode 100644 index 00000000..4290c57b --- /dev/null +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/compact/bitmap/GraphDeleteBitmap.java @@ -0,0 +1,124 @@ +package com.the_qa_company.qendpoint.core.compact.bitmap; + +import com.the_qa_company.qendpoint.core.exceptions.NotImplementedException; +import com.the_qa_company.qendpoint.core.util.io.Closer; + +import java.io.Closeable; +import java.io.IOException; + +/** + * Bitmap to delete inside a graph, all the ids are mapped on a bitmap with the + * formula + * + *
+ * (id, graph) -> id * graphs + graph
+ * 
+ * + * @author Antoine Willerval + */ +public class GraphDeleteBitmap implements SimpleModifiableBitmap, Closeable { + /** + * create empty graph delete bitmap + * + * @param graphs graphs count + * @param size triples count + * @return gdb + */ + public static GraphDeleteBitmap empty(long graphs, long size) { + return new GraphDeleteBitmap(EmptyBitmap.of(size * graphs), graphs); + } + + /** + * create memory graph delete bitmap + * + * @param graphs graphs count + * @param size triples count + * @return gdb + */ + public static GraphDeleteBitmap memory(long graphs, long size) { + return new GraphDeleteBitmap(MultiRoaringBitmap.memory(size * graphs), graphs); + } + + /** + * wrap a bitmap to create a {@link GraphDeleteBitmap} + * + * @param bitmap bitmap + * @param graphs graphs count + * @return bitmap if already instanceof graph delete bitmap and contains the + * right graphs number or wrap into GraphDeleteBitmap + */ + public static GraphDeleteBitmap wrap(Bitmap bitmap, long graphs) { + if (bitmap instanceof GraphDeleteBitmap gdb && gdb.graphs == graphs) { + // use directly the bitmap + return gdb; + } + return new GraphDeleteBitmap(bitmap, graphs); + } + + private final Bitmap store; + private final long graphs; + + private GraphDeleteBitmap(Bitmap store, long graphs) { + this.store = store; + this.graphs = graphs; + } + + /** + * access a bit in a graph + * + * @param graph graph + * @param position position + * @return bit value + */ + public boolean access(long graph, long position) { + return access(position * graphs + graph); + } + + /** + * set a bit in a graph + * + * @param graph graph + * @param position position + * @param value value + * @throws ClassCastException if the wrapped bitmap isn't a modifiable + * bitmap + */ + public void set(int graph, long position, boolean value) { + set(position * graphs + graph, value); + } + + @Override + public boolean access(long position) { + return store.access(position); + } + + @Override + public void set(long position, boolean value) { + ((ModifiableBitmap) store).set(position, value); + } + + @Override + public long getNumBits() { + return store.getNumBits(); + } + + @Override + public long getSizeBytes() { + return store.getSizeBytes(); + } + + @Override + public String getType() { + return store.getType(); + } + + @Override + public void append(boolean value) { + throw new NotImplementedException(); + } + + @Override + public void close() throws IOException { + Closer.closeSingle(store); + } +} diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/compact/bitmap/MappedRoaringBitmap.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/compact/bitmap/MappedRoaringBitmap.java new file mode 100644 index 00000000..69d9ef6f --- /dev/null +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/compact/bitmap/MappedRoaringBitmap.java @@ -0,0 +1,101 @@ +package com.the_qa_company.qendpoint.core.compact.bitmap; + +import com.the_qa_company.qendpoint.core.hdt.HDTVocabulary; +import com.the_qa_company.qendpoint.core.listener.ProgressListener; +import com.the_qa_company.qendpoint.core.util.io.CloseMappedByteBuffer; +import com.the_qa_company.qendpoint.core.util.io.IOUtil; +import org.roaringbitmap.buffer.ImmutableRoaringBitmap; + +import java.io.Closeable; +import java.io.DataOutputStream; +import java.io.IOException; +import java.io.OutputStream; + +/** + * Mapped {@link Bitmap} wrapper for the {@link ImmutableRoaringBitmap}, not + * compatible with {@link RoaringBitmap64} + * + * @author Antoine Willerval + */ +public class MappedRoaringBitmap implements SimpleBitmap, Closeable { + private final CloseMappedByteBuffer buffer; + private final ImmutableRoaringBitmap rbm; + + public MappedRoaringBitmap(CloseMappedByteBuffer buffer) { + this.buffer = buffer; + this.rbm = new ImmutableRoaringBitmap(buffer.getInternalBuffer()); + } + + public ImmutableRoaringBitmap getHandle() { + return rbm; + } + + @Override + public boolean access(long position) { + return rbm.contains((int) position); + } + + @Override + public long getNumBits() { + return rbm.last(); + } + + @Override + public long getSizeBytes() { + return rbm.serializedSizeInBytes() + 8; + } + + @Override + public void save(OutputStream output, ProgressListener listener) throws IOException { + long size = rbm.serializedSizeInBytes(); + IOUtil.writeLong(output, size); + rbm.serialize(new DataOutputStream(output)); + } + + @Override + public String getType() { + return HDTVocabulary.BITMAP_TYPE_ROARING; + } + + @Override + public long select1(long n) { + long position = n - 1; + if (position == -1) + return -1; + if (position < rbm.getLongCardinality()) { + return rbm.select((int) position); + } else { + return rbm.select((int) rbm.getLongCardinality() - 1) + 1; + } + } + + @Override + public long rank1(long position) { + if (position >= 0) + return rbm.rankLong((int) position); + return 0; + } + + @Override + public long countOnes() { + return rbm.getLongCardinality(); + } + + @Override + public long selectPrev1(long start) { + return select1(rank1(start)); + } + + @Override + public long selectNext1(long start) { + long pos = rank1(start - 1); + if (pos < rbm.getLongCardinality()) + return select1(pos + 1); + return -1; + } + + @Override + public void close() throws IOException { + buffer.close(); + } +} diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/compact/bitmap/MultiRoaringBitmap.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/compact/bitmap/MultiRoaringBitmap.java new file mode 100644 index 00000000..90298a29 --- /dev/null +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/compact/bitmap/MultiRoaringBitmap.java @@ -0,0 +1,451 @@ +package com.the_qa_company.qendpoint.core.compact.bitmap; + +import com.the_qa_company.qendpoint.core.exceptions.NotImplementedException; +import com.the_qa_company.qendpoint.core.hdt.HDTVocabulary; +import com.the_qa_company.qendpoint.core.listener.ProgressListener; +import com.the_qa_company.qendpoint.core.util.io.CloseMappedByteBuffer; +import com.the_qa_company.qendpoint.core.util.io.Closer; +import com.the_qa_company.qendpoint.core.util.io.IOUtil; +import org.roaringbitmap.RoaringBitmap; + +import java.io.Closeable; +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.channels.FileChannel; +import java.nio.file.Path; +import java.nio.file.StandardOpenOption; +import java.util.ArrayList; +import java.util.List; +import java.util.stream.IntStream; + +import static java.lang.String.format; + +/** + * {@link ModifiableBitmap} using multiple roaring bitmap to create a 64bits + * version, can't be loaded. + * + * @author Antoine Willerval + */ +public class MultiRoaringBitmap implements SimpleModifiableBitmap, Closeable { + // cookie + maps_nb + chunk_size + numbits + private static final int HEADER_SIZE = 8 + 4 + 4 + 8; + public static final long COOKIE = 0x6347008534687531L; + + /** + * load mapped multi roaring bitmap stream + * + * @param input stream + * @return bitmap + * @throws IOException io exception when loading + */ + public static MultiRoaringBitmap load(InputStream input) throws IOException { + return new MultiRoaringBitmap(input); + } + + /** + * load mapped multi roaring bitmap file + * + * @param path file + * @return bitmap + * @throws IOException io exception when loading + */ + public static MultiRoaringBitmap mapped(Path path) throws IOException { + return mapped(path, 0); + } + + /** + * load mapped multi roaring bitmap file + * + * @param path file + * @return bitmap + * @throws IOException io exception when loading + */ + public static MultiRoaringBitmap mapped(Path path, long start) throws IOException { + try (FileChannel channel = FileChannel.open(path, StandardOpenOption.READ)) { + return mapped(path, start, channel); + } + } + + /** + * load mapped multi roaring bitmap file. + * + * @param fileName file name for leak tracking + * @param start channel start + * @param channel channel to read + * @return bitmap + * @throws IOException io exception when loading + */ + public static MultiRoaringBitmap mapped(Path fileName, long start, FileChannel channel) throws IOException { + return new MultiRoaringBitmap(fileName, channel, start); + } + + /** + * create a multi roaring bitmap with a size with the default chunk size. + * + * @param size size + * @return bitmap + */ + public static MultiRoaringBitmap memory(long size) { + return memory(size, defaultChunkSize); + } + + /** + * create a multi roaring bitmap with a size. + * + * @param size size + * @param chunkSize chunk size + * @return bitmap + */ + public static MultiRoaringBitmap memory(long size, int chunkSize) { + try { + return new MultiRoaringBitmap(size, chunkSize, null); + } catch (IOException e) { + throw new AssertionError(e); + } + } + + /** + * create a multi roaring bitmap with a size with the default chunk size for + * stream writing. + * + * @param size size + * @param streamOutput stream output + * @return bitmap + */ + public static MultiRoaringBitmap memoryStream(long size, Path streamOutput) throws IOException { + return memoryStream(size, defaultChunkSize, streamOutput); + } + + /** + * create a multi roaring bitmap with a size for stream writing. + * + * @param size size + * @param chunkSize chunk size + * @param streamOutput stream output + * @return bitmap + */ + public static MultiRoaringBitmap memoryStream(long size, int chunkSize, Path streamOutput) throws IOException { + return new MultiRoaringBitmap(size, chunkSize, streamOutput); + } + + static int defaultChunkSize = 1 << 29; + final List maps = new ArrayList<>(); + final int chunkSize; + private final long numbits; + private final boolean writable; + private final FileChannel output; + private final Path outputPath; + private long outputMax; + + private MultiRoaringBitmap(InputStream input) throws IOException { + ByteBuffer buffer = ByteBuffer.wrap(IOUtil.readBuffer(input, HEADER_SIZE, ProgressListener.ignore())) + .order(ByteOrder.LITTLE_ENDIAN); + + long cookie = buffer.getLong(0); + if (cookie != COOKIE) { + throw new IOException(format("found bad cookie %x != %x", cookie, COOKIE)); + } + + int chunks = buffer.getInt(8); + chunkSize = buffer.getInt(12); + numbits = buffer.getLong(16); + writable = true; + output = null; + outputPath = null; + + for (int i = 0; i < chunks; i++) { + input.skipNBytes(8); // skip size used for mapping + + RoaringBitmap32 bitmap32 = new RoaringBitmap32(); + bitmap32.getHandle().deserialize(new DataInputStream(input)); + maps.add(bitmap32); + } + + } + + private MultiRoaringBitmap(long size, int chunkSize, Path output) throws IOException { + writable = true; + if (size < 0) { + throw new IllegalArgumentException("Negative size: " + size); + } + this.chunkSize = chunkSize; + this.numbits = size; + + int chunks = (int) ((size - 1) / chunkSize + 1); + + try { + if (output != null) { + this.outputPath = output; + this.output = FileChannel.open(output, StandardOpenOption.READ, StandardOpenOption.WRITE, + StandardOpenOption.TRUNCATE_EXISTING, StandardOpenOption.CREATE); + + try (CloseMappedByteBuffer map = IOUtil.mapChannel(output, this.output, FileChannel.MapMode.READ_WRITE, + 0, HEADER_SIZE)) { + map.order(ByteOrder.LITTLE_ENDIAN); + map.putLong(0, COOKIE); + map.putInt(8, chunks); + map.putInt(12, chunkSize); + map.putLong(16, size); + } + + outputMax = HEADER_SIZE; + } else { + this.output = null; + this.outputPath = null; + } + + for (int i = 0; i < chunks; i++) { + maps.add(new RoaringBitmap32()); // to on use? + } + } catch (Throwable t) { + try { + close(); + } catch (Exception e) { + t.addSuppressed(e); + } catch (Throwable t2) { + t2.addSuppressed(t); + throw t2; + } + throw t; + } + } + + private MultiRoaringBitmap(Path fileName, FileChannel channel, long start) throws IOException { + writable = false; + output = null; + this.outputPath = null; + try (CloseMappedByteBuffer header = IOUtil.mapChannel(fileName, channel, FileChannel.MapMode.READ_ONLY, start, + HEADER_SIZE)) { + header.order(ByteOrder.LITTLE_ENDIAN); + + long cookie = header.getLong(0); + if (cookie != COOKIE) { + throw new IOException(format("Bad cookie for multi roaring bitmap %x != %x", cookie, COOKIE)); + } + int bitmapCount = header.getInt(8); + chunkSize = header.getInt(12); + numbits = header.getLong(16); + + long shift = HEADER_SIZE + start; + for (int i = 0; i < bitmapCount; i++) { + long sizeBytes = IOUtil.readLong(shift, channel, ByteOrder.LITTLE_ENDIAN); + maps.add(new MappedRoaringBitmap( + IOUtil.mapChannel(fileName, channel, FileChannel.MapMode.READ_ONLY, shift += 8, sizeBytes))); + shift += sizeBytes; + } + } catch (Throwable t) { + try { + close(); + } catch (Exception e) { + t.addSuppressed(e); + } catch (Throwable t2) { + t2.addSuppressed(t); + throw t2; + } + throw t; + } + } + + private void closeStreamBitmap(int index) throws IOException { + Bitmap map = maps.get(index); + if (map == null) { + return; + } + + if (!(map instanceof RoaringBitmap32 rbm)) { + throw new AssertionError(); + } + + RoaringBitmap handle = rbm.getHandle(); + + long loc = outputMax; + int sizeInBytes = handle.serializedSizeInBytes(); + outputMax += sizeInBytes + 8; + + try (CloseMappedByteBuffer buffer = IOUtil.mapChannel(outputPath, output, FileChannel.MapMode.READ_WRITE, loc, + sizeInBytes + 8)) { + ByteBuffer internalBuffer = buffer.getInternalBuffer().order(ByteOrder.LITTLE_ENDIAN); + internalBuffer.putLong(0, sizeInBytes); + handle.serialize(internalBuffer.slice(8, sizeInBytes)); + } + + try { + Closer.closeSingle(map); + } finally { + maps.set(index, null); + System.gc(); + } + } + + @Override + public void save(OutputStream output, ProgressListener listener) throws IOException { + if (this.output != null) { + throw new IllegalArgumentException("Can't save a streamed bitmap"); + } + if (!writable) { + throw new IllegalArgumentException("Can't save mapped bitmap"); + } + + // compute headers + byte[] bytes = new byte[HEADER_SIZE]; + ByteBuffer buffer = ByteBuffer.wrap(bytes).order(ByteOrder.LITTLE_ENDIAN); + + buffer.putLong(0, COOKIE); + buffer.putInt(8, maps.size()); + buffer.putInt(12, chunkSize); + buffer.putLong(16, numbits); + + output.write(bytes); + + for (Bitmap map : maps) { + RoaringBitmap handle = ((RoaringBitmap32) map).getHandle(); + + int sizeInBytes = handle.serializedSizeInBytes(); + byte[] array = new byte[8]; + ByteBuffer.wrap(array).order(ByteOrder.LITTLE_ENDIAN).putLong(0, sizeInBytes); + output.write(array); + + handle.serialize(new DataOutputStream(output)); + } + } + + @Override + public boolean access(long position) { + int location = (int) (position / chunkSize); + if (location >= maps.size() || position < 0) { + return false; + } + int localLocation = (int) (position % chunkSize); + return maps.get(location).access(localLocation); + } + + @Override + public long getNumBits() { + return numbits; + } + + @Override + public long getSizeBytes() { + return HEADER_SIZE + maps.stream().mapToLong(Bitmap::getSizeBytes).sum(); + } + + @Override + public String getType() { + return HDTVocabulary.BITMAP_TYPE_ROARING_MULTI; + } + + @Override + public long countOnes() { + return maps.stream().mapToLong(Bitmap::countOnes).sum(); + } + + @Override + public long select1(long n) { + long count = n; + long delta = 0; + int idx = 0; + + while (idx < maps.size()) { + long countOnes = maps.get(idx).countOnes(); + if (count <= countOnes) { + break; + } + count -= countOnes; + delta += idx != maps.size() - 1 ? chunkSize : maps.get(idx).getNumBits(); + idx++; + } + + if (idx == maps.size()) { + if (maps.isEmpty()) { + return 0; + } + return delta; + } + + return delta + maps.get(idx).select1(count); + } + + @Override + public long rank1(long position) { + int location = (int) (position / chunkSize); + + if (location >= maps.size() || position < 0) { + return 0; + } + + int localLocation = (int) (position % chunkSize); + + long delta = 0; + for (int i = 0; i < location; i++) { + delta += maps.get(i).getNumBits(); + } + + return delta + maps.get(location).rank1(localLocation); + } + + @Override + public long selectPrev1(long start) { + return select1(rank1(start)); + } + + @Override + public long selectNext1(long start) { + long pos = rank1(start - 1); + if (pos < getNumBits()) + return select1(pos + 1); + return -1; + } + + @Override + public void close() throws IOException { + try { + if (output != null) { + // write remaining + Closer.closeAll(IntStream.range(0, maps.size()) + .mapToObj(index -> (Closeable) (() -> closeStreamBitmap(index)))); + } + } finally { + Closer.closeAll(maps, output); + } + } + + @Override + public void set(long position, boolean value) { + if (!writable) { + throw new IllegalArgumentException("not writable"); + } + + int location = (int) (position / chunkSize); + if (location >= maps.size() || position < 0) { + throw new IllegalArgumentException(format("bit outside of range %d < 0 || map(%d)=%d >= %d", position, + position, location, maps.size())); + } + int localLocation = (int) (position % chunkSize); + + if (output != null) { // streaming + if (maps.get(location) == null) { + throw new IllegalArgumentException("Passing unsorted values in streaming mode"); + } + // clear previous + try { + Closer.closeAll( + IntStream.range(0, location).mapToObj(index -> (Closeable) (() -> closeStreamBitmap(index)))); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + // set the bit + ((ModifiableBitmap) maps.get(location)).set(localLocation, value); + } + + @Override + public void append(boolean value) { + throw new NotImplementedException(); + } +} diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/compact/bitmap/RoaringBitmap32.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/compact/bitmap/RoaringBitmap32.java new file mode 100644 index 00000000..3babe51c --- /dev/null +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/compact/bitmap/RoaringBitmap32.java @@ -0,0 +1,122 @@ +package com.the_qa_company.qendpoint.core.compact.bitmap; + +import com.the_qa_company.qendpoint.core.hdt.HDTVocabulary; +import com.the_qa_company.qendpoint.core.listener.ProgressListener; +import com.the_qa_company.qendpoint.core.util.io.IOUtil; +import org.roaringbitmap.RoaringBitmap; +import org.roaringbitmap.longlong.Roaring64Bitmap; + +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; + +/** + * {@link ModifiableBitmap} wrapper of the {@link Roaring64Bitmap} class, it + * supports the {@link #set(long, boolean)}, {@link #access(long)}, + * {@link #append(boolean)} {@link #select1(long)}, {@link #rank1(long)}, + * {@link #rank0(long)} and {@link #countOnes()} methods. + * + * @author Antoine Willerval + */ +public class RoaringBitmap32 implements SimpleModifiableBitmap { + private final RoaringBitmap rbm; + + public RoaringBitmap32() { + this.rbm = new RoaringBitmap(); + } + + public RoaringBitmap getHandle() { + return rbm; + } + + @Override + public boolean access(long position) { + if (position < 0 || position > Integer.MAX_VALUE) { + return false; + } + return rbm.contains((int) position); + } + + @Override + public long getNumBits() { + return rbm.getLongCardinality(); + } + + @Override + public long getSizeBytes() { + return rbm.serializedSizeInBytes() + 8; + } + + @Override + public void save(OutputStream output, ProgressListener listener) throws IOException { + long size = rbm.serializedSizeInBytes(); + IOUtil.writeLong(output, size); + rbm.serialize(new DataOutputStream(output)); + } + + @Override + public void load(InputStream input, ProgressListener listener) throws IOException { + IOUtil.readLong(input); // ignored + rbm.deserialize(new DataInputStream(input)); + } + + @Override + public String getType() { + return HDTVocabulary.BITMAP_TYPE_ROARING; + } + + @Override + public void set(long position, boolean value) { + assert position >= 0 && position < Integer.MAX_VALUE; + if (value) { + rbm.add((int) position); + } else { + rbm.remove((int) position); + } + } + + @Override + public long select1(long n) { + assert n >= 0 && n <= Integer.MAX_VALUE; + int position = (int) (n - 1); + if (position == -1) + return -1; + if (position < rbm.getLongCardinality()) { + return rbm.select(position); + } else { + return rbm.select(rbm.getCardinality() - 1) + 1; + } + } + + @Override + public long rank1(long position) { + if (position >= 0 && position <= Integer.MAX_VALUE) + return rbm.rankLong((int) position); + return 0; + } + + @Override + public long countOnes() { + return rbm.getLongCardinality(); + } + + @Override + public void append(boolean value) { + set(rbm.last() + 1, value); + } + + @Override + public long selectPrev1(long start) { + return select1(rank1(start)); + } + + @Override + public long selectNext1(long start) { + long pos = rank1(start - 1); + if (pos < rbm.getLongCardinality()) + return select1(pos + 1); + return -1; + } +} diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/compact/bitmap/RoaringBitmap.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/compact/bitmap/RoaringBitmap64.java similarity index 80% rename from qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/compact/bitmap/RoaringBitmap.java rename to qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/compact/bitmap/RoaringBitmap64.java index 0371cd78..2b136f9a 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/compact/bitmap/RoaringBitmap.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/compact/bitmap/RoaringBitmap64.java @@ -1,15 +1,15 @@ package com.the_qa_company.qendpoint.core.compact.bitmap; -import com.the_qa_company.qendpoint.core.exceptions.NotImplementedException; import com.the_qa_company.qendpoint.core.hdt.HDTVocabulary; import com.the_qa_company.qendpoint.core.listener.ProgressListener; import com.the_qa_company.qendpoint.core.util.io.IOUtil; import org.roaringbitmap.longlong.Roaring64Bitmap; +import java.io.DataInputStream; +import java.io.DataOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; -import java.nio.ByteBuffer; /** * {@link ModifiableBitmap} wrapper of the {@link Roaring64Bitmap} class, it @@ -19,10 +19,10 @@ * * @author Antoine Willerval */ -public class RoaringBitmap implements SimpleModifiableBitmap { +public class RoaringBitmap64 implements SimpleModifiableBitmap { private final Roaring64Bitmap rbm; - public RoaringBitmap() { + public RoaringBitmap64() { this.rbm = Roaring64Bitmap.bitmapOf(); } @@ -37,29 +37,25 @@ public boolean access(long position) { @Override public long getNumBits() { - throw new NotImplementedException(); + return rbm.getLongCardinality(); } @Override public long getSizeBytes() { - return rbm.serializedSizeInBytes(); + return rbm.serializedSizeInBytes() + 8; } @Override public void save(OutputStream output, ProgressListener listener) throws IOException { - long size = getSizeBytes(); + long size = rbm.serializedSizeInBytes(); IOUtil.writeLong(output, size); - ByteBuffer b2 = ByteBuffer.allocate((int) size); - rbm.serialize(b2); - output.write(b2.array()); + rbm.serialize(new DataOutputStream(output)); } @Override public void load(InputStream input, ProgressListener listener) throws IOException { - long size = IOUtil.readLong(input); - ByteBuffer b2 = ByteBuffer.allocate((int) size); - input.read(b2.array()); - rbm.deserialize(b2); + IOUtil.readLong(input); // ignored + rbm.deserialize(new DataInputStream(input)); } @Override diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/Dictionary.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/Dictionary.java index aafb65c5..17fcb6e7 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/Dictionary.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/Dictionary.java @@ -21,6 +21,8 @@ import com.the_qa_company.qendpoint.core.enums.RDFNodeType; import com.the_qa_company.qendpoint.core.enums.TripleComponentRole; import com.the_qa_company.qendpoint.core.header.Header; +import com.the_qa_company.qendpoint.core.triples.TripleID; +import com.the_qa_company.qendpoint.core.triples.TripleString; import java.io.Closeable; import java.util.Iterator; @@ -245,4 +247,14 @@ default long getNSection(TripleComponentRole role, boolean includeShared) { * @return type */ String getType(); + + default TripleID toTripleId(TripleString tsstr) { + TripleID tid = new TripleID(stringToId(tsstr.getSubject(), TripleComponentRole.SUBJECT), + stringToId(tsstr.getPredicate(), TripleComponentRole.PREDICATE), + stringToId(tsstr.getObject(), TripleComponentRole.OBJECT)); + if (tsstr.getGraph() != null && !tsstr.getGraph().isEmpty()) { + tid.setGraph(stringToId(tsstr.getGraph(), TripleComponentRole.GRAPH)); + } + return tid; + } } diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/DictionaryFactory.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/DictionaryFactory.java index c9f0da84..23db1522 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/DictionaryFactory.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/DictionaryFactory.java @@ -46,7 +46,6 @@ import com.the_qa_company.qendpoint.core.options.ControlInfo; import com.the_qa_company.qendpoint.core.options.HDTOptions; import com.the_qa_company.qendpoint.core.options.HDTOptionsKeys; -import com.the_qa_company.qendpoint.core.options.HDTSpecification; import com.the_qa_company.qendpoint.core.triples.TripleString; import com.the_qa_company.qendpoint.core.util.io.CloseSuppressPath; import com.the_qa_company.qendpoint.core.util.string.ByteString; @@ -97,6 +96,16 @@ public class DictionaryFactory { private DictionaryFactory() { } + public static boolean isQuadDictionary(String name) { + return switch (name) { + case HDTOptionsKeys.DICTIONARY_TYPE_VALUE_FOUR_QUAD_SECTION, + HDTOptionsKeys.DICTIONARY_TYPE_VALUE_MULTI_OBJECTS_LANG_QUAD, + HDTVocabulary.DICTIONARY_TYPE_MULT_SECTION_LANG_QUAD -> + true; + default -> false; + }; + } + /** * Creates a temp dictionary (allow insert) * @@ -108,7 +117,17 @@ public static TempDictionary createTempDictionary(HDTOptions spec) { // Implementations available in the Core return switch (name) { - case "", HDTOptionsKeys.TEMP_DICTIONARY_IMPL_VALUE_HASH, HDTOptionsKeys.TEMP_DICTIONARY_IMPL_VALUE_MULT_HASH -> + case "" -> { + String dicttype = spec.get(HDTOptionsKeys.DICTIONARY_TYPE_KEY, ""); + if (dicttype.equals(HDTOptionsKeys.DICTIONARY_TYPE_VALUE_FOUR_PSFC_SECTION)) { + yield new PSFCTempDictionary(new HashDictionary(spec)); + } + if (isQuadDictionary(dicttype)) { + yield new HashQuadDictionary(spec); + } + yield new HashDictionary(spec); + } + case HDTOptionsKeys.TEMP_DICTIONARY_IMPL_VALUE_HASH, HDTOptionsKeys.TEMP_DICTIONARY_IMPL_VALUE_MULT_HASH -> new HashDictionary(spec); case HDTOptionsKeys.TEMP_DICTIONARY_IMPL_VALUE_HASH_QUAD -> new HashQuadDictionary(spec); case HDTOptionsKeys.TEMP_DICTIONARY_IMPL_VALUE_HASH_PSFC -> new PSFCTempDictionary(new HashDictionary(spec)); @@ -131,6 +150,8 @@ public static DictionaryPrivate createDictionary(HDTOptions spec) { case HDTOptionsKeys.DICTIONARY_TYPE_VALUE_FOUR_SECTION_BIG -> new FourSectionDictionaryBig(spec); case HDTOptionsKeys.DICTIONARY_TYPE_VALUE_MULTI_OBJECTS -> new MultipleSectionDictionary(spec); case HDTOptionsKeys.DICTIONARY_TYPE_VALUE_MULTI_OBJECTS_LANG -> new MultipleSectionDictionaryLang(spec); + case HDTOptionsKeys.DICTIONARY_TYPE_VALUE_MULTI_OBJECTS_LANG_QUAD -> + new MultipleSectionDictionaryLang(spec, true); default -> throw new IllegalFormatException("Implementation of dictionary not found for " + name); }; } @@ -160,11 +181,16 @@ public static DictionaryPrivate createWriteDictionary(HDTOptions spec, Path loca public static DictionaryPrivate createWriteDictionary(String name, HDTOptions spec, Path location, int bufferSize) { return switch (name) { case "", HDTVocabulary.DICTIONARY_TYPE_FOUR_SECTION, HDTOptionsKeys.DICTIONARY_TYPE_VALUE_FOUR_SECTION_BIG -> - new WriteFourSectionDictionary(spec, location, bufferSize); + new WriteFourSectionDictionary(spec, location, bufferSize, false); + case HDTVocabulary.DICTIONARY_TYPE_FOUR_QUAD_SECTION -> + new WriteFourSectionDictionary(spec, location, bufferSize, true); case HDTVocabulary.DICTIONARY_TYPE_MULT_SECTION, HDTOptionsKeys.DICTIONARY_TYPE_VALUE_MULTI_OBJECTS -> new WriteMultipleSectionDictionary(spec, location, bufferSize); case HDTVocabulary.DICTIONARY_TYPE_MULT_SECTION_LANG, HDTOptionsKeys.DICTIONARY_TYPE_VALUE_MULTI_OBJECTS_LANG -> new WriteMultipleSectionDictionaryLang(spec, location, bufferSize); + case HDTVocabulary.DICTIONARY_TYPE_MULT_SECTION_LANG_QUAD, + HDTOptionsKeys.DICTIONARY_TYPE_VALUE_MULTI_OBJECTS_LANG_QUAD -> + new WriteMultipleSectionDictionaryLang(spec, location, bufferSize, true); default -> throw new IllegalFormatException("Implementation of write dictionary not found for " + name); }; } @@ -174,14 +200,20 @@ public static SectionCompressor createSectionCompressor(HDTOptions spec, CloseSu int k, boolean debugSleepKwayDict) { String name = spec.get(HDTOptionsKeys.DICTIONARY_TYPE_KEY, ""); + // use the same compressor for quad/triple dict types + boolean quad = isQuadDictionary(name); + return switch (name) { case "", HDTOptionsKeys.DICTIONARY_TYPE_VALUE_FOUR_SECTION, - HDTOptionsKeys.DICTIONARY_TYPE_VALUE_FOUR_SECTION_BIG -> - new SectionCompressor(baseFileName, source, listener, bufferSize, chunkSize, k, debugSleepKwayDict); + HDTOptionsKeys.DICTIONARY_TYPE_VALUE_FOUR_SECTION_BIG, + HDTOptionsKeys.DICTIONARY_TYPE_VALUE_FOUR_QUAD_SECTION -> + new SectionCompressor(baseFileName, source, listener, bufferSize, chunkSize, k, debugSleepKwayDict, quad); case HDTOptionsKeys.DICTIONARY_TYPE_VALUE_MULTI_OBJECTS -> new MultiSectionSectionCompressor(baseFileName, - source, listener, bufferSize, chunkSize, k, debugSleepKwayDict); - case HDTOptionsKeys.DICTIONARY_TYPE_VALUE_MULTI_OBJECTS_LANG -> new MultiSectionLangSectionCompressor( - baseFileName, source, listener, bufferSize, chunkSize, k, debugSleepKwayDict); + source, listener, bufferSize, chunkSize, k, debugSleepKwayDict, quad); + case HDTOptionsKeys.DICTIONARY_TYPE_VALUE_MULTI_OBJECTS_LANG, + HDTOptionsKeys.DICTIONARY_TYPE_VALUE_MULTI_OBJECTS_LANG_QUAD -> + new MultiSectionLangSectionCompressor(baseFileName, source, listener, bufferSize, chunkSize, k, + debugSleepKwayDict, quad); default -> throw new IllegalFormatException("Implementation of section compressor not found for " + name); }; } @@ -195,12 +227,13 @@ public static SectionCompressor createSectionCompressor(HDTOptions spec, CloseSu public static DictionaryPrivate createDictionary(ControlInfo ci) { String name = ci.getFormat(); return switch (name) { - case HDTVocabulary.DICTIONARY_TYPE_FOUR_SECTION -> new FourSectionDictionary(new HDTSpecification()); - case HDTVocabulary.DICTIONARY_TYPE_FOUR_QUAD_SECTION -> new FourQuadSectionDictionary(new HDTSpecification()); - case HDTVocabulary.DICTIONARY_TYPE_FOUR_PSFC_SECTION -> new PSFCFourSectionDictionary(new HDTSpecification()); - case HDTVocabulary.DICTIONARY_TYPE_MULT_SECTION -> new MultipleSectionDictionary(new HDTSpecification()); - case HDTVocabulary.DICTIONARY_TYPE_MULT_SECTION_LANG -> - new MultipleSectionDictionaryLang(new HDTSpecification()); + case HDTVocabulary.DICTIONARY_TYPE_FOUR_SECTION -> new FourSectionDictionary(HDTOptions.of()); + case HDTVocabulary.DICTIONARY_TYPE_FOUR_QUAD_SECTION -> new FourQuadSectionDictionary(HDTOptions.of()); + case HDTVocabulary.DICTIONARY_TYPE_FOUR_PSFC_SECTION -> new PSFCFourSectionDictionary(HDTOptions.of()); + case HDTVocabulary.DICTIONARY_TYPE_MULT_SECTION -> new MultipleSectionDictionary(HDTOptions.of()); + case HDTVocabulary.DICTIONARY_TYPE_MULT_SECTION_LANG -> new MultipleSectionDictionaryLang(HDTOptions.of()); + case HDTVocabulary.DICTIONARY_TYPE_MULT_SECTION_LANG_QUAD -> + new MultipleSectionDictionaryLang(HDTOptions.of(), true); default -> throw new IllegalFormatException("Implementation of dictionary not found for " + name); }; } @@ -231,9 +264,11 @@ public static DictionaryDiff createDictionaryDiff(Dictionary dictionary, String public static DictionaryKCat createDictionaryKCat(Dictionary dictionary) { String type = dictionary.getType(); return switch (type) { - case HDTVocabulary.DICTIONARY_TYPE_FOUR_SECTION -> new FourSectionDictionaryKCat(dictionary); + case HDTVocabulary.DICTIONARY_TYPE_FOUR_SECTION, HDTVocabulary.DICTIONARY_TYPE_FOUR_QUAD_SECTION -> + new FourSectionDictionaryKCat(dictionary); case HDTVocabulary.DICTIONARY_TYPE_MULT_SECTION -> new MultipleSectionDictionaryKCat(dictionary); - case HDTVocabulary.DICTIONARY_TYPE_MULT_SECTION_LANG -> new MultipleSectionLangDictionaryKCat(dictionary); + case HDTVocabulary.DICTIONARY_TYPE_MULT_SECTION_LANG, HDTVocabulary.DICTIONARY_TYPE_MULT_SECTION_LANG_QUAD -> + new MultipleSectionLangDictionaryKCat(dictionary); default -> throw new IllegalArgumentException("Implementation of DictionaryKCat not found for " + type); }; } @@ -241,14 +276,30 @@ public static DictionaryKCat createDictionaryKCat(Dictionary dictionary) { public static DictionaryPrivate createWriteDictionary(String type, HDTOptions spec, DictionarySectionPrivate subject, DictionarySectionPrivate predicate, DictionarySectionPrivate object, DictionarySectionPrivate shared, TreeMap sub) { + return createWriteDictionary(type, spec, subject, predicate, object, shared, sub, null); + } + + public static DictionaryPrivate createWriteDictionary(String type, HDTOptions spec, + DictionarySectionPrivate subject, DictionarySectionPrivate predicate, DictionarySectionPrivate object, + DictionarySectionPrivate shared, TreeMap sub, + DictionarySectionPrivate graph) { + if (graph == null) { + return switch (type) { + case HDTVocabulary.DICTIONARY_TYPE_FOUR_SECTION, HDTVocabulary.DICTIONARY_TYPE_FOUR_PSFC_SECTION -> + new WriteFourSectionDictionary(spec, subject, predicate, object, shared); + case HDTVocabulary.DICTIONARY_TYPE_MULT_SECTION -> + new WriteMultipleSectionDictionary(spec, subject, predicate, shared, sub); + case HDTVocabulary.DICTIONARY_TYPE_MULT_SECTION_LANG -> + new WriteMultipleSectionDictionaryLang(spec, subject, predicate, shared, sub); + default -> throw new IllegalArgumentException("Unknown dictionary type " + type); + }; + } return switch (type) { - case HDTVocabulary.DICTIONARY_TYPE_FOUR_SECTION, HDTVocabulary.DICTIONARY_TYPE_FOUR_PSFC_SECTION -> - new WriteFourSectionDictionary(spec, subject, predicate, object, shared); - case HDTVocabulary.DICTIONARY_TYPE_MULT_SECTION -> - new WriteMultipleSectionDictionary(spec, subject, predicate, shared, sub); - case HDTVocabulary.DICTIONARY_TYPE_MULT_SECTION_LANG -> - new WriteMultipleSectionDictionaryLang(spec, subject, predicate, shared, sub); - default -> throw new IllegalArgumentException("Unknown dictionary type " + type); + case HDTVocabulary.DICTIONARY_TYPE_FOUR_QUAD_SECTION -> + new WriteFourSectionDictionary(spec, subject, predicate, object, shared, graph); + case HDTVocabulary.DICTIONARY_TYPE_MULT_SECTION_LANG_QUAD -> + new WriteMultipleSectionDictionaryLang(spec, subject, predicate, shared, sub, graph); + default -> throw new IllegalArgumentException("Unknown quad dictionary type " + type); }; } } diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/DictionaryKCat.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/DictionaryKCat.java index 8f3442a2..9f8309d1 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/DictionaryKCat.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/DictionaryKCat.java @@ -29,6 +29,11 @@ public interface DictionaryKCat { */ DictionarySection getSharedSection(); + /** + * @return the graph section + */ + DictionarySection getGraphSection(); + /** * @return the number of subjects */ @@ -49,6 +54,12 @@ public interface DictionaryKCat { */ long countObjects(); + /** + * @return the number of graphs, returns 0 if the dictionary doesn't support + * graphs + */ + long countGraphs(); + /** * @return the object shift in the dictionary IDs */ diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/DictionaryType.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/DictionaryType.java index 307b52b9..2f3fc83f 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/DictionaryType.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/DictionaryType.java @@ -37,7 +37,9 @@ public static DictionaryType fromDictionaryType(String dictType) { HDTOptionsKeys.DICTIONARY_TYPE_VALUE_FOUR_PSFC_SECTION -> FSD; case HDTOptionsKeys.DICTIONARY_TYPE_VALUE_MULTI_OBJECTS, HDTVocabulary.DICTIONARY_TYPE_MULT_SECTION -> MSD; - case HDTVocabulary.DICTIONARY_TYPE_MULT_SECTION_LANG, HDTOptionsKeys.DICTIONARY_TYPE_VALUE_MULTI_OBJECTS_LANG -> + case HDTVocabulary.DICTIONARY_TYPE_MULT_SECTION_LANG, HDTOptionsKeys.DICTIONARY_TYPE_VALUE_MULTI_OBJECTS_LANG, + HDTVocabulary.DICTIONARY_TYPE_MULT_SECTION_LANG_QUAD, + HDTOptionsKeys.DICTIONARY_TYPE_VALUE_MULTI_OBJECTS_LANG_QUAD -> MSDL; default -> throw new NotImplementedException("Can't find type for name: " + dictType); }; diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/BaseDictionary.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/BaseDictionary.java index 56b9dff3..1a881b80 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/BaseDictionary.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/BaseDictionary.java @@ -100,6 +100,13 @@ public Iterator stringIterator(TripleComponentRole role, return CatIterator.of(getShared().getSortedEntries(), getObjects().getSortedEntries()); } + case GRAPH -> { + if (!supportGraphs()) { + throw new IllegalArgumentException("This dictionary doesn't support graphs!"); + } + + return getGraphs().getSortedEntries(); + } default -> throw new IllegalArgumentException("Unknown role: " + role); } } diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/CompressFourSectionDictionary.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/CompressFourSectionDictionary.java index 876f556c..c6de4c24 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/CompressFourSectionDictionary.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/CompressFourSectionDictionary.java @@ -36,6 +36,7 @@ public class CompressFourSectionDictionary implements TempDictionary { private final TempDictionarySection predicate; private final TempDictionarySection object; private final TempDictionarySection shared; + private final TempDictionarySection graph; private static void sendPiped(IndexedNode node, long index, PipedCopyIterator pipe, CompressUtil.DuplicatedIterator it, NodeConsumerMethod method) { @@ -45,7 +46,7 @@ private static void sendPiped(IndexedNode node, long index, PipedCopyIterator debugOrderCheckerS = DebugOrderNodeIterator.of(debugOrder, "Subject"); Consumer debugOrderCheckerO = DebugOrderNodeIterator.of(debugOrder, "Object"); @@ -62,10 +63,20 @@ public CompressFourSectionDictionary(CompressionResult compressionResult, NodeCo new NotificationExceptionIterator<>(compressionResult.getObjects(), compressionResult.getTripleCount(), splits, "Object section filling", listener), (originalIndex, duplicatedIndex, lastHeader) -> nodeConsumer.onObject(duplicatedIndex, lastHeader)); + CompressUtil.DuplicatedIterator sortedGraph; + if (quad) { + sortedGraph = CompressUtil.asNoDupeCharSequenceIterator( + new NotificationExceptionIterator<>(compressionResult.getGraph(), + compressionResult.getTripleCount(), splits, "Graph section filling", listener), + (originalIndex, duplicatedIndex, lastHeader) -> nodeConsumer.onGraph(duplicatedIndex, lastHeader)); + } else { + sortedGraph = null; + } long subjects = compressionResult.getSubjectsCount(); long predicates = compressionResult.getPredicatesCount(); long objects = compressionResult.getObjectsCount(); long shareds = compressionResult.getSharedCount(); + long graphs = quad ? compressionResult.getGraphCount() : 0; // iterator to pipe to the s p o sh PipedCopyIterator subject = new PipedCopyIterator<>(); @@ -158,6 +169,19 @@ public CompressFourSectionDictionary(CompressionResult compressionResult, NodeCo }), predicates); this.object = new OneReadDictionarySection(object, objects); this.shared = new OneReadDictionarySection(shared, shareds); + if (quad) { + this.graph = new OneReadDictionarySection(new MapIterator<>(sortedGraph, (node, index) -> { + long header = CompressUtil.getHeaderId(index + 1); + sortedGraph.setLastHeader(header); + nodeConsumer.onGraph(node.getIndex(), header); + // force duplication because it's not made in a pipe like with + // the + // others + return new CompactString(node.getNode()); + }), graphs); + } else { + this.graph = null; + } } @Override @@ -182,12 +206,15 @@ public TempDictionarySection getShared() { @Override public TempDictionarySection getGraphs() { - throw new NotImplementedException(); + if (supportGraphs()) { + return graph; + } + throw new IllegalArgumentException("This dictionary doesn't support graph!"); } @Override public boolean supportGraphs() { - return false; + return graph != null; } @Override @@ -243,6 +270,8 @@ public interface NodeConsumer { void onPredicate(long preMapId, long newMapId); void onObject(long preMapId, long newMapId); + + void onGraph(long preMapId, long newMapId); } private interface NodeConsumerMethod { diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/FourQuadSectionDictionary.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/FourQuadSectionDictionary.java index e194be45..52dc2bf2 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/FourQuadSectionDictionary.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/FourQuadSectionDictionary.java @@ -28,6 +28,7 @@ import com.the_qa_company.qendpoint.core.dictionary.TempDictionary; import com.the_qa_company.qendpoint.core.dictionary.impl.section.DictionarySectionFactory; import com.the_qa_company.qendpoint.core.dictionary.impl.section.PFCDictionarySection; +import com.the_qa_company.qendpoint.core.dictionary.impl.section.PFCDictionarySectionBig; import com.the_qa_company.qendpoint.core.exceptions.IllegalFormatException; import com.the_qa_company.qendpoint.core.hdt.HDTVocabulary; import com.the_qa_company.qendpoint.core.header.Header; @@ -41,26 +42,17 @@ import com.the_qa_company.qendpoint.core.util.io.IOUtil; import com.the_qa_company.qendpoint.core.util.listener.IntermediateListener; -public class FourQuadSectionDictionary extends FourSectionDictionary { +public class FourQuadSectionDictionary extends FourSectionDictionaryBig { public FourQuadSectionDictionary(HDTOptions spec, DictionarySectionPrivate s, DictionarySectionPrivate p, DictionarySectionPrivate o, DictionarySectionPrivate sh, DictionarySectionPrivate g) { - super(spec); - this.subjects = s; - this.predicates = p; - this.objects = o; - this.shared = sh; + super(spec, s, p, o, sh); this.graphs = g; } public FourQuadSectionDictionary(HDTOptions spec) { super(spec); - // FIXME: Read type from spec. - subjects = new PFCDictionarySection(spec); - predicates = new PFCDictionarySection(spec); - objects = new PFCDictionarySection(spec); - shared = new PFCDictionarySection(spec); - graphs = new PFCDictionarySection(spec); + graphs = new PFCDictionarySectionBig(spec); } /* diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/MultipleLangBaseDictionary.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/MultipleLangBaseDictionary.java index 45f7be72..ded6c74e 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/MultipleLangBaseDictionary.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/MultipleLangBaseDictionary.java @@ -54,11 +54,13 @@ public record ObjectIdLocationData(int uid, ByteString name, ByteString suffix, protected SortedDictionarySectionIndex nonTypedIndex; protected SortedDictionarySectionIndex subjectResIndex; protected SortedDictionarySectionIndex sharedIndex; + protected SortedDictionarySectionIndex graphIndex; protected TreeMap languages; protected TreeMap typed; protected TreeMap objectsLocations; protected TreeMap languagesLocations; protected DictionarySectionPrivate shared; + protected DictionarySectionPrivate graph; // locations protected LongArray objectIdLocations = LongArray.of(0); @@ -86,6 +88,9 @@ public void populateHeader(Header header, String rootNode) { @Override public String getType() { + if (supportGraphs()) { + return HDTVocabulary.DICTIONARY_TYPE_MULT_SECTION_LANG_QUAD; + } return HDTVocabulary.DICTIONARY_TYPE_MULT_SECTION_LANG; } @@ -126,6 +131,11 @@ public long getNshared() { return shared.getNumberOfElements(); } + @Override + public long getNgraphs() { + return graph.getNumberOfElements(); + } + @Override public DictionarySection getSubjects() { return subjects; @@ -141,6 +151,11 @@ public DictionarySection getObjects() { throw new NotImplementedException(); } + @Override + public DictionarySectionPrivate getGraphs() { + return graph; + } + @Override public Map getAllObjects() { TreeMap m = new TreeMap<>(CharSequenceComparator.getInstance()); @@ -238,6 +253,9 @@ protected void syncLocations() { nonTypedIndex = new SortedDictionarySectionIndex(nonTyped); subjectResIndex = new SortedDictionarySectionIndex(subjects); sharedIndex = new SortedDictionarySectionIndex(shared); + if (supportGraphs()) { + graphIndex = new SortedDictionarySectionIndex(graph); + } } } @@ -275,6 +293,9 @@ public CharSequence idToString(long id, TripleComponentRole position) { return null; } + case GRAPH -> { + return graph.extract(id); + } default -> throw new NotImplementedException(); } } @@ -291,6 +312,13 @@ public long stringToId(CharSequence sstr, TripleComponentRole position) { long id = predicates.locate(str); return id > 0 ? id : -1; } + case GRAPH -> { + if (!supportGraphs()) { + throw new IllegalArgumentException("This dictionary doesn't support graphs!"); + } + long id = graph.locate(str); + return id > 0 ? id : -1; + } case SUBJECT -> { long sid = shared.locate(str); if (sid != 0) { @@ -367,6 +395,12 @@ public Iterator stringIterator(TripleComponentRole role, return StringSuffixIterator.of(sec.getSortedEntries(), suffix); }).toList()); } + case GRAPH -> { + if (!supportGraphs()) { + throw new IllegalArgumentException("This dictionary doesn't support graphs!"); + } + return getGraphs().getSortedEntries(); + } default -> throw new IllegalArgumentException("Unknown role: " + role); } } @@ -436,6 +470,12 @@ public RDFNodeType nodeTypeOfId(TripleComponentRole role, long id) { } return nonTypedIndex.getNodeType(id - nshared); } + case GRAPH -> { + if (!supportGraphs()) { + throw new IllegalArgumentException("This dictionary doesn't support graphs!"); + } + return graphIndex.getNodeType(id); + } } throw new IllegalArgumentException("Method is not applicable on z this dictionary"); } @@ -455,9 +495,14 @@ public boolean supportsLanguageOfId() { return true; } + @Override + public boolean supportGraphs() { + return graph != null; + } + @Override public void close() throws IOException { - Closer.closeAll(subjects, predicates, nonTyped, typed, languages, shared); + Closer.closeAll(subjects, predicates, nonTyped, typed, languages, shared, graph); } protected static class StopPredicate implements Predicate { diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/MultipleSectionDictionaryLang.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/MultipleSectionDictionaryLang.java index d07827bc..f1ad248c 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/MultipleSectionDictionaryLang.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/MultipleSectionDictionaryLang.java @@ -1,14 +1,12 @@ package com.the_qa_company.qendpoint.core.dictionary.impl; import com.the_qa_company.qendpoint.core.compact.integer.VByte; -import com.the_qa_company.qendpoint.core.dictionary.DictionarySection; import com.the_qa_company.qendpoint.core.dictionary.DictionarySectionPrivate; import com.the_qa_company.qendpoint.core.dictionary.TempDictionary; import com.the_qa_company.qendpoint.core.dictionary.impl.section.DictionarySectionFactory; import com.the_qa_company.qendpoint.core.dictionary.impl.section.PFCDictionarySection; import com.the_qa_company.qendpoint.core.dictionary.impl.section.PFCDictionarySectionBig; import com.the_qa_company.qendpoint.core.exceptions.IllegalFormatException; -import com.the_qa_company.qendpoint.core.exceptions.NotImplementedException; import com.the_qa_company.qendpoint.core.iterator.charsequence.StopIterator; import com.the_qa_company.qendpoint.core.iterator.utils.MapIterator; import com.the_qa_company.qendpoint.core.iterator.utils.PeekIterator; @@ -43,6 +41,10 @@ public class MultipleSectionDictionaryLang extends MultipleLangBaseDictionary { private static final Logger logger = LoggerFactory.getLogger(MultipleSectionDictionaryLang.class); public MultipleSectionDictionaryLang(HDTOptions spec) { + this(spec, false); + } + + public MultipleSectionDictionaryLang(HDTOptions spec, boolean quad) { super(spec); // FIXME: Read type from spec. subjects = new PFCDictionarySectionBig(spec); @@ -52,12 +54,23 @@ public MultipleSectionDictionaryLang(HDTOptions spec) { languages = new TreeMap<>(cmp); nonTyped = new PFCDictionarySectionBig(spec); shared = new PFCDictionarySectionBig(spec); + if (quad) { + graph = new PFCDictionarySectionBig(spec); + } } public MultipleSectionDictionaryLang(HDTOptions spec, DictionarySectionPrivate subjects, DictionarySectionPrivate predicates, DictionarySectionPrivate nonTyped, TreeMap typed, TreeMap languages, DictionarySectionPrivate shared) { + this(spec, subjects, predicates, nonTyped, typed, languages, shared, null); + } + + public MultipleSectionDictionaryLang(HDTOptions spec, DictionarySectionPrivate subjects, + DictionarySectionPrivate predicates, DictionarySectionPrivate nonTyped, + TreeMap typed, + TreeMap languages, DictionarySectionPrivate shared, + DictionarySectionPrivate graph) { super(spec); this.subjects = subjects; this.predicates = predicates; @@ -65,6 +78,7 @@ public MultipleSectionDictionaryLang(HDTOptions spec, DictionarySectionPrivate s this.languages = languages; this.nonTyped = Objects.requireNonNullElseGet(nonTyped, () -> new PFCDictionarySection(spec)); this.shared = shared; + this.graph = graph; syncLocations(); } @@ -81,6 +95,10 @@ public void load(InputStream input, ControlInfo ci, ProgressListener listener) t predicates = DictionarySectionFactory.loadFrom(input, iListener); nonTyped = DictionarySectionFactory.loadFrom(input, iListener); + if (supportGraphs()) { + graph = DictionarySectionFactory.loadFrom(input, iListener); + } + readLiteralsMaps(input, listener); } @@ -98,6 +116,10 @@ public void mapFromFile(CountInputStream in, File f, ProgressListener listener) predicates = DictionarySectionFactory.loadFrom(in, f, iListener); nonTyped = DictionarySectionFactory.loadFrom(in, f, iListener); + if (supportGraphs()) { + graph = DictionarySectionFactory.loadFrom(in, f, iListener); + } + mapLiteralsMaps(in, f, listener); } @@ -149,6 +171,9 @@ public void load(TempDictionary other, ProgressListener listener) { } shared.load(other.getShared(), iListener); + if (supportGraphs()) { + graph.load(other.getGraphs(), iListener); + } syncLocations(); } @@ -159,6 +184,10 @@ public void loadAsync(TempDictionary other, ProgressListener listener) throws In new ExceptionThread(() -> subjects.load(other.getSubjects(), iListener), "MultiSecSAsyncReaderS"), new ExceptionThread(() -> shared.load(other.getShared(), iListener), "MultiSecSAsyncReaderSh"), new ExceptionThread(() -> { + if (supportGraphs()) { + graph.load(other.getGraphs(), iListener); + } + }, "MultiSecSAsyncReaderG"), new ExceptionThread(() -> { StopPredicate pred = new StopPredicate<>(); PeekIterator it = new com.the_qa_company.qendpoint.core.iterator.utils.StopIterator<>( new MapIterator<>(other.getObjects().getSortedEntries(), LiteralsUtils::prefToLitLang), @@ -199,6 +228,9 @@ public void save(OutputStream output, ControlInfo ci, ProgressListener listener) subjects.save(output, iListener); predicates.save(output, iListener); nonTyped.save(output, iListener); + if (supportGraphs()) { + graph.save(output, iListener); + } writeLiteralsMaps(output, iListener); } @@ -265,14 +297,4 @@ private void mapLiteralsMaps(CountInputStream input, File f, ProgressListener li syncLocations(); } - - @Override - public long getNgraphs() { - return 0; - } - - @Override - public DictionarySection getGraphs() { - throw new NotImplementedException(); - } } diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/MultipleSectionDictionaryLangPFCOptimizedExtractor.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/MultipleSectionDictionaryLangPFCOptimizedExtractor.java index 3b61da66..8e53a1d3 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/MultipleSectionDictionaryLangPFCOptimizedExtractor.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/MultipleSectionDictionaryLangPFCOptimizedExtractor.java @@ -5,7 +5,7 @@ import com.the_qa_company.qendpoint.core.enums.TripleComponentRole; public class MultipleSectionDictionaryLangPFCOptimizedExtractor implements OptimizedExtractor { - private final PFCOptimizedExtractor shared, subjects, predicates; + private final PFCOptimizedExtractor shared, subjects, predicates, graph; private final PFCOptimizedExtractor[] objects; private final MultipleLangBaseDictionary dict; private final long nshared; @@ -20,6 +20,11 @@ public MultipleSectionDictionaryLangPFCOptimizedExtractor(MultipleLangBaseDictio for (int i = 0; i < objects.length; i++) { objects[i] = new PFCOptimizedExtractor((PFCDictionarySectionMap) dict.getObjectsSectionFromId(i).section()); } + if (dict.supportGraphs()) { + graph = new PFCOptimizedExtractor((PFCDictionarySectionMap) dict.graph); + } else { + graph = null; + } } @Override @@ -43,6 +48,12 @@ public CharSequence idToString(long id, TripleComponentRole role) { } return data.suffix().copyPreAppend(str); } + case GRAPH -> { + if (!dict.supportGraphs()) { + throw new IllegalArgumentException("This dictionary doesn't support graphs!"); + } + return graph.extract(id); + } default -> throw new IllegalArgumentException("Bad role: " + role); } } diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/WriteFourSectionDictionary.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/WriteFourSectionDictionary.java index fd363e81..7061cd2e 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/WriteFourSectionDictionary.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/WriteFourSectionDictionary.java @@ -28,35 +28,48 @@ * @author Antoine Willerval */ public class WriteFourSectionDictionary extends BaseDictionary { - public WriteFourSectionDictionary(HDTOptions spec, Path filename, int bufferSize) { + public WriteFourSectionDictionary(HDTOptions spec, Path filename, int bufferSize, boolean quads) { super(spec); String name = filename.getFileName().toString(); subjects = new WriteDictionarySection(spec, filename.resolveSibling(name + "SU"), bufferSize); predicates = new WriteDictionarySection(spec, filename.resolveSibling(name + "PR"), bufferSize); objects = new WriteDictionarySection(spec, filename.resolveSibling(name + "OB"), bufferSize); shared = new WriteDictionarySection(spec, filename.resolveSibling(name + "SH"), bufferSize); + + if (quads) { + graphs = new WriteDictionarySection(spec, filename.resolveSibling(name + "GH"), bufferSize); + } } public WriteFourSectionDictionary(HDTOptions spec, DictionarySectionPrivate subjects, DictionarySectionPrivate predicates, DictionarySectionPrivate objects, DictionarySectionPrivate shared) { + this(spec, subjects, predicates, objects, shared, null); + } + + public WriteFourSectionDictionary(HDTOptions spec, DictionarySectionPrivate subjects, + DictionarySectionPrivate predicates, DictionarySectionPrivate objects, DictionarySectionPrivate shared, + DictionarySectionPrivate graph) { super(spec); this.subjects = subjects; this.predicates = predicates; this.objects = objects; this.shared = shared; + this.graphs = graph; } @Override public void loadAsync(TempDictionary other, ProgressListener listener) throws InterruptedException { MultiThreadListener ml = ListenerUtil.multiThreadListener(listener); ml.unregisterAllThreads(); - ExceptionThread - .async("FourSecSAsyncReader", - () -> predicates.load(other.getPredicates(), new IntermediateListener(ml, "Predicate: ")), - () -> subjects.load(other.getSubjects(), new IntermediateListener(ml, "Subjects: ")), - () -> shared.load(other.getShared(), new IntermediateListener(ml, "Shared: ")), - () -> objects.load(other.getObjects(), new IntermediateListener(ml, "Object: "))) - .startAll().joinAndCrashIfRequired(); + ExceptionThread.async("FourSecSAsyncReader", + () -> predicates.load(other.getPredicates(), new IntermediateListener(ml, "Predicate: ")), + () -> subjects.load(other.getSubjects(), new IntermediateListener(ml, "Subjects: ")), () -> { + if (supportGraphs()) { + graphs.load(other.getGraphs(), new IntermediateListener(ml, "Graph: ")); + } + }, () -> shared.load(other.getShared(), new IntermediateListener(ml, "Shared: ")), + () -> objects.load(other.getObjects(), new IntermediateListener(ml, "Object: "))).startAll() + .joinAndCrashIfRequired(); ml.unregisterAllThreads(); } @@ -92,9 +105,16 @@ public void save(OutputStream output, ControlInfo ci, ProgressListener listener) iListener.setRange(50, 75); iListener.setPrefix("Save predicates: "); predicates.save(output, iListener); - iListener.setRange(75, 100); + + int rangeStart = supportGraphs() ? 77 : 100; + iListener.setRange(75, rangeStart); iListener.setPrefix("Save objects: "); objects.save(output, iListener); + if (supportGraphs()) { + iListener.setRange(rangeStart, 100); + iListener.setPrefix("Save graphs: "); + graphs.save(output, listener); + } } @Override @@ -106,11 +126,17 @@ public void populateHeader(Header header, String rootNode) { @Override public String getType() { - return HDTVocabulary.DICTIONARY_TYPE_FOUR_SECTION; + return supportGraphs() ? HDTVocabulary.DICTIONARY_TYPE_FOUR_QUAD_SECTION + : HDTVocabulary.DICTIONARY_TYPE_FOUR_SECTION; + } + + @Override + public boolean supportGraphs() { + return graphs != null; } @Override public void close() throws IOException { - IOUtil.closeAll(shared, subjects, predicates, objects); + IOUtil.closeAll(shared, subjects, predicates, objects, graphs); } } diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/WriteMultipleSectionDictionaryLang.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/WriteMultipleSectionDictionaryLang.java index c0b21808..0af445a0 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/WriteMultipleSectionDictionaryLang.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/WriteMultipleSectionDictionaryLang.java @@ -1,7 +1,6 @@ package com.the_qa_company.qendpoint.core.dictionary.impl; import com.the_qa_company.qendpoint.core.compact.integer.VByte; -import com.the_qa_company.qendpoint.core.dictionary.DictionarySection; import com.the_qa_company.qendpoint.core.dictionary.DictionarySectionPrivate; import com.the_qa_company.qendpoint.core.dictionary.TempDictionary; import com.the_qa_company.qendpoint.core.dictionary.impl.section.WriteDictionarySection; @@ -48,6 +47,10 @@ private static HDTOptions withoutRDFType(HDTOptions opt) { } public WriteMultipleSectionDictionaryLang(HDTOptions spec, Path filename, int bufferSize) { + this(spec, filename, bufferSize, false); + } + + public WriteMultipleSectionDictionaryLang(HDTOptions spec, Path filename, int bufferSize, boolean quad) { super(withoutRDFType(spec)); this.filename = filename; this.bufferSize = bufferSize; @@ -58,11 +61,20 @@ public WriteMultipleSectionDictionaryLang(HDTOptions spec, Path filename, int bu nonTyped = new WriteDictionarySection(spec, filename.resolveSibling(name + "NT"), bufferSize); shared = new WriteDictionarySection(spec, filename.resolveSibling(name + "SH"), bufferSize); languages = new TreeMap<>(); + if (quad) { + graph = new WriteDictionarySection(spec, filename.resolveSibling(name + "GR"), bufferSize); + } } public WriteMultipleSectionDictionaryLang(HDTOptions spec, DictionarySectionPrivate subjects, DictionarySectionPrivate predicates, DictionarySectionPrivate shared, TreeMap objects) { + this(spec, subjects, predicates, shared, objects, null); + } + + public WriteMultipleSectionDictionaryLang(HDTOptions spec, DictionarySectionPrivate subjects, + DictionarySectionPrivate predicates, DictionarySectionPrivate shared, + TreeMap objects, DictionarySectionPrivate graph) { super(spec); // useless this.filename = null; @@ -74,6 +86,7 @@ public WriteMultipleSectionDictionaryLang(HDTOptions spec, DictionarySectionPriv this.typed = new TreeMap<>(); this.languages = new TreeMap<>(); this.shared = shared; + this.graph = graph; for (var e : objects.entrySet()) { ByteString type = e.getKey(); DictionarySectionPrivate sec = e.getValue(); @@ -93,6 +106,7 @@ public WriteMultipleSectionDictionaryLang(HDTOptions spec, DictionarySectionPriv private ExceptionThread fillSection(Iterator objects, long count, ProgressListener listener) { + @SuppressWarnings("resource") PipedCopyIterator datatypeIterator = new PipedCopyIterator<>(); String name = filename.getFileName().toString(); Map theTyped = Collections.synchronizedMap(this.typed); @@ -195,11 +209,13 @@ private ExceptionThread fillSection(Iterator objects, lo public void loadAsync(TempDictionary other, ProgressListener listener) throws InterruptedException { MultiThreadListener ml = ListenerUtil.multiThreadListener(listener); ml.unregisterAllThreads(); - ExceptionThread - .async("MultiSecSAsyncReader", - () -> predicates.load(other.getPredicates(), new IntermediateListener(ml, "Predicate: ")), - () -> subjects.load(other.getSubjects(), new IntermediateListener(ml, "Subjects: ")), - () -> shared.load(other.getShared(), new IntermediateListener(ml, "Shared: "))) + ExceptionThread.async("MultiSecSAsyncReader", + () -> predicates.load(other.getPredicates(), new IntermediateListener(ml, "Predicate: ")), () -> { + if (supportGraphs()) { + graph.load(other.getGraphs(), new IntermediateListener(ml, "Graph: ")); + } + }, () -> subjects.load(other.getSubjects(), new IntermediateListener(ml, "Subjects: ")), + () -> shared.load(other.getShared(), new IntermediateListener(ml, "Shared: "))) .attach(fillSection(other.getObjects().getEntries(), other.getObjects().getNumberOfElements(), new IntermediateListener(ml, "Objects: "))) .startAll().joinAndCrashIfRequired(); @@ -223,10 +239,21 @@ public void save(OutputStream output, ControlInfo ci, ProgressListener listener) iListener.setRange(40, 60); iListener.setPrefix("Save predicates: "); predicates.save(output, iListener); + iListener.setRange(60, 80); iListener.setPrefix("Save non typed objects: "); nonTyped.save(output, iListener); - iListener.setRange(80, 100); + + int rangeStart; + if (supportGraphs()) { + iListener.setRange(80, 85); + iListener.setPrefix("Save graphs: "); + graph.save(output, listener); + rangeStart = 85; + } else { + rangeStart = 80; + } + iListener.setRange(rangeStart, 100); iListener.setPrefix("Save objects: "); int count = typed.size() + languages.size(); @@ -250,6 +277,7 @@ public void save(OutputStream output, ControlInfo ci, ProgressListener listener) IOUtil.writeSizedBuffer(output, entry.getKey(), iListener); entry.getValue().save(output, iListener); } + } @Override @@ -267,15 +295,5 @@ public void load(TempDictionary other, ProgressListener listener) { throw new NotImplementedException(); } - @Override - public long getNgraphs() { - return 0; - } - - @Override - public DictionarySection getGraphs() { - throw new NotImplementedException(); - } - private record TypedByteString(ByteString type, ByteString node, boolean lang) {} } diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/kcat/BitmapTriple.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/kcat/BitmapTriple.java index 6f7217d1..501afd9f 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/kcat/BitmapTriple.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/kcat/BitmapTriple.java @@ -10,11 +10,14 @@ public class BitmapTriple implements Closeable { private final ModifiableBitmap subjects; private final ModifiableBitmap predicates; private final ModifiableBitmap objects; + private final ModifiableBitmap graphs; - public BitmapTriple(ModifiableBitmap subjects, ModifiableBitmap predicates, ModifiableBitmap objects) { + public BitmapTriple(ModifiableBitmap subjects, ModifiableBitmap predicates, ModifiableBitmap objects, + ModifiableBitmap graphs) { this.subjects = subjects; this.predicates = predicates; this.objects = objects; + this.graphs = graphs; } public ModifiableBitmap getSubjects() { @@ -25,12 +28,16 @@ public ModifiableBitmap getPredicates() { return predicates; } + public ModifiableBitmap getGraphs() { + return graphs; + } + public ModifiableBitmap getObjects() { return objects; } @Override public void close() throws IOException { - Closer.closeAll(subjects, predicates, objects); + Closer.closeAll(subjects, predicates, objects, graphs); } } diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/kcat/FourSectionDictionaryKCat.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/kcat/FourSectionDictionaryKCat.java index ac3a74fe..891ddb97 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/kcat/FourSectionDictionaryKCat.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/kcat/FourSectionDictionaryKCat.java @@ -34,6 +34,11 @@ public DictionarySection getPredicateSection() { return dictionary.getPredicates(); } + @Override + public DictionarySection getGraphSection() { + return dictionary.getGraphs(); + } + @Override public DictionarySection getSharedSection() { return dictionary.getShared(); @@ -59,6 +64,11 @@ public long countObjects() { return dictionary.getObjects().getNumberOfElements() + countShared(); } + @Override + public long countGraphs() { + return dictionary.supportGraphs() ? dictionary.getGraphs().getNumberOfElements() : 0; + } + @Override public long nonTypedShift() { return countShared(); diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/kcat/GroupBySubjectMapIterator.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/kcat/GroupBySubjectMapIterator.java index 9e8dc7bc..68cf8533 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/kcat/GroupBySubjectMapIterator.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/kcat/GroupBySubjectMapIterator.java @@ -1,5 +1,6 @@ package com.the_qa_company.qendpoint.core.dictionary.impl.kcat; +import com.the_qa_company.qendpoint.core.compact.bitmap.GraphDeleteBitmap; import com.the_qa_company.qendpoint.core.hdt.HDT; import com.the_qa_company.qendpoint.core.triples.IteratorTripleID; import com.the_qa_company.qendpoint.core.triples.TripleID; @@ -27,9 +28,11 @@ public class GroupBySubjectMapIterator implements Iterator { private final List groupList = new ArrayList<>(); private Iterator groupListIterator; private TripleID next; + private final boolean quad; - private GroupBySubjectMapIterator(Iterator mergeIterator) { + private GroupBySubjectMapIterator(Iterator mergeIterator, boolean quad) { this.mergeIterator = new PeekIteratorImpl<>(mergeIterator); + this.quad = quad; } @Override @@ -65,7 +68,11 @@ public boolean hasNext() { } while (mergeIterator.hasNext() && mergeIterator.peek().getSubject() == subject && mergeIterator.peek().getPredicate() == predicate); - groupList.sort(Comparator.comparingLong(TripleID::getObject)); + if (quad) { + groupList.sort(Comparator.comparingLong(TripleID::getObject).thenComparingLong(TripleID::getGraph)); + } else { + groupList.sort(Comparator.comparingLong(TripleID::getObject)); + } groupListIterator = groupList.iterator(); @@ -118,12 +125,15 @@ private static long firstSubjectTripleId(HDT hdt) { */ public static Iterator fromHDTs(KCatMerger merger, HDT[] hdts, List deleteBitmaps) { final long shared = merger.getCountShared(); + boolean quad = merger.isQuad(); // sorted shared List> sharedSubjectIterators = IntStream.range(0, hdts.length) .mapToObj(hdtIndex -> { // extract hdt elements for this index HDT hdt = hdts[hdtIndex]; - Bitmap deleteBitmap = deleteBitmaps == null ? null : deleteBitmaps.get(hdtIndex); + GraphDeleteBitmap deleteBitmap = deleteBitmaps == null ? null + : GraphDeleteBitmap.wrap(deleteBitmaps.get(hdtIndex), + quad ? hdt.getDictionary().getNgraphs() : 1); if (hdt.getTriples().getNumberOfElements() == 0) { // no triples @@ -162,7 +172,9 @@ public static Iterator fromHDTs(KCatMerger merger, HDT[] hdts, List { // extract hdt elements for this index HDT hdt = hdts[hdtIndex]; - Bitmap deleteBitmap = deleteBitmaps == null ? null : deleteBitmaps.get(hdtIndex); + GraphDeleteBitmap deleteBitmap = deleteBitmaps == null ? null + : GraphDeleteBitmap.wrap(deleteBitmaps.get(hdtIndex), + quad ? hdt.getDictionary().getNgraphs() : 1); if (hdt.getTriples().getNumberOfElements() == 0) { // no triples @@ -206,25 +218,34 @@ public static Iterator fromHDTs(KCatMerger merger, HDT[] hdts, List createIdMapper(KCatMerger merger, int hdtIndex, HDT hdt, Iterator it, - long start, Bitmap deleteBitmap) { + long start, GraphDeleteBitmap deleteBitmap) { if (deleteBitmap == null) { return new MapIterator<>(it, (tid) -> { assert inHDT(tid, hdt); return merger.extractMapped(hdtIndex, tid); }); - } else { + } + if (hdt.getDictionary().supportGraphs()) { return MapFilterIterator.of(it, (tid, index) -> { - if (deleteBitmap.access(index + start)) { + if (deleteBitmap.access(tid.getGraph() - 1, index + start)) { return null; } assert inHDT(tid, hdt); return merger.extractMapped(hdtIndex, tid); }); } + return MapFilterIterator.of(it, (tid, index) -> { + if (deleteBitmap.access(index + start)) { + return null; + } + assert inHDT(tid, hdt); + return merger.extractMapped(hdtIndex, tid); + }); } private static int compareSP(TripleID t1, TripleID t2) { diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/kcat/KCatImpl.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/kcat/KCatImpl.java index c121b8b2..4881711a 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/kcat/KCatImpl.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/kcat/KCatImpl.java @@ -2,6 +2,7 @@ import com.the_qa_company.qendpoint.core.compact.bitmap.Bitmap; import com.the_qa_company.qendpoint.core.compact.bitmap.Bitmap64Big; +import com.the_qa_company.qendpoint.core.compact.bitmap.GraphDeleteBitmap; import com.the_qa_company.qendpoint.core.compact.bitmap.ModifiableBitmap; import com.the_qa_company.qendpoint.core.compact.bitmap.NegBitmap; import com.the_qa_company.qendpoint.core.dictionary.DictionaryPrivate; @@ -124,6 +125,7 @@ private static TripleComponentOrder getOrder(HDT hdt) { private final boolean clearLocation; private final MultiThreadListener listener; private final String dictionaryType; + private final boolean quad; private final int bufferSize; private final HDTOptions hdtFormat; private final TripleComponentOrder order; @@ -173,6 +175,7 @@ private KCatImpl(List hdtFileNames, List deleteBitmaps, HDT hdts[firstIndex] = firstHDT; dictionaryType = firstHDT.getDictionary().getType(); + quad = firstHDT.getDictionary().supportGraphs(); baseURI = firstHDT.getBaseURI(); order = getOrder(firstHDT); @@ -244,15 +247,22 @@ private KCatImpl(List hdtFileNames, List deleteBitmaps, HDT hdt.getDictionary().getNpredicates() + 1)); ModifiableBitmap bo = NegBitmap.of(Bitmap64Big.disk(diffLocation.resolve("d" + index + "o"), hdt.getDictionary().getNobjects() + 1)); + ModifiableBitmap bg = quad ? NegBitmap.of(Bitmap64Big.disk(diffLocation.resolve("d" + index + "g"), + hdt.getDictionary().getNgraphs() + 1)) : null; // noinspection resource - deleteBitmapTriples[index] = new BitmapTriple(bs, bp, bo); + deleteBitmapTriples[index] = new BitmapTriple(bs, bp, bo, bg); IteratorTripleID searchAll = hdt.getTriples().searchAll(); long numberOfElements = hdt.getTriples().getNumberOfElements(); // fill the maps based on the deleted triples long c = 0; + + @SuppressWarnings("resource") + GraphDeleteBitmap bm = GraphDeleteBitmap.wrap(deleteBitmap, + quad ? hdt.getDictionary().getNgraphs() : 1); + while (searchAll.hasNext()) { TripleID tripleID = searchAll.next(); @@ -260,11 +270,18 @@ private KCatImpl(List hdtFileNames, List deleteBitmaps, HDT "building diff bitmaps " + c + "/" + numberOfElements + " (hdt " + index + "/" + hdts.length + ")"); - if (!deleteBitmap.access(searchAll.getLastTriplePosition())) { + long g = quad ? (tripleID.getGraph() - 1) : 0; + + assert g >= 0; + + if (!bm.access(g, searchAll.getLastTriplePosition())) { // not deleted bs.set(tripleID.getSubject(), false); bp.set(tripleID.getPredicate(), false); bo.set(tripleID.getObject(), false); + if (quad) { + bg.set(tripleID.getGraph(), false); + } } } } @@ -306,7 +323,8 @@ private KCatImpl(List hdtFileNames, List deleteBitmaps, HDT * @throws IOException io exception */ KCatMerger createMerger(ProgressListener listener) throws IOException { - return new KCatMerger(hdts, deleteBitmapTriples, location, listener, bufferSize, dictionaryType, hdtFormat); + return new KCatMerger(hdts, deleteBitmapTriples, location, listener, bufferSize, dictionaryType, quad, + hdtFormat); } /** @@ -330,7 +348,7 @@ public HDT cat() throws IOException { // stream Iterator tripleIterator = GroupBySubjectMapIterator.fromHDTs(merger, hdts, deleteBitmaps); try (WriteBitmapTriples triples = new WriteBitmapTriples(hdtFormat, location.resolve("triples"), - bufferSize)) { + bufferSize, quad)) { long count = Arrays.stream(hdts).mapToLong(h -> h.getTriples().getNumberOfElements()).sum(); il.setRange(40, 80); diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/kcat/KCatMerger.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/kcat/KCatMerger.java index c76acd19..d467ecb9 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/kcat/KCatMerger.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/kcat/KCatMerger.java @@ -56,6 +56,7 @@ public class KCatMerger implements AutoCloseable { final SyncSeq[] subjectsMaps; final SyncSeq[] predicatesMaps; final SyncSeq[] objectsMaps; + final SyncSeq[] graphsMaps; private final ExceptionThread catMergerThread; final boolean typedHDT; final boolean langHDT; @@ -69,8 +70,10 @@ public class KCatMerger implements AutoCloseable { private final ExceptionIterator sortedObject; private final ExceptionIterator sortedPredicates; private final Map> sortedSubSections; + private final ExceptionIterator sortedGraphs; private final long estimatedSizeP; + private final long estimatedSizeG; private final AtomicLong countTyped = new AtomicLong(); private final AtomicLong countShared = new AtomicLong(); private final AtomicLong countNonTyped = new AtomicLong(); @@ -81,6 +84,7 @@ public class KCatMerger implements AutoCloseable { private final WriteDictionarySection sectionShared; private final WriteDictionarySection sectionObject; private final WriteDictionarySection sectionPredicate; + private final WriteDictionarySection sectionGraph; private final Map sectionSub; private final Map typeId = new HashMap<>(); private boolean running; @@ -93,11 +97,12 @@ public class KCatMerger implements AutoCloseable { * @param listener listener to log the state * @param bufferSize buffer size * @param dictionaryType dictionary type + * @param quad quad * @param spec spec to config the HDT * @throws java.io.IOException io exception */ public KCatMerger(HDT[] hdts, BitmapTriple[] deletedTriple, CloseSuppressPath location, ProgressListener listener, - int bufferSize, String dictionaryType, HDTOptions spec) throws IOException { + int bufferSize, String dictionaryType, boolean quad, HDTOptions spec) throws IOException { this.hdts = hdts; this.listener = listener; this.dictionaryType = dictionaryType; @@ -106,7 +111,9 @@ public KCatMerger(HDT[] hdts, BitmapTriple[] deletedTriple, CloseSuppressPath lo subjectsMaps = new SyncSeq[hdts.length]; predicatesMaps = new SyncSeq[hdts.length]; objectsMaps = new SyncSeq[hdts.length]; - locations = new CloseSuppressPath[hdts.length * 3]; + graphsMaps = quad ? new SyncSeq[hdts.length] : null; + int locationDelta = quad ? 4 : 3; + locations = new CloseSuppressPath[hdts.length * locationDelta]; countSubject = IntStream.range(0, hdts.length).mapToObj(i -> new AtomicLong()).toArray(AtomicLong[]::new); countObject = IntStream.range(0, hdts.length).mapToObj(i -> new AtomicLong()).toArray(AtomicLong[]::new); @@ -116,6 +123,7 @@ public KCatMerger(HDT[] hdts, BitmapTriple[] deletedTriple, CloseSuppressPath lo long sizeO = 0; long sizeONoTyped = 0; long sizeShared = 0; + long sizeG = 0; // if this HDT is typed, we don't have to allocate 1 bit / node to note // a typed node @@ -138,6 +146,7 @@ public KCatMerger(HDT[] hdts, BitmapTriple[] deletedTriple, CloseSuppressPath lo sizeS += cat.countSubjects(); sizeP += cat.countPredicates(); sizeO += cat.countObjects(); + sizeG += cat.countGraphs(); DictionarySection objectSection = cat.getObjectSection(); sizeONoTyped += objectSection == null ? 0 : objectSection.getNumberOfElements(); sizeShared += cat.countShared(); @@ -158,22 +167,33 @@ public KCatMerger(HDT[] hdts, BitmapTriple[] deletedTriple, CloseSuppressPath lo } this.estimatedSizeP = sizeP; + this.estimatedSizeG = sizeG; + try { // create maps, allocate more bits for the shared part int numbitsS = BitUtil.log2(sizeS + 1 + sizeShared) + 1 + shift; int numbitsP = BitUtil.log2(sizeP + 1); int numbitsO = BitUtil.log2(sizeO + 1 + sizeShared) + 1 + shift; + int numbitsG = BitUtil.log2(sizeG + 1); for (int i = 0; i < cats.length; i++) { DictionaryKCat cat = cats[i]; - subjectsMaps[i] = new SyncSeq(new SequenceLog64BigDisk( - (locations[i * 3] = location.resolve("subjectsMap_" + i)).toAbsolutePath().toString(), numbitsS, - cat.countSubjects() + 1)); + subjectsMaps[i] = new SyncSeq( + new SequenceLog64BigDisk((locations[i * locationDelta] = location.resolve("subjectsMap_" + i)) + .toAbsolutePath().toString(), numbitsS, cat.countSubjects() + 1)); predicatesMaps[i] = new SyncSeq(new SequenceLog64BigDisk( - (locations[i * 3 + 1] = location.resolve("predicatesMap_" + i)).toAbsolutePath().toString(), + (locations[i * locationDelta + 1] = location.resolve("predicatesMap_" + i)).toAbsolutePath() + .toString(), numbitsP, cat.countPredicates() + 1)); objectsMaps[i] = new SyncSeq(new SequenceLog64BigDisk( - (locations[i * 3 + 2] = location.resolve("objectsMap_" + i)).toAbsolutePath().toString(), + (locations[i * locationDelta + 2] = location.resolve("objectsMap_" + i)).toAbsolutePath() + .toString(), numbitsO, cat.countObjects() + 1)); + if (quad) { + graphsMaps[i] = new SyncSeq(new SequenceLog64BigDisk( + (locations[i * locationDelta + 3] = location.resolve("graphsMap_" + i)).toAbsolutePath() + .toString(), + numbitsG, cat.countGraphs() + 1)); + } } // merge the subjects/objects/shared from all the HDTs @@ -217,6 +237,23 @@ public CharSequence next() { } }).notif(sizeP, 20, "Merge predicates", listener); + sortedGraphs = quad ? mergeSection(cats, (hdtIndex, c) -> { + ExceptionIterator of = ExceptionIterator + .of(c.getGraphSection().getSortedEntries()); + if (deletedTriple != null) { + ModifiableBitmap deleteBitmap = deletedTriple[hdtIndex].getGraphs(); + return of.mapFiltered(((element, index) -> { + if (deleteBitmap.access(index + 1)) { + return null; + } + return new LocatedIndexedNode(hdtIndex, index + 1, ByteString.of(element)); + })); + } else { + return of.map( + ((element, index) -> new LocatedIndexedNode(hdtIndex, index + 1, ByteString.of(element)))); + } + }).notif(sizeP, 20, "Merge graphs", listener) : null; + sortedSubSections = new TreeMap<>(); // create a merge section for each section subSections @@ -293,6 +330,7 @@ public CharSequence next() { sectionShared = new WriteDictionarySection(spec, location.resolve("sortedShared"), bufferSize); sectionObject = new WriteDictionarySection(spec, location.resolve("sortedObject"), bufferSize); sectionPredicate = new WriteDictionarySection(spec, location.resolve("sortedPredicate"), bufferSize); + sectionGraph = quad ? new WriteDictionarySection(spec, location.resolve("sortedGraph"), bufferSize) : null; sectionSub = new TreeMap<>(); sortedSubSections.keySet().forEach((key) -> sectionSub.put(key, new WriteDictionarySection(spec, location.resolve("sortedSub" + getTypeId(key)), bufferSize))); @@ -449,7 +487,7 @@ public DictionaryPrivate buildDictionary() throws InterruptedException { catMergerThread.joinAndCrashIfRequired(); return DictionaryFactory.createWriteDictionary(dictionaryType, null, getSectionSubject(), getSectionPredicate(), - getSectionObject(), getSectionShared(), getSectionSub()); + getSectionObject(), getSectionShared(), getSectionSub(), getSectionGraph()); } private void runSharedCompute() { @@ -521,6 +559,18 @@ private void runSubSectionCompute() { return db.peek(); }).asIterator(), estimatedSizeP), null); + // load graphs + if (sectionGraph != null) { + sectionGraph.load(new OneReadDictionarySection(sortedGraphs.map((db, id) -> { + db.stream().forEach(node -> { + SyncSeq map = graphsMaps[node.getHdt()]; + assert map.get(node.getIndex()) == 0 : "overwriting previous graph value"; + map.set(node.getIndex(), id + 1); + }); + return db.peek(); + }).asIterator(), estimatedSizeG), null); + } + long shift = 1L; // load data typed sections for (Map.Entry e : sectionSub.entrySet()) { @@ -561,8 +611,8 @@ public void close() throws IOException { } catch (InterruptedException e) { throw new RuntimeException(e); } finally { - Closer.closeAll(sectionSubject, sectionPredicate, sectionObject, sectionShared, sectionSub, subjectsMaps, - predicatesMaps, objectsMaps, locations); + Closer.closeAll(sectionSubject, sectionPredicate, sectionGraph, sectionObject, sectionShared, sectionSub, + subjectsMaps, predicatesMaps, graphsMaps, objectsMaps, locations); } } @@ -602,6 +652,17 @@ public long extractPredicate(int hdtIndex, long oldID) { return predicatesMaps[hdtIndex].get(oldID); } + /** + * extract the graph from an HDTq + * + * @param hdtIndex the HDT index + * @param oldID the ID in the HDT triples + * @return ID in the new HDT + */ + public long extractGraph(int hdtIndex, long oldID) { + return graphsMaps[hdtIndex].get(oldID); + } + /** * extract the object from an HDT * @@ -635,8 +696,15 @@ public long extractObject(int hdtIndex, long oldID) { * @return mapped tripleID */ public TripleID extractMapped(int hdtIndex, TripleID id) { - TripleID mapped = new TripleID(extractSubject(hdtIndex, id.getSubject()), - extractPredicate(hdtIndex, id.getPredicate()), extractObject(hdtIndex, id.getObject())); + TripleID mapped; + if (graphsMaps != null) { + mapped = new TripleID(extractSubject(hdtIndex, id.getSubject()), + extractPredicate(hdtIndex, id.getPredicate()), extractObject(hdtIndex, id.getObject()), + extractGraph(hdtIndex, id.getGraph())); + } else { + mapped = new TripleID(extractSubject(hdtIndex, id.getSubject()), + extractPredicate(hdtIndex, id.getPredicate()), extractObject(hdtIndex, id.getObject())); + } assert mapped.isValid() : "mapped to empty triples! " + id + " => " + mapped; return mapped; } @@ -676,6 +744,13 @@ public DictionarySectionPrivate getSectionPredicate() { return sectionPredicate; } + /** + * @return graph section + */ + public WriteDictionarySection getSectionGraph() { + return sectionGraph; + } + /** * @return sub sections */ @@ -697,6 +772,13 @@ public synchronized void startMerger() { catMergerThread.startAll(); } + /** + * @return if the merge is handling quads + */ + public boolean isQuad() { + return graphsMaps != null; + } + static class BiDuplicateBuffer implements Comparable { private final DuplicateBuffer left; private final DuplicateBuffer right; diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/kcat/MultipleSectionDictionaryKCat.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/kcat/MultipleSectionDictionaryKCat.java index c8b26e9c..6199e5bf 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/kcat/MultipleSectionDictionaryKCat.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/kcat/MultipleSectionDictionaryKCat.java @@ -40,6 +40,11 @@ public DictionarySection getPredicateSection() { return dictionary.getPredicates(); } + @Override + public DictionarySection getGraphSection() { + return dictionary.getGraphs(); + } + @Override public DictionarySection getObjectSection() { return dictionary.getAllObjects().get("NO_DATATYPE"); @@ -74,6 +79,11 @@ public long countObjects() { return count + countShared(); } + @Override + public long countGraphs() { + return dictionary.supportGraphs() ? dictionary.getGraphs().getNumberOfElements() : 0; + } + @Override public long nonTypedShift() { DictionarySection section = getObjectSection(); diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/kcat/MultipleSectionLangDictionaryKCat.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/kcat/MultipleSectionLangDictionaryKCat.java index fb82d89e..c1a86725 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/kcat/MultipleSectionLangDictionaryKCat.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/kcat/MultipleSectionLangDictionaryKCat.java @@ -42,6 +42,11 @@ public DictionarySection getPredicateSection() { return dictionary.getPredicates(); } + @Override + public DictionarySection getGraphSection() { + return dictionary.getGraphs(); + } + @Override public DictionarySection getObjectSection() { return allObjects.get(LiteralsUtils.NO_DATATYPE); @@ -82,6 +87,11 @@ public long nonTypedShift() { return countShared(); } + @Override + public long countGraphs() { + return dictionary.supportGraphs() ? dictionary.getGraphs().getNumberOfElements() : 0; + } + @Override public long typedShift() { DictionarySection section = getObjectSection(); diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/HDTVocabulary.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/HDTVocabulary.java index 704884ae..3366aea7 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/HDTVocabulary.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/HDTVocabulary.java @@ -77,6 +77,7 @@ public class HDTVocabulary { public static final String DICTIONARY_TYPE_MULT_SECTION = HDT_DICTIONARY_BASE + "Mult>"; public static final String DICTIONARY_TYPE_FOUR_QUAD_SECTION = HDT_DICTIONARY_BASE + "FourQuad>"; public static final String DICTIONARY_TYPE_MULT_SECTION_LANG = HDT_DICTIONARY_BASE + "MultLang>"; + public static final String DICTIONARY_TYPE_MULT_SECTION_LANG_QUAD = HDT_DICTIONARY_BASE + "MultLangQuad>"; public static final String DICTIONARY_TYPE_FOUR_PSFC_SECTION = HDT_DICTIONARY_BASE + "FourPsfc>"; @@ -114,6 +115,7 @@ public class HDTVocabulary { // Bitmaps public static final String BITMAP_TYPE_PLAIN = HDT_BITMAP_BASE + "Plain>"; public static final String BITMAP_TYPE_ROARING = HDT_BITMAP_BASE + "Roaring>"; + public static final String BITMAP_TYPE_ROARING_MULTI = HDT_BITMAP_BASE + "RoaringMulti>"; // Misc public static final String ORIGINAL_SIZE = HDT_BASE + "originalSize>"; diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/HDTDiskImporter.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/HDTDiskImporter.java index 429a75ff..19fc6b6c 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/HDTDiskImporter.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/HDTDiskImporter.java @@ -201,9 +201,10 @@ public CompressTripleMapper compressDictionary(Iterator iterator) profiler.pushSection("dictionary write"); // create sections and triple mapping DictionaryPrivate dictionary = hdt.getDictionary(); - CompressTripleMapper mapper = new CompressTripleMapper(basePath, compressionResult.getTripleCount(), chunkSize); + CompressTripleMapper mapper = new CompressTripleMapper(basePath, compressionResult.getTripleCount(), chunkSize, + compressionResult.supportsGraph()); try (CompressFourSectionDictionary modifiableDictionary = new CompressFourSectionDictionary(compressionResult, - mapper, listener, debugHDTBuilding)) { + mapper, listener, debugHDTBuilding, compressionResult.supportsGraph())) { dictionary.loadAsync(modifiableDictionary, listener); } catch (InterruptedException e) { throw new ParserException(e); @@ -238,8 +239,8 @@ public void compressTriples(CompressTripleMapper mapper) throws ParserException, profiler.pushSection("triple compression/map"); try { MapCompressTripleMerger tripleMapper = new MapCompressTripleMerger(basePath.resolve("tripleMapper"), - new AsyncIteratorFetcher<>(new TripleGenerator(mapper.getTripleCount())), mapper, listener, order, - bufferSize, chunkSize, 1 << ways); + new AsyncIteratorFetcher<>(TripleGenerator.of(mapper.getTripleCount(), mapper.supportsGraph())), + mapper, listener, order, bufferSize, chunkSize, 1 << ways); tripleCompressionResult = tripleMapper.merge(workers, compressMode); } catch (KWayMerger.KWayMergerException | InterruptedException e) { throw new ParserException(e); diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/HDTImpl.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/HDTImpl.java index cb26ab02..f5bf99d3 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/HDTImpl.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/HDTImpl.java @@ -271,9 +271,17 @@ public IteratorTripleString search(CharSequence subject, CharSequence predicate, } // Conversion from TripleString to TripleID - TripleID triple = new TripleID(dictionary.stringToId(subject, TripleComponentRole.SUBJECT), - dictionary.stringToId(predicate, TripleComponentRole.PREDICATE), - dictionary.stringToId(object, TripleComponentRole.OBJECT)); + TripleID triple; + + if (dictionary.supportGraphs()) { + triple = new TripleID(dictionary.stringToId(subject, TripleComponentRole.SUBJECT), + dictionary.stringToId(predicate, TripleComponentRole.PREDICATE), + dictionary.stringToId(object, TripleComponentRole.OBJECT), 0); + } else { + triple = new TripleID(dictionary.stringToId(subject, TripleComponentRole.SUBJECT), + dictionary.stringToId(predicate, TripleComponentRole.PREDICATE), + dictionary.stringToId(object, TripleComponentRole.OBJECT)); + } if (triple.isNoMatch()) { // throw new NotFoundException("String not found in dictionary"); @@ -309,17 +317,20 @@ public long getLastTriplePosition() { }; } + CharSequence g = dictionary.supportGraphs() ? "" : null; + if (isMapped) { try { return new DictionaryTranslateIteratorBuffer(triples.search(triple), dictionary, subject, predicate, - object); + object, g); } catch (NullPointerException e) { e.printStackTrace(); // FIXME: find why this can happen - return new DictionaryTranslateIterator(triples.search(triple), dictionary, subject, predicate, object); + return new DictionaryTranslateIterator(triples.search(triple), dictionary, subject, predicate, object, + g); } } else { - return new DictionaryTranslateIterator(triples.search(triple), dictionary, subject, predicate, object); + return new DictionaryTranslateIterator(triples.search(triple), dictionary, subject, predicate, object, g); } } diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/WriteHDTImpl.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/WriteHDTImpl.java index 51568f04..b606d927 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/WriteHDTImpl.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/WriteHDTImpl.java @@ -39,7 +39,8 @@ public WriteHDTImpl(HDTOptions spec, CloseSuppressPath workingLocation, int buff dictionary = DictionaryFactory.createWriteDictionary(this.spec, workingLocation.resolve("section"), bufferSize); // we need to have the bitmaps in memory, so we can't bypass the // implementation - triples = new WriteBitmapTriples(this.spec, workingLocation.resolve("tripleBitmap"), bufferSize); + triples = new WriteBitmapTriples(this.spec, workingLocation.resolve("tripleBitmap"), bufferSize, + dictionary.supportGraphs()); // small, can use default implementation header = HeaderFactory.createHeader(this.spec); } diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/diskimport/CompressTripleMapper.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/diskimport/CompressTripleMapper.java index 5185a6ce..5edd18f8 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/diskimport/CompressTripleMapper.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/diskimport/CompressTripleMapper.java @@ -1,13 +1,13 @@ package com.the_qa_company.qendpoint.core.hdt.impl.diskimport; -import com.the_qa_company.qendpoint.core.util.BitUtil; -import com.the_qa_company.qendpoint.core.util.io.compress.CompressUtil; -import com.the_qa_company.qendpoint.core.util.io.compress.WriteLongArrayBuffer; import com.the_qa_company.qendpoint.core.compact.sequence.SequenceLog64BigDisk; import com.the_qa_company.qendpoint.core.dictionary.impl.CompressFourSectionDictionary; +import com.the_qa_company.qendpoint.core.util.BitUtil; import com.the_qa_company.qendpoint.core.util.disk.LongArray; import com.the_qa_company.qendpoint.core.util.io.CloseSuppressPath; import com.the_qa_company.qendpoint.core.util.io.IOUtil; +import com.the_qa_company.qendpoint.core.util.io.compress.CompressUtil; +import com.the_qa_company.qendpoint.core.util.io.compress.WriteLongArrayBuffer; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -23,17 +23,23 @@ public class CompressTripleMapper implements CompressFourSectionDictionary.NodeC private final WriteLongArrayBuffer subjects; private final WriteLongArrayBuffer predicates; private final WriteLongArrayBuffer objects; + private final WriteLongArrayBuffer graph; private final CloseSuppressPath locationSubjects; private final CloseSuppressPath locationPredicates; private final CloseSuppressPath locationObjects; + private final CloseSuppressPath locationGraph; private long shared = -1; private final long tripleCount; + private final boolean quads; - public CompressTripleMapper(CloseSuppressPath location, long tripleCount, long chunkSize) { + public CompressTripleMapper(CloseSuppressPath location, long tripleCount, long chunkSize, boolean quads) { this.tripleCount = tripleCount; + this.quads = quads; + locationSubjects = location.resolve("map_subjects"); locationPredicates = location.resolve("map_predicates"); locationObjects = location.resolve("map_objects"); + locationGraph = location.resolve("map_graph"); int numbits = BitUtil.log2(tripleCount + 2) + CompressUtil.INDEX_SHIFT; int maxElement = (int) Math.min(chunkSize / Long.BYTES / 3, Integer.MAX_VALUE - 5); subjects = new WriteLongArrayBuffer( @@ -44,6 +50,13 @@ public CompressTripleMapper(CloseSuppressPath location, long tripleCount, long c objects = new WriteLongArrayBuffer( new SequenceLog64BigDisk(locationObjects.toAbsolutePath().toString(), numbits, tripleCount + 2, true), tripleCount, maxElement); + if (quads) { + graph = new WriteLongArrayBuffer( + new SequenceLog64BigDisk(locationGraph.toAbsolutePath().toString(), numbits, tripleCount + 2, true), + tripleCount, maxElement); + } else { + graph = null; + } } /** @@ -51,12 +64,12 @@ public CompressTripleMapper(CloseSuppressPath location, long tripleCount, long c */ public void delete() { try { - IOUtil.closeAll(subjects, predicates, objects); + IOUtil.closeAll(subjects, predicates, objects, graph); } catch (IOException e) { log.warn("Can't close triple map array", e); } try { - IOUtil.closeAll(locationSubjects, locationPredicates, locationObjects); + IOUtil.closeAll(locationSubjects, locationPredicates, locationObjects, locationGraph); } catch (IOException e) { log.warn("Can't delete triple map array files", e); } @@ -83,11 +96,21 @@ public void onObject(long preMapId, long newMapId) { objects.set(preMapId, newMapId); } + @Override + public void onGraph(long preMapId, long newMapId) { + assert preMapId > 0; + assert newMapId >= CompressUtil.getHeaderId(1) : "negative or null new grap id"; + graph.set(preMapId, newMapId); + } + public void setShared(long shared) { this.shared = shared; subjects.free(); predicates.free(); objects.free(); + if (supportsGraph()) { + graph.free(); + } } private void checkShared() { @@ -126,6 +149,16 @@ public long extractObjects(long id) { return extract(objects, id); } + /** + * extract the map id of a graph + * + * @param id id + * @return new id + */ + public long extractGraph(long id) { + return extract(graph, id) - shared; + } + private long extract(LongArray array, long id) { checkShared(); // compute shared if required @@ -135,4 +168,8 @@ private long extract(LongArray array, long id) { public long getTripleCount() { return tripleCount; } + + public boolean supportsGraph() { + return quads; + } } diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/diskimport/CompressionResult.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/diskimport/CompressionResult.java index 6bacfda9..e926e5a1 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/diskimport/CompressionResult.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/diskimport/CompressionResult.java @@ -31,6 +31,11 @@ public interface CompressionResult extends Closeable { */ long getTripleCount(); + /** + * @return if the result is using graph + */ + boolean supportsGraph(); + /** * @return a sorted iterator of subject */ @@ -46,6 +51,11 @@ public interface CompressionResult extends Closeable { */ ExceptionIterator getObjects(); + /** + * @return a sorted iterator of graphs + */ + ExceptionIterator getGraph(); + /** * @return the count of subjects */ @@ -61,6 +71,11 @@ public interface CompressionResult extends Closeable { */ long getObjectsCount(); + /** + * @return the count of graphs + */ + long getGraphCount(); + /** * @return the count of shared */ diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/diskimport/CompressionResultEmpty.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/diskimport/CompressionResultEmpty.java index 6baddab8..e89ad22e 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/diskimport/CompressionResultEmpty.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/diskimport/CompressionResultEmpty.java @@ -11,6 +11,11 @@ public long getTripleCount() { return 0; } + @Override + public boolean supportsGraph() { + return false; + } + @Override public ExceptionIterator getSubjects() { return ExceptionIterator.empty(); @@ -26,6 +31,11 @@ public ExceptionIterator getObjects() { return ExceptionIterator.empty(); } + @Override + public ExceptionIterator getGraph() { + return ExceptionIterator.empty(); + } + @Override public long getSubjectsCount() { return 0; @@ -41,6 +51,11 @@ public long getObjectsCount() { return 0; } + @Override + public long getGraphCount() { + return 0; + } + @Override public long getSharedCount() { return 0; diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/diskimport/CompressionResultFile.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/diskimport/CompressionResultFile.java index 7c86703b..6df09fa3 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/diskimport/CompressionResultFile.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/diskimport/CompressionResultFile.java @@ -18,15 +18,23 @@ public class CompressionResultFile implements CompressionResult { private final CompressNodeReader subjects; private final CompressNodeReader predicates; private final CompressNodeReader objects; + private final CompressNodeReader graph; private final SectionCompressor.TripleFile sections; + private final boolean supportsGraph; - public CompressionResultFile(long tripleCount, long ntRawSize, SectionCompressor.TripleFile sections) - throws IOException { + public CompressionResultFile(long tripleCount, long ntRawSize, SectionCompressor.TripleFile sections, + boolean supportsGraph) throws IOException { this.tripleCount = tripleCount; this.ntRawSize = ntRawSize; this.subjects = new CompressNodeReader(sections.openRSubject()); this.predicates = new CompressNodeReader(sections.openRPredicate()); this.objects = new CompressNodeReader(sections.openRObject()); + this.supportsGraph = supportsGraph; + if (supportsGraph) { + this.graph = new CompressNodeReader(sections.openRGraph()); + } else { + this.graph = null; + } this.sections = sections; } @@ -35,6 +43,11 @@ public long getTripleCount() { return tripleCount; } + @Override + public boolean supportsGraph() { + return supportsGraph; + } + @Override public ExceptionIterator getSubjects() { return subjects; @@ -50,6 +63,11 @@ public ExceptionIterator getObjects() { return objects; } + @Override + public ExceptionIterator getGraph() { + return graph; + } + @Override public void delete() throws IOException { sections.delete(); @@ -70,6 +88,11 @@ public long getObjectsCount() { return objects.getSize(); } + @Override + public long getGraphCount() { + return graph.getSize(); + } + @Override public long getSharedCount() { return tripleCount; @@ -82,6 +105,6 @@ public long getRawSize() { @Override public void close() throws IOException { - IOUtil.closeAll(objects, predicates, subjects); + IOUtil.closeAll(objects, predicates, subjects, graph); } } diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/diskimport/CompressionResultPartial.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/diskimport/CompressionResultPartial.java index 8cfc6019..bf4bc27b 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/diskimport/CompressionResultPartial.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/diskimport/CompressionResultPartial.java @@ -24,13 +24,14 @@ public class CompressionResultPartial implements CompressionResult { private final ExceptionIterator subject; private final ExceptionIterator predicate; private final ExceptionIterator object; + private final ExceptionIterator graph; - public CompressionResultPartial(List files, long triplesCount, long ntSize) - throws IOException { + public CompressionResultPartial(List files, long triplesCount, long ntSize, + boolean graph) throws IOException { this.files = new ArrayList<>(files.size()); this.ntSize = ntSize; for (SectionCompressor.TripleFile file : files) { - this.files.add(new CompressNodeReaderTriple(file)); + this.files.add(new CompressNodeReaderTriple(file, graph)); } this.triplesCount = triplesCount; @@ -38,6 +39,11 @@ public CompressionResultPartial(List files, long t this.subject = createBTree(0, files.size(), CompressNodeReaderTriple::getS); this.predicate = createBTree(0, files.size(), CompressNodeReaderTriple::getP); this.object = createBTree(0, files.size(), CompressNodeReaderTriple::getO); + if (graph) { + this.graph = createBTree(0, files.size(), CompressNodeReaderTriple::getG); + } else { + this.graph = null; + } } private ExceptionIterator createBTree(int start, int end, @@ -60,6 +66,11 @@ public long getTripleCount() { return triplesCount; } + @Override + public boolean supportsGraph() { + return graph != null; + } + @Override public ExceptionIterator getSubjects() { return subject; @@ -75,6 +86,11 @@ public ExceptionIterator getObjects() { return object; } + @Override + public ExceptionIterator getGraph() { + return graph; + } + @Override public void delete() throws IOException { IOUtil.closeAll(files); @@ -108,25 +124,35 @@ public long getSharedCount() { return triplesCount; } + @Override + public long getGraphCount() { + return triplesCount; + } + @Override public long getRawSize() { return ntSize; } private static class CompressNodeReaderTriple implements Closeable { - final CompressNodeReader s, p, o; + final CompressNodeReader s, p, o, g; final SectionCompressor.TripleFile file; - public CompressNodeReaderTriple(SectionCompressor.TripleFile file) throws IOException { + public CompressNodeReaderTriple(SectionCompressor.TripleFile file, boolean graph) throws IOException { this.s = new CompressNodeReader(file.openRSubject()); this.p = new CompressNodeReader(file.openRPredicate()); this.o = new CompressNodeReader(file.openRObject()); + if (graph) { + this.g = new CompressNodeReader(file.openRGraph()); + } else { + this.g = null; + } this.file = file; } @Override public void close() throws IOException { - IOUtil.closeAll(s, p, o); + IOUtil.closeAll(s, p, o, g); } public CompressNodeReader getS() { @@ -140,5 +166,9 @@ public CompressNodeReader getP() { public CompressNodeReader getO() { return o; } + + public CompressNodeReader getG() { + return g; + } } } diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/diskimport/MultiSectionLangSectionCompressor.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/diskimport/MultiSectionLangSectionCompressor.java index 3139c138..c928182d 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/diskimport/MultiSectionLangSectionCompressor.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/diskimport/MultiSectionLangSectionCompressor.java @@ -6,12 +6,12 @@ import com.the_qa_company.qendpoint.core.util.LiteralsUtils; import com.the_qa_company.qendpoint.core.util.io.CloseSuppressPath; import com.the_qa_company.qendpoint.core.util.string.ByteString; -import com.the_qa_company.qendpoint.core.util.string.CompactString; public class MultiSectionLangSectionCompressor extends SectionCompressor { public MultiSectionLangSectionCompressor(CloseSuppressPath baseFileName, AsyncIteratorFetcher source, - MultiThreadListener listener, int bufferSize, long chunkSize, int k, boolean debugSleepKwayDict) { - super(baseFileName, source, listener, bufferSize, chunkSize, k, debugSleepKwayDict); + MultiThreadListener listener, int bufferSize, long chunkSize, int k, boolean debugSleepKwayDict, + boolean quad) { + super(baseFileName, source, listener, bufferSize, chunkSize, k, debugSleepKwayDict, quad); } @Override diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/diskimport/MultiSectionSectionCompressor.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/diskimport/MultiSectionSectionCompressor.java index c8dd9e61..1eafdd7c 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/diskimport/MultiSectionSectionCompressor.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/diskimport/MultiSectionSectionCompressor.java @@ -13,8 +13,9 @@ */ public class MultiSectionSectionCompressor extends SectionCompressor { public MultiSectionSectionCompressor(CloseSuppressPath baseFileName, AsyncIteratorFetcher source, - MultiThreadListener listener, int bufferSize, long chunkSize, int k, boolean debugSleepKwayDict) { - super(baseFileName, source, listener, bufferSize, chunkSize, k, debugSleepKwayDict); + MultiThreadListener listener, int bufferSize, long chunkSize, int k, boolean debugSleepKwayDict, + boolean quad) { + super(baseFileName, source, listener, bufferSize, chunkSize, k, debugSleepKwayDict, quad); } @Override diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/diskimport/SectionCompressor.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/diskimport/SectionCompressor.java index 822e2cc0..05b47197 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/diskimport/SectionCompressor.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/diskimport/SectionCompressor.java @@ -50,9 +50,11 @@ public class SectionCompressor implements KWayMerger.KWayMergerImpl source, - MultiThreadListener listener, int bufferSize, long chunkSize, int k, boolean debugSleepKwayDict) { + MultiThreadListener listener, int bufferSize, long chunkSize, int k, boolean debugSleepKwayDict, + boolean quads) { this.source = source; this.listener = listener; this.baseFileName = baseFileName; @@ -60,6 +62,7 @@ public SectionCompressor(CloseSuppressPath baseFileName, AsyncIteratorFetcher compressToFile(workers); + case CompressionResult.COMPRESSION_MODE_PARTIAL -> compressPartial(); + default -> throw new IllegalArgumentException("Unknown compression mode: " + mode); + }; } @Override @@ -199,6 +210,12 @@ public void createChunk(SizeFetcher fetcher, CloseSuppressPath out ParallelSortableArrayList subjects = new ParallelSortableArrayList<>(IndexedNode[].class); ParallelSortableArrayList predicates = new ParallelSortableArrayList<>(IndexedNode[].class); ParallelSortableArrayList objects = new ParallelSortableArrayList<>(IndexedNode[].class); + ParallelSortableArrayList graph; + if (supportsGraph()) { + graph = new ParallelSortableArrayList<>(IndexedNode[].class); + } else { + graph = null; + } listener.notifyProgress(10, "reading triples " + triples.get()); TripleString next; @@ -227,6 +244,11 @@ public void createChunk(SizeFetcher fetcher, CloseSuppressPath out IndexedNode objectNode = new IndexedNode(convertObject(next.getObject()), tripleID); objects.add(objectNode); + if (graph != null) { + IndexedNode graphNode = new IndexedNode(convertGraph(next.getGraph()), tripleID); + graph.add(graphNode); + } + if (tripleID % 100_000 == 0) { listener.notifyProgress(10, "reading triples " + tripleID); } @@ -241,28 +263,42 @@ public void createChunk(SizeFetcher fetcher, CloseSuppressPath out try { TripleFile sections = new TripleFile(output, true); try { + float split = 40.0f / (3 + (graph != null ? 1 : 0)); + float range = 70; IntermediateListener il = new IntermediateListener(listener); - il.setRange(70, 80); + il.setRange(range, range + split); + range += split; il.setPrefix("creating subjects section " + sections.root.getFileName() + ": "); il.notifyProgress(0, "sorting"); try (OutputStream stream = sections.openWSubject()) { subjects.parallelSort(IndexedNode::compareTo); CompressUtil.writeCompressedSection(subjects, stream, il); } - il.setRange(80, 90); + il.setRange(range, range + split); + range += split; il.setPrefix("creating predicates section " + sections.root.getFileName() + ": "); il.notifyProgress(0, "sorting"); try (OutputStream stream = sections.openWPredicate()) { predicates.parallelSort(IndexedNode::compareTo); CompressUtil.writeCompressedSection(predicates, stream, il); } - il.setRange(90, 100); + il.setRange(range, range + split); + range += split; il.setPrefix("creating objects section " + sections.root.getFileName() + ": "); il.notifyProgress(0, "sorting"); try (OutputStream stream = sections.openWObject()) { objects.parallelSort(IndexedNode::compareTo); CompressUtil.writeCompressedSection(objects, stream, il); } + if (graph != null) { + il.setRange(range, range + split); + il.setPrefix("creating graph section " + sections.root.getFileName() + ": "); + il.notifyProgress(0, "sorting"); + try (OutputStream stream = sections.openWGraph()) { + graph.parallelSort(IndexedNode::compareTo); + CompressUtil.writeCompressedSection(graph, stream, il); + } + } } finally { subjects.clear(); predicates.clear(); @@ -298,6 +334,13 @@ public SizeFetcher newStopFlux(Supplier flux) { return SizeFetcher.ofTripleString(flux, chunkSize); } + /** + * @return if this compressor is compressing graphs + */ + protected boolean supportsGraph() { + return quads; + } + /** * A triple directory, contains 3 files, subject, predicate and object * @@ -308,12 +351,14 @@ public class TripleFile implements Closeable { private final CloseSuppressPath s; private final CloseSuppressPath p; private final CloseSuppressPath o; + private final CloseSuppressPath g; private TripleFile(CloseSuppressPath root, boolean mkdir) throws IOException { this.root = root; this.s = root.resolve("subject"); this.p = root.resolve("predicate"); this.o = root.resolve("object"); + this.g = root.resolve("graph"); root.closeWithDeleteRecurse(); if (mkdir) { @@ -354,6 +399,14 @@ public OutputStream openWObject() throws IOException { return o.openOutputStream(bufferSize); } + /** + * @return open a write stream to the graph file + * @throws IOException can't open the stream + */ + public OutputStream openWGraph() throws IOException { + return g.openOutputStream(bufferSize); + } + /** * @return open a read stream to the subject file * @throws IOException can't open the stream @@ -378,6 +431,14 @@ public InputStream openRObject() throws IOException { return o.openInputStream(bufferSize); } + /** + * @return open a read stream to the graph file + * @throws IOException can't open the stream + */ + public InputStream openRGraph() throws IOException { + return g.openInputStream(bufferSize); + } + /** * @return the path to the subject file */ @@ -399,6 +460,13 @@ public CloseSuppressPath getObjectPath() { return o; } + /** + * @return the path to the graph file + */ + public CloseSuppressPath getGraphPath() { + return g; + } + /** * compute this triple file from multiple triples files * @@ -414,11 +482,17 @@ public void compute(List triples, boolean async) throws IOException, computeSubject(triples, false); computePredicate(triples, false); computeObject(triples, false); + if (supportsGraph()) { + computeGraph(triples, false); + } } else { - ExceptionThread - .async("SectionMerger" + root.getFileName(), () -> computeSubject(triples, true), - () -> computePredicate(triples, true), () -> computeObject(triples, true)) - .joinAndCrashIfRequired(); + + ExceptionThread.async("SectionMerger" + root.getFileName(), () -> computeSubject(triples, true), + () -> computePredicate(triples, true), () -> computeObject(triples, true), () -> { + if (supportsGraph()) { + computeGraph(triples, true); + } + }).joinAndCrashIfRequired(); } } @@ -437,6 +511,11 @@ private void computeObject(List triples, boolean async) throws IOExc TripleFile::getObjectPath, async); } + private void computeGraph(List triples, boolean async) throws IOException { + computeSection(triples, "graph", 66, 100, this::openWGraph, TripleFile::openRGraph, + TripleFile::getGraphPath, async); + } + private void computeSection(List triples, String section, int start, int end, ExceptionSupplier openW, ExceptionFunction openR, diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/header/PlainHeader.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/header/PlainHeader.java index 055b0ab5..0df71236 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/header/PlainHeader.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/header/PlainHeader.java @@ -195,7 +195,8 @@ public void processTriple(TripleString triple, long pos) { @Override public void remove(CharSequence subject, CharSequence predicate, CharSequence object) { - TripleString pattern = new TripleString(subject.toString(), predicate.toString(), object.toString()); + TripleString pattern = new TripleString(HeaderUtil.cleanURI(subject.toString()), + HeaderUtil.cleanURI(predicate.toString()), HeaderUtil.cleanURI(object.toString())); triples.removeIf(next -> next.match(pattern)); } diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/iterator/DictionaryTranslateIterator.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/iterator/DictionaryTranslateIterator.java index 91897e0e..ad5e2328 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/iterator/DictionaryTranslateIterator.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/iterator/DictionaryTranslateIterator.java @@ -22,6 +22,7 @@ import com.the_qa_company.qendpoint.core.dictionary.Dictionary; import com.the_qa_company.qendpoint.core.enums.ResultEstimationType; import com.the_qa_company.qendpoint.core.enums.TripleComponentRole; +import com.the_qa_company.qendpoint.core.quad.QuadString; import com.the_qa_company.qendpoint.core.triples.IteratorTripleID; import com.the_qa_company.qendpoint.core.triples.IteratorTripleString; import com.the_qa_company.qendpoint.core.triples.TripleID; @@ -39,20 +40,8 @@ public class DictionaryTranslateIterator implements IteratorTripleString { CharSequence s, p, o, g; - long lastSid, lastPid, lastOid; - CharSequence lastSstr, lastPstr, lastOstr; - - /** - * Basic constructor - * - * @param iteratorTripleID Iterator of TripleID to be used - * @param dictionary The dictionary to be used - */ - public DictionaryTranslateIterator(IteratorTripleID iteratorTripleID, Dictionary dictionary) { - this.iterator = iteratorTripleID; - this.dictionary = dictionary; - this.s = this.p = this.o = this.g = ""; - } + long lastSid, lastPid, lastOid, lastGid; + CharSequence lastSstr, lastPstr, lastOstr, lastGstr; /** * Basic constructor @@ -67,7 +56,7 @@ public DictionaryTranslateIterator(IteratorTripleID iteratorTripleID, Dictionary this.s = s == null ? "" : s; this.p = p == null ? "" : p; this.o = o == null ? "" : o; - this.g = ""; + this.g = null; } /** @@ -83,7 +72,7 @@ public DictionaryTranslateIterator(IteratorTripleID iteratorTripleID, Dictionary this.s = s == null ? "" : s; this.p = p == null ? "" : p; this.o = o == null ? "" : o; - this.g = g == null ? "" : g; + this.g = g; } /* @@ -125,7 +114,17 @@ public TripleString next() { lastOid = triple.getObject(); } - return new TripleString(lastSstr, lastPstr, lastOstr); + if (g == null) { + // no graph + return new TripleString(lastSstr, lastPstr, lastOstr); + } else if (!g.isEmpty()) { + return new QuadString(lastSstr, lastPstr, lastOstr, g); + } else if (triple.getGraph() != lastGid) { + lastGstr = dictionary.idToString(triple.getGraph(), TripleComponentRole.GRAPH); + lastGid = triple.getGraph(); + } + + return new QuadString(lastSstr, lastPstr, lastOstr, lastGstr); // return DictionaryUtil.tripleIDtoTripleString(dictionary, triple); } diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/iterator/DictionaryTranslateIteratorBuffer.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/iterator/DictionaryTranslateIteratorBuffer.java index 978a7eea..cf46d3f7 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/iterator/DictionaryTranslateIteratorBuffer.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/iterator/DictionaryTranslateIteratorBuffer.java @@ -79,7 +79,7 @@ public DictionaryTranslateIteratorBuffer(SuppliableIteratorTripleID iteratorTrip public DictionaryTranslateIteratorBuffer(SuppliableIteratorTripleID iteratorTripleID, DictionaryPrivate dictionary, CharSequence s, CharSequence p, CharSequence o, CharSequence g) { - this(iteratorTripleID, dictionary, s, p, o, g, DEFAULT_BLOCK_SIZE, true); + this(iteratorTripleID, dictionary, s, p, o, g, DEFAULT_BLOCK_SIZE, g != null); } public DictionaryTranslateIteratorBuffer(SuppliableIteratorTripleID iteratorTripleID, DictionaryPrivate dictionary, diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/options/HDTOptionsKeys.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/options/HDTOptionsKeys.java index 18e8491c..3e85f61a 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/options/HDTOptionsKeys.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/options/HDTOptionsKeys.java @@ -349,10 +349,15 @@ public class HDTOptionsKeys { @Value(key = DICTIONARY_TYPE_KEY, desc = "Multi section dictionary") public static final String DICTIONARY_TYPE_VALUE_MULTI_OBJECTS = "dictionaryMultiObj"; /** - * multi section dictionary + * multi section dictionary lang */ @Value(key = DICTIONARY_TYPE_KEY, desc = "Multi section dictionary language") public static final String DICTIONARY_TYPE_VALUE_MULTI_OBJECTS_LANG = "dictionaryMultiObjLang"; + /** + * multi section dictionary lang quad + */ + @Value(key = DICTIONARY_TYPE_KEY, desc = "Multi section dictionary language") + public static final String DICTIONARY_TYPE_VALUE_MULTI_OBJECTS_LANG_QUAD = "dictionaryMultiObjLangQuad"; /** * Ignore MSDL RDFTYPE INDEX diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/quad/QuadString.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/quad/QuadString.java index 35919309..56226477 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/quad/QuadString.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/quad/QuadString.java @@ -2,6 +2,7 @@ import com.the_qa_company.qendpoint.core.exceptions.ParserException; import com.the_qa_company.qendpoint.core.triples.TripleString; +import com.the_qa_company.qendpoint.core.util.string.ByteString; public class QuadString extends TripleString { protected CharSequence context; @@ -115,6 +116,18 @@ public QuadString tripleToString() { return new QuadString(subject.toString(), predicate.toString(), object.toString(), context.toString()); } + @Override + public TripleString tripleToByteString() { + return new QuadString(ByteString.copy(subject), ByteString.copy(predicate), ByteString.copy(object), + ByteString.copy(context)); + } + + @Override + public TripleString tripleToByteStringCast() { + return new QuadString(ByteString.of(subject), ByteString.of(predicate), ByteString.of(object), + ByteString.copy(context)); + } + @Override public String toString() { if (context.length() == 0) { diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/quad/impl/BitmapQuadsIterator.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/quad/impl/BitmapQuadsIterator.java deleted file mode 100644 index eca2acd6..00000000 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/quad/impl/BitmapQuadsIterator.java +++ /dev/null @@ -1,139 +0,0 @@ -package com.the_qa_company.qendpoint.core.quad.impl; - -import java.util.List; - -import com.the_qa_company.qendpoint.core.compact.bitmap.Bitmap; -import com.the_qa_company.qendpoint.core.enums.TripleComponentOrder; -import com.the_qa_company.qendpoint.core.exceptions.NotImplementedException; -import com.the_qa_company.qendpoint.core.triples.TripleID; -import com.the_qa_company.qendpoint.core.triples.impl.BitmapTriples; -import com.the_qa_company.qendpoint.core.triples.impl.BitmapTriplesIterator; -import com.the_qa_company.qendpoint.core.triples.impl.TripleOrderConvert; - -public class BitmapQuadsIterator extends BitmapTriplesIterator { - - // resolves ????, S???, SP??, SPO? queries - - private final List bitmapsGraph; // one bitmap per graph - private final long numberOfGraphs; - private long posG; // the current graph bitmap - private long g; // g is variable - - public BitmapQuadsIterator(BitmapTriples triples, TripleID pattern) { - super(triples, pattern, false); - this.bitmapsGraph = triples.getQuadInfoAG(); - this.numberOfGraphs = bitmapsGraph.size(); - newSearch(pattern); - } - - @Override - public void goToStart() { - super.goToStart(); - posG = 0; - while (!bitmapsGraph.get((int) posG).access(posZ)) { - posG++; - } - g = posG + 1; - } - - @Override - public long estimatedNumResults() { - long results = 0; - for (int i = 0; i < numberOfGraphs; i++) { - results += bitmapsGraph.get(i).rank1(maxZ - 1) - bitmapsGraph.get(i).rank1(minZ - 1); - } - return results; - } - - /* - * Get the next solution - */ - @Override - public TripleID next() { - z = adjZ.get(posZ); // get the next object (Z). We just retrieve it from - // the list of objects (AdjZ) from current position - // posZ - if (posZ >= nextZ) { // if, with the current position of the object - // (posZ), we have reached the next list of - // objects (starting in nexZ), then we should - // update the associated predicate (Y) and, - // potentially, also the associated subject (X) - posY = triples.getBitmapZ().rank1(posZ - 1); // move to the next - // position of - // predicates - y = adjY.get(posY); // get the next predicate (Y). We just retrieve - // it from the list of predicates(AdjY) from - // current position posY - nextZ = adjZ.findNext(posZ) + 1; // update nextZ, storing in which - // position (in adjZ) ends the - // list of objects associated - // with the current - // subject,predicate - if (posY >= nextY) { // if we have reached the next list of objects - // (starting in nexZ) we should update the - // associated predicate (Y) and, - // potentially, also the associated subject - // (X) - x = triples.getBitmapY().rank1(posY - 1) + 1; // get the next - // subject (X) - nextY = adjY.findNext(posY) + 1; // update nextY, storing in - // which position (in AdjY) - // ends the list of - // predicates associated - // with the current subject - } - } - - g = posG + 1; - - // set posG to the next graph of this triple - do { - posG++; - } while (posG + 1 <= numberOfGraphs && !bitmapsGraph.get((int) posG).access(posZ)); - - if (posG == numberOfGraphs) { // there are no further graphs for this - // triple - posZ++; - if (posZ < maxZ) { - posG = 0; - while (!bitmapsGraph.get((int) posG).access(posZ)) { - posG++; - } - } - } - - updateOutput(); // set the components (subject,predicate,object,graph) - // of the returned triple - return returnTriple; // return the triple as solution - } - - /* - * Set the components (subject,predicate,object) of the returned triple - */ - @Override - protected void updateOutput() { - returnTriple.setAll(x, y, z, g); - TripleOrderConvert.swapComponentOrder(returnTriple, triples.getOrder(), TripleComponentOrder.SPO); - } - - @Override - public boolean hasPrevious() { - throw new NotImplementedException(); - } - - @Override - public TripleID previous() { - throw new NotImplementedException(); - } - - @Override - public boolean canGoTo() { - throw new NotImplementedException(); - } - - @Override - public void goTo(long pos) { - throw new NotImplementedException(); - } - -} diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/quad/impl/BitmapQuadsIteratorG.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/quad/impl/BitmapQuadsIteratorG.java deleted file mode 100644 index ec0dce53..00000000 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/quad/impl/BitmapQuadsIteratorG.java +++ /dev/null @@ -1,116 +0,0 @@ -package com.the_qa_company.qendpoint.core.quad.impl; - -import com.the_qa_company.qendpoint.core.compact.bitmap.Bitmap; -import com.the_qa_company.qendpoint.core.exceptions.NotImplementedException; -import com.the_qa_company.qendpoint.core.triples.TripleID; -import com.the_qa_company.qendpoint.core.triples.impl.BitmapTriples; -import com.the_qa_company.qendpoint.core.triples.impl.BitmapTriplesIterator; - -public class BitmapQuadsIteratorG extends BitmapTriplesIterator { - - // resolves ???G, S??G, SP?G, SPOG queries - - private final Bitmap bitmapGraph; // the graph bitmap for the search - - public BitmapQuadsIteratorG(BitmapTriples triples, TripleID pattern) { - super(triples, pattern, false); - this.bitmapGraph = triples.getQuadInfoAG().get((int) pattern.getGraph() - 1); - newSearch(pattern); - } - - @Override - public void goToStart() { - if (minZ >= maxZ || minZ == -1) { // no results - posZ = maxZ; - } else { - super.goToStart(); - } - if (!bitmapGraph.access(posZ)) { - posZ = bitmapGraph.selectNext1(posZ + 1); - } - } - - @Override - public long estimatedNumResults() { - if (minZ == -1) { - return 0; - } - return bitmapGraph.rank1(maxZ - 1) - bitmapGraph.rank1(minZ - 1); - } - - /* - * Check if there are more solution - */ - @Override - public boolean hasNext() { - return posZ < maxZ && posZ != -1; // Just check if we have arrived to - // the maximum position of the - // objects that resolve the query - } - - /* - * Get the next solution - */ - @Override - public TripleID next() { - z = adjZ.get(posZ); // get the next object (Z). We just retrieve it from - // the list of objects (AdjZ) from current position - // posZ - if (posZ >= nextZ) { // if, with the current position of the object - // (posZ), we have reached the next list of - // objects (starting in nexZ), then we should - // update the associated predicate (Y) and, - // potentially, also the associated subject (X) - posY = triples.getBitmapZ().rank1(posZ - 1); // move to the next - // position of - // predicates - y = adjY.get(posY); // get the next predicate (Y). We just retrieve - // it from the list of predicates(AdjY) from - // current position posY - nextZ = adjZ.findNext(posZ) + 1; // update nextZ, storing in which - // position (in adjZ) ends the - // list of objects associated - // with the current - // subject,predicate - if (posY >= nextY) { // if we have reached the next list of objects - // (starting in nexZ) we should update the - // associated predicate (Y) and, - // potentially, also the associated subject - // (X) - x = triples.getBitmapY().rank1(posY - 1) + 1; // get the next - // subject (X) - nextY = adjY.findNext(posY) + 1; // update nextY, storing in - // which position (in AdjY) - // ends the list of - // predicates associated - // with the current subject - } - } - posZ = bitmapGraph.selectNext1(posZ + 1); - - updateOutput(); // set the components (subject,predicate,object,graph) - // of the returned triple - return returnTriple; // return the triple as solution - } - - @Override - public boolean hasPrevious() { - throw new NotImplementedException(); - } - - @Override - public TripleID previous() { - throw new NotImplementedException(); - } - - @Override - public boolean canGoTo() { - throw new NotImplementedException(); - } - - @Override - public void goTo(long pos) { - throw new NotImplementedException(); - } - -} diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/quad/impl/BitmapQuadsIteratorYFOQ.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/quad/impl/BitmapQuadsIteratorYFOQ.java deleted file mode 100644 index f3c19e98..00000000 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/quad/impl/BitmapQuadsIteratorYFOQ.java +++ /dev/null @@ -1,97 +0,0 @@ -package com.the_qa_company.qendpoint.core.quad.impl; - -import java.util.ArrayList; -import java.util.List; -import java.util.stream.Collectors; - -import com.the_qa_company.qendpoint.core.compact.bitmap.Bitmap; -import com.the_qa_company.qendpoint.core.enums.ResultEstimationType; -import com.the_qa_company.qendpoint.core.enums.TripleComponentOrder; -import com.the_qa_company.qendpoint.core.triples.TripleID; -import com.the_qa_company.qendpoint.core.triples.impl.BitmapTriples; -import com.the_qa_company.qendpoint.core.triples.impl.BitmapTriplesIteratorYFOQ; -import com.the_qa_company.qendpoint.core.iterator.SuppliableIteratorTripleID; - -public class BitmapQuadsIteratorYFOQ implements SuppliableIteratorTripleID { - - private final BitmapTriplesIteratorYFOQ inIt; - private final List bitmapGraphs; - private List graphs; - private TripleID curTriple; - - public BitmapQuadsIteratorYFOQ(BitmapTriples triples, TripleID pattern) { - this.inIt = new BitmapTriplesIteratorYFOQ(triples, pattern); - this.bitmapGraphs = triples.getQuadInfoAG(); - this.graphs = new ArrayList<>(); - } - - private void updateNextTriple() { - if (!this.inIt.hasNext()) - throw new RuntimeException("inIt should have next"); - this.curTriple = this.inIt.next(); - this.graphs = bitmapGraphs.stream().parallel().filter(graph -> graph.access((int) this.inIt.getPosZ() - 1)) - .map(graph -> bitmapGraphs.indexOf(graph) + 1L).collect(Collectors.toList()); - } - - @Override - public boolean hasNext() { - return this.graphs.size() > 0 || this.inIt.hasNext(); - } - - @Override - public TripleID next() { - if (graphs.isEmpty()) { - this.updateNextTriple(); - return this.next(); - } - long curGraph = graphs.remove(0); - TripleID curTriple = this.curTriple.clone(); - curTriple.setGraph(curGraph); - return curTriple; - } - - @Override - public void goToStart() { - this.inIt.goToStart(); - } - - @Override - public boolean hasPrevious() { - return this.inIt.hasPrevious(); - } - - @Override - public TripleID previous() { - return this.inIt.previous(); - } - - @Override - public boolean canGoTo() { - return this.inIt.canGoTo(); - } - - @Override - public void goTo(long pos) { - this.inIt.goTo(pos); - } - - @Override - public long estimatedNumResults() { - return this.inIt.estimatedNumResults(); - } - - @Override - public ResultEstimationType numResultEstimation() { - return this.inIt.numResultEstimation(); - } - - @Override - public TripleComponentOrder getOrder() { - return this.inIt.getOrder(); - } - - @Override - public long getLastTriplePosition() { - return this.inIt.getLastTriplePosition(); - } -} diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/quad/impl/BitmapQuadsIteratorYGFOQ.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/quad/impl/BitmapQuadsIteratorYGFOQ.java deleted file mode 100644 index 9020a808..00000000 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/quad/impl/BitmapQuadsIteratorYGFOQ.java +++ /dev/null @@ -1,96 +0,0 @@ -package com.the_qa_company.qendpoint.core.quad.impl; - -import com.the_qa_company.qendpoint.core.compact.bitmap.Bitmap; -import com.the_qa_company.qendpoint.core.enums.ResultEstimationType; -import com.the_qa_company.qendpoint.core.enums.TripleComponentOrder; -import com.the_qa_company.qendpoint.core.iterator.SuppliableIteratorTripleID; -import com.the_qa_company.qendpoint.core.triples.TripleID; -import com.the_qa_company.qendpoint.core.triples.impl.BitmapTriples; -import com.the_qa_company.qendpoint.core.triples.impl.BitmapTriplesIteratorYFOQ; - -public class BitmapQuadsIteratorYGFOQ implements SuppliableIteratorTripleID { - - // resolves ?P?G queries - private final Bitmap bitmapGraph; // the bitmap of the requested graph - private final BitmapTriplesIteratorYFOQ inIt; - private TripleID nextRes = null; - - public BitmapQuadsIteratorYGFOQ(BitmapTriples triples, TripleID pattern) { - this.inIt = new BitmapTriplesIteratorYFOQ(triples, pattern); - this.bitmapGraph = triples.getQuadInfoAG().get((int) pattern.getGraph() - 1); - this.goToStart(); - this.calculateNext(); - } - - private boolean isValidZ() { - return this.inIt.getPosZ() != -1 && this.bitmapGraph.access(this.inIt.getPosZ() - 1); - } - - @Override - public void goToStart() { - this.inIt.goToStart(); - } - - @Override - public boolean hasNext() { - return this.nextRes != null; - } - - private void calculateNext() { - this.nextRes = null; - while (this.inIt.hasNext()) { - TripleID next = this.inIt.next().clone(); - if (!this.isValidZ()) - continue; - this.nextRes = next; - break; - } - } - - @Override - public TripleID next() { - TripleID res = this.nextRes.clone(); - this.calculateNext(); - return res; - } - - @Override - public boolean hasPrevious() { - return this.inIt.hasPrevious(); - } - - @Override - public TripleID previous() { - return this.inIt.previous(); - } - - @Override - public boolean canGoTo() { - return this.inIt.canGoTo(); - } - - @Override - public void goTo(long pos) { - this.inIt.goTo(pos); - } - - @Override - public long estimatedNumResults() { - return this.inIt.estimatedNumResults(); - } - - @Override - public ResultEstimationType numResultEstimation() { - return this.inIt.numResultEstimation(); - } - - @Override - public TripleComponentOrder getOrder() { - return this.inIt.getOrder(); - } - - @Override - public long getLastTriplePosition() { - return this.inIt.getLastTriplePosition(); - } -} diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/quad/impl/BitmapQuadsIteratorZFOQ.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/quad/impl/BitmapQuadsIteratorZFOQ.java deleted file mode 100644 index 348d6726..00000000 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/quad/impl/BitmapQuadsIteratorZFOQ.java +++ /dev/null @@ -1,223 +0,0 @@ -package com.the_qa_company.qendpoint.core.quad.impl; - -import java.util.List; - -import com.the_qa_company.qendpoint.core.compact.bitmap.Bitmap; -import com.the_qa_company.qendpoint.core.enums.ResultEstimationType; -import com.the_qa_company.qendpoint.core.enums.TripleComponentOrder; -import com.the_qa_company.qendpoint.core.exceptions.NotImplementedException; -import com.the_qa_company.qendpoint.core.triples.TripleID; -import com.the_qa_company.qendpoint.core.triples.impl.BitmapTriples; -import com.the_qa_company.qendpoint.core.triples.impl.BitmapTriplesIteratorZFOQ; -import com.the_qa_company.qendpoint.core.triples.impl.TripleOrderConvert; - -public class BitmapQuadsIteratorZFOQ extends BitmapTriplesIteratorZFOQ { - - // resolves ?PO?, ??O? queries - - private final List bitmapsGraph; // one bitmap per graph - private final int numberOfGraphs; - private long posG; // the current graph bitmap - private int g; // g is variable - private boolean updateXYZ = true; - - public BitmapQuadsIteratorZFOQ(BitmapTriples triples, TripleID pattern) { - super(triples, pattern); - this.bitmapsGraph = triples.getQuadInfoAG(); - this.numberOfGraphs = bitmapsGraph.size(); - newSearch(pattern); - } - - @Override - public void goToStart() { - super.goToStart(); - if (hasNext()) { - posG = getNextGraphPosition(0); - } - } - - /* - * Get the next solution - */ - @Override - public TripleID next() { - if (updateXYZ) { - long posY = adjIndex.get(posIndex); // get the position of the next - // occurrence of the predicate - // in AdjY - - z = patZ != 0 ? patZ : (int) adjIndex.findListIndex(posIndex) + 1; // get - // the - // next - // object - // (z) - // as - // the - // number - // of - // list - // in - // adIndex - // corresponding - // to - // posIndex - y = patY != 0 ? patY : (int) adjY.get(posY); // get the next - // predicate (y) as - // the element in - // adjY stores in - // position posY - x = (int) adjY.findListIndex(posY) + 1; // get the next subject (X) - // as the number of list in - // adjY corresponding to - // posY - - updateXYZ = false; - } - g = (int) posG + 1; - - posG = getNextGraphPosition((int) posG + 1); // get the next graph - // position for the - // current triple - if (posG == numberOfGraphs) { // there are no further graphs for this - // triple - posIndex++; // increase the position of the next occurrence of the - // predicate - if (hasNext()) { - updateXYZ = true; - posG = getNextGraphPosition(0); - } - } - - updateOutput(); // set the components (subject,predicate,object) of the - // returned triple - return returnTriple; // return the triple as solution - } - - private int getNextGraphPosition(int pos) { - int nextTriplePos = (int) getNextTriplePosition(); - while (pos < numberOfGraphs && !bitmapsGraph.get(pos).access(nextTriplePos)) { - pos++; - } - return pos; - } - - private long getNextTriplePosition() { - try { - return triples.getAdjacencyListZ().find(adjIndex.get(posIndex), patZ); - } catch (Exception ignore) { - return 0; - } - } - - @Override - protected void updateOutput() { - returnTriple.setAll(x, y, z, g); - TripleOrderConvert.swapComponentOrder(returnTriple, triples.getOrder(), TripleComponentOrder.SPO); - } - - @Override - public ResultEstimationType numResultEstimation() { - return ResultEstimationType.MORE_THAN; - } - - private void newSearch(TripleID pattern) { - this.pattern.assign(pattern); - - TripleOrderConvert.swapComponentOrder(this.pattern, TripleComponentOrder.SPO, triples.getOrder()); - patZ = this.pattern.getObject(); - if (patZ == 0 && (patY != 0 || this.pattern.getSubject() != 0)) { - throw new IllegalArgumentException("This structure is not meant to process this pattern"); - } - - patY = this.pattern.getPredicate(); - - adjY = triples.getAdjacencyListY(); - adjIndex = triples.getAdjacencyListIndex(); // adjIndex has the list of - // positions in adY - - findRange(); // get the boundaries where the solution for the given - // object can be found - goToStart(); // load the first solution and position the next pointers - } - - private void findRange() { - if (patZ == 0) { // if the object is not provided (usually it is in this - // iterator) - minIndex = 0; - maxIndex = adjIndex.getNumberOfElements(); - return; - } - minIndex = adjIndex.find(patZ - 1); // find the position of the first - // occurrence of the object - maxIndex = adjIndex.last(patZ - 1); // find the position of the last - // ocurrence of the object - - if (patY != 0) { // if the predicate is provided then we do a binary - // search to search for such predicate - while (minIndex <= maxIndex) { - long mid = (minIndex + maxIndex) / 2; - long predicate = getY(mid); // get predicate at mid position in - // the object index - - if (patY > predicate) { - minIndex = mid + 1; - } else if (patY < predicate) { - maxIndex = mid - 1; - } else { // the predicate has been found, now we have to find - // the min and max limits (the predicate P is - // repeated for each PO occurrence in the triples) - // Binary Search to find left boundary - long left = minIndex; - long right = mid; - long pos = 0; - - while (left <= right) { - pos = (left + right) / 2; - - predicate = getY(pos); - - if (predicate != patY) { - left = pos + 1; - } else { - right = pos - 1; - } - } - minIndex = predicate == patY ? pos : pos + 1; - - // Binary Search to find right boundary - left = mid; - right = maxIndex; - - while (left <= right) { - pos = (left + right) / 2; - predicate = getY(pos); - - if (predicate != patY) { - right = pos - 1; - } else { - left = pos + 1; - } - } - maxIndex = predicate == patY ? pos : pos - 1; - - break; - } - } - } - } - - @Override - public boolean hasPrevious() { - throw new NotImplementedException(); - } - - @Override - public TripleID previous() { - throw new NotImplementedException(); - } - - @Override - public void goTo(long pos) { - throw new NotImplementedException(); - } -} diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/quad/impl/BitmapQuadsIteratorZGFOQ.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/quad/impl/BitmapQuadsIteratorZGFOQ.java deleted file mode 100644 index 18b3979f..00000000 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/quad/impl/BitmapQuadsIteratorZGFOQ.java +++ /dev/null @@ -1,202 +0,0 @@ -package com.the_qa_company.qendpoint.core.quad.impl; - -import com.the_qa_company.qendpoint.core.compact.bitmap.Bitmap; -import com.the_qa_company.qendpoint.core.enums.ResultEstimationType; -import com.the_qa_company.qendpoint.core.enums.TripleComponentOrder; -import com.the_qa_company.qendpoint.core.exceptions.NotImplementedException; -import com.the_qa_company.qendpoint.core.triples.TripleID; -import com.the_qa_company.qendpoint.core.triples.impl.BitmapTriples; -import com.the_qa_company.qendpoint.core.triples.impl.BitmapTriplesIteratorZFOQ; -import com.the_qa_company.qendpoint.core.triples.impl.TripleOrderConvert; - -public class BitmapQuadsIteratorZGFOQ extends BitmapTriplesIteratorZFOQ { - // resolves ?POG, ??OG queries - - private final Bitmap bitmapGraph; // the bitmap of the requested graph - - public BitmapQuadsIteratorZGFOQ(BitmapTriples triples, TripleID pattern) { - super(triples, pattern); - this.bitmapGraph = triples.getQuadInfoAG().get((int) pattern.getGraph() - 1); - newSearch(pattern); - } - - protected void findRange() { - findRange2(); - while (maxIndex >= minIndex && !bitmapGraph.access(getTriplePosition(maxIndex))) { - maxIndex--; - } - - while (maxIndex >= minIndex && !bitmapGraph.access(getTriplePosition(minIndex))) { - minIndex++; - } - } - - /* - * Check if there are more solution - */ - @Override - public boolean hasNext() { - return posIndex <= maxIndex && maxIndex >= minIndex; - } - - /* - * Get the next solution - */ - @Override - public TripleID next() { - long posY = adjIndex.get(posIndex); // get the position of the next - // occurrence of the predicate in - // AdjY - - z = patZ != 0 ? patZ : (int) adjIndex.findListIndex(posIndex) + 1; // get - // the - // next - // object - // (z) - // as - // the - // number - // of - // list - // in - // adIndex - // corresponding - // to - // posIndex - y = patY != 0 ? patY : (int) adjY.get(posY); // get the next predicate - // (y) as the element in - // adjY stores in - // position posY - x = (int) adjY.findListIndex(posY) + 1; // get the next subject (X) as - // the number of list in adjY - // corresponding to posY - - do { - posIndex++; // increase the position of the next occurrence of the - // predicate - } while (posIndex < maxIndex && !bitmapGraph.access(getNextTriplePosition())); - - updateOutput(); // set the components (subject,predicate,object) of the - // returned triple - return returnTriple; // return the triple as solution - } - - public long getTriplePosition(long index) { - try { - return triples.getAdjacencyListZ().find(adjIndex.get(index), patZ); - } catch (Exception ignore) { - return 0; - } - } - - protected void newSearch(TripleID pattern) { - this.pattern.assign(pattern); - - TripleOrderConvert.swapComponentOrder(this.pattern, TripleComponentOrder.SPO, triples.getOrder()); - patZ = this.pattern.getObject(); - if (patZ == 0 && (patY != 0 || this.pattern.getSubject() != 0)) { - throw new IllegalArgumentException("This structure is not meant to process this pattern"); - } - - patY = this.pattern.getPredicate(); - - adjY = triples.getAdjacencyListY(); - adjIndex = triples.getAdjacencyListIndex(); // adjIndex has the list of - // positions in adY - - findRange(); // get the boundaries where the solution for the given - // object can be found - goToStart(); // load the first solution and position the next pointers - } - - protected void findRange2() { - if (patZ == 0) { // if the object is not provided (usually it is in this - // iterator) - minIndex = 0; - maxIndex = adjIndex.getNumberOfElements(); - return; - } - minIndex = adjIndex.find(patZ - 1); // find the position of the first - // occurrence of the object - maxIndex = adjIndex.last(patZ - 1); // find the position of the last - // ocurrence of the object - - if (patY != 0) { // if the predicate is provided then we do a binary - // search to search for such predicate - while (minIndex <= maxIndex) { - long mid = (minIndex + maxIndex) / 2; - long predicate = getY(mid); // get predicate at mid position in - // the object index - if (patY > predicate) { - minIndex = mid + 1; - } else if (patY < predicate) { - maxIndex = mid - 1; - } else { // the predicate has been found, now we have to find - // the min and max limits (the predicate P is - // repeated for each PO occurrence in the triples) - // Binary Search to find left boundary - long left = minIndex; - long right = mid; - long pos = 0; - - while (left <= right) { - pos = (left + right) / 2; - - predicate = getY(pos); - - if (predicate != patY) { - left = pos + 1; - } else { - right = pos - 1; - } - } - minIndex = predicate == patY ? pos : pos + 1; - // Binary Search to find right boundary - left = mid; - right = maxIndex; - - while (left <= right) { - pos = (left + right) / 2; - predicate = getY(pos); - - if (predicate != patY) { - right = pos - 1; - } else { - left = pos + 1; - } - } - maxIndex = predicate == patY ? pos : pos - 1; - break; - } - } - } - } - - public long getNextTriplePosition() { - try { - return triples.getAdjacencyListZ().find(adjIndex.get(posIndex), patZ); - } catch (Exception ignore) { - return 0; - } - } - - @Override - public ResultEstimationType numResultEstimation() { - return ResultEstimationType.UP_TO; - } - - @Override - public boolean hasPrevious() { - throw new NotImplementedException(); - } - - @Override - public TripleID previous() { - throw new NotImplementedException(); - } - - @Override - public void goTo(long pos) { - throw new NotImplementedException(); - } -} diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/quad/impl/BitmapTriplesIteratorGraph.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/quad/impl/BitmapTriplesIteratorGraph.java new file mode 100644 index 00000000..3447716d --- /dev/null +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/quad/impl/BitmapTriplesIteratorGraph.java @@ -0,0 +1,112 @@ +package com.the_qa_company.qendpoint.core.quad.impl; + +import com.the_qa_company.qendpoint.core.compact.bitmap.Bitmap; +import com.the_qa_company.qendpoint.core.enums.ResultEstimationType; +import com.the_qa_company.qendpoint.core.enums.TripleComponentOrder; +import com.the_qa_company.qendpoint.core.exceptions.NotImplementedException; +import com.the_qa_company.qendpoint.core.iterator.SuppliableIteratorTripleID; +import com.the_qa_company.qendpoint.core.iterator.utils.FetcherIterator; +import com.the_qa_company.qendpoint.core.triples.IteratorTripleID; +import com.the_qa_company.qendpoint.core.triples.TripleID; +import com.the_qa_company.qendpoint.core.triples.impl.BitmapQuadTriples; + +import java.util.List; + +public class BitmapTriplesIteratorGraph extends FetcherIterator implements SuppliableIteratorTripleID { + private final BitmapQuadTriples quads; + private final IteratorTripleID tidIt; + private TripleID tid; + private long posZ; + private final long graph; + + public BitmapTriplesIteratorGraph(BitmapQuadTriples triples, IteratorTripleID tid, long graph) { + this.quads = triples; + this.tidIt = tid; + this.graph = graph; + } + + @Override + protected TripleID getNext() { + List quadInfoAG = quads.getQuadInfoAG(); + while (true) { + if (tid == null) { // we need to compute the next one + if (!tidIt.hasNext()) { + return null; + } + // get the last TID + tid = tidIt.next(); + tid.setGraph(graph); + posZ = tidIt.getLastTriplePosition(); + } + + if (graph != 0) { + // we are searching for a particular graph, we only need to + // check if this graph + // contains the current triple + if (quadInfoAG.get((int) graph - 1).access(posZ)) { + TripleID id = tid; + tid = null; // pass to the next one in the future case + return id; + } + // search another + continue; + } + + for (long i = tid.getGraph() + 1; i <= quadInfoAG.size(); i++) { + if (quadInfoAG.get((int) i - 1).access(posZ)) { + // found a graph containing it + tid.setGraph(i); + return tid; + } + } + tid = null; // pass to the next one + } + } + + @Override + public boolean hasPrevious() { + throw new NotImplementedException(); + } + + @Override + public TripleID previous() { + throw new NotImplementedException(); + } + + @Override + public void goToStart() { + tidIt.goToStart(); + tid = null; + } + + @Override + public boolean canGoTo() { + return tidIt.canGoTo(); + } + + @Override + public void goTo(long pos) { + tidIt.goTo(pos); + tid = null; + } + + @Override + public long estimatedNumResults() { + return tidIt.estimatedNumResults() * quads.size(); + } + + @Override + public ResultEstimationType numResultEstimation() { + return ResultEstimationType.UP_TO; + } + + @Override + public TripleComponentOrder getOrder() { + return tidIt.getOrder(); + } + + @Override + public long getLastTriplePosition() { + return posZ; + } +} diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/quad/impl/BitmapTriplesIteratorGraphG.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/quad/impl/BitmapTriplesIteratorGraphG.java new file mode 100644 index 00000000..8c3c7bf2 --- /dev/null +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/quad/impl/BitmapTriplesIteratorGraphG.java @@ -0,0 +1,93 @@ +package com.the_qa_company.qendpoint.core.quad.impl; + +import com.the_qa_company.qendpoint.core.compact.bitmap.Bitmap; +import com.the_qa_company.qendpoint.core.enums.ResultEstimationType; +import com.the_qa_company.qendpoint.core.enums.TripleComponentOrder; +import com.the_qa_company.qendpoint.core.exceptions.NotImplementedException; +import com.the_qa_company.qendpoint.core.iterator.SuppliableIteratorTripleID; +import com.the_qa_company.qendpoint.core.iterator.utils.FetcherIterator; +import com.the_qa_company.qendpoint.core.triples.TripleID; +import com.the_qa_company.qendpoint.core.triples.impl.BitmapQuadTriples; + +public class BitmapTriplesIteratorGraphG extends FetcherIterator implements SuppliableIteratorTripleID { + private final long graph; + private final Bitmap bitmapW; + protected final long minZ, maxZ; + protected final TripleID qid = new TripleID(); + protected final BitmapQuadTriples triples; + protected long posZ; + + public BitmapTriplesIteratorGraphG(BitmapQuadTriples triples, TripleID pattern) { + this.triples = triples; + this.graph = pattern.getGraph(); + + bitmapW = triples.getQuadInfoAG().get((int) (graph - 1)); + + minZ = bitmapW.select1(1); + maxZ = bitmapW.select1(bitmapW.countOnes()); + + goToStart(); + } + + @Override + protected TripleID getNext() { + if (posZ == maxZ) { + return null; + } + + if (posZ == -1) { + posZ = minZ; // start + } else { + posZ = bitmapW.select1(bitmapW.rank1(posZ) + 1); // next + } + + TripleID tripleID = triples.findTriple(posZ, qid); + tripleID.setGraph(graph); + return tripleID; + } + + @Override + public boolean hasPrevious() { + throw new NotImplementedException(); + } + + @Override + public TripleID previous() { + throw new NotImplementedException(); + } + + @Override + public void goToStart() { + posZ = -1; + } + + @Override + public boolean canGoTo() { + return false; + } + + @Override + public void goTo(long pos) { + throw new NotImplementedException(); + } + + @Override + public long estimatedNumResults() { + return bitmapW.rank1(maxZ) - bitmapW.rank1(minZ) + 1; + } + + @Override + public ResultEstimationType numResultEstimation() { + return ResultEstimationType.EXACT; + } + + @Override + public TripleComponentOrder getOrder() { + return triples.getOrder(); + } + + @Override + public long getLastTriplePosition() { + return posZ; + } +} diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/IndexedTriple.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/IndexedTriple.java index 0377c436..bf718565 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/IndexedTriple.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/IndexedTriple.java @@ -9,9 +9,14 @@ public class IndexedTriple { private IndexedNode subject; private IndexedNode predicate; private IndexedNode object; + private IndexedNode graph; + + public IndexedTriple(IndexedNode subject, IndexedNode predicate, IndexedNode object, IndexedNode graph) { + load(subject, predicate, object, graph); + } public IndexedTriple(IndexedNode subject, IndexedNode predicate, IndexedNode object) { - load(subject, predicate, object); + load(subject, predicate, object, null); } public IndexedNode getSubject() { @@ -26,6 +31,10 @@ public IndexedNode getObject() { return object; } + public IndexedNode getGraph() { + return graph; + } + /** * load a new s p o inside this triple * @@ -33,10 +42,11 @@ public IndexedNode getObject() { * @param predicate the predicate * @param object the object */ - public void load(IndexedNode subject, IndexedNode predicate, IndexedNode object) { + public void load(IndexedNode subject, IndexedNode predicate, IndexedNode object, IndexedNode graph) { this.subject = subject; this.predicate = predicate; this.object = object; + this.graph = graph; } } diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/TripleID.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/TripleID.java index 0df5db8f..62f2ba4a 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/TripleID.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/TripleID.java @@ -329,4 +329,11 @@ public int hashCode() { } return (int) (subject * 13 + predicate * 17 + object * 31); } + + /** + * @return a copy of this triple id without the graph part + */ + public TripleID copyNoGraph() { + return new TripleID(subject, predicate, object); + } } diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/TripleIDComparator.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/TripleIDComparator.java index 28869178..220996f4 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/TripleIDComparator.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/TripleIDComparator.java @@ -147,7 +147,11 @@ public int compare(TripleID o1, TripleID o2) { result = LongCompare.compare(y1, y2); if (result == 0) { // The third component is different? - return LongCompare.compare(z1, z2); + result = LongCompare.compare(z1, z2); + if (result == 0) { + return LongCompare.compare(o1.getGraph(), o2.getGraph()); + } + return result; } else { // the second component is different return result; diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/TripleIDComparatorSPO.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/TripleIDComparatorSPO.java index 8fc97500..dd056c81 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/TripleIDComparatorSPO.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/TripleIDComparatorSPO.java @@ -47,19 +47,24 @@ public int compare(TripleID o1, TripleID o2) { int result = LongCompare.compare(o1.getSubject(), o2.getSubject()); - if (result == 0) { - result = LongCompare.compare(o1.getPredicate(), o2.getPredicate()); - if (result == 0) { - // The third component is different? - return LongCompare.compare(o1.getObject(), o2.getObject()); - } else { - // the second component is different - return result; - } - } else { + if (result != 0) { // the first component is different return result; } + + result = LongCompare.compare(o1.getPredicate(), o2.getPredicate()); + + if (result != 0) { + // the second component is different + return result; + } + // The third component is different? + result = LongCompare.compare(o1.getObject(), o2.getObject()); + if (result != 0) { + return result; + } + + return LongCompare.compare(o1.getGraph(), o2.getGraph()); } } diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/Triples.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/Triples.java index cb66045a..4a815085 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/Triples.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/Triples.java @@ -80,5 +80,18 @@ public interface Triples extends Closeable { * @see IteratorTripleID#getLastTriplePosition() * @see IteratorTripleString#getLastTriplePosition() */ - TripleID findTriple(long position); + default TripleID findTriple(long position) { + return findTriple(position, new TripleID()); + } + + /** + * Find a triple with his position + * + * @param position The triple position + * @param buffer buffer to put the triple if an allocation is required + * @return triple + * @see IteratorTripleID#getLastTriplePosition() + * @see IteratorTripleString#getLastTriplePosition() + */ + TripleID findTriple(long position, TripleID buffer); } diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/TriplesFactory.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/TriplesFactory.java index 33947acf..d20f931e 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/TriplesFactory.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/TriplesFactory.java @@ -18,6 +18,7 @@ package com.the_qa_company.qendpoint.core.triples; +import com.the_qa_company.qendpoint.core.dictionary.DictionaryFactory; import com.the_qa_company.qendpoint.core.hdt.HDTVocabulary; import com.the_qa_company.qendpoint.core.options.ControlInfo; import com.the_qa_company.qendpoint.core.options.HDTOptions; @@ -55,11 +56,8 @@ static public TempTriples createTempTriples(HDTOptions spec) { static public TriplesPrivate createTriples(HDTOptions spec) throws IOException { String type = spec.get("triples.format"); - boolean isQuad = spec.get(HDTOptionsKeys.DICTIONARY_TYPE_KEY, "") - .equals(HDTOptionsKeys.DICTIONARY_TYPE_VALUE_FOUR_QUAD_SECTION); - if (type == null) { - if (isQuad) { + if (DictionaryFactory.isQuadDictionary(spec.get(HDTOptionsKeys.DICTIONARY_TYPE_KEY, ""))) { return new BitmapQuadTriples(spec); } return new BitmapTriples(spec); diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapQuadTriples.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapQuadTriples.java index 89021271..79895f06 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapQuadTriples.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapQuadTriples.java @@ -18,59 +18,50 @@ package com.the_qa_company.qendpoint.core.triples.impl; -import java.io.IOException; -import java.io.OutputStream; -import java.nio.ByteBuffer; -import java.util.ArrayList; -import java.util.List; - import com.the_qa_company.qendpoint.core.compact.bitmap.AdjacencyList; import com.the_qa_company.qendpoint.core.compact.bitmap.Bitmap; import com.the_qa_company.qendpoint.core.compact.bitmap.Bitmap375Big; -import com.the_qa_company.qendpoint.core.compact.bitmap.RoaringBitmap; import com.the_qa_company.qendpoint.core.compact.bitmap.BitmapFactory; import com.the_qa_company.qendpoint.core.compact.bitmap.ModifiableBitmap; +import com.the_qa_company.qendpoint.core.compact.bitmap.MultiRoaringBitmap; +import com.the_qa_company.qendpoint.core.compact.integer.VByte; import com.the_qa_company.qendpoint.core.compact.sequence.DynamicSequence; -import com.the_qa_company.qendpoint.core.compact.sequence.Sequence; import com.the_qa_company.qendpoint.core.compact.sequence.SequenceFactory; import com.the_qa_company.qendpoint.core.compact.sequence.SequenceLog64Big; import com.the_qa_company.qendpoint.core.enums.TripleComponentOrder; import com.the_qa_company.qendpoint.core.exceptions.IllegalFormatException; import com.the_qa_company.qendpoint.core.hdt.HDTVocabulary; -import com.the_qa_company.qendpoint.core.iterator.SequentialSearchIteratorTripleID; import com.the_qa_company.qendpoint.core.iterator.SuppliableIteratorTripleID; import com.the_qa_company.qendpoint.core.listener.ProgressListener; import com.the_qa_company.qendpoint.core.options.ControlInfo; import com.the_qa_company.qendpoint.core.options.ControlInformation; import com.the_qa_company.qendpoint.core.options.HDTOptions; -import com.the_qa_company.qendpoint.core.quad.impl.BitmapQuadsIterator; -import com.the_qa_company.qendpoint.core.quad.impl.BitmapQuadsIteratorG; -import com.the_qa_company.qendpoint.core.quad.impl.BitmapQuadsIteratorYFOQ; -import com.the_qa_company.qendpoint.core.quad.impl.BitmapQuadsIteratorYGFOQ; -import com.the_qa_company.qendpoint.core.quad.impl.BitmapQuadsIteratorZFOQ; -import com.the_qa_company.qendpoint.core.quad.impl.BitmapQuadsIteratorZGFOQ; +import com.the_qa_company.qendpoint.core.quad.impl.BitmapTriplesIteratorGraph; +import com.the_qa_company.qendpoint.core.quad.impl.BitmapTriplesIteratorGraphG; import com.the_qa_company.qendpoint.core.triples.IteratorTripleID; import com.the_qa_company.qendpoint.core.triples.TempTriples; import com.the_qa_company.qendpoint.core.triples.TripleID; import com.the_qa_company.qendpoint.core.util.BitUtil; +import com.the_qa_company.qendpoint.core.util.io.Closer; import com.the_qa_company.qendpoint.core.util.io.CountInputStream; import com.the_qa_company.qendpoint.core.util.listener.IntermediateListener; import com.the_qa_company.qendpoint.core.util.listener.ListenerUtil; -import com.github.andrewoma.dexx.collection.Pair; - +import java.io.Closeable; import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.List; /** * @author mario.arias */ public class BitmapQuadTriples extends BitmapTriples { - protected List quadInfoAG = new ArrayList<>(); - - private static ModifiableBitmap createQuadBitmap() { - return new RoaringBitmap(); - } + protected final List quadInfoAG = new ArrayList<>(); public BitmapQuadTriples() throws IOException { super(); @@ -80,11 +71,6 @@ public BitmapQuadTriples(HDTOptions spec) throws IOException { super(spec); } - public BitmapQuadTriples(HDTOptions spec, Sequence seqY, Sequence seqZ, Bitmap bitY, Bitmap bitZ, - TripleComponentOrder order) throws IOException { - super(spec, seqY, seqZ, bitY, bitZ, order); - } - /* * (non-Javadoc) * @see hdt.triples.Triples#getType() @@ -111,8 +97,6 @@ public void load(IteratorTripleID it, ProgressListener listener) { long tripleIndex = -1; - List> triplesInGraph = new ArrayList<>(); - while (it.hasNext()) { TripleID triple = it.next(); TripleOrderConvert.swapComponentOrder(triple, TripleComponentOrder.SPO, order); @@ -124,13 +108,20 @@ public void load(IteratorTripleID it, ProgressListener listener) { if (x == 0 || y == 0 || z == 0 || g == 0) { throw new IllegalFormatException("None of the components of a quad can be null"); } - numGraphs = Math.max(numGraphs, g); + if (g > numGraphs) { + for (long i = numGraphs; i < g; i++) { + quadInfoAG.add(MultiRoaringBitmap.memory(number)); + } + numGraphs = g; + } long graphIndex = g - 1; boolean sameAsLast = x == lastX && y == lastY && z == lastZ; if (!sameAsLast) { tripleIndex += 1; } - triplesInGraph.add(new Pair<>(tripleIndex, graphIndex)); + + quadInfoAG.get((int) graphIndex).set(tripleIndex, true); + if (sameAsLast) { continue; } @@ -178,15 +169,6 @@ public void load(IteratorTripleID it, ProgressListener listener) { numTriples++; } - for (int i = 0; i < numGraphs; i++) { - quadInfoAG.add(createQuadBitmap()); - } - for (Pair tripleInGraph : triplesInGraph) { - long iTriple = tripleInGraph.component1(); - long iGraph = tripleInGraph.component2(); - quadInfoAG.get((int) iGraph).set(iTriple, true); - } - if (numTriples > 0) { bitY.append(true); bitZ.append(true); @@ -212,11 +194,6 @@ public void load(TempTriples triples, ProgressListener listener) { super.load(triples, listener); } - @Override - public long getNumberOfElements() { - return super.getNumberOfElements(); - } - /* * (non-Javadoc) * @see hdt.triples.Triples#size() @@ -225,7 +202,7 @@ public long getNumberOfElements() { public long size() { if (isClosed) return 0; - long graphs = quadInfoAG.stream().map(b -> b.getSizeBytes()).reduce(0L, (a, b) -> a + b); + long graphs = quadInfoAG.stream().mapToLong(Bitmap::getSizeBytes).sum(); return seqY.size() + seqZ.size() + bitmapY.getSizeBytes() + bitmapZ.getSizeBytes() + graphs; } @@ -242,9 +219,7 @@ public void save(OutputStream output, ControlInfo ci, ProgressListener listener) bitmapZ.save(output, iListener); seqY.save(output, iListener); seqZ.save(output, iListener); - ByteBuffer numGraphs = ByteBuffer.allocate(Integer.BYTES); - numGraphs.putInt(quadInfoAG.size()); - output.write(numGraphs.array()); + VByte.encode(output, quadInfoAG.size()); for (ModifiableBitmap b : quadInfoAG) { b.save(output, iListener); } @@ -264,29 +239,12 @@ public SuppliableIteratorTripleID search(TripleID pattern) { TripleOrderConvert.swapComponentOrder(reorderedPat, TripleComponentOrder.SPO, order); String patternString = reorderedPat.getPatternString(); - if (patternString.equals("?P??")) - return new BitmapQuadsIteratorYFOQ(this, pattern); - - if (patternString.equals("?P?G")) - return new BitmapQuadsIteratorYGFOQ(this, pattern); - - if (patternString.equals("?PO?") || patternString.equals("??O?")) - return new BitmapQuadsIteratorZFOQ(this, pattern); - - if (patternString.equals("?POG") || patternString.equals("??OG")) - return new BitmapQuadsIteratorZGFOQ(this, pattern); - - SuppliableIteratorTripleID bitIt; - if (patternString.endsWith("G")) - bitIt = new BitmapQuadsIteratorG(this, pattern); - else - bitIt = new BitmapQuadsIterator(this, pattern); - if (patternString.equals("????") || patternString.equals("???G") || patternString.equals("S???") - || patternString.equals("S??G") || patternString.equals("SP??") || patternString.equals("SP?G") - || patternString.equals("SPO?") || patternString.equals("SPOG")) { - return bitIt; + if (hasFOQIndex() && patternString.equals("???G")) { + return new BitmapTriplesIteratorGraphG(this, pattern); } - return new SequentialSearchIteratorTripleID(pattern, bitIt); + + return new BitmapTriplesIteratorGraph(this, super.search(pattern.copyNoGraph()), + pattern.isQuad() ? pattern.getGraph() : 0); } @Override @@ -318,15 +276,61 @@ public void mapFromFile(CountInputStream input, File f, ProgressListener listene adjY = new AdjacencyList(seqY, bitmapY); adjZ = new AdjacencyList(seqZ, bitmapZ); - quadInfoAG = new ArrayList<>(); + Closer.closeSingle(quadInfoAG); + quadInfoAG.clear(); + + long numGraphs = VByte.decode(input); + + Path fPath = f.toPath(); + for (long i = 0; i < numGraphs; i++) { + // map the multi roaring bitmap and skip the bytes + long base = input.getTotalBytes(); + MultiRoaringBitmap mapped = MultiRoaringBitmap.mapped(fPath, base); + input.skipNBytes(mapped.getSizeBytes()); + quadInfoAG.add(mapped); + } + + isClosed = false; + } + + @Override + public void load(InputStream input, ControlInfo ci, ProgressListener listener) throws IOException { + + if (ci.getType() != ControlInfo.Type.TRIPLES) { + throw new IllegalFormatException("Trying to read a triples section, but was not triples."); + } + + if (!ci.getFormat().equals(getType())) { + throw new IllegalFormatException( + "Trying to read BitmapTriples, but the data does not seem to be BitmapTriples"); + } + + order = TripleComponentOrder.values()[(int) ci.getInt("order")]; + + IntermediateListener iListener = new IntermediateListener(listener); + + bitmapY = BitmapFactory.createBitmap(input); + bitmapY.load(input, iListener); + + bitmapZ = BitmapFactory.createBitmap(input); + bitmapZ.load(input, iListener); + + seqY = SequenceFactory.createStream(input); + seqY.load(input, iListener); + + seqZ = SequenceFactory.createStream(input); + seqZ.load(input, iListener); + + adjY = new AdjacencyList(seqY, bitmapY); + adjZ = new AdjacencyList(seqZ, bitmapZ); + + Closer.closeSingle(quadInfoAG); + quadInfoAG.clear(); + + long numGraphs = VByte.decode(input); - ByteBuffer numGraphsB = ByteBuffer.allocate(Integer.BYTES); - input.read(numGraphsB.array()); - int numGraphs = numGraphsB.getInt(); - for (int i = 0; i < numGraphs; i++) { - ModifiableBitmap b = createQuadBitmap(); - b.load(input, iListener); - quadInfoAG.add(b); + for (long i = 0; i < numGraphs; i++) { + quadInfoAG.add(MultiRoaringBitmap.load(input)); } isClosed = false; @@ -338,12 +342,8 @@ public List getQuadInfoAG() { return quadInfoAG; } - // Slower but safer - // @Override - // public List getQuadInfoAG() { - // return quadInfoAG - // .stream() - // .map(b -> (Bitmap) b) - // .collect(java.util.stream.Collectors.toList()); - // } + @Override + public void close() throws IOException { + Closer.closeAll((Closeable) super::close, quadInfoAG); + } } diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriples.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriples.java index 5805cb20..f90e8bb9 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriples.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriples.java @@ -310,7 +310,7 @@ public SuppliableIteratorTripleID search(TripleID pattern) { } } - if (indexZ != null && bitmapIndexZ != null) { + if (hasFOQIndex()) { // USE FOQ if (patternString.equals("?PO") || patternString.equals("??O")) { return new BitmapTriplesIteratorZFOQ(this, pattern); @@ -1007,10 +1007,11 @@ public String getType() { } @Override - public TripleID findTriple(long position) { + public TripleID findTriple(long position, TripleID tripleID) { if (position == 0) { // remove this special case so we can use position-1 - return new TripleID(1, seqY.get(0), seqZ.get(0)); + tripleID.setAll(1, seqY.get(0), seqZ.get(0)); + return tripleID; } // get the object at the given position long z = seqZ.get(position); @@ -1021,7 +1022,8 @@ public TripleID findTriple(long position) { if (posY == 0) { // remove this case to do posY - 1 - return new TripleID(1, y, z); + tripleID.setAll(1, y, z); + return tripleID; } // -1 so we don't count end of tree @@ -1029,7 +1031,8 @@ public TripleID findTriple(long position) { long x = posX + 1; // the subject ID is the position + 1, IDs start from // 1 not zero - return new TripleID(x, y, z); + tripleID.setAll(x, y, z); + return tripleID; } /* @@ -1219,6 +1222,10 @@ public void close() throws IOException { } } + public boolean hasFOQIndex() { + return indexZ != null && bitmapIndexZ != null; + } + @Override public TripleComponentOrder getOrder() { return this.order; diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/OneReadTempTriples.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/OneReadTempTriples.java index b07c64de..ab2d1d0c 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/OneReadTempTriples.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/OneReadTempTriples.java @@ -179,7 +179,7 @@ public String getType() { } @Override - public TripleID findTriple(long position) { + public TripleID findTriple(long position, TripleID buffer) { throw new NotImplementedException(); } diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/TriplesList.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/TriplesList.java index 445999ad..d1a2b614 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/TriplesList.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/TriplesList.java @@ -346,6 +346,11 @@ public String getType() { return HDTVocabulary.TRIPLES_TYPE_TRIPLESLIST; } + @Override + public TripleID findTriple(long position, TripleID buffer) { + return arrayOfTriples.get((int) position).asTripleID(); + } + @Override public TripleID findTriple(long position) { return arrayOfTriples.get((int) position).asTripleID(); diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/TriplesListLong.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/TriplesListLong.java index e39ae31d..0bab30fb 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/TriplesListLong.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/TriplesListLong.java @@ -339,6 +339,11 @@ public String getType() { return HDTVocabulary.TRIPLES_TYPE_TRIPLESLIST; } + @Override + public TripleID findTriple(long position, TripleID buffer) { + return arrayOfTriples.get((int) position); + } + @Override public TripleID findTriple(long position) { return arrayOfTriples.get((int) position); diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/WriteBitmapTriples.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/WriteBitmapTriples.java index 4aa79a58..778cb535 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/WriteBitmapTriples.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/WriteBitmapTriples.java @@ -1,5 +1,7 @@ package com.the_qa_company.qendpoint.core.triples.impl; +import com.the_qa_company.qendpoint.core.compact.bitmap.MultiRoaringBitmap; +import com.the_qa_company.qendpoint.core.compact.integer.VByte; import com.the_qa_company.qendpoint.core.dictionary.Dictionary; import com.the_qa_company.qendpoint.core.enums.TripleComponentOrder; import com.the_qa_company.qendpoint.core.exceptions.IllegalFormatException; @@ -19,8 +21,8 @@ import com.the_qa_company.qendpoint.core.compact.bitmap.AppendableWriteBitmap; import com.the_qa_company.qendpoint.core.compact.sequence.SequenceLog64BigDisk; import com.the_qa_company.qendpoint.core.util.io.CloseSuppressPath; +import com.the_qa_company.qendpoint.core.util.io.Closer; import com.the_qa_company.qendpoint.core.util.io.CountInputStream; -import com.the_qa_company.qendpoint.core.util.io.IOUtil; import com.the_qa_company.qendpoint.core.util.listener.IntermediateListener; import com.the_qa_company.qendpoint.core.util.listener.ListenerUtil; @@ -28,6 +30,9 @@ import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; +import java.nio.file.Files; +import java.util.ArrayList; +import java.util.List; /** * Appendable write {@link BitmapTriples} version @@ -40,8 +45,14 @@ public class WriteBitmapTriples implements TriplesPrivate { private final AppendableWriteBitmap bitY, bitZ; private final CloseSuppressPath seqY, seqZ, triples; private SequenceLog64BigDisk vectorY, vectorZ; + private final List quadInfoAG; public WriteBitmapTriples(HDTOptions spec, CloseSuppressPath triples, int bufferSize) throws IOException { + this(spec, triples, bufferSize, false); + } + + public WriteBitmapTriples(HDTOptions spec, CloseSuppressPath triples, int bufferSize, boolean quads) + throws IOException { String orderStr = spec.get(HDTOptionsKeys.TRIPLE_ORDER_KEY); if (orderStr == null) { this.order = TripleComponentOrder.SPO; @@ -55,6 +66,12 @@ public WriteBitmapTriples(HDTOptions spec, CloseSuppressPath triples, int buffer bitZ = new AppendableWriteBitmap(triples.resolve("bitmapZ"), bufferSize); seqY = triples.resolve("seqY"); seqZ = triples.resolve("seqZ"); + + if (quads) { + quadInfoAG = new ArrayList<>(); + } else { + quadInfoAG = null; + } } @Override @@ -70,6 +87,22 @@ public void save(OutputStream output, ControlInfo ci, ProgressListener listener) bitZ.save(output, iListener); vectorY.save(output, iListener); vectorZ.save(output, iListener); + + if (quadInfoAG != null) { + // quads + int numGraphs = quadInfoAG.size(); + VByte.encode(output, numGraphs); + + try { + Closer.closeAll(quadInfoAG); + } finally { + quadInfoAG.clear(); + } + + for (int i = 0; i < numGraphs; i++) { + Files.copy(this.triples.resolve("g-" + i + ".bin"), output); + } + } } @Override @@ -115,11 +148,11 @@ public void populateHeader(Header header, String rootNode) { @Override public String getType() { - return HDTVocabulary.TRIPLES_TYPE_BITMAP; + return quadInfoAG != null ? HDTVocabulary.TRIPLES_TYPE_BITMAP_QUAD : HDTVocabulary.TRIPLES_TYPE_BITMAP; } @Override - public TripleID findTriple(long position) { + public TripleID findTriple(long position, TripleID tripleID) { throw new NotImplementedException(); } @@ -153,13 +186,6 @@ public void saveIndex(OutputStream output, ControlInfo ci, ProgressListener list throw new NotImplementedException(); } - public BitmapTriplesAppender createAppender(long numElements, ProgressListener listener) { - vectorY = new SequenceLog64BigDisk(seqY.toAbsolutePath().toString(), BitUtil.log2(numElements)); - vectorZ = new SequenceLog64BigDisk(seqZ.toAbsolutePath().toString(), BitUtil.log2(numElements)); - numTriples = 0; - return new BitmapTriplesAppender(numElements, listener); - } - @Override public void load(TempTriples triples, ProgressListener listener) { triples.setOrder(order); @@ -173,8 +199,9 @@ public void load(TempTriples triples, ProgressListener listener) { vectorZ = new SequenceLog64BigDisk(seqZ.toAbsolutePath().toString(), BitUtil.log2(number)); long lastX = 0, lastY = 0, lastZ = 0; - long x, y, z; + long x, y, z, g; numTriples = 0; + long numGraphs = 0; while (it.hasNext()) { TripleID triple = it.next(); @@ -183,11 +210,39 @@ public void load(TempTriples triples, ProgressListener listener) { x = triple.getSubject(); y = triple.getPredicate(); z = triple.getObject(); - if (x == 0 || y == 0 || z == 0) { + g = triple.isQuad() ? triple.getGraph() : -1; + if (x == 0 || y == 0 || z == 0 || g == 0) { throw new IllegalFormatException("None of the components of a triple can be null"); } - if (numTriples == 0) { + if (quadInfoAG != null) { + if (g > numGraphs) { + for (long i = numGraphs; i < g; i++) { + try { + quadInfoAG.add( + MultiRoaringBitmap.memoryStream(number, this.triples.resolve("g-" + i + ".bin"))); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + numGraphs = g; + } + long graphIndex = g - 1; + boolean sameAsLast = x == lastX && y == lastY && z == lastZ; + if (!sameAsLast) { + numTriples += 1; + } + + quadInfoAG.get((int) graphIndex).set(numTriples - 1, true); + + if (sameAsLast) { + continue; + } + } else { + numTriples++; + } + + if (numTriples == 1) { // First triple vectorY.append(y); vectorZ.append(z); @@ -230,7 +285,6 @@ public void load(TempTriples triples, ProgressListener listener) { lastZ = z; ListenerUtil.notifyCond(listener, "Converting to BitmapTriples", numTriples, numTriples, number); - numTriples++; } if (numTriples > 0) { @@ -249,7 +303,7 @@ public TripleComponentOrder getOrder() { @Override public void close() throws IOException { - IOUtil.closeAll(bitY, bitZ, vectorY, seqY, vectorZ, seqZ, triples); + Closer.closeAll(bitY, bitZ, vectorY, seqY, vectorZ, seqZ, triples, quadInfoAG); } public class BitmapTriplesAppender { diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/io/CloseMappedByteBuffer.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/io/CloseMappedByteBuffer.java index f95f483a..00a846cb 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/io/CloseMappedByteBuffer.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/io/CloseMappedByteBuffer.java @@ -8,6 +8,10 @@ import java.util.concurrent.atomic.AtomicLong; public class CloseMappedByteBuffer implements Closeable { + public static CloseMappedByteBuffer wrap(ByteBuffer buffer) { + return new CloseMappedByteBuffer(buffer); + } + private static final AtomicLong ID_GEN = new AtomicLong(); private static final Map MAP_TEST_MAP = new HashMap<>(); private static boolean mapTest = false; @@ -33,6 +37,11 @@ static void crashMapTest() { private final ByteBuffer buffer; private final boolean duplicated; + CloseMappedByteBuffer(ByteBuffer buffer) { + this.buffer = buffer; + this.duplicated = true; + } + CloseMappedByteBuffer(String filename, ByteBuffer buffer, boolean duplicated) { this.duplicated = duplicated; this.buffer = buffer; diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/io/Closer.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/io/Closer.java index 6e5ed05e..1a459150 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/io/Closer.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/io/Closer.java @@ -16,8 +16,8 @@ /** * Class to close many {@link java.io.Closeable} objects at once without having * to do a large try-finally tree, handle {@link Closeable}, {@link Iterable}, - * array, record, {@link Map}, the {@link Throwable} are also rethrown, it can - * be useful to close and throw at the same time. + * array, record, {@link Map}, {@link Stream}, the {@link Throwable} are also + * rethrown, it can be useful to close and throw at the same time. *

* It's using a deep search over the elements. * @@ -130,6 +130,12 @@ private Stream explore(Object obj) { throw new HighValueException(new IOException(t)); }); } + + // a stream + if (obj instanceof Stream ss) { + return ss.flatMap(this::explore); + } + // nothing known return Stream.of(); } diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/io/IOUtil.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/io/IOUtil.java index 4c112165..1f7ddb37 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/io/IOUtil.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/io/IOUtil.java @@ -46,6 +46,7 @@ import java.net.URL; import java.net.URLConnection; import java.nio.ByteBuffer; +import java.nio.ByteOrder; import java.nio.channels.FileChannel; import java.nio.file.FileVisitResult; import java.nio.file.FileVisitor; @@ -188,6 +189,13 @@ public static CloseMappedByteBuffer mapChannel(String filename, FileChannel ch, return new CloseMappedByteBuffer(filename, ch.map(mode, position, size), false); } + public static long readLong(long location, FileChannel channel, ByteOrder order) throws IOException { + try (CloseMappedByteBuffer buffer = new CloseMappedByteBuffer("readLong", + channel.map(FileChannel.MapMode.READ_ONLY, location, 8), false)) { + return buffer.order(order).getLong(0); + } + } + /** * create a large array filled with 0 * diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/io/compress/CompressTripleReader.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/io/compress/CompressTripleReader.java index 0746cfee..af611814 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/io/compress/CompressTripleReader.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/io/compress/CompressTripleReader.java @@ -20,9 +20,12 @@ public class CompressTripleReader implements ExceptionIterator { private final Iterator it; private final ReplazableString prev = new ReplazableString(); + private boolean prevRead = false; private IndexedNode next; private long id; private final DuplicatedNodeConsumer duplicatedNodeConsumer; @@ -194,10 +196,14 @@ public boolean hasNext() { int cmp = prev.compareTo(next); assert cmp <= 0 : "bad order : " + prev + " > " + next; if (cmp == 0) { - // same as previous, ignore - assert this.id != node.getIndex() : "same index and prevIndex"; - duplicatedNodeConsumer.onDuplicated(this.id, node.getIndex(), lastHeader); - continue; + if (!prev.isEmpty() || prevRead) { + // same as previous, ignore + assert this.id != node.getIndex() : "same index and prevIndex"; + duplicatedNodeConsumer.onDuplicated(this.id, node.getIndex(), lastHeader); + continue; + } else { + prevRead = true; + } } this.next = node; prev.replace(next); diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/io/compress/MapCompressTripleMerger.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/io/compress/MapCompressTripleMerger.java index 695cf008..6b50f892 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/io/compress/MapCompressTripleMerger.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/io/compress/MapCompressTripleMerger.java @@ -19,10 +19,10 @@ import com.the_qa_company.qendpoint.core.util.io.IOUtil; import com.the_qa_company.qendpoint.core.util.listener.IntermediateListener; -import java.io.Closeable; import java.io.IOException; import java.util.ArrayList; import java.util.List; +import java.util.Objects; import java.util.Optional; import java.util.concurrent.atomic.AtomicLong; import java.util.function.Supplier; @@ -133,18 +133,11 @@ public void close() throws IOException { */ public TripleCompressionResult merge(int workers, String mode) throws KWayMerger.KWayMergerException, InterruptedException, IOException { - if (mode == null) { - mode = ""; - } - switch (mode) { - case "": - case CompressionResult.COMPRESSION_MODE_COMPLETE: - return mergeToFile(workers); - case CompressionResult.COMPRESSION_MODE_PARTIAL: - return mergeToPartial(); - default: - throw new IllegalArgumentException("Unknown compression mode: " + mode); - } + return switch (Objects.requireNonNullElse(mode, "")) { + case "", CompressionResult.COMPRESSION_MODE_COMPLETE -> mergeToFile(workers); + case CompressionResult.COMPRESSION_MODE_PARTIAL -> mergeToPartial(); + default -> throw new IllegalArgumentException("Unknown compression mode: " + mode); + }; } @Override @@ -154,9 +147,18 @@ public void createChunk(SizeFetcher flux, CloseSuppressPath output) ParallelSortableArrayList tripleIDS = buffer.triples; listener.notifyProgress(10, "reading triples part2 " + triplesCount); TripleID next; + boolean quad = mapper.supportsGraph(); while ((next = flux.get()) != null) { - TripleID mappedTriple = new TripleID(mapper.extractSubject(next.getSubject()), - mapper.extractPredicate(next.getPredicate()), mapper.extractObjects(next.getObject())); + TripleID mappedTriple; + + if (quad) { + mappedTriple = new TripleID(mapper.extractSubject(next.getSubject()), + mapper.extractPredicate(next.getPredicate()), mapper.extractObjects(next.getObject()), + mapper.extractGraph(next.getGraph())); + } else { + mappedTriple = new TripleID(mapper.extractSubject(next.getSubject()), + mapper.extractPredicate(next.getPredicate()), mapper.extractObjects(next.getObject())); + } assert mappedTriple.isValid(); tripleIDS.add(mappedTriple); long count = triplesCount.incrementAndGet(); @@ -174,9 +176,9 @@ public void createChunk(SizeFetcher flux, CloseSuppressPath output) IntermediateListener il = new IntermediateListener(listener); il.setRange(70, 100); il.setPrefix("writing triples " + output.getFileName() + " "); - try (CompressTripleWriter w = new CompressTripleWriter(output.openOutputStream(bufferSize))) { + try (CompressTripleWriter w = new CompressTripleWriter(output.openOutputStream(bufferSize), quad)) { il.notifyProgress(0, "creating file"); - TripleID prev = new TripleID(-1, -1, -1); + TripleID prev = quad ? new TripleID(-1, -1, -1, -1) : new TripleID(-1, -1, -1); for (TripleID triple : tripleIDS) { count++; if (count % block == 0) { @@ -185,7 +187,11 @@ public void createChunk(SizeFetcher flux, CloseSuppressPath output) if (prev.match(triple)) { continue; } - prev.setAll(triple.getSubject(), triple.getPredicate(), triple.getObject()); + if (quad) { + prev.setAll(triple.getSubject(), triple.getPredicate(), triple.getObject(), triple.getGraph()); + } else { + prev.setAll(triple.getSubject(), triple.getPredicate(), triple.getObject()); + } w.appendTriple(triple); } listener.notifyProgress(100, "writing completed " + triplesCount + " " + output.getFileName()); @@ -206,7 +212,8 @@ public void mergeChunks(List inputs, CloseSuppressPath output readers[i] = new CompressTripleReader(inputs.get(i).openInputStream(bufferSize)); } - try (CompressTripleWriter w = new CompressTripleWriter(output.openOutputStream(bufferSize))) { + try (CompressTripleWriter w = new CompressTripleWriter(output.openOutputStream(bufferSize), + mapper.supportsGraph())) { ExceptionIterator it = CompressTripleMergeIterator.buildOfTree(readers, order); while (it.hasNext()) { diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/io/compress/NoDuplicateTripleIDIterator.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/io/compress/NoDuplicateTripleIDIterator.java index c207ac18..50c7806f 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/io/compress/NoDuplicateTripleIDIterator.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/io/compress/NoDuplicateTripleIDIterator.java @@ -13,7 +13,7 @@ */ public class NoDuplicateTripleIDIterator implements IteratorTripleID { private TripleID next; - private final TripleID prev = new TripleID(-1, -1, -1); + private final TripleID prev = new TripleID(-1, -1, -1, -1); private final IteratorTripleID it; public NoDuplicateTripleIDIterator(IteratorTripleID it) { @@ -29,10 +29,10 @@ public boolean hasNext() { TripleID next = it.next(); - if (next.match(prev)) { + if (next.equals(prev)) { continue; } - prev.setAll(next.getSubject(), next.getPredicate(), next.getObject()); + prev.setAll(next.getSubject(), next.getPredicate(), next.getObject(), next.getGraph()); this.next = next; } diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/io/compress/TripleGenerator.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/io/compress/TripleGenerator.java index 43a81797..718c54cf 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/io/compress/TripleGenerator.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/io/compress/TripleGenerator.java @@ -7,22 +7,37 @@ /** * Utility class to generate triples */ -public class TripleGenerator implements Iterator { - private final long triples; - private long current = 1; +public class TripleGenerator { + public static Iterator of(long triples, boolean quads) { + if (quads) { + return new Iterator<>() { + private long current = 1; - public TripleGenerator(long triples) { - this.triples = triples; - } + @Override + public boolean hasNext() { + return current <= triples; + } - @Override - public boolean hasNext() { - return current <= triples; - } + @Override + public TripleID next() { + long c = current++; + return new TripleID(c, c, c, c); + } + }; + } + return new Iterator<>() { + private long current = 1; + + @Override + public boolean hasNext() { + return current <= triples; + } - @Override - public TripleID next() { - long c = current++; - return new TripleID(c, c, c); + @Override + public TripleID next() { + long c = current++; + return new TripleID(c, c, c); + } + }; } } diff --git a/qendpoint-core/src/test/java/com/the_qa_company/qendpoint/core/compact/bitmap/MultiRoaringBitmapTest.java b/qendpoint-core/src/test/java/com/the_qa_company/qendpoint/core/compact/bitmap/MultiRoaringBitmapTest.java new file mode 100644 index 00000000..9d32c523 --- /dev/null +++ b/qendpoint-core/src/test/java/com/the_qa_company/qendpoint/core/compact/bitmap/MultiRoaringBitmapTest.java @@ -0,0 +1,184 @@ +package com.the_qa_company.qendpoint.core.compact.bitmap; + +import com.the_qa_company.qendpoint.core.listener.ProgressListener; +import com.the_qa_company.qendpoint.core.util.io.AbstractMapMemoryTest; +import org.apache.commons.io.file.PathUtils; +import org.junit.After; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; + +import java.io.BufferedInputStream; +import java.io.BufferedOutputStream; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Random; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +public class MultiRoaringBitmapTest extends AbstractMapMemoryTest { + @Rule + public TemporaryFolder tempDir = TemporaryFolder.builder().assureDeletion().build(); + + int oldDefaultChunk; + + @Before + public void saveChunkSize() { + oldDefaultChunk = MultiRoaringBitmap.defaultChunkSize; + } + + @After + public void resetChunkSize() { + MultiRoaringBitmap.defaultChunkSize = oldDefaultChunk; + } + + @Test + public void serialSyncTest() throws IOException { + Path root = tempDir.newFolder().toPath(); + + try { + Path output = root.resolve("tmp.bin"); + MultiRoaringBitmap.defaultChunkSize = 9; + try (MultiRoaringBitmap map = MultiRoaringBitmap.memoryStream(100, output)) { + assertEquals(9, map.chunkSize); + assertEquals(12, map.maps.size()); + map.set(0, true); + map.set(42, true); + map.set(80, true); + map.set(90, true); + } + + try (MultiRoaringBitmap map = MultiRoaringBitmap.mapped(output)) { + for (int i = 0; i < 100; i++) { + switch (i) { + case 0, 42, 80, 90 -> assertTrue(map.access(i)); + default -> assertFalse(map.access(i)); + } + } + } + + try (BufferedInputStream stream = new BufferedInputStream(Files.newInputStream(output)); + MultiRoaringBitmap map = MultiRoaringBitmap.load(stream)) { + for (int i = 0; i < 100; i++) { + switch (i) { + case 0, 42, 80, 90 -> assertTrue(map.access(i)); + default -> assertFalse(map.access(i)); + } + } + } + + } finally { + PathUtils.deleteDirectory(root); + } + } + + @Test + public void largeSerialSyncTest() throws IOException { + final int seed = 684; + final int size = 10_000; + + Random rnd = new Random(seed); + Path root = tempDir.newFolder().toPath(); + + try { + Path output = root.resolve("tmp.bin"); + + MultiRoaringBitmap.defaultChunkSize = size / 9; + + try (MultiRoaringBitmap map = MultiRoaringBitmap.memory(size)) { + assertEquals(MultiRoaringBitmap.defaultChunkSize, map.chunkSize); + assertEquals((size - 1) / map.chunkSize + 1, map.maps.size()); + + for (int i = 0; i < size / 50; i++) { + int position = rnd.nextInt(size); + map.set(position, true); + } + + try (BufferedOutputStream out = new BufferedOutputStream(Files.newOutputStream(output))) { + map.save(out, ProgressListener.ignore()); + } + } + + rnd = new Random(seed); + + try (MultiRoaringBitmap map = MultiRoaringBitmap.mapped(output)) { + for (int i = 0; i < size / 50; i++) { + assertTrue(map.access(rnd.nextInt(size))); + } + } + + rnd = new Random(seed); + + try (BufferedInputStream stream = new BufferedInputStream(Files.newInputStream(output)); + MultiRoaringBitmap map = MultiRoaringBitmap.load(stream)) { + for (int i = 0; i < size / 50; i++) { + assertTrue(map.access(rnd.nextInt(size))); + } + } + + } finally { + PathUtils.deleteDirectory(root); + } + } + + @Test + public void rankSelectTest() throws IOException { + final int seed = 684; + final int size = 10_000; + + Random rnd = new Random(seed); + MultiRoaringBitmap.defaultChunkSize = size / 9; + + try (MultiRoaringBitmap map = MultiRoaringBitmap.memory(size); + Bitmap375Big memmap = Bitmap375Big.memory(size)) { + assertEquals(MultiRoaringBitmap.defaultChunkSize, map.chunkSize); + assertEquals((size - 1) / map.chunkSize + 1, map.maps.size()); + + for (int i = 0; i < size / 50; i++) { + int position = rnd.nextInt(size); + map.set(position, true); + memmap.set(position, true); + } + + memmap.updateIndex(); + + long numBits = memmap.countOnes(); + + assertEquals("countOnes", numBits, map.countOnes()); + + for (int i = 0; i < size; i++) { + assertEquals("access#" + i + "/" + size, memmap.access(i), map.access(i)); + } + + for (int i = 0; i < size; i++) { + assertEquals("rank1#" + i + "/" + size, memmap.rank1(i), map.rank1(i)); + } + for (int i = 0; i < size; i++) { + assertEquals("rank0#" + i + "/" + size, memmap.rank0(i), map.rank0(i)); + } + for (int i = 0; i < numBits; i++) { + long n = i; + long j = -1; + while (n > 0) { + if (memmap.access(++j)) { + n--; + } + } + assertEquals(j, memmap.select1(i)); + assertEquals("select1#" + i + "/" + numBits, memmap.select1(i), map.select1(i)); + } + + for (int i = 0; i < numBits; i++) { + assertEquals("selectNext1#" + i + "/" + numBits, memmap.selectNext1(i), map.selectNext1(i)); + } + + for (int i = 0; i < numBits; i++) { + assertEquals("selectPrev1#" + i + "/" + numBits, memmap.selectPrev1(i), map.selectPrev1(i)); + } + } + } +} diff --git a/qendpoint-core/src/test/java/com/the_qa_company/qendpoint/core/dictionary/impl/CompressFourSectionDictionaryTest.java b/qendpoint-core/src/test/java/com/the_qa_company/qendpoint/core/dictionary/impl/CompressFourSectionDictionaryTest.java index bc71c3f5..b146265d 100644 --- a/qendpoint-core/src/test/java/com/the_qa_company/qendpoint/core/dictionary/impl/CompressFourSectionDictionaryTest.java +++ b/qendpoint-core/src/test/java/com/the_qa_company/qendpoint/core/dictionary/impl/CompressFourSectionDictionaryTest.java @@ -28,7 +28,7 @@ public void compressDictTest() throws Exception { List exceptedObjects = Arrays.asList("1111", "3333", "6666", "8888"); List exceptedShared = Arrays.asList("4444", "7777"); CompressFourSectionDictionary dictionary = new CompressFourSectionDictionary(result, new FakeNodeConsumer(), - (p, m) -> {}, true); + (p, m) -> {}, true, false); Iterator su = dictionary.getSubjects().getSortedEntries(); Iterator pr = dictionary.getPredicates().getSortedEntries(); Iterator ob = dictionary.getObjects().getSortedEntries(); @@ -73,22 +73,33 @@ static class TestCompressionResult implements CompressionResult { private final CharSequence[] subjects; private final CharSequence[] predicates; private final CharSequence[] objects; + private final CharSequence[] graph; // used to create fake id to avoid duplicate assert error - private int sid, pid, oid; + private int sid, pid, oid, gid; private final long size; public TestCompressionResult(CharSequence[] subjects, CharSequence[] predicates, CharSequence[] objects) { + this(subjects, predicates, objects, null); + } + + public TestCompressionResult(CharSequence[] subjects, CharSequence[] predicates, CharSequence[] objects, + CharSequence[] graph) { this.subjects = subjects; this.predicates = predicates; this.objects = objects; + this.graph = graph; size = Arrays.stream(subjects).mapToLong(s -> s.toString().getBytes(ByteStringUtil.STRING_ENCODING).length) .sum() + Arrays.stream(predicates) .mapToLong(s -> s.toString().getBytes(ByteStringUtil.STRING_ENCODING).length).sum() + Arrays.stream(objects) - .mapToLong(s -> s.toString().getBytes(ByteStringUtil.STRING_ENCODING).length).sum(); + .mapToLong(s -> s.toString().getBytes(ByteStringUtil.STRING_ENCODING).length).sum() + + (graph == null ? 0 + : Arrays.stream(graph) + .mapToLong(s -> s.toString().getBytes(ByteStringUtil.STRING_ENCODING).length) + .sum()); } @Override @@ -96,6 +107,11 @@ public long getTripleCount() { return Math.max(subjects.length, Math.max(predicates.length, objects.length)); } + @Override + public boolean supportsGraph() { + return graph != null; + } + @Override public ExceptionIterator getSubjects() { return ExceptionIterator.of(new MapIterator<>(Arrays.asList(subjects).iterator(), @@ -114,6 +130,12 @@ public ExceptionIterator getObjects() { s -> new IndexedNode(ByteString.of(s), oid++))); } + @Override + public ExceptionIterator getGraph() { + return ExceptionIterator.of( + new MapIterator<>(Arrays.asList(graph).iterator(), g -> new IndexedNode(ByteString.of(g), gid++))); + } + @Override public long getSubjectsCount() { return subjects.length; @@ -129,6 +151,11 @@ public long getObjectsCount() { return objects.length; } + @Override + public long getGraphCount() { + return graph.length; + } + @Override public long getSharedCount() { return Math.min(subjects.length, objects.length); @@ -160,5 +187,9 @@ public void onPredicate(long preMapId, long newMapId) { @Override public void onObject(long preMapId, long newMapId) { } + + @Override + public void onGraph(long preMapId, long newMapId) { + } } } diff --git a/qendpoint-core/src/test/java/com/the_qa_company/qendpoint/core/dictionary/impl/kcat/KCatMergerTest.java b/qendpoint-core/src/test/java/com/the_qa_company/qendpoint/core/dictionary/impl/kcat/KCatMergerTest.java index be87e0f8..50f5811f 100644 --- a/qendpoint-core/src/test/java/com/the_qa_company/qendpoint/core/dictionary/impl/kcat/KCatMergerTest.java +++ b/qendpoint-core/src/test/java/com/the_qa_company/qendpoint/core/dictionary/impl/kcat/KCatMergerTest.java @@ -1,33 +1,41 @@ package com.the_qa_company.qendpoint.core.dictionary.impl.kcat; +import com.the_qa_company.qendpoint.core.compact.bitmap.Bitmap; +import com.the_qa_company.qendpoint.core.compact.bitmap.BitmapFactory; +import com.the_qa_company.qendpoint.core.compact.bitmap.GraphDeleteBitmap; +import com.the_qa_company.qendpoint.core.compact.bitmap.ModifiableBitmap; import com.the_qa_company.qendpoint.core.dictionary.Dictionary; import com.the_qa_company.qendpoint.core.dictionary.DictionaryPrivate; import com.the_qa_company.qendpoint.core.dictionary.DictionarySection; import com.the_qa_company.qendpoint.core.dictionary.DictionarySectionPrivate; +import com.the_qa_company.qendpoint.core.dictionary.impl.section.PFCDictionarySection; import com.the_qa_company.qendpoint.core.enums.TripleComponentRole; import com.the_qa_company.qendpoint.core.exceptions.NotFoundException; import com.the_qa_company.qendpoint.core.exceptions.ParserException; import com.the_qa_company.qendpoint.core.hdt.HDT; import com.the_qa_company.qendpoint.core.hdt.HDTManager; +import com.the_qa_company.qendpoint.core.hdt.HDTManagerTest; +import com.the_qa_company.qendpoint.core.hdt.HDTVocabulary; +import com.the_qa_company.qendpoint.core.listener.ProgressListener; import com.the_qa_company.qendpoint.core.options.HDTOptions; import com.the_qa_company.qendpoint.core.options.HDTOptionsKeys; +import com.the_qa_company.qendpoint.core.triples.IteratorTripleString; +import com.the_qa_company.qendpoint.core.triples.TripleString; +import com.the_qa_company.qendpoint.core.util.LargeFakeDataSetStreamSupplier; +import com.the_qa_company.qendpoint.core.util.concurrent.SyncSeq; +import com.the_qa_company.qendpoint.core.util.io.AbstractMapMemoryTest; +import com.the_qa_company.qendpoint.core.util.io.Closer; +import com.the_qa_company.qendpoint.core.util.string.ByteString; import org.apache.commons.io.file.PathUtils; +import org.junit.After; +import org.junit.Before; import org.junit.Ignore; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; -import com.the_qa_company.qendpoint.core.compact.bitmap.Bitmap; -import com.the_qa_company.qendpoint.core.compact.bitmap.BitmapFactory; -import com.the_qa_company.qendpoint.core.compact.bitmap.ModifiableBitmap; -import com.the_qa_company.qendpoint.core.dictionary.impl.section.PFCDictionarySection; -import com.the_qa_company.qendpoint.core.hdt.HDTManagerTest; -import com.the_qa_company.qendpoint.core.util.LargeFakeDataSetStreamSupplier; -import com.the_qa_company.qendpoint.core.util.concurrent.SyncSeq; -import com.the_qa_company.qendpoint.core.util.io.AbstractMapMemoryTest; -import com.the_qa_company.qendpoint.core.util.io.Closer; -import com.the_qa_company.qendpoint.core.util.string.ByteString; +import org.junit.runners.Suite; import java.io.BufferedInputStream; import java.io.BufferedOutputStream; @@ -44,6 +52,7 @@ import java.util.TreeMap; import java.util.concurrent.atomic.AtomicLong; import java.util.stream.Collectors; +import java.util.stream.IntStream; import java.util.stream.Stream; import static org.junit.Assert.assertEquals; @@ -52,481 +61,684 @@ import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; -@RunWith(Parameterized.class) +@Suite.SuiteClasses({ KCatMergerTest.BaseTest.class, KCatMergerTest.QuadCatDiffTests.class }) +@RunWith(Suite.class) public class KCatMergerTest extends AbstractMapMemoryTest { - @Parameterized.Parameters(name = "multi: {0}, unicode: {1}, map: {2}, count: {3}") - public static Collection params() { - return Stream.of(false, true) - .flatMap(multi -> Stream.of(false, true) - .flatMap(unicode -> Stream.of(false, true).flatMap( - map -> Stream.of(2, 10).map(kcat -> new Object[] { multi, unicode, map, kcat })))) - .collect(Collectors.toList()); - } - - @Parameterized.Parameter - public boolean multi; - @Parameterized.Parameter(1) - public boolean unicode; - @Parameterized.Parameter(2) - public boolean map; - @Parameterized.Parameter(3) - public int kcat; + @RunWith(Parameterized.class) + public static class BaseTest { + + @Parameterized.Parameters(name = "multi: {0}, unicode: {1}, map: {2}, count: {3}") + public static Collection params() { + return Stream.of(false, true) + .flatMap(multi -> Stream.of(false, true) + .flatMap(unicode -> Stream.of(false, true).flatMap( + map -> Stream.of(2, 10).map(kcat -> new Object[] { multi, unicode, map, kcat })))) + .collect(Collectors.toList()); + } - @Rule - public TemporaryFolder tempDir = TemporaryFolder.builder().assureDeletion().build(); + @Parameterized.Parameter + public boolean multi; + @Parameterized.Parameter(1) + public boolean unicode; + @Parameterized.Parameter(2) + public boolean map; + @Parameterized.Parameter(3) + public int kcat; - private void writeSection(DictionarySection sec, OutputStream stream) throws IOException { - ((DictionarySectionPrivate) sec).save(stream, null); - } + @Rule + public TemporaryFolder tempDir = TemporaryFolder.builder().assureDeletion().build(); - private DictionarySection loadSection(InputStream stream) throws IOException { - PFCDictionarySection section = new PFCDictionarySection(HDTOptions.EMPTY); - section.load(stream, null); - return section; - } + private void writeSection(DictionarySection sec, OutputStream stream) throws IOException { + ((DictionarySectionPrivate) sec).save(stream, null); + } - private Map loadMultiSection(List seq, InputStream stream) - throws IOException { - Map sectionMap = new TreeMap<>(); - for (CharSequence key : seq) { + private DictionarySection loadSection(InputStream stream) throws IOException { PFCDictionarySection section = new PFCDictionarySection(HDTOptions.EMPTY); section.load(stream, null); - sectionMap.put(ByteString.of(key), section); + return section; } - return sectionMap; - } - - @Test - public void mergerTest() throws ParserException, IOException, InterruptedException { - Path root = tempDir.newFolder().toPath(); - try { - HDTOptions spec = HDTOptions.of(); - if (multi) { - spec.set(HDTOptionsKeys.DICTIONARY_TYPE_KEY, HDTOptionsKeys.DICTIONARY_TYPE_VALUE_MULTI_OBJECTS); - spec.set(HDTOptionsKeys.TEMP_DICTIONARY_IMPL_KEY, HDTOptionsKeys.TEMP_DICTIONARY_IMPL_VALUE_MULT_HASH); + private Map loadMultiSection(List seq, + InputStream stream) throws IOException { + Map sectionMap = new TreeMap<>(); + for (CharSequence key : seq) { + PFCDictionarySection section = new PFCDictionarySection(HDTOptions.EMPTY); + section.load(stream, null); + sectionMap.put(ByteString.of(key), section); } + return sectionMap; + } - // create "kcat" fake HDTs - LargeFakeDataSetStreamSupplier s = LargeFakeDataSetStreamSupplier.createSupplierWithMaxTriples(1000, 42) - .withMaxElementSplit(50).withUnicode(unicode); + @Test + public void mergerTest() throws ParserException, IOException, InterruptedException { + Path root = tempDir.newFolder().toPath(); + try { + HDTOptions spec = HDTOptions.of(); - List hdts = new ArrayList<>(); - for (int i = 0; i < kcat; i++) { - String location = root.resolve("hdt" + i + ".hdt").toAbsolutePath().toString(); - hdts.add(location); - s.createAndSaveFakeHDT(spec, location); - } + if (multi) { + spec.set(HDTOptionsKeys.DICTIONARY_TYPE_KEY, HDTOptionsKeys.DICTIONARY_TYPE_VALUE_MULTI_OBJECTS); + spec.set(HDTOptionsKeys.TEMP_DICTIONARY_IMPL_KEY, + HDTOptionsKeys.TEMP_DICTIONARY_IMPL_VALUE_MULT_HASH); + } - // create the excepted HDT from previous algorithm - Path fatcathdt = root.resolve("fatcat.hdt"); - LargeFakeDataSetStreamSupplier.createSupplierWithMaxTriples(1000L * kcat, 42).withMaxElementSplit(50) - .withUnicode(unicode).createAndSaveFakeHDT(spec, fatcathdt.toAbsolutePath().toString()); - - // create dictionary and write sections - Path dictFile = root.resolve("dict"); - List sub = new ArrayList<>(); - try (KCatImpl impl = KCatImpl.of(hdts, spec, null)) { - try (KCatMerger merger = impl.createMerger(null)) { - assertEquals(multi, merger.typedHDT); - merger.startMerger(); - // create - DictionaryPrivate dict = merger.buildDictionary(); - try (OutputStream stream = new BufferedOutputStream(Files.newOutputStream(dictFile))) { - writeSection(dict.getShared(), stream); - writeSection(dict.getSubjects(), stream); - writeSection(dict.getPredicates(), stream); - if (multi) { - for (Map.Entry e : dict.getAllObjects() - .entrySet()) { - CharSequence key = e.getKey(); - sub.add(key); - DictionarySection sec = e.getValue(); - writeSection(sec, stream); + // create "kcat" fake HDTs + LargeFakeDataSetStreamSupplier s = LargeFakeDataSetStreamSupplier.createSupplierWithMaxTriples(1000, 42) + .withMaxElementSplit(50).withUnicode(unicode); + + List hdts = new ArrayList<>(); + for (int i = 0; i < kcat; i++) { + String location = root.resolve("hdt" + i + ".hdt").toAbsolutePath().toString(); + hdts.add(location); + s.createAndSaveFakeHDT(spec, location); + } + + // create the excepted HDT from previous algorithm + Path fatcathdt = root.resolve("fatcat.hdt"); + LargeFakeDataSetStreamSupplier.createSupplierWithMaxTriples(1000L * kcat, 42).withMaxElementSplit(50) + .withUnicode(unicode).createAndSaveFakeHDT(spec, fatcathdt.toAbsolutePath().toString()); + + // create dictionary and write sections + Path dictFile = root.resolve("dict"); + List sub = new ArrayList<>(); + try (KCatImpl impl = KCatImpl.of(hdts, spec, null)) { + try (KCatMerger merger = impl.createMerger(null)) { + assertEquals(multi, merger.typedHDT); + merger.startMerger(); + // create + DictionaryPrivate dict = merger.buildDictionary(); + try (OutputStream stream = new BufferedOutputStream(Files.newOutputStream(dictFile))) { + writeSection(dict.getShared(), stream); + writeSection(dict.getSubjects(), stream); + writeSection(dict.getPredicates(), stream); + if (multi) { + for (Map.Entry e : dict.getAllObjects() + .entrySet()) { + CharSequence key = e.getKey(); + sub.add(key); + DictionarySection sec = e.getValue(); + writeSection(sec, stream); + } + } else { + writeSection(dict.getObjects(), stream); } - } else { - writeSection(dict.getObjects(), stream); } - } - // check if all the dynamic sequences are filled - - SyncSeq[] sms = merger.subjectsMaps; - SyncSeq[] pms = merger.predicatesMaps; - SyncSeq[] oms = merger.objectsMaps; - - AtomicLong[] objectCounts = merger.countObject; - AtomicLong[] subjectCounts = merger.countSubject; - - for (int hdtId = 1; hdtId <= impl.hdts.length; hdtId++) { - HDT hdt = impl.hdts[hdtId - 1]; - SyncSeq sm = sms[hdtId - 1]; - SyncSeq pm = pms[hdtId - 1]; - SyncSeq om = oms[hdtId - 1]; - - AtomicLong objectCount = objectCounts[hdtId - 1]; - AtomicLong subjectCount = subjectCounts[hdtId - 1]; - - long shared = hdt.getDictionary().getShared().getNumberOfElements(); - long subjects = hdt.getDictionary().getSubjects().getNumberOfElements(); - long predicates = hdt.getDictionary().getPredicates().getNumberOfElements(); - long objects = multi - ? hdt.getDictionary().getAllObjects().values().stream() - .mapToLong(DictionarySection::getNumberOfElements).sum() - : hdt.getDictionary().getObjects().getNumberOfElements(); - - assertEquals(shared + objects, objectCount.get()); - assertEquals(shared + subjects, subjectCount.get()); - - for (long i = 1; i <= shared; i++) { - long sv = sm.get(i); - long ov = om.get(i); - if (merger.removeHeader(sv) == 0) { - fail("HDT #" + hdtId + "/" + impl.hdts.length + " Missing shared subject #" + i + "/" - + shared + " for node: " - + hdt.getDictionary().idToString(i, TripleComponentRole.SUBJECT)); - } - if (merger.removeHeader(ov) == 0) { - fail("HDT #" + hdtId + "/" + impl.hdts.length + " Missing shared object #" + i + "/" - + shared + " for node: " - + hdt.getDictionary().idToString(i, TripleComponentRole.OBJECT)); + // check if all the dynamic sequences are filled + + SyncSeq[] sms = merger.subjectsMaps; + SyncSeq[] pms = merger.predicatesMaps; + SyncSeq[] oms = merger.objectsMaps; + + AtomicLong[] objectCounts = merger.countObject; + AtomicLong[] subjectCounts = merger.countSubject; + + for (int hdtId = 1; hdtId <= impl.hdts.length; hdtId++) { + HDT hdt = impl.hdts[hdtId - 1]; + SyncSeq sm = sms[hdtId - 1]; + SyncSeq pm = pms[hdtId - 1]; + SyncSeq om = oms[hdtId - 1]; + + AtomicLong objectCount = objectCounts[hdtId - 1]; + AtomicLong subjectCount = subjectCounts[hdtId - 1]; + + long shared = hdt.getDictionary().getShared().getNumberOfElements(); + long subjects = hdt.getDictionary().getSubjects().getNumberOfElements(); + long predicates = hdt.getDictionary().getPredicates().getNumberOfElements(); + long objects = multi + ? hdt.getDictionary().getAllObjects().values().stream() + .mapToLong(DictionarySection::getNumberOfElements).sum() + : hdt.getDictionary().getObjects().getNumberOfElements(); + + assertEquals(shared + objects, objectCount.get()); + assertEquals(shared + subjects, subjectCount.get()); + + for (long i = 1; i <= shared; i++) { + long sv = sm.get(i); + long ov = om.get(i); + if (merger.removeHeader(sv) == 0) { + fail("HDT #" + hdtId + "/" + impl.hdts.length + " Missing shared subject #" + i + + "/" + shared + " for node: " + + hdt.getDictionary().idToString(i, TripleComponentRole.SUBJECT)); + } + if (merger.removeHeader(ov) == 0) { + fail("HDT #" + hdtId + "/" + impl.hdts.length + " Missing shared object #" + i + "/" + + shared + " for node: " + + hdt.getDictionary().idToString(i, TripleComponentRole.OBJECT)); + } + + assertEquals("shared element not mapped to the same object", ov, sv); + assertTrue("shared mapped element isn't shared", merger.isShared(ov)); } - assertEquals("shared element not mapped to the same object", ov, sv); - assertTrue("shared mapped element isn't shared", merger.isShared(ov)); - } + for (long i = 1; i <= subjects; i++) { + if (merger.removeHeader(sm.get(shared + i)) == 0) { + fail("HDT #" + hdtId + "/" + impl.hdts.length + " Missing subject #" + i + "/" + + subjects + " for node: " + + hdt.getDictionary().idToString(i + shared, TripleComponentRole.SUBJECT)); + } + } - for (long i = 1; i <= subjects; i++) { - if (merger.removeHeader(sm.get(shared + i)) == 0) { - fail("HDT #" + hdtId + "/" + impl.hdts.length + " Missing subject #" + i + "/" - + subjects + " for node: " - + hdt.getDictionary().idToString(i + shared, TripleComponentRole.SUBJECT)); + for (long i = 1; i <= objects; i++) { + if (merger.removeHeader(om.get(shared + i)) == 0) { + fail("HDT #" + hdtId + "/" + impl.hdts.length + " Missing object #" + i + "/" + + subjects + " for node: " + + hdt.getDictionary().idToString(i + shared, TripleComponentRole.OBJECT)); + } } - } - for (long i = 1; i <= objects; i++) { - if (merger.removeHeader(om.get(shared + i)) == 0) { - fail("HDT #" + hdtId + "/" + impl.hdts.length + " Missing object #" + i + "/" + subjects - + " for node: " - + hdt.getDictionary().idToString(i + shared, TripleComponentRole.OBJECT)); + for (long i = 1; i <= predicates; i++) { + if (pm.get(i) == 0) { + fail("HDT #" + hdtId + "/" + impl.hdts.length + " Missing predicate #" + i + "/" + + subjects + " for node: " + + hdt.getDictionary().idToString(i, TripleComponentRole.PREDICATE)); + } } - } - for (long i = 1; i <= predicates; i++) { - if (pm.get(i) == 0) { - fail("HDT #" + hdtId + "/" + impl.hdts.length + " Missing predicate #" + i + "/" - + subjects + " for node: " - + hdt.getDictionary().idToString(i, TripleComponentRole.PREDICATE)); + } + } + } + try (InputStream stream = new BufferedInputStream(Files.newInputStream(dictFile))) { + // read the sections + try (DictionarySection sh = loadSection(stream); + DictionarySection su = loadSection(stream); + DictionarySection pr = loadSection(stream)) { + Map dictionarySectionMap; + DictionarySection ob; + if (multi) { + ob = null; + dictionarySectionMap = loadMultiSection(sub, stream); + } else { + dictionarySectionMap = Map.of(); + ob = loadSection(stream); + } + try { + // map the excepted hdt + try (HDT exceptedHDT = HDTManager.mapHDT(fatcathdt.toAbsolutePath().toString())) { + Dictionary exceptedDict = exceptedHDT.getDictionary(); + assertNotEquals("Invalid test, shared section empty", 0, + exceptedHDT.getDictionary().getShared().getNumberOfElements()); + // assert equals between the dictionaries + HDTManagerTest.HDTManagerTestBase.assertEqualsHDT("Shared", exceptedDict.getShared(), + sh); + HDTManagerTest.HDTManagerTestBase.assertEqualsHDT("Subjects", + exceptedDict.getSubjects(), su); + HDTManagerTest.HDTManagerTestBase.assertEqualsHDT("Predicates", + exceptedDict.getPredicates(), pr); + if (multi) { + Map exceptedDictSub = exceptedDict + .getAllObjects(); + dictionarySectionMap.forEach((key, sec) -> { + DictionarySection subSec = exceptedDictSub.get(key); + assertNotNull("sub#" + key + " wasn't found", subSec); + HDTManagerTest.HDTManagerTestBase.assertEqualsHDT("Section#" + key, subSec, + sec); + }); + } else { + assert ob != null; + HDTManagerTest.HDTManagerTestBase.assertEqualsHDT("Objects", + exceptedDict.getObjects(), ob); + } } + } finally { + Closer.of(ob).with(dictionarySectionMap.values()).close(); } + } + } + } finally { + PathUtils.deleteDirectory(root); + } + } + + @Test + public void catTest() throws ParserException, IOException, NotFoundException { + Path root = tempDir.newFolder().toPath(); + try { + // number of HDTs + int countPerHDT = 1000; + Random rnd = new Random(58); + + // create the config + HDTOptions spec = HDTOptions.of(); + if (multi) { + spec.set(HDTOptionsKeys.DICTIONARY_TYPE_KEY, HDTOptionsKeys.DICTIONARY_TYPE_VALUE_MULTI_OBJECTS); + spec.set(HDTOptionsKeys.TEMP_DICTIONARY_IMPL_KEY, + HDTOptionsKeys.TEMP_DICTIONARY_IMPL_VALUE_MULT_HASH); + } + if (map) { + spec.set(HDTOptionsKeys.HDTCAT_FUTURE_LOCATION, root.resolve("futurehc.hdt").toAbsolutePath()); + } + + // create "kcat" fake HDTs + LargeFakeDataSetStreamSupplier s = LargeFakeDataSetStreamSupplier.createInfinite(42) + .withMaxElementSplit(50).withUnicode(unicode); + + long size = 0; + List hdts = new ArrayList<>(); + for (int i = 0; i < kcat; i++) { + String location = root.resolve("hdt" + i + ".hdt").toAbsolutePath().toString(); + hdts.add(location); + int hdtSize = countPerHDT / 2 + rnd.nextInt(countPerHDT); + size += hdtSize; + s.withMaxTriples(hdtSize).createAndSaveFakeHDT(spec, location); + } + + // create the excepted HDT from previous algorithm + Path fatcathdt = root.resolve("fatcat.hdt"); + s.reset(); + s.withMaxTriples(size).createAndSaveFakeHDT(spec, fatcathdt.toAbsolutePath().toString()); + + // create dictionary and write sections + // map the excepted hdt + try (HDT actualHDT = HDTManager.catHDT(hdts, spec, null)) { + try (HDT exceptedHDT = HDTManager.mapHDT(fatcathdt.toAbsolutePath().toString())) { + // assert equals between the dictionaries + assertNotEquals(0, actualHDT.getDictionary().getShared().getNumberOfElements()); + HDTManagerTest.HDTManagerTestBase.assertEqualsHDT(exceptedHDT, actualHDT); } } + } finally { + PathUtils.deleteDirectory(root); } - try (InputStream stream = new BufferedInputStream(Files.newInputStream(dictFile))) { - // read the sections - try (DictionarySection sh = loadSection(stream); - DictionarySection su = loadSection(stream); - DictionarySection pr = loadSection(stream)) { - Map dictionarySectionMap; - DictionarySection ob; - if (multi) { - ob = null; - dictionarySectionMap = loadMultiSection(sub, stream); - } else { - dictionarySectionMap = Map.of(); - ob = loadSection(stream); + } + + @Test + @Ignore + public void catDiffTest() throws ParserException, IOException, NotFoundException { + Path root = tempDir.newFolder().toPath(); + try { + // number of HDTs + int countPerHDT = 1000; + Random rnd = new Random(58); + + // create the config + HDTOptions spec = HDTOptions.of(); + if (multi) { + spec.set(HDTOptionsKeys.DICTIONARY_TYPE_KEY, HDTOptionsKeys.DICTIONARY_TYPE_VALUE_MULTI_OBJECTS); + spec.set(HDTOptionsKeys.TEMP_DICTIONARY_IMPL_KEY, + HDTOptionsKeys.TEMP_DICTIONARY_IMPL_VALUE_MULT_HASH); + } + + if (map) { + spec.set(HDTOptionsKeys.HDTCAT_FUTURE_LOCATION, root.resolve("futurehc.hdt").toAbsolutePath()); + } + + // create "kcat" fake HDTs + LargeFakeDataSetStreamSupplier s = LargeFakeDataSetStreamSupplier.createInfinite(42) + .withMaxElementSplit(50).withUnicode(unicode); + + Random rndDelete = new Random(45678); + List hdts = new ArrayList<>(); + List hdtsDiff = new ArrayList<>(); + List deleteBitmaps = new ArrayList<>(); + String diffwork = root.resolve("diffwork").toAbsolutePath().toString(); + for (int i = 0; i < kcat; i++) { + String location = root.resolve("hdt" + i + ".hdt").toAbsolutePath().toString(); + String locationPreDiff = root.resolve("hdt" + i + "pre.hdt").toAbsolutePath().toString(); + hdtsDiff.add(location); + hdts.add(locationPreDiff); + int hdtSize = countPerHDT / 2 + rnd.nextInt(countPerHDT); + s.withMaxTriples(hdtSize).createAndSaveFakeHDT(spec, locationPreDiff); + + ModifiableBitmap bitmap = BitmapFactory.createRWBitmap(hdtSize + 1); + deleteBitmaps.add(bitmap); + + int toDelete = rndDelete.nextInt(hdtSize); + for (int j = 0; j < toDelete; j++) { + int index = rndDelete.nextInt(hdtSize) + 1; + bitmap.set(index, true); + } + + try (HDT diffHDTBit = HDTManager.diffHDTBit(diffwork, locationPreDiff, bitmap, spec, null)) { + diffHDTBit.saveToHDT(location, null); } - try { - // map the excepted hdt - try (HDT exceptedHDT = HDTManager.mapHDT(fatcathdt.toAbsolutePath().toString())) { - Dictionary exceptedDict = exceptedHDT.getDictionary(); - assertNotEquals("Invalid test, shared section empty", 0, - exceptedHDT.getDictionary().getShared().getNumberOfElements()); - // assert equals between the dictionaries - HDTManagerTest.HDTManagerTestBase.assertEqualsHDT("Shared", exceptedDict.getShared(), sh); - HDTManagerTest.HDTManagerTestBase.assertEqualsHDT("Subjects", exceptedDict.getSubjects(), - su); - HDTManagerTest.HDTManagerTestBase.assertEqualsHDT("Predicates", - exceptedDict.getPredicates(), pr); + } + + // create the excepted HDT from previous algorithm + Path fatcathdt = root.resolve("fatcat.hdt"); + try (HDT hdt = HDTManager.catHDT(hdtsDiff, spec, null)) { + hdt.saveToHDT(fatcathdt.toAbsolutePath().toString(), null); + } + + // create dictionary and write sections + // map the excepted hdt + try (HDT actualHDT = HDTManager.diffBitCatHDT(hdts, deleteBitmaps, spec, null)) { + try (HDT exceptedHDT = HDTManager.mapHDT(fatcathdt.toAbsolutePath().toString())) { + // assert equals between the dictionaries + assertNotEquals(0, actualHDT.getDictionary().getShared().getNumberOfElements()); + HDTManagerTest.HDTManagerTestBase.assertEqualsHDT(exceptedHDT, actualHDT); + } + } + } finally { + PathUtils.deleteDirectory(root); + } + } + + @Test + public void diffMergerTest() throws ParserException, IOException, InterruptedException { + Path root = tempDir.newFolder().toPath(); + try { + // number of HDTs + int countPerHDT = 1000; + Random rnd = new Random(58); + + // create the config + HDTOptions spec = HDTOptions.of(); + if (multi) { + spec.set(HDTOptionsKeys.DICTIONARY_TYPE_KEY, HDTOptionsKeys.DICTIONARY_TYPE_VALUE_MULTI_OBJECTS); + spec.set(HDTOptionsKeys.TEMP_DICTIONARY_IMPL_KEY, + HDTOptionsKeys.TEMP_DICTIONARY_IMPL_VALUE_MULT_HASH); + } + + if (map) { + spec.set(HDTOptionsKeys.HDTCAT_FUTURE_LOCATION, root.resolve("futurehc.hdt").toAbsolutePath()); + } + + // create "kcat" fake HDTs + LargeFakeDataSetStreamSupplier s = LargeFakeDataSetStreamSupplier.createInfinite(42) + .withMaxElementSplit(50).withUnicode(unicode); + + Random rndDelete = new Random(45678); + List hdts = new ArrayList<>(); + List hdtsDiff = new ArrayList<>(); + List deleteBitmaps = new ArrayList<>(); + String diffwork = root.resolve("diffwork").toAbsolutePath().toString(); + for (int i = 0; i < kcat; i++) { + String location = root.resolve("hdt" + i + ".hdt").toAbsolutePath().toString(); + String locationPreDiff = root.resolve("hdt" + i + "pre.hdt").toAbsolutePath().toString(); + hdtsDiff.add(location); + hdts.add(locationPreDiff); + int hdtSize = countPerHDT / 2 + rnd.nextInt(countPerHDT); + s.withMaxTriples(hdtSize).createAndSaveFakeHDT(spec, locationPreDiff); + + ModifiableBitmap bitmap = BitmapFactory.createRWBitmap(hdtSize + 1); + deleteBitmaps.add(bitmap); + + int toDelete = rndDelete.nextInt(hdtSize); + for (int j = 0; j < toDelete; j++) { + int index = rndDelete.nextInt(hdtSize) + 1; + bitmap.set(index, true); + } + + try (HDT diffHDTBit = HDTManager.diffHDTBit(diffwork, locationPreDiff, bitmap, spec, null)) { + diffHDTBit.saveToHDT(location, null); + } + } + + // create the excepted HDT from previous algorithm + Path fatcathdt = root.resolve("fatcat.hdt"); + try (HDT hdt = HDTManager.catHDT(hdtsDiff, spec, null)) { + hdt.saveToHDT(fatcathdt.toAbsolutePath().toString(), null); + } + + // create dictionary and write sections + Path dictFile = root.resolve("dict"); + List sub = new ArrayList<>(); + try (KCatImpl impl = KCatImpl.of(hdts, deleteBitmaps, spec, null)) { + try (KCatMerger merger = impl.createMerger(null)) { + assertEquals(multi, merger.typedHDT); + merger.startMerger(); + // create + DictionaryPrivate dict = merger.buildDictionary(); + try (OutputStream stream = new BufferedOutputStream(Files.newOutputStream(dictFile))) { + writeSection(dict.getShared(), stream); + writeSection(dict.getSubjects(), stream); + writeSection(dict.getPredicates(), stream); if (multi) { - Map exceptedDictSub = exceptedDict - .getAllObjects(); - dictionarySectionMap.forEach((key, sec) -> { - DictionarySection subSec = exceptedDictSub.get(key); - assertNotNull("sub#" + key + " wasn't found", subSec); - HDTManagerTest.HDTManagerTestBase.assertEqualsHDT("Section#" + key, subSec, sec); - }); + for (Map.Entry e : dict.getAllObjects() + .entrySet()) { + CharSequence key = e.getKey(); + sub.add(key); + DictionarySection sec = e.getValue(); + writeSection(sec, stream); + } } else { - assert ob != null; - HDTManagerTest.HDTManagerTestBase.assertEqualsHDT("Objects", exceptedDict.getObjects(), - ob); + writeSection(dict.getObjects(), stream); } } - } finally { - Closer.of(ob).with(dictionarySectionMap.values()).close(); } } + try (InputStream stream = new BufferedInputStream(Files.newInputStream(dictFile))) { + // read the sections + try (DictionarySection sh = loadSection(stream); + DictionarySection su = loadSection(stream); + DictionarySection pr = loadSection(stream)) { + Map dictionarySectionMap; + DictionarySection ob; + if (multi) { + ob = null; + dictionarySectionMap = loadMultiSection(sub, stream); + } else { + dictionarySectionMap = Map.of(); + ob = loadSection(stream); + } + try { + // map the excepted hdt + try (HDT exceptedHDT = HDTManager.mapHDT(fatcathdt.toAbsolutePath().toString())) { + Dictionary exceptedDict = exceptedHDT.getDictionary(); + assertNotEquals("Invalid test, shared section empty", 0, + exceptedHDT.getDictionary().getShared().getNumberOfElements()); + // assert equals between the dictionaries + HDTManagerTest.HDTManagerTestBase.assertEqualsHDT("Shared", exceptedDict.getShared(), + sh); + HDTManagerTest.HDTManagerTestBase.assertEqualsHDT("Subjects", + exceptedDict.getSubjects(), su); + HDTManagerTest.HDTManagerTestBase.assertEqualsHDT("Predicates", + exceptedDict.getPredicates(), pr); + if (multi) { + Map exceptedDictSub = exceptedDict + .getAllObjects(); + dictionarySectionMap.forEach((key, sec) -> { + DictionarySection subSec = exceptedDictSub.get(key); + assertNotNull("sub#" + key + " wasn't found", subSec); + HDTManagerTest.HDTManagerTestBase.assertEqualsHDT("Section#" + key, subSec, + sec); + }); + } else { + assert ob != null; + HDTManagerTest.HDTManagerTestBase.assertEqualsHDT("Objects", + exceptedDict.getObjects(), ob); + } + } + } finally { + Closer.of(ob).with(dictionarySectionMap.values()).close(); + } + } + } + } finally { + PathUtils.deleteDirectory(root); } - } finally { - PathUtils.deleteDirectory(root); } } - @Test - public void catTest() throws ParserException, IOException, NotFoundException { - Path root = tempDir.newFolder().toPath(); - try { - // number of HDTs - int countPerHDT = 1000; - Random rnd = new Random(58); - - // create the config - HDTOptions spec = HDTOptions.of(); - if (multi) { - spec.set(HDTOptionsKeys.DICTIONARY_TYPE_KEY, HDTOptionsKeys.DICTIONARY_TYPE_VALUE_MULTI_OBJECTS); - spec.set(HDTOptionsKeys.TEMP_DICTIONARY_IMPL_KEY, HDTOptionsKeys.TEMP_DICTIONARY_IMPL_VALUE_MULT_HASH); - } + @RunWith(Parameterized.class) + public static class QuadCatDiffTests extends AbstractMapMemoryTest { + @Rule + public TemporaryFolder tempDir = TemporaryFolder.builder().assureDeletion().build(); + + @Parameterized.Parameters(name = "{0} default:{1}") + public static Collection params() { + return Stream + .of(HDTOptionsKeys.DICTIONARY_TYPE_VALUE_FOUR_QUAD_SECTION, + HDTOptionsKeys.DICTIONARY_TYPE_VALUE_MULTI_OBJECTS_LANG_QUAD) + .flatMap(dict -> Stream.of(true, false).map(defaultGraph -> new Object[] { dict, defaultGraph })) + .toList(); + } - if (map) { - spec.set(HDTOptionsKeys.HDTCAT_FUTURE_LOCATION, root.resolve("futurehc.hdt").toAbsolutePath()); - } + @Parameterized.Parameter + public String dictType; + @Parameterized.Parameter(1) + public boolean defaultGraph; - // create "kcat" fake HDTs - LargeFakeDataSetStreamSupplier s = LargeFakeDataSetStreamSupplier.createInfinite(42).withMaxElementSplit(50) - .withUnicode(unicode); - - long size = 0; - List hdts = new ArrayList<>(); - for (int i = 0; i < kcat; i++) { - String location = root.resolve("hdt" + i + ".hdt").toAbsolutePath().toString(); - hdts.add(location); - int hdtSize = countPerHDT / 2 + rnd.nextInt(countPerHDT); - size += hdtSize; - s.withMaxTriples(hdtSize).createAndSaveFakeHDT(spec, location); - } + public HDTOptions spec; + public Path root; - // create the excepted HDT from previous algorithm - Path fatcathdt = root.resolve("fatcat.hdt"); - s.reset(); - s.withMaxTriples(size).createAndSaveFakeHDT(spec, fatcathdt.toAbsolutePath().toString()); - - // create dictionary and write sections - // map the excepted hdt - try (HDT actualHDT = HDTManager.catHDT(hdts, spec, null)) { - try (HDT exceptedHDT = HDTManager.mapHDT(fatcathdt.toAbsolutePath().toString())) { - // assert equals between the dictionaries - assertNotEquals(0, actualHDT.getDictionary().getShared().getNumberOfElements()); - HDTManagerTest.HDTManagerTestBase.assertEqualsHDT(exceptedHDT, actualHDT); - } - } - } finally { + @Before + public void setupVals() throws IOException { + spec = HDTOptions.of(HDTOptionsKeys.DICTIONARY_TYPE_KEY, dictType); + root = tempDir.newFolder().toPath(); + } + + @After + public void clearVals() throws IOException { PathUtils.deleteDirectory(root); } - } - @Test - @Ignore - public void catDiffTest() throws ParserException, IOException, NotFoundException { - Path root = tempDir.newFolder().toPath(); - try { - // number of HDTs - int countPerHDT = 1000; - Random rnd = new Random(58); - - // create the config - HDTOptions spec = HDTOptions.of(); - if (multi) { - spec.set(HDTOptionsKeys.DICTIONARY_TYPE_KEY, HDTOptionsKeys.DICTIONARY_TYPE_VALUE_MULTI_OBJECTS); - spec.set(HDTOptionsKeys.TEMP_DICTIONARY_IMPL_KEY, HDTOptionsKeys.TEMP_DICTIONARY_IMPL_VALUE_MULT_HASH); - } + @Test + public void catTest() throws ParserException, IOException, NotFoundException { + long size = 1000; + int count = 10; - if (map) { - spec.set(HDTOptionsKeys.HDTCAT_FUTURE_LOCATION, root.resolve("futurehc.hdt").toAbsolutePath()); - } + LargeFakeDataSetStreamSupplier supplier = LargeFakeDataSetStreamSupplier + .createSupplierWithMaxTriples(size, 6789).withMaxElementSplit((int) (size / 50)).withQuads(true) + .withNoDefaultGraph(!defaultGraph); - // create "kcat" fake HDTs - LargeFakeDataSetStreamSupplier s = LargeFakeDataSetStreamSupplier.createInfinite(42).withMaxElementSplit(50) - .withUnicode(unicode); + List files = IntStream.range(0, count).mapToObj(i -> root.resolve("sub-" + i + ".hdt")).toList(); - Random rndDelete = new Random(45678); - List hdts = new ArrayList<>(); - List hdtsDiff = new ArrayList<>(); - List deleteBitmaps = new ArrayList<>(); - String diffwork = root.resolve("diffwork").toAbsolutePath().toString(); - for (int i = 0; i < kcat; i++) { - String location = root.resolve("hdt" + i + ".hdt").toAbsolutePath().toString(); - String locationPreDiff = root.resolve("hdt" + i + "pre.hdt").toAbsolutePath().toString(); - hdtsDiff.add(location); - hdts.add(locationPreDiff); - int hdtSize = countPerHDT / 2 + rnd.nextInt(countPerHDT); - s.withMaxTriples(hdtSize).createAndSaveFakeHDT(spec, locationPreDiff); - - ModifiableBitmap bitmap = BitmapFactory.createRWBitmap(hdtSize + 1); - deleteBitmaps.add(bitmap); - - int toDelete = rndDelete.nextInt(hdtSize); - for (int j = 0; j < toDelete; j++) { - int index = rndDelete.nextInt(hdtSize) + 1; - bitmap.set(index, true); + List deletes = new ArrayList<>(); + // create the files to cat + for (Path file : files) { + try (HDT hdt = supplier.createFakeHDT(spec)) { + hdt.saveToHDT(file); + deletes.add(GraphDeleteBitmap.empty(hdt.getDictionary().getNgraphs(), + hdt.getTriples().getNumberOfElements())); } + } - try (HDT diffHDTBit = HDTManager.diffHDTBit(diffwork, locationPreDiff, bitmap, spec, null)) { - diffHDTBit.saveToHDT(location, null); - } + supplier.withMaxTriples(count * size); + supplier.reset(); + + Path exceptedHDT = root.resolve("excepted.hdt"); + + supplier.createAndSaveFakeHDT(spec, exceptedHDT); + + Path actualHDT = root.resolve("actual.hdt"); + Path actualdiffHDT = root.resolve("actualdiff.hdt"); + + // compute using cat algorithm + try (HDT hdt = HDTManager.catHDTPath(files, spec, ProgressListener.ignore())) { + HDTManagerTest.HDTManagerTestBase.checkHDTConsistency(hdt); + assertTrue(hdt.getDictionary().supportGraphs()); + hdt.saveToHDT(actualHDT); } - // create the excepted HDT from previous algorithm - Path fatcathdt = root.resolve("fatcat.hdt"); - try (HDT hdt = HDTManager.catHDT(hdtsDiff, spec, null)) { - hdt.saveToHDT(fatcathdt.toAbsolutePath().toString(), null); + // compute using diff algorithm with empty bitmaps + try (HDT hdt = HDTManager.diffBitCatHDTPath(files, deletes, spec, ProgressListener.ignore())) { + HDTManagerTest.HDTManagerTestBase.checkHDTConsistency(hdt); + assertTrue(hdt.getDictionary().supportGraphs()); + hdt.saveToHDT(actualdiffHDT); } - // create dictionary and write sections - // map the excepted hdt - try (HDT actualHDT = HDTManager.diffBitCatHDT(hdts, deleteBitmaps, spec, null)) { - try (HDT exceptedHDT = HDTManager.mapHDT(fatcathdt.toAbsolutePath().toString())) { - // assert equals between the dictionaries - assertNotEquals(0, actualHDT.getDictionary().getShared().getNumberOfElements()); - HDTManagerTest.HDTManagerTestBase.assertEqualsHDT(exceptedHDT, actualHDT); - } + // test results + try (HDT excepted = HDTManager.mapHDT(exceptedHDT); + HDT actual = HDTManager.mapHDT(actualHDT); + HDT actualdiff = HDTManager.mapHDT(actualdiffHDT)) { + // clear original size because it impossible to compute it in a + // diff + excepted.getHeader().remove("_:statistics", HDTVocabulary.ORIGINAL_SIZE, ""); + actual.getHeader().remove("_:statistics", HDTVocabulary.ORIGINAL_SIZE, ""); + actualdiff.getHeader().remove("_:statistics", HDTVocabulary.ORIGINAL_SIZE, ""); + + HDTManagerTest.HDTManagerTestBase.checkHDTConsistency(actual); + HDTManagerTest.HDTManagerTestBase.assertEqualsHDT(excepted, actual); + HDTManagerTest.HDTManagerTestBase.checkHDTConsistency(actualdiff); + HDTManagerTest.HDTManagerTestBase.assertEqualsHDT(excepted, actualdiff); } - } finally { - PathUtils.deleteDirectory(root); } - } - @Test - public void diffMergerTest() throws ParserException, IOException, InterruptedException { - Path root = tempDir.newFolder().toPath(); - try { - // number of HDTs - int countPerHDT = 1000; - Random rnd = new Random(58); - - // create the config - HDTOptions spec = HDTOptions.of(); - if (multi) { - spec.set(HDTOptionsKeys.DICTIONARY_TYPE_KEY, HDTOptionsKeys.DICTIONARY_TYPE_VALUE_MULTI_OBJECTS); - spec.set(HDTOptionsKeys.TEMP_DICTIONARY_IMPL_KEY, HDTOptionsKeys.TEMP_DICTIONARY_IMPL_VALUE_MULT_HASH); - } + @Test + public void catDiffTest() throws ParserException, IOException, NotFoundException { + long size = 1625; + int count = 10; - if (map) { - spec.set(HDTOptionsKeys.HDTCAT_FUTURE_LOCATION, root.resolve("futurehc.hdt").toAbsolutePath()); - } + LargeFakeDataSetStreamSupplier supplier = LargeFakeDataSetStreamSupplier + .createSupplierWithMaxTriples(size, 6789).withMaxElementSplit((int) (size / 50)).withQuads(true) + .withNoDefaultGraph(!defaultGraph); - // create "kcat" fake HDTs - LargeFakeDataSetStreamSupplier s = LargeFakeDataSetStreamSupplier.createInfinite(42).withMaxElementSplit(50) - .withUnicode(unicode); + List files = IntStream.range(0, count).mapToObj(i -> root.resolve("sub-" + i + ".hdt")).toList(); - Random rndDelete = new Random(45678); - List hdts = new ArrayList<>(); - List hdtsDiff = new ArrayList<>(); List deleteBitmaps = new ArrayList<>(); - String diffwork = root.resolve("diffwork").toAbsolutePath().toString(); - for (int i = 0; i < kcat; i++) { - String location = root.resolve("hdt" + i + ".hdt").toAbsolutePath().toString(); - String locationPreDiff = root.resolve("hdt" + i + "pre.hdt").toAbsolutePath().toString(); - hdtsDiff.add(location); - hdts.add(locationPreDiff); - int hdtSize = countPerHDT / 2 + rnd.nextInt(countPerHDT); - s.withMaxTriples(hdtSize).createAndSaveFakeHDT(spec, locationPreDiff); - - ModifiableBitmap bitmap = BitmapFactory.createRWBitmap(hdtSize + 1); - deleteBitmaps.add(bitmap); - - int toDelete = rndDelete.nextInt(hdtSize); - for (int j = 0; j < toDelete; j++) { - int index = rndDelete.nextInt(hdtSize) + 1; - bitmap.set(index, true); - } + Random deleteRnd = new Random(345678); + try { + + List dataset = new ArrayList<>(); + + // create the files to cat + for (Path file : files) { + try (HDT hdt = supplier.createFakeHDT(spec)) { + hdt.saveToHDT(file); + long graphs = hdt.getDictionary().supportGraphs() ? hdt.getDictionary().getNgraphs() : 1; + assert graphs > 0; + long triples = hdt.getTriples().getNumberOfElements(); + + GraphDeleteBitmap memory = GraphDeleteBitmap.memory(graphs, triples + 1); + + // create delete bitmap + int toDelete = (int) (triples / 200); + for (int i = 0; i < toDelete; i++) { + int tripleId = 1 + deleteRnd.nextInt((int) triples); + + // add +1 for a special case to delete all the nodes + int graphId = deleteRnd.nextInt((int) graphs + 1); + + if (graphId >= graphs) { + // delete the triple + for (int j = 0; j < graphs; j++) { + memory.set(j, tripleId, true); + } + } else { + // delete the quad + memory.set(graphId, tripleId, true); + } + } - try (HDT diffHDTBit = HDTManager.diffHDTBit(diffwork, locationPreDiff, bitmap, spec, null)) { - diffHDTBit.saveToHDT(location, null); - } - } + IteratorTripleString it = hdt.searchAll(); - // create the excepted HDT from previous algorithm - Path fatcathdt = root.resolve("fatcat.hdt"); - try (HDT hdt = HDTManager.catHDT(hdtsDiff, spec, null)) { - hdt.saveToHDT(fatcathdt.toAbsolutePath().toString(), null); - } + while (it.hasNext()) { + TripleString ts = it.next(); + long pos = it.getLastTriplePosition(); + long g = ts.getGraph().isEmpty() ? 0 + : (hdt.getDictionary().stringToId(ts.getGraph(), TripleComponentRole.GRAPH) - 1); - // create dictionary and write sections - Path dictFile = root.resolve("dict"); - List sub = new ArrayList<>(); - try (KCatImpl impl = KCatImpl.of(hdts, deleteBitmaps, spec, null)) { - try (KCatMerger merger = impl.createMerger(null)) { - assertEquals(multi, merger.typedHDT); - merger.startMerger(); - // create - DictionaryPrivate dict = merger.buildDictionary(); - try (OutputStream stream = new BufferedOutputStream(Files.newOutputStream(dictFile))) { - writeSection(dict.getShared(), stream); - writeSection(dict.getSubjects(), stream); - writeSection(dict.getPredicates(), stream); - if (multi) { - for (Map.Entry e : dict.getAllObjects() - .entrySet()) { - CharSequence key = e.getKey(); - sub.add(key); - DictionarySection sec = e.getValue(); - writeSection(sec, stream); + assert g >= 0; + + if (!memory.access(g, pos)) { + // not deleted, we can add it + dataset.add(ts.tripleToString()); } - } else { - writeSection(dict.getObjects(), stream); } + + deleteBitmaps.add(memory); } } - } - try (InputStream stream = new BufferedInputStream(Files.newInputStream(dictFile))) { - // read the sections - try (DictionarySection sh = loadSection(stream); - DictionarySection su = loadSection(stream); - DictionarySection pr = loadSection(stream)) { - Map dictionarySectionMap; - DictionarySection ob; - if (multi) { - ob = null; - dictionarySectionMap = loadMultiSection(sub, stream); - } else { - dictionarySectionMap = Map.of(); - ob = loadSection(stream); - } - try { - // map the excepted hdt - try (HDT exceptedHDT = HDTManager.mapHDT(fatcathdt.toAbsolutePath().toString())) { - Dictionary exceptedDict = exceptedHDT.getDictionary(); - assertNotEquals("Invalid test, shared section empty", 0, - exceptedHDT.getDictionary().getShared().getNumberOfElements()); - // assert equals between the dictionaries - HDTManagerTest.HDTManagerTestBase.assertEqualsHDT("Shared", exceptedDict.getShared(), sh); - HDTManagerTest.HDTManagerTestBase.assertEqualsHDT("Subjects", exceptedDict.getSubjects(), - su); - HDTManagerTest.HDTManagerTestBase.assertEqualsHDT("Predicates", - exceptedDict.getPredicates(), pr); - if (multi) { - Map exceptedDictSub = exceptedDict - .getAllObjects(); - dictionarySectionMap.forEach((key, sec) -> { - DictionarySection subSec = exceptedDictSub.get(key); - assertNotNull("sub#" + key + " wasn't found", subSec); - HDTManagerTest.HDTManagerTestBase.assertEqualsHDT("Section#" + key, subSec, sec); - }); - } else { - assert ob != null; - HDTManagerTest.HDTManagerTestBase.assertEqualsHDT("Objects", exceptedDict.getObjects(), - ob); - } - } - } finally { - Closer.of(ob).with(dictionarySectionMap.values()).close(); - } + + supplier.withMaxTriples(count * size); + + Path exceptedHDT = root.resolve("excepted.hdt"); + + try (HDT hdt = HDTManager.generateHDT(dataset.iterator(), LargeFakeDataSetStreamSupplier.BASE_URI, spec, + ProgressListener.ignore())) { + hdt.saveToHDT(exceptedHDT); } + + Path actualHDT = root.resolve("actual.hdt"); + try (HDT hdt = HDTManager.diffBitCatHDTPath(files, deleteBitmaps, spec, ProgressListener.ignore())) { + HDTManagerTest.HDTManagerTestBase.checkHDTConsistency(hdt); + hdt.saveToHDT(actualHDT); + } + + try (HDT excepted = HDTManager.mapHDT(exceptedHDT); HDT actual = HDTManager.mapHDT(actualHDT)) { + HDTManagerTest.HDTManagerTestBase.checkHDTConsistency(actual); + + // clear original size because it impossible to compute it + // in a diff + excepted.getHeader().remove("_:statistics", HDTVocabulary.ORIGINAL_SIZE, ""); + actual.getHeader().remove("_:statistics", HDTVocabulary.ORIGINAL_SIZE, ""); + HDTManagerTest.HDTManagerTestBase.assertEqualsHDT(excepted, actual); + } + } finally { + Closer.closeAll(deleteBitmaps); } - } finally { - PathUtils.deleteDirectory(root); } } - } diff --git a/qendpoint-core/src/test/java/com/the_qa_company/qendpoint/core/hdt/HDTManagerTest.java b/qendpoint-core/src/test/java/com/the_qa_company/qendpoint/core/hdt/HDTManagerTest.java index fc3d7831..416b51b5 100644 --- a/qendpoint-core/src/test/java/com/the_qa_company/qendpoint/core/hdt/HDTManagerTest.java +++ b/qendpoint-core/src/test/java/com/the_qa_company/qendpoint/core/hdt/HDTManagerTest.java @@ -3,6 +3,7 @@ import com.the_qa_company.qendpoint.core.compact.bitmap.BitmapFactory; import com.the_qa_company.qendpoint.core.compact.bitmap.ModifiableBitmap; import com.the_qa_company.qendpoint.core.dictionary.Dictionary; +import com.the_qa_company.qendpoint.core.dictionary.DictionaryFactory; import com.the_qa_company.qendpoint.core.dictionary.DictionarySection; import com.the_qa_company.qendpoint.core.dictionary.impl.BaseDictionary; import com.the_qa_company.qendpoint.core.dictionary.impl.MultipleBaseDictionary; @@ -15,7 +16,6 @@ import com.the_qa_company.qendpoint.core.exceptions.NotFoundException; import com.the_qa_company.qendpoint.core.exceptions.ParserException; import com.the_qa_company.qendpoint.core.hdt.impl.diskimport.CompressionResult; -import com.the_qa_company.qendpoint.core.iterator.utils.FetcherIterator; import com.the_qa_company.qendpoint.core.iterator.utils.PipedCopyIterator; import com.the_qa_company.qendpoint.core.listener.ProgressListener; import com.the_qa_company.qendpoint.core.options.HDTOptions; @@ -41,6 +41,7 @@ import org.apache.commons.io.file.PathUtils; import org.junit.After; import org.junit.Assert; +import org.junit.Assume; import org.junit.Before; import org.junit.Ignore; import org.junit.Rule; @@ -59,7 +60,6 @@ import java.nio.file.Files; import java.nio.file.Path; import java.util.ArrayList; -import java.util.Arrays; import java.util.Collection; import java.util.Comparator; import java.util.HashMap; @@ -85,15 +85,22 @@ @RunWith(Suite.class) @Suite.SuiteClasses({ HDTManagerTest.DynamicDiskTest.class, HDTManagerTest.DynamicCatTreeTest.class, HDTManagerTest.FileDynamicTest.class, HDTManagerTest.StaticTest.class, HDTManagerTest.MSDLangTest.class, - HDTManagerTest.HDTQTest.class, HDTManagerTest.DictionaryLangTypeTest.class }) + HDTManagerTest.HDTQTest.class, HDTManagerTest.DictionaryLangTypeTest.class, + HDTManagerTest.MSDLangQuadTest.class }) public class HDTManagerTest { public static class HDTManagerTestBase extends AbstractMapMemoryTest implements ProgressListener { protected final Logger logger; - protected static String[] diskDict() { - return new String[] { HDTOptionsKeys.DICTIONARY_TYPE_VALUE_MULTI_OBJECTS, + protected static List diskDict() { + return List.of(HDTOptionsKeys.DICTIONARY_TYPE_VALUE_FOUR_QUAD_SECTION, + HDTOptionsKeys.DICTIONARY_TYPE_VALUE_MULTI_OBJECTS_LANG_QUAD, + HDTOptionsKeys.DICTIONARY_TYPE_VALUE_MULTI_OBJECTS, HDTOptionsKeys.DICTIONARY_TYPE_VALUE_FOUR_SECTION, - HDTOptionsKeys.DICTIONARY_TYPE_VALUE_MULTI_OBJECTS_LANG }; + HDTOptionsKeys.DICTIONARY_TYPE_VALUE_MULTI_OBJECTS_LANG); + } + + protected static List diskDictCat() { + return diskDict(); } /** @@ -101,7 +108,7 @@ protected static String[] diskDict() { * GH#177 */ protected static final boolean ALLOW_STRING_CONSISTENCY_TEST = false; - protected static final long SIZE_VALUE = 1L << 15; + protected static final long SIZE_VALUE = 1L << 10; protected static final int SEED = 67; private HDTManagerTestBase() { @@ -189,6 +196,7 @@ public static void assertEqualsHDT(HDT expected, HDT actual) throws NotFoundExce TripleID expectedTriple = expectedIt.next(); TripleID actualTriple = actualIt.next(); + long location = expectedIt.getLastTriplePosition(); assertEquals("The tripleID location doesn't match", location, actualIt.getLastTriplePosition()); assertEquals("The tripleID #" + location + " doesn't match", expectedTriple, actualTriple); @@ -223,6 +231,10 @@ public static void checkHDTConsistency(HDT hdt) { map.put("Predicates", dict.getPredicates()); map.put("Shared", dict.getShared()); + if (dict.supportGraphs()) { + map.put("Graph", dict.getGraphs()); + } + ReplazableString prev = new ReplazableString(); Comparator cmp = CharSequenceComparator.getInstance(); map.forEach((name, section) -> { @@ -270,6 +282,18 @@ public static void checkHDTConsistency(HDT hdt) { prev.replace(next); } }); + IteratorTripleID tripleIt = hdt.getTriples().searchAll(); + long count = 0; + TripleID last = new TripleID(-1, -1, -1); + while (tripleIt.hasNext()) { + TripleID tid = tripleIt.next(); + if (tid.match(last)) { // same graph? + continue; + } + count++; + last.setAll(tid.getSubject(), tid.getPredicate(), tid.getObject()); + } + assertEquals("tripleIt:" + tripleIt.getClass(), hdt.getTriples().getNumberOfElements(), count); } public static void assertComponentsNotNull(String message, TripleString ts) { @@ -385,6 +409,7 @@ public static Collection params() { public long size; @Parameterized.Parameter(9) public String addedSpecs; + public boolean quadDict; @Before public void setupSpecs() { @@ -393,12 +418,15 @@ public void setupSpecs() { spec.set(HDTOptionsKeys.LOADER_DISK_COMPRESSION_MODE_KEY, compressMode); spec.set(HDTOptionsKeys.DICTIONARY_TYPE_KEY, dictionaryType); spec.set(HDTOptionsKeys.LOADER_DISK_NO_COPY_ITERATOR_KEY, true); + + quadDict = DictionaryFactory.isQuadDictionary(dictionaryType); } private void generateDiskTest() throws IOException, ParserException, NotFoundException, InterruptedException { LargeFakeDataSetStreamSupplier supplier = LargeFakeDataSetStreamSupplier .createSupplierWithMaxSize(maxSize, SEED).withMaxElementSplit(maxElementSplit) - .withMaxLiteralSize(maxLiteralSize).withSameTripleString(true).withUnicode(true); + .withMaxLiteralSize(maxLiteralSize).withSameTripleString(true).withUnicode(true) + .withQuads(quadDict); if (spec.getBoolean("debug.disk.slow.stream")) { supplier.withSlowStream(25); @@ -409,8 +437,9 @@ private void generateDiskTest() throws IOException, ParserException, NotFoundExc .createNTInputStream(CompressionType.GZIP); HDT actual = null; try { - actual = HDTManager.generateHDTDisk(genActual.getStream(), HDTTestUtils.BASE_URI, RDFNotation.NTRIPLES, - CompressionType.GZIP, spec, quiet ? null : this); + actual = HDTManager.generateHDTDisk(genActual.getStream(), HDTTestUtils.BASE_URI, + quadDict ? RDFNotation.NQUAD : RDFNotation.NTRIPLES, CompressionType.GZIP, spec, + quiet ? null : this); checkHDTConsistency(actual); } finally { if (actual == null) { @@ -426,8 +455,8 @@ private void generateDiskTest() throws IOException, ParserException, NotFoundExc // create MEMORY HDT HDT expected = null; try { - expected = HDTManager.generateHDT(genExpected.getStream(), HDTTestUtils.BASE_URI, RDFNotation.NTRIPLES, - CompressionType.GZIP, spec, null); + expected = HDTManager.generateHDT(genExpected.getStream(), HDTTestUtils.BASE_URI, + quadDict ? RDFNotation.NQUAD : RDFNotation.NTRIPLES, CompressionType.GZIP, spec, null); checkHDTConsistency(expected); } finally { if (expected == null) { @@ -450,7 +479,8 @@ private void generateDiskTest() throws IOException, ParserException, NotFoundExc public void generateSaveLoadMapTest() throws IOException, ParserException, NotFoundException { LargeFakeDataSetStreamSupplier supplier = LargeFakeDataSetStreamSupplier .createSupplierWithMaxSize(maxSize, SEED).withMaxElementSplit(maxElementSplit) - .withMaxLiteralSize(maxLiteralSize).withUnicode(true); + .withMaxLiteralSize(maxLiteralSize).withUnicode(true) + .withQuads(DictionaryFactory.isQuadDictionary(dictionaryType)); // create MEMORY HDT @@ -489,9 +519,10 @@ public void generateDiskMapTest() throws IOException, ParserException, NotFoundE @Test public void catTreeTest() throws IOException, ParserException, NotFoundException, InterruptedException { + Assume.assumeTrue(diskDictCat().contains(dictionaryType)); LargeFakeDataSetStreamSupplier supplier = LargeFakeDataSetStreamSupplier .createSupplierWithMaxSize(maxSize, SEED).withMaxElementSplit(maxElementSplit) - .withMaxLiteralSize(maxLiteralSize).withUnicode(true); + .withMaxLiteralSize(maxLiteralSize).withUnicode(true).withQuads(quadDict); // create DISK HDT LargeFakeDataSetStreamSupplier.ThreadedStream genActual = supplier @@ -499,7 +530,8 @@ public void catTreeTest() throws IOException, ParserException, NotFoundException HDT actual = null; try { actual = HDTManager.catTree(RDFFluxStop.sizeLimit(size), HDTSupplier.memory(), genActual.getStream(), - HDTTestUtils.BASE_URI, RDFNotation.NTRIPLES, spec, quiet ? null : this); + HDTTestUtils.BASE_URI, quadDict ? RDFNotation.NQUAD : RDFNotation.NTRIPLES, spec, + quiet ? null : this); } finally { if (actual == null) { genActual.getThread().interrupt(); @@ -526,9 +558,10 @@ public void catTreeTest() throws IOException, ParserException, NotFoundException @Test public void catTreeDiskTest() throws IOException, ParserException, NotFoundException, InterruptedException { + Assume.assumeTrue(diskDictCat().contains(dictionaryType)); LargeFakeDataSetStreamSupplier supplier = LargeFakeDataSetStreamSupplier .createSupplierWithMaxSize(maxSize, SEED).withMaxElementSplit(maxElementSplit) - .withMaxLiteralSize(maxLiteralSize).withUnicode(true); + .withMaxLiteralSize(maxLiteralSize).withUnicode(true).withQuads(quadDict); spec.set("debug.disk.build", true); @@ -538,7 +571,8 @@ public void catTreeDiskTest() throws IOException, ParserException, NotFoundExcep HDT actual = null; try { actual = HDTManager.catTree(RDFFluxStop.sizeLimit(size), HDTSupplier.disk(), genActual.getStream(), - HDTTestUtils.BASE_URI, RDFNotation.NTRIPLES, spec, quiet ? null : this); + HDTTestUtils.BASE_URI, quadDict ? RDFNotation.NQUAD : RDFNotation.NTRIPLES, spec, + quiet ? null : this); } finally { if (actual == null) { genActual.getThread().interrupt(); @@ -606,9 +640,13 @@ public static Collection params() { @Parameterized.Parameter(8) public boolean async; + public boolean quadDict; + @Before public void setupSpecs() { + Assume.assumeTrue(diskDictCat().contains(dictionaryType)); spec.set(HDTOptionsKeys.DICTIONARY_TYPE_KEY, dictionaryType); + quadDict = DictionaryFactory.isQuadDictionary(dictionaryType); if (kCat != 0) { spec.set(HDTOptionsKeys.LOADER_CATTREE_KCAT, kCat); @@ -622,7 +660,7 @@ public void setupSpecs() { public void catTreeTest() throws IOException, ParserException, NotFoundException, InterruptedException { LargeFakeDataSetStreamSupplier supplier = LargeFakeDataSetStreamSupplier .createSupplierWithMaxSize(maxSize, SEED).withMaxElementSplit(maxElementSplit) - .withMaxLiteralSize(maxLiteralSize).withUnicode(true); + .withMaxLiteralSize(maxLiteralSize).withUnicode(true).withQuads(quadDict); // create DISK HDT LargeFakeDataSetStreamSupplier.ThreadedStream genActual = supplier @@ -632,8 +670,8 @@ public void catTreeTest() throws IOException, ParserException, NotFoundException try { try { actual = HDTManager.catTree(RDFFluxStop.sizeLimit(size), HDTSupplier.memory(), - genActual.getStream(), HDTTestUtils.BASE_URI, RDFNotation.NTRIPLES, spec, - quiet ? null : this); + genActual.getStream(), HDTTestUtils.BASE_URI, + quadDict ? RDFNotation.NQUAD : RDFNotation.NTRIPLES, spec, quiet ? null : this); } finally { if (actual == null) { genActual.getThread().interrupt(); @@ -661,7 +699,7 @@ public void catTreeTest() throws IOException, ParserException, NotFoundException public void catTreeDiskTest() throws IOException, ParserException, NotFoundException, InterruptedException { LargeFakeDataSetStreamSupplier supplier = LargeFakeDataSetStreamSupplier .createSupplierWithMaxSize(maxSize, SEED).withMaxElementSplit(maxElementSplit) - .withMaxLiteralSize(maxLiteralSize).withUnicode(true); + .withMaxLiteralSize(maxLiteralSize).withUnicode(true).withQuads(quadDict); // create DISK HDT LargeFakeDataSetStreamSupplier.ThreadedStream genActual = supplier @@ -669,7 +707,8 @@ public void catTreeDiskTest() throws IOException, ParserException, NotFoundExcep HDT actual = null; try { actual = HDTManager.catTree(RDFFluxStop.sizeLimit(size), HDTSupplier.disk(), genActual.getStream(), - HDTTestUtils.BASE_URI, RDFNotation.NTRIPLES, spec, quiet ? null : this); + HDTTestUtils.BASE_URI, quadDict ? RDFNotation.NQUAD : RDFNotation.NTRIPLES, spec, + quiet ? null : this); } finally { if (actual == null) { genActual.getThread().interrupt(); @@ -1037,13 +1076,24 @@ public void bigGenCatTreeDiskTest() throws ParserException, IOException { @RunWith(Parameterized.class) public static class HDTQTest extends HDTManagerTestBase { - @Parameterized.Parameters(name = "default graph:{0}") - public static Collection params() { - return List.of(true, false); + @Parameterized.Parameters(name = "default graph:{0} type:{1}") + public static Collection params() { + List params = new ArrayList<>(); + + for (String dict : List.of(HDTOptionsKeys.DICTIONARY_TYPE_VALUE_FOUR_QUAD_SECTION, + HDTOptionsKeys.DICTIONARY_TYPE_VALUE_MULTI_OBJECTS_LANG_QUAD)) { + for (boolean defaultGraph : List.of(true, false)) { + params.add(new Object[] { defaultGraph, dict }); + } + } + + return params; } @Parameterized.Parameter public boolean useDefaultGraph; + @Parameterized.Parameter(1) + public String dictType; private LargeFakeDataSetStreamSupplier createSupplier() { // fake data generation @@ -1055,6 +1105,7 @@ private void hdtqTesd(LargeFakeDataSetStreamSupplier supplier, Path d) throws No // run test Comparator csc = CharSequenceComparator.getInstance(); try (HDT h = HDTManager.mapIndexedHDT(d)) { + checkHDTConsistency(h); Path indexFile = d.resolveSibling(d.getFileName() + HDTVersion.get_index_suffix("-")); assertTrue("can't find " + indexFile, Files.exists(indexFile)); supplier.reset(); @@ -1087,11 +1138,20 @@ private void hdtqTesd(LargeFakeDataSetStreamSupplier supplier, Path d) throws No IteratorTripleString it2 = h.search(ts.getSubject(), ts.getPredicate(), ts.getObject(), graph); if (!it2.hasNext()) { BitmapTriplesIteratorPositionTest.printIterator(it2); - fail(); + fail("Can't find #" + count + " " + ts); } TripleString ts2 = it2.next(); assertEquals(ts, ts2); - assertFalse(it2.hasNext()); + if (it2.hasNext()) { + BitmapTriplesIteratorPositionTest.printIterator(it2); + System.err.println("***********"); + + for (int i = 0; i < 5 && (i == 0 || it2.hasNext()); i++) { + System.err.println(it2.next()); + } + + fail("Too many nodes for " + ts + " " + graph); + } // empty search to check wildcard IteratorTripleString it3 = h.search(ts.getSubject(), ts.getPredicate(), ts.getObject(), ""); @@ -1128,44 +1188,7 @@ private void hdtqTesd(LargeFakeDataSetStreamSupplier supplier, Path d) throws No Set dataset2 = new HashSet<>(dataset); roleDesc.append(",").append(role); - Iterator roleIt; - switch (role) { - case OBJECT -> { - Iterator sh = h.getDictionary().getShared().getSortedEntries(); - Iterator ob = h.getDictionary().getObjects().getSortedEntries(); - roleIt = new FetcherIterator<>() { - @Override - protected CharSequence getNext() { - if (sh.hasNext()) { - return sh.next(); - } - if (ob.hasNext()) { - return ob.next(); - } - return null; - } - }; - } - case SUBJECT -> { - Iterator sh = h.getDictionary().getShared().getSortedEntries(); - Iterator su = h.getDictionary().getSubjects().getSortedEntries(); - roleIt = new FetcherIterator<>() { - @Override - protected CharSequence getNext() { - if (sh.hasNext()) { - return sh.next(); - } - if (su.hasNext()) { - return su.next(); - } - return null; - } - }; - } - case PREDICATE -> roleIt = h.getDictionary().getPredicates().getSortedEntries(); - case GRAPH -> roleIt = h.getDictionary().getGraphs().getSortedEntries(); - default -> throw new AssertionError(); - } + Iterator roleIt = h.getDictionary().stringIterator(role, true); long componentId = 0; Set components = new HashSet<>(); @@ -1182,8 +1205,10 @@ protected CharSequence getNext() { case GRAPH -> h.search("", "", "", component); }; + long countEid = 0; while (eid.hasNext()) { TripleString tsstr = eid.next().tripleToString(); + countEid++; if (role == TripleComponentRole.GRAPH && !tsstr.getGraph().equals(str)) { // the default graph "" is searching all the // graphs, so we need @@ -1193,7 +1218,11 @@ protected CharSequence getNext() { if (!dataset2.remove(tsstr)) { BitmapTriplesIteratorPositionTest.printIterator(eid); fail("can't remove " + tsstr + "\nfor " + role + "=" + component + "(" + cid + ")" - + "\ndone: " + roleDesc.substring(1) + "\n" + String.join(",", components)); + + "\ndone: " + roleDesc.substring(1) + "\n" + + String.join(",", + components + "\nexists: " + dataset.contains(tsstr) + ", id: " + + countEid + "\npattern: " + + h.getDictionary().toTripleId(tsstr))); } } } @@ -1208,9 +1237,7 @@ public void iteratorStreamGenerationTest() throws IOException, ParserException, LargeFakeDataSetStreamSupplier supplier = createSupplier(); Iterator it = supplier.createTripleStringStream(); - HDTOptions spec = HDTOptions.of(HDTOptionsKeys.TEMP_DICTIONARY_IMPL_KEY, - HDTOptionsKeys.TEMP_DICTIONARY_IMPL_VALUE_HASH_QUAD, HDTOptionsKeys.DICTIONARY_TYPE_KEY, - HDTOptionsKeys.DICTIONARY_TYPE_VALUE_FOUR_QUAD_SECTION); + HDTOptions spec = HDTOptions.of(HDTOptionsKeys.DICTIONARY_TYPE_KEY, dictType); Path root = tempDir.newFolder().toPath(); try { Path d = root.resolve("d.hdt"); @@ -1228,9 +1255,7 @@ public void fileReadGenerationTest() throws IOException, ParserException, NotFou LargeFakeDataSetStreamSupplier supplier = createSupplier(); Iterator it = supplier.createTripleStringStream(); - HDTOptions spec = HDTOptions.of(HDTOptionsKeys.TEMP_DICTIONARY_IMPL_KEY, - HDTOptionsKeys.TEMP_DICTIONARY_IMPL_VALUE_HASH_QUAD, HDTOptionsKeys.DICTIONARY_TYPE_KEY, - HDTOptionsKeys.DICTIONARY_TYPE_VALUE_FOUR_QUAD_SECTION); + HDTOptions spec = HDTOptions.of(HDTOptionsKeys.DICTIONARY_TYPE_KEY, dictType); Path root = tempDir.newFolder().toPath(); try { Path nq = root.resolve("d.nq"); @@ -1685,12 +1710,263 @@ public void idFromIteratorTest() throws IOException, ParserException { } } + public static class MSDLangQuadTest extends HDTManagerTestBase { + @Test + public void msdLangTest() throws Exception { + LargeFakeDataSetStreamSupplier supplier = LargeFakeDataSetStreamSupplier + .createSupplierWithMaxTriples(5000, 34).withQuads(true); + Path ntFile = tempDir.newFile().toPath(); + try { + + supplier.createNTFile(ntFile); + + HDTOptions spec = HDTOptions.of( + // use msdl + HDTOptionsKeys.DICTIONARY_TYPE_KEY, + HDTOptionsKeys.DICTIONARY_TYPE_VALUE_MULTI_OBJECTS_LANG_QUAD); + + HDTOptions specFSD = HDTOptions.of(HDTOptionsKeys.DICTIONARY_TYPE_KEY, + HDTOptionsKeys.DICTIONARY_TYPE_VALUE_FOUR_QUAD_SECTION); + + try (HDT hdt = HDTManager.generateHDT(ntFile, HDTTestUtils.BASE_URI, RDFNotation.NQUAD, spec, + ProgressListener.ignore())) { + Dictionary msdl = hdt.getDictionary(); + assertEquals(HDTVocabulary.DICTIONARY_TYPE_MULT_SECTION_LANG_QUAD, msdl.getType()); + assertTrue("not a msdlq", msdl instanceof MultipleLangBaseDictionary); + checkHDTConsistency(hdt); + + // the HDT is fine, does it contain all the triples? + + try (HDT hdtFSD = HDTManager.generateHDT(ntFile, HDTTestUtils.BASE_URI, RDFNotation.NQUAD, specFSD, + ProgressListener.ignore())) { + Dictionary fsd = hdtFSD.getDictionary(); + + assertTrue("not a fsd", fsd instanceof BaseDictionary); + assertEquals("not the same number of triples", hdtFSD.getTriples().getNumberOfElements(), + hdt.getTriples().getNumberOfElements()); + assertEquals("Not the same number of SHARED", fsd.getNshared(), msdl.getNshared()); + assertEquals("Not the same number of SUBJECTS", fsd.getNsubjects(), msdl.getNsubjects()); + assertEquals("Not the same number of PREDICATES", fsd.getNpredicates(), msdl.getNpredicates()); + assertEquals("Not the same number of OBJECTS", fsd.getNobjects(), msdl.getNobjects()); + + IteratorTripleString itMSDAT = hdt.search("", "", ""); + + while (itMSDAT.hasNext()) { + TripleString actual = itMSDAT.next(); + if (!hdt.search(actual).hasNext()) { + fail(format("Can't find back triple %s in", actual)); + } + } + + IteratorTripleString itMSDA = hdt.search("", "", ""); + + while (itMSDA.hasNext()) { + TripleString actual = itMSDA.next(); + + IteratorTripleString itE = hdtFSD.search(actual); + if (!itE.hasNext()) { + long sid = fsd.stringToId(actual.getSubject(), SUBJECT); + assertNotEquals("can't find SUB in FSD: " + actual.getSubject(), -1, sid); + long pid = fsd.stringToId(actual.getPredicate(), TripleComponentRole.PREDICATE); + assertNotEquals("can't find PRE in FSD: " + actual.getPredicate(), -1, pid); + long oid = fsd.stringToId(actual.getObject(), OBJECT); + assertNotEquals("can't find OBJ in FSD: " + actual.getObject(), -1, oid); + + assertEquals(actual.getSubject().toString(), fsd.idToString(sid, SUBJECT).toString()); + assertEquals(actual.getPredicate().toString(), + fsd.idToString(pid, TripleComponentRole.PREDICATE).toString()); + assertEquals(actual.getObject().toString(), fsd.idToString(oid, OBJECT).toString()); + + fail(format("Can't find triple %s in FSD", actual)); + } + assertEquals(actual.tripleToString(), itE.next().tripleToString()); + } + + IteratorTripleString itE = hdtFSD.search("", "", ""); + + while (itE.hasNext()) { + TripleString excepted = itE.next(); + IteratorTripleString itA = hdt.search(excepted.getSubject(), excepted.getPredicate(), + excepted.getObject()); + if (!itA.hasNext()) { + long sid = msdl.stringToId(excepted.getSubject(), SUBJECT); + assertNotEquals("can't find SUB in MSDL: " + excepted.getSubject(), -1, sid); + long pid = msdl.stringToId(excepted.getPredicate(), TripleComponentRole.PREDICATE); + assertNotEquals("can't find PRE in MSDL: " + excepted.getPredicate(), -1, pid); + long oid = msdl.stringToId(excepted.getObject(), OBJECT); + assertNotEquals("can't find OBJ in MSDL: " + excepted.getObject(), -1, oid); + + assertEquals(excepted.getSubject().toString(), + msdl.idToString(sid, SUBJECT).toString()); + assertEquals(excepted.getPredicate().toString(), + msdl.idToString(pid, TripleComponentRole.PREDICATE).toString()); + assertEquals(excepted.getObject().toString(), msdl.idToString(oid, OBJECT).toString()); + + TripleID tid = new TripleID(sid, pid, oid); + IteratorTripleID itA2 = hdt.getTriples().search(tid); + if (itA2.hasNext()) { + fail(format("can't find triple %s by string in MSDL HDT", excepted)); + } else { + fail(format("can't find triple %s by string or id in MSDL HDT (%s)", excepted, + tid)); + } + + } + TripleString actual = itA.next(); + assertComponentsNotNull("an element is null", actual); + assertEquals(excepted, actual); + } + } + + // try to load/map the HDT + + Path tempHDT = tempDir.newFile().toPath(); + try { + hdt.saveToHDT(tempHDT, ProgressListener.ignore()); + try (HDT hdtMap = HDTManager.mapHDT(tempHDT)) { + assertEquals(HDTVocabulary.DICTIONARY_TYPE_MULT_SECTION_LANG_QUAD, + hdtMap.getDictionary().getType()); + assertEqualsHDT(hdt, hdtMap); + try (HDT hdtLoad = HDTManager.loadHDT(tempHDT)) { + assertEquals(HDTVocabulary.DICTIONARY_TYPE_MULT_SECTION_LANG_QUAD, + hdtLoad.getDictionary().getType()); + assertEqualsHDT(hdt, hdtLoad); + assertEqualsHDT(hdtLoad, hdtMap); + } + } + } catch (Throwable t) { + try { + Files.deleteIfExists(tempHDT); + } catch (IOException e) { + t.addSuppressed(e); + } + throw t; + } + Files.deleteIfExists(tempHDT); + } + } catch (Throwable t) { + try { + Files.deleteIfExists(ntFile); + } catch (IOException e) { + t.addSuppressed(e); + } + throw t; + } + Files.deleteIfExists(ntFile); + } + + @Test + public void idFromIteratorTest() throws IOException, ParserException { + LargeFakeDataSetStreamSupplier supplier = LargeFakeDataSetStreamSupplier + .createSupplierWithMaxTriples(5000, 34).withMaxLiteralSize(50).withMaxElementSplit(20) + .withQuads(true); + Path rootDir = tempDir.newFolder().toPath(); + try { + Path hdtPath = rootDir.resolve("ds.nt"); + + HDTOptions spec = HDTOptions.of( + // use msdl + HDTOptionsKeys.DICTIONARY_TYPE_KEY, HDTOptionsKeys.DICTIONARY_TYPE_VALUE_MULTI_OBJECTS_LANG, + // use GD + HDTOptionsKeys.LOADER_TYPE_KEY, HDTOptionsKeys.LOADER_TYPE_VALUE_DISK, + + HDTOptionsKeys.LOADER_DISK_LOCATION_KEY, rootDir.resolve("gd"), + + HDTOptionsKeys.LOADER_DISK_FUTURE_HDT_LOCATION_KEY, rootDir.resolve("future.hdt")); + + supplier.createAndSaveFakeHDT(spec, hdtPath); + + try (HDT hdt = HDTManager.mapHDT(hdtPath)) { + Dictionary dictUkn = hdt.getDictionary(); + + if (!(dictUkn instanceof MultipleLangBaseDictionary dict)) { + fail("bad dict type: %s".formatted(dictUkn.getClass())); + return; + } + + assertTrue(dict.supportsDataTypeOfId()); + assertTrue(dict.supportsLanguageOfId()); + assertTrue(dict.supportsNodeTypeOfId()); + + for (TripleComponentRole role : TripleComponentRole.valuesNoGraph()) { + long idc = 1; + Iterator it = dict.stringIterator(role, true); + + while (it.hasNext()) { + CharSequence component = it.next(); + long id = idc++; + + CharSequence componentActual = dict.idToString(id, role); + + if (!component.toString().equals(componentActual.toString())) { + fail("%s != %s for id %d/%s".formatted(component, componentActual, id, role)); + } + } + } + Set loaded = new HashSet<>(); + for (TripleComponentRole role : new TripleComponentRole[] { SUBJECT, OBJECT }) { + long nshared = dict.getNshared(); + long idc = 1; + Iterator it = dict.stringIterator(role, true); + + while (it.hasNext()) { + CharSequence component = it.next(); + long id = idc++; + + if (!loaded.add(ByteString.of(component))) { + if (id > nshared) { // normal for shared + fail(format("the component %s(%s/%d) was loaded twice! ", component, role, id)); + } + } + + assertEquals("bad id mapping", id, dict.stringToId(component, role)); + + CharSequence componentActual = dict.idToString(id, role); + assertEquals("bad string mapping", component.toString(), componentActual.toString()); + + TripleComponentRole role2 = role == SUBJECT ? OBJECT : SUBJECT; + + if (id <= nshared) { + assertEquals("bad role logic", id, dict.stringToId(component, role2)); + } else { + assertTrue("bad role logic", dict.stringToId(component, role2) <= 0); + } + + RDFNodeType nodeType = RDFNodeType.typeof(component); + + RDFNodeType actualNodeType = dict.nodeTypeOfId(role, id); + if (nodeType != actualNodeType) { + StringBuilder bld = new StringBuilder("Sections: "); + for (int i = 0; i < dict.getObjectsSectionCount(); i++) { + MultipleLangBaseDictionary.ObjectIdLocationData sec = dict + .getObjectsSectionFromId(i); + bld.append("%d=%s(%s)\n".formatted(sec.location(), sec.name(), sec.type())); + } + fail("bad node type %s != %s for %s (%s/%d@%d)\n%s".formatted(nodeType, actualNodeType, + component, role, id, nshared, bld)); + } + if (role == OBJECT) { + CharSequence lang = LiteralsUtils.getLanguage(component).orElse(null); + assertEquals("bad lang", lang, dict.languageOfId(id)); + + CharSequence type = LiteralsUtils.getType(component); + assertEquals("bad type", type, dict.dataTypeOfId(id)); + } + } + } + } + } finally { + PathUtils.deleteDirectory(rootDir); + } + } + } + @RunWith(Parameterized.class) public static class DictionaryLangTypeTest extends HDTManagerTestBase { @Parameterized.Parameters(name = "dict:{0}") public static Collection params() { - return Arrays.asList(diskDict()); + return diskDict(); } @Parameterized.Parameter diff --git a/qendpoint-core/src/test/java/com/the_qa_company/qendpoint/core/storage/QEPCoreTest.java b/qendpoint-core/src/test/java/com/the_qa_company/qendpoint/core/storage/QEPCoreTest.java index b319fce1..b73bfc14 100644 --- a/qendpoint-core/src/test/java/com/the_qa_company/qendpoint/core/storage/QEPCoreTest.java +++ b/qendpoint-core/src/test/java/com/the_qa_company/qendpoint/core/storage/QEPCoreTest.java @@ -370,8 +370,8 @@ private void assertEqualsDump(String text, QEPComponent c, Object ex, Object ac) } @Test - public void rdfNodeTypeMapTest() throws QEPCoreException, IOException { - try (HDT hdt = HDTManager.mapHDT(rootHDT); QEPCore core = new QEPCore(coreRoot, HDTOptions.of())) { + public void rdfNodeTypeMapTest() throws QEPCoreException { + try (QEPCore core = new QEPCore(coreRoot, HDTOptions.of())) { try (QueryCloseableIterator it = core.search()) { while (it.hasNext()) { QEPComponentTriple triple = it.next(); diff --git a/qendpoint-core/src/test/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapQuadTriplesTest.java b/qendpoint-core/src/test/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapQuadTriplesTest.java new file mode 100644 index 00000000..aeeb40c5 --- /dev/null +++ b/qendpoint-core/src/test/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapQuadTriplesTest.java @@ -0,0 +1,188 @@ +package com.the_qa_company.qendpoint.core.triples.impl; + +import com.the_qa_company.qendpoint.core.enums.ResultEstimationType; +import com.the_qa_company.qendpoint.core.enums.TripleComponentOrder; +import com.the_qa_company.qendpoint.core.listener.ProgressListener; +import com.the_qa_company.qendpoint.core.options.ControlInformation; +import com.the_qa_company.qendpoint.core.triples.IteratorTripleID; +import com.the_qa_company.qendpoint.core.triples.TripleID; +import com.the_qa_company.qendpoint.core.util.io.AbstractMapMemoryTest; +import com.the_qa_company.qendpoint.core.util.io.CountInputStream; +import org.apache.commons.io.file.PathUtils; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; + +import java.io.BufferedInputStream; +import java.io.BufferedOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.Comparator; +import java.util.Iterator; +import java.util.List; +import java.util.Random; + +import static org.junit.Assert.assertEquals; + +public class BitmapQuadTriplesTest extends AbstractMapMemoryTest { + private static IteratorTripleID fromList(List lst) { + return new IteratorTripleID() { + private int current; + private int lastLoc; + + @Override + public boolean hasPrevious() { + return false; + } + + @Override + public TripleID previous() { + return null; + } + + @Override + public void goToStart() { + current = 0; + } + + @Override + public boolean canGoTo() { + return true; + } + + @Override + public void goTo(long pos) { + current = (int) Math.min(lst.size(), Math.max(pos, 0)); + } + + @Override + public long estimatedNumResults() { + return lst.size(); + } + + @Override + public ResultEstimationType numResultEstimation() { + return ResultEstimationType.EXACT; + } + + @Override + public TripleComponentOrder getOrder() { + return TripleComponentOrder.SPO; + } + + @Override + public long getLastTriplePosition() { + return lastLoc; + } + + @Override + public boolean hasNext() { + return current < lst.size(); + } + + @Override + public TripleID next() { + if (!hasNext()) { + return null; + } + return lst.get(lastLoc = current++); + } + }; + } + + @Rule + public TemporaryFolder tempDir = TemporaryFolder.builder().assureDeletion().build(); + + @Test + public void triplesTest() throws IOException { + Random rnd = new Random(5872); + final long size = 10_000; + + List tripleIDList = new ArrayList<>(); + + for (int i = 0; i < size; i++) { + TripleID id = new TripleID(); + + id.setSubject((i / 10) + 1); + id.setPredicate(1 + rnd.nextInt((int) size / 100)); + id.setObject(1 + rnd.nextInt((int) size / 5)); + id.setGraph(1 + rnd.nextInt((int) size / 750)); + + tripleIDList.add(id); + } + + tripleIDList.sort(Comparator.comparing(TripleID::getSubject).thenComparingLong(TripleID::getPredicate) + .thenComparingLong(TripleID::getObject).thenComparingLong(TripleID::getGraph)); + + // remove dupes + TripleID last = new TripleID(); + Iterator dupeIt = tripleIDList.iterator(); + + while (dupeIt.hasNext()) { + TripleID id = dupeIt.next(); + if (id.equals(last)) { + dupeIt.remove(); + continue; + } + + last.setAll(id.getSubject(), id.getPredicate(), id.getObject(), id.getGraph()); + } + + Path root = tempDir.newFolder().toPath(); + Path path = root.resolve("triples.bin"); + try { + try (BitmapQuadTriples triples = new BitmapQuadTriples()) { + triples.load(fromList(tripleIDList), ProgressListener.ignore()); + + IteratorTripleID it = triples.searchAll(); + Iterator it2 = tripleIDList.iterator(); + + while (it.hasNext()) { + assertEquals(it2.next(), it.next()); + } + + try (OutputStream stream = new BufferedOutputStream(Files.newOutputStream(path))) { + triples.save(stream, new ControlInformation(), ProgressListener.ignore()); + } + } + + // load + try (BitmapQuadTriples triples = new BitmapQuadTriples()) { + try (InputStream stream = new BufferedInputStream(Files.newInputStream(path))) { + ControlInformation ci = new ControlInformation(); + ci.load(stream); + triples.load(stream, ci, ProgressListener.ignore()); + } + + IteratorTripleID it = triples.searchAll(); + Iterator it2 = tripleIDList.iterator(); + + while (it.hasNext()) { + assertEquals(it2.next(), it.next()); + } + } + + // map + try (BitmapQuadTriples triples = new BitmapQuadTriples()) { + try (InputStream stream = new BufferedInputStream(Files.newInputStream(path))) { + CountInputStream cstream = new CountInputStream(stream); + triples.mapFromFile(cstream, path.toFile(), ProgressListener.ignore()); + } + + IteratorTripleID it = triples.searchAll(); + Iterator it2 = tripleIDList.iterator(); + + while (it.hasNext()) { + assertEquals(it2.next(), it.next()); + } + } + } finally { + PathUtils.deleteDirectory(root); + } + } + +} diff --git a/qendpoint-core/src/test/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesTest.java b/qendpoint-core/src/test/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesTest.java index 7730407b..ccecea79 100644 --- a/qendpoint-core/src/test/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesTest.java +++ b/qendpoint-core/src/test/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesTest.java @@ -27,7 +27,7 @@ import java.nio.file.Files; import java.nio.file.Path; import java.util.Collection; -import java.util.List; +import java.util.stream.Stream; import static org.junit.Assert.*; @@ -180,14 +180,22 @@ public static abstract class AbstractTest extends AbstractMapMemoryTest { @RunWith(Parameterized.class) public static class DynamicTest extends AbstractTest { - @Parameterized.Parameters(name = "indexing: {0}") - public static Collection params() { - return List.of(HDTOptionsKeys.BITMAPTRIPLES_INDEX_METHOD_VALUE_DISK, - HDTOptionsKeys.BITMAPTRIPLES_INDEX_METHOD_VALUE_OPTIMIZED); + @Parameterized.Parameters(name = "indexing: {0}, dict {1}") + public static Collection params() { + return Stream + .of(HDTOptionsKeys.BITMAPTRIPLES_INDEX_METHOD_VALUE_DISK, + HDTOptionsKeys.BITMAPTRIPLES_INDEX_METHOD_VALUE_OPTIMIZED) + .flatMap(indexMethod -> Stream + .of(HDTOptionsKeys.DICTIONARY_TYPE_VALUE_FOUR_SECTION, + HDTOptionsKeys.DICTIONARY_TYPE_VALUE_FOUR_QUAD_SECTION) + .map(dict -> new Object[] { indexMethod, dict })) + .toList(); } @Parameterized.Parameter public String indexMethod; + @Parameterized.Parameter(1) + public String dict; public void diskBitmapIndexTest(boolean map, boolean disk) throws IOException, ParserException { Path root = tempDir.newFolder().toPath(); @@ -207,9 +215,11 @@ public void diskBitmapIndexTest(boolean map, boolean disk) throws IOException, P Files.copy(hdt1Path, hdt2Path); // optDisk = DISK, optDefault = OLD IMPLEMENTATION - HDTOptions optDisk = HDTOptions.of(HDTOptionsKeys.BITMAPTRIPLES_INDEX_METHOD_KEY, indexMethod); + HDTOptions optDisk = HDTOptions.of(HDTOptionsKeys.BITMAPTRIPLES_INDEX_METHOD_KEY, indexMethod, + HDTOptionsKeys.DICTIONARY_TYPE_KEY, dict); HDTOptions optDefault = HDTOptions.of(HDTOptionsKeys.BITMAPTRIPLES_INDEX_METHOD_KEY, - HDTOptionsKeys.BITMAPTRIPLES_INDEX_METHOD_VALUE_LEGACY); + HDTOptionsKeys.BITMAPTRIPLES_INDEX_METHOD_VALUE_LEGACY, HDTOptionsKeys.DICTIONARY_TYPE_KEY, + dict); // set config if (disk) { diff --git a/qendpoint-core/src/test/java/com/the_qa_company/qendpoint/core/util/io/compress/CompressTripleTest.java b/qendpoint-core/src/test/java/com/the_qa_company/qendpoint/core/util/io/compress/CompressTripleTest.java index c52e2882..e643b16b 100644 --- a/qendpoint-core/src/test/java/com/the_qa_company/qendpoint/core/util/io/compress/CompressTripleTest.java +++ b/qendpoint-core/src/test/java/com/the_qa_company/qendpoint/core/util/io/compress/CompressTripleTest.java @@ -51,7 +51,7 @@ public void writeReadTest() throws InterruptedException, IOException { in.close(); } }, "ReadTest").attach(new ExceptionThread(() -> { - CompressTripleWriter writer = new CompressTripleWriter(out); + CompressTripleWriter writer = new CompressTripleWriter(out, false); try { for (IndexedTriple triple : triples) { writer.appendTriple(triple); @@ -92,7 +92,7 @@ public void writeReadTripleIDTest() throws InterruptedException, IOException { in.close(); } }, "ReadTest").attach(new ExceptionThread(() -> { - CompressTripleWriter writer = new CompressTripleWriter(out); + CompressTripleWriter writer = new CompressTripleWriter(out, false); try { for (TripleID triple : triples) { writer.appendTriple(triple); From 415229f9ec2ad5a0cfeca9bf462c159a54733258 Mon Sep 17 00:00:00 2001 From: qaate47 Date: Thu, 5 Oct 2023 10:32:17 +0200 Subject: [PATCH 4/5] GH-420 Fix HDTq generation file handlers --- .../core/compact/bitmap/EmptyBitmap.java | 78 ++++- .../compact/bitmap/GraphDeleteBitmap.java | 124 ------- .../bitmap/ModifiableMultiLayerBitmap.java | 12 + .../core/compact/bitmap/MultiLayerBitmap.java | 268 +++++++++++++++ .../compact/bitmap/MultiRoaringBitmap.java | 319 +++++++++++++----- .../impl/kcat/GroupBySubjectMapIterator.java | 16 +- .../core/dictionary/impl/kcat/KCatImpl.java | 10 +- .../qendpoint/core/enums/CompressionType.java | 49 ++- .../qendpoint/core/enums/RDFNotation.java | 20 ++ .../qendpoint/core/hdt/HDTVocabulary.java | 1 + .../qendpoint/core/hdt/impl/HDTBase.java | 3 + .../core/hdt/impl/HDTDiskImporter.java | 6 +- .../qendpoint/core/hdt/impl/WriteHDTImpl.java | 2 +- .../impl/diskimport/CompressTripleMapper.java | 9 +- .../TripleCompressionResultEmpty.java | 2 +- .../TripleCompressionResultFile.java | 6 +- .../TripleCompressionResultPartial.java | 4 +- .../quad/impl/BitmapTriplesIteratorGraph.java | 10 +- .../impl/BitmapTriplesIteratorGraphG.java | 17 +- .../qendpoint/core/rdf/RDFParserFactory.java | 2 + .../core/rdf/parsers/RDFParserRIOT.java | 20 +- .../qendpoint/core/tools/RDF2HDT.java | 3 +- .../qendpoint/core/triples/TempTriples.java | 2 + .../core/triples/impl/BitmapQuadTriples.java | 53 +-- .../core/triples/impl/BitmapTriples.java | 3 +- .../core/triples/impl/OneReadTempTriples.java | 12 + .../core/triples/impl/TriplesList.java | 14 + .../core/triples/impl/TriplesListLong.java | 15 + .../core/triples/impl/WriteBitmapTriples.java | 48 ++- .../util/LargeFakeDataSetStreamSupplier.java | 18 +- .../util/concurrent/ExceptionFunction.java | 7 + .../qendpoint/core/util/io/IOUtil.java | 59 ++++ .../io/compress/MapCompressTripleMerger.java | 8 +- .../bitmap/MultiRoaringBitmapTest.java | 126 ++++--- .../dictionary/impl/kcat/KCatMergerTest.java | 18 +- .../qendpoint/core/hdt/HDTManagerTest.java | 22 ++ 36 files changed, 980 insertions(+), 406 deletions(-) delete mode 100644 qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/compact/bitmap/GraphDeleteBitmap.java create mode 100644 qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/compact/bitmap/ModifiableMultiLayerBitmap.java create mode 100644 qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/compact/bitmap/MultiLayerBitmap.java diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/compact/bitmap/EmptyBitmap.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/compact/bitmap/EmptyBitmap.java index ffd8bbf3..0e0a1601 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/compact/bitmap/EmptyBitmap.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/compact/bitmap/EmptyBitmap.java @@ -18,21 +18,34 @@ * in memory, will throw a {@link NotImplementedException} if we try to add a * non 0 value */ -public class EmptyBitmap implements ModifiableBitmap { +public class EmptyBitmap implements ModifiableBitmap, ModifiableMultiLayerBitmap { /** * create empty bitmap simulating a bitmap of a particular size * * @param size the size * @return bitmap */ - public static ModifiableBitmap of(long size) { - return new EmptyBitmap(size); + public static EmptyBitmap of(long size) { + return new EmptyBitmap(size, 0); + } + + /** + * create empty bitmap simulating a bitmap of a particular size + * + * @param size the size + * @param layers layers + * @return bitmap + */ + public static EmptyBitmap of(long size, long layers) { + return new EmptyBitmap(size, layers); } private long size; + private final long layers; - private EmptyBitmap(long size) { + private EmptyBitmap(long size, long layers) { this.size = size; + this.layers = layers; } @Override @@ -84,11 +97,56 @@ public long select1(long n) { return -1; } + @Override + public boolean access(long layer, long position) { + return false; + } + + @Override + public long rank1(long layer, long position) { + return rank1(position); + } + + @Override + public long rank0(long layer, long position) { + return rank0(position); + } + + @Override + public long selectPrev1(long layer, long start) { + return selectPrev1(start); + } + + @Override + public long selectNext1(long layer, long start) { + return selectNext1(start); + } + + @Override + public long select0(long layer, long n) { + return select0(n); + } + + @Override + public long select1(long layer, long n) { + return select1(n); + } + @Override public long getNumBits() { return size; } + @Override + public long countOnes(long layer) { + return countOnes(); + } + + @Override + public long countZeros(long layer) { + return countZeros(); + } + @Override public long countOnes() { return 0; @@ -129,6 +187,16 @@ public void load(InputStream input, ProgressListener listener) { @Override public String getType() { - return HDTVocabulary.BITMAP_TYPE_PLAIN; + return layers == 0 ? HDTVocabulary.BITMAP_TYPE_PLAIN : HDTVocabulary.BITMAP_TYPE_ROARING_MULTI; + } + + @Override + public long getLayersCount() { + return layers; + } + + @Override + public void set(long layer, long position, boolean value) { + set(position, value); } } diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/compact/bitmap/GraphDeleteBitmap.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/compact/bitmap/GraphDeleteBitmap.java deleted file mode 100644 index 4290c57b..00000000 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/compact/bitmap/GraphDeleteBitmap.java +++ /dev/null @@ -1,124 +0,0 @@ -package com.the_qa_company.qendpoint.core.compact.bitmap; - -import com.the_qa_company.qendpoint.core.exceptions.NotImplementedException; -import com.the_qa_company.qendpoint.core.util.io.Closer; - -import java.io.Closeable; -import java.io.IOException; - -/** - * Bitmap to delete inside a graph, all the ids are mapped on a bitmap with the - * formula - * - *
- * (id, graph) -> id * graphs + graph
- * 
- * - * @author Antoine Willerval - */ -public class GraphDeleteBitmap implements SimpleModifiableBitmap, Closeable { - /** - * create empty graph delete bitmap - * - * @param graphs graphs count - * @param size triples count - * @return gdb - */ - public static GraphDeleteBitmap empty(long graphs, long size) { - return new GraphDeleteBitmap(EmptyBitmap.of(size * graphs), graphs); - } - - /** - * create memory graph delete bitmap - * - * @param graphs graphs count - * @param size triples count - * @return gdb - */ - public static GraphDeleteBitmap memory(long graphs, long size) { - return new GraphDeleteBitmap(MultiRoaringBitmap.memory(size * graphs), graphs); - } - - /** - * wrap a bitmap to create a {@link GraphDeleteBitmap} - * - * @param bitmap bitmap - * @param graphs graphs count - * @return bitmap if already instanceof graph delete bitmap and contains the - * right graphs number or wrap into GraphDeleteBitmap - */ - public static GraphDeleteBitmap wrap(Bitmap bitmap, long graphs) { - if (bitmap instanceof GraphDeleteBitmap gdb && gdb.graphs == graphs) { - // use directly the bitmap - return gdb; - } - return new GraphDeleteBitmap(bitmap, graphs); - } - - private final Bitmap store; - private final long graphs; - - private GraphDeleteBitmap(Bitmap store, long graphs) { - this.store = store; - this.graphs = graphs; - } - - /** - * access a bit in a graph - * - * @param graph graph - * @param position position - * @return bit value - */ - public boolean access(long graph, long position) { - return access(position * graphs + graph); - } - - /** - * set a bit in a graph - * - * @param graph graph - * @param position position - * @param value value - * @throws ClassCastException if the wrapped bitmap isn't a modifiable - * bitmap - */ - public void set(int graph, long position, boolean value) { - set(position * graphs + graph, value); - } - - @Override - public boolean access(long position) { - return store.access(position); - } - - @Override - public void set(long position, boolean value) { - ((ModifiableBitmap) store).set(position, value); - } - - @Override - public long getNumBits() { - return store.getNumBits(); - } - - @Override - public long getSizeBytes() { - return store.getSizeBytes(); - } - - @Override - public String getType() { - return store.getType(); - } - - @Override - public void append(boolean value) { - throw new NotImplementedException(); - } - - @Override - public void close() throws IOException { - Closer.closeSingle(store); - } -} diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/compact/bitmap/ModifiableMultiLayerBitmap.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/compact/bitmap/ModifiableMultiLayerBitmap.java new file mode 100644 index 00000000..74e342eb --- /dev/null +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/compact/bitmap/ModifiableMultiLayerBitmap.java @@ -0,0 +1,12 @@ +package com.the_qa_company.qendpoint.core.compact.bitmap; + +public interface ModifiableMultiLayerBitmap extends MultiLayerBitmap { + /** + * Set the value of the bit at position pos + * + * @param layer layer + * @param position pos + * @param value value + */ + void set(long layer, long position, boolean value); +} diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/compact/bitmap/MultiLayerBitmap.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/compact/bitmap/MultiLayerBitmap.java new file mode 100644 index 00000000..41f749fa --- /dev/null +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/compact/bitmap/MultiLayerBitmap.java @@ -0,0 +1,268 @@ +package com.the_qa_company.qendpoint.core.compact.bitmap; + +import com.the_qa_company.qendpoint.core.listener.ProgressListener; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; + +public interface MultiLayerBitmap extends Bitmap { + static MultiLayerBitmap ofBitmap(Bitmap bitmap) { + if (bitmap instanceof MultiLayerBitmap mlb) { + return mlb; + } + return new MultiLayerBitmap() { + + @Override + public boolean access(long layer, long position) { + assert layer == 0; + return bitmap.access(position); + } + + @Override + public long rank1(long layer, long position) { + return bitmap.rank1(position); + } + + @Override + public long rank0(long layer, long position) { + assert layer == 0; + return bitmap.rank0(position); + } + + @Override + public long selectPrev1(long layer, long start) { + assert layer == 0; + return bitmap.selectPrev1(start); + } + + @Override + public long selectNext1(long layer, long start) { + assert layer == 0; + return bitmap.selectNext1(start); + } + + @Override + public long select0(long layer, long n) { + assert layer == 0; + return bitmap.select0(n); + } + + @Override + public long select1(long layer, long n) { + assert layer == 0; + return bitmap.select1(n); + } + + @Override + public long getNumBits() { + return bitmap.getNumBits(); + } + + @Override + public long countOnes(long layer) { + assert layer == 0; + return bitmap.countOnes(); + } + + @Override + public long countZeros(long layer) { + assert layer == 0; + return bitmap.countZeros(); + } + + @Override + public long getSizeBytes() { + return bitmap.getSizeBytes(); + } + + @Override + public void save(OutputStream output, ProgressListener listener) throws IOException { + bitmap.save(output, listener); + } + + @Override + public void load(InputStream input, ProgressListener listener) throws IOException { + bitmap.load(input, listener); + } + + @Override + public String getType() { + return bitmap.getType(); + } + + @Override + public long getLayersCount() { + return 1; + } + }; + } + + /** + * Get the value of the bit at position pos + * + * @param layer layer + * @param position pos + * @return boolean + */ + boolean access(long layer, long position); + + /** + * Count the number of ones up to position pos (included) + * + * @param layer layer + * @param position pos + * @return long + */ + long rank1(long layer, long position); + + /** + * Count the number of zeros up to position pos (included) + * + * @param layer layer + * @param position pos + * @return long + */ + long rank0(long layer, long position); + + /** + * Return the position of the next 1 after position start. + * + * @param layer layer + * @param start start + * @return long + */ + long selectPrev1(long layer, long start); + + /** + * Return the position of the previous 1 before position start. + * + * @param layer layer + * @param start start + * @return long + */ + long selectNext1(long layer, long start); + + /** + * Find the position where n zeros have appeared up to that position. + * + * @param layer layer + * @param n n + * @return long + */ + long select0(long layer, long n); + + /** + * Find the position where n ones have appeared up to that position. + * + * @param layer layer + * @param n n + * @return long + */ + long select1(long layer, long n); + + /** + * Get number of total bits in the data structure + * + * @return long + */ + long getNumBits(); + + /** + * Count the number of total ones in the data structure. + * + * @param layer layer + * @return long + */ + long countOnes(long layer); + + /** + * Count the number of total zeros in the data structure. + * + * @param layer layer + * @return long + */ + long countZeros(long layer); + + /** + * Estimate the size in bytes of the total data structure. + * + * @return long + */ + long getSizeBytes(); + + /** + * Dump Bitmap into an {@link OutputStream} + * + * @param output The OutputStream + * @param listener Listener to get notified of loading progress. Can be null + * if no notifications needed. + * @throws IOException io exception while saving the bitmap + */ + void save(OutputStream output, ProgressListener listener) throws IOException; + + /** + * Load Bitmap from an {@link OutputStream} + * + * @param input The OutputStream + * @param listener Listener to get notified of loading progress. Can be null + * if no notifications needed. + * @throws IOException io exception while loading the bitmap + */ + void load(InputStream input, ProgressListener listener) throws IOException; + + /** + * @return the type of the data structure as defined in HDTVocabulary + */ + String getType(); + + /** + * @return layers count + */ + long getLayersCount(); + + @Override + default boolean access(long position) { + return access(0, position); + } + + @Override + default long rank1(long position) { + return rank1(0, position); + } + + @Override + default long rank0(long position) { + return rank0(0, position); + } + + @Override + default long selectPrev1(long start) { + return selectPrev1(0, start); + } + + @Override + default long selectNext1(long start) { + return selectNext1(0, start); + } + + @Override + default long select0(long n) { + return select0(0, n); + } + + @Override + default long select1(long n) { + return select1(0, n); + } + + @Override + default long countOnes() { + return countOnes(0); + } + + @Override + default long countZeros() { + return countZeros(0); + } +} diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/compact/bitmap/MultiRoaringBitmap.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/compact/bitmap/MultiRoaringBitmap.java index 90298a29..d63415a7 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/compact/bitmap/MultiRoaringBitmap.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/compact/bitmap/MultiRoaringBitmap.java @@ -8,19 +8,26 @@ import com.the_qa_company.qendpoint.core.util.io.IOUtil; import org.roaringbitmap.RoaringBitmap; +import java.io.BufferedInputStream; +import java.io.BufferedOutputStream; import java.io.Closeable; import java.io.DataInputStream; import java.io.DataOutputStream; +import java.io.EOFException; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.nio.ByteBuffer; import java.nio.ByteOrder; +import java.nio.channels.Channels; import java.nio.channels.FileChannel; +import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.StandardOpenOption; import java.util.ArrayList; +import java.util.Collection; import java.util.List; +import java.util.function.Function; import java.util.stream.IntStream; import static java.lang.String.format; @@ -31,10 +38,15 @@ * * @author Antoine Willerval */ -public class MultiRoaringBitmap implements SimpleModifiableBitmap, Closeable { - // cookie + maps_nb + chunk_size + numbits - private static final int HEADER_SIZE = 8 + 4 + 4 + 8; - public static final long COOKIE = 0x6347008534687531L; +public class MultiRoaringBitmap implements Closeable, ModifiableMultiLayerBitmap { + // cookie + maps_nb + chunk_size + numbits + num_layers + private static final int HEADER_SIZE = 8 + 4 + 4 + 8 + 8; + public static final long COOKIE = 0x6347008534687532L; + + // End of the blocks + public static final byte BLOCK_END = 0x40; + // Bitmap block + public static final byte BLOCK_BITMAP = 0x41; /** * load mapped multi roaring bitmap stream @@ -47,6 +59,19 @@ public static MultiRoaringBitmap load(InputStream input) throws IOException { return new MultiRoaringBitmap(input); } + /** + * load mapped multi roaring bitmap stream + * + * @param input stream + * @return bitmap + * @throws IOException io exception when loading + */ + public static MultiRoaringBitmap load(Path input) throws IOException { + try (InputStream stream = new BufferedInputStream(Files.newInputStream(input))) { + return load(stream); + } + } + /** * load mapped multi roaring bitmap file * @@ -90,8 +115,8 @@ public static MultiRoaringBitmap mapped(Path fileName, long start, FileChannel c * @param size size * @return bitmap */ - public static MultiRoaringBitmap memory(long size) { - return memory(size, defaultChunkSize); + public static MultiRoaringBitmap memory(long size, long layers) { + return memory(size, layers, defaultChunkSize); } /** @@ -101,9 +126,9 @@ public static MultiRoaringBitmap memory(long size) { * @param chunkSize chunk size * @return bitmap */ - public static MultiRoaringBitmap memory(long size, int chunkSize) { + public static MultiRoaringBitmap memory(long size, long layers, int chunkSize) { try { - return new MultiRoaringBitmap(size, chunkSize, null); + return new MultiRoaringBitmap(size, layers, chunkSize, null); } catch (IOException e) { throw new AssertionError(e); } @@ -117,8 +142,8 @@ public static MultiRoaringBitmap memory(long size, int chunkSize) { * @param streamOutput stream output * @return bitmap */ - public static MultiRoaringBitmap memoryStream(long size, Path streamOutput) throws IOException { - return memoryStream(size, defaultChunkSize, streamOutput); + public static MultiRoaringBitmap memoryStream(long size, long layers, Path streamOutput) throws IOException { + return memoryStream(size, layers, defaultChunkSize, streamOutput); } /** @@ -129,18 +154,21 @@ public static MultiRoaringBitmap memoryStream(long size, Path streamOutput) thro * @param streamOutput stream output * @return bitmap */ - public static MultiRoaringBitmap memoryStream(long size, int chunkSize, Path streamOutput) throws IOException { - return new MultiRoaringBitmap(size, chunkSize, streamOutput); + public static MultiRoaringBitmap memoryStream(long size, long layers, int chunkSize, Path streamOutput) + throws IOException { + return new MultiRoaringBitmap(size, layers, chunkSize, streamOutput); } static int defaultChunkSize = 1 << 29; - final List maps = new ArrayList<>(); + final List> maps = new ArrayList<>(); + final int chunks; final int chunkSize; + final long layers; private final long numbits; private final boolean writable; private final FileChannel output; - private final Path outputPath; private long outputMax; + private boolean closed; private MultiRoaringBitmap(InputStream input) throws IOException { ByteBuffer buffer = ByteBuffer.wrap(IOUtil.readBuffer(input, HEADER_SIZE, ProgressListener.ignore())) @@ -151,36 +179,53 @@ private MultiRoaringBitmap(InputStream input) throws IOException { throw new IOException(format("found bad cookie %x != %x", cookie, COOKIE)); } - int chunks = buffer.getInt(8); + chunks = buffer.getInt(8); chunkSize = buffer.getInt(12); numbits = buffer.getLong(16); + layers = buffer.getLong(24); writable = true; output = null; - outputPath = null; - for (int i = 0; i < chunks; i++) { - input.skipNBytes(8); // skip size used for mapping + int type; + while ((type = input.read()) != BLOCK_END) { + switch (type) { + case BLOCK_BITMAP -> { + input.skipNBytes(Long.BYTES); // skip size used for mapping + long layer = IOUtil.readLong(input); - RoaringBitmap32 bitmap32 = new RoaringBitmap32(); - bitmap32.getHandle().deserialize(new DataInputStream(input)); - maps.add(bitmap32); - } + if (layer < 0) { + throw new IOException("Found negative layer!"); + } + // generate the layer + while (layer >= maps.size()) { + maps.add(new ArrayList<>()); + } + + List map = maps.get((int) layer); + RoaringBitmap32 bitmap32 = new RoaringBitmap32(); + bitmap32.getHandle().deserialize(new DataInputStream(input)); + map.add(bitmap32); + } + case -1 -> throw new EOFException(); + default -> throw new IOException(format("Found bad type format %x", type)); + } + } } - private MultiRoaringBitmap(long size, int chunkSize, Path output) throws IOException { + private MultiRoaringBitmap(long size, long layers, int chunkSize, Path output) throws IOException { writable = true; if (size < 0) { throw new IllegalArgumentException("Negative size: " + size); } this.chunkSize = chunkSize; + this.layers = layers; this.numbits = size; - int chunks = (int) ((size - 1) / chunkSize + 1); + chunks = (int) ((size - 1) / chunkSize + 1); try { if (output != null) { - this.outputPath = output; this.output = FileChannel.open(output, StandardOpenOption.READ, StandardOpenOption.WRITE, StandardOpenOption.TRUNCATE_EXISTING, StandardOpenOption.CREATE); @@ -191,16 +236,20 @@ private MultiRoaringBitmap(long size, int chunkSize, Path output) throws IOExcep map.putInt(8, chunks); map.putInt(12, chunkSize); map.putLong(16, size); + map.putLong(24, layers); } outputMax = HEADER_SIZE; } else { this.output = null; - this.outputPath = null; } - for (int i = 0; i < chunks; i++) { - maps.add(new RoaringBitmap32()); // to on use? + for (int j = 0; j < layers; j++) { + List map = new ArrayList<>(); + maps.add(map); + for (int i = 0; i < chunks; i++) { + map.add(new RoaringBitmap32()); // to on use? + } } } catch (Throwable t) { try { @@ -218,25 +267,48 @@ private MultiRoaringBitmap(long size, int chunkSize, Path output) throws IOExcep private MultiRoaringBitmap(Path fileName, FileChannel channel, long start) throws IOException { writable = false; output = null; - this.outputPath = null; - try (CloseMappedByteBuffer header = IOUtil.mapChannel(fileName, channel, FileChannel.MapMode.READ_ONLY, start, - HEADER_SIZE)) { - header.order(ByteOrder.LITTLE_ENDIAN); - - long cookie = header.getLong(0); - if (cookie != COOKIE) { - throw new IOException(format("Bad cookie for multi roaring bitmap %x != %x", cookie, COOKIE)); + try { + try (CloseMappedByteBuffer header = IOUtil.mapChannel(fileName, channel, FileChannel.MapMode.READ_ONLY, + start, HEADER_SIZE)) { + header.order(ByteOrder.LITTLE_ENDIAN); + + long cookie = header.getLong(0); + if (cookie != COOKIE) { + throw new IOException(format("Bad cookie for multi roaring bitmap %x != %x", cookie, COOKIE)); + } + chunks = header.getInt(8); + chunkSize = header.getInt(12); + numbits = header.getLong(16); + layers = header.getLong(24); + + for (int i = 0; i < layers; i++) { + maps.add(new ArrayList<>()); + } } - int bitmapCount = header.getInt(8); - chunkSize = header.getInt(12); - numbits = header.getLong(16); long shift = HEADER_SIZE + start; - for (int i = 0; i < bitmapCount; i++) { - long sizeBytes = IOUtil.readLong(shift, channel, ByteOrder.LITTLE_ENDIAN); - maps.add(new MappedRoaringBitmap( - IOUtil.mapChannel(fileName, channel, FileChannel.MapMode.READ_ONLY, shift += 8, sizeBytes))); - shift += sizeBytes; + + int type; + while (true) { + InputStream stream = Channels.newInputStream(channel.position(shift)); + if ((type = stream.read()) == BLOCK_END) { + break; + } + shift++; + + switch (type) { + case BLOCK_BITMAP -> { + long sizeBytes = IOUtil.readLong(stream); + long layer = IOUtil.readLong(stream); + shift += 8 + 8; + MappedRoaringBitmap bm = new MappedRoaringBitmap( + IOUtil.mapChannel(fileName, channel, FileChannel.MapMode.READ_ONLY, shift, sizeBytes)); + maps.get((int) layer).add(bm); + shift += sizeBytes; + } + case -1 -> throw new EOFException(); + default -> throw new IOException(format("unknown type %x", type)); + } } } catch (Throwable t) { try { @@ -251,8 +323,8 @@ private MultiRoaringBitmap(Path fileName, FileChannel channel, long start) throw } } - private void closeStreamBitmap(int index) throws IOException { - Bitmap map = maps.get(index); + private void closeStreamBitmap(int layer, int index) throws IOException { + Bitmap map = maps.get(layer).get(index); if (map == null) { return; } @@ -265,25 +337,29 @@ private void closeStreamBitmap(int index) throws IOException { long loc = outputMax; int sizeInBytes = handle.serializedSizeInBytes(); - outputMax += sizeInBytes + 8; + outputMax += sizeInBytes + 8 + 8 + 1; - try (CloseMappedByteBuffer buffer = IOUtil.mapChannel(outputPath, output, FileChannel.MapMode.READ_WRITE, loc, - sizeInBytes + 8)) { - ByteBuffer internalBuffer = buffer.getInternalBuffer().order(ByteOrder.LITTLE_ENDIAN); - internalBuffer.putLong(0, sizeInBytes); - handle.serialize(internalBuffer.slice(8, sizeInBytes)); - } + OutputStream os = new BufferedOutputStream(Channels.newOutputStream(output.position(loc))); + os.write(BLOCK_BITMAP); + IOUtil.writeLong(os, sizeInBytes); + IOUtil.writeLong(os, layer); + handle.serialize(new DataOutputStream(os)); + os.flush(); try { Closer.closeSingle(map); } finally { - maps.set(index, null); - System.gc(); + maps.get(layer).set(index, null); } } - @Override - public void save(OutputStream output, ProgressListener listener) throws IOException { + public void save(Path output) throws IOException { + try (OutputStream stream = new BufferedOutputStream(Files.newOutputStream(output))) { + save(stream); + } + } + + public void save(OutputStream output) throws IOException { if (this.output != null) { throw new IllegalArgumentException("Can't save a streamed bitmap"); } @@ -299,24 +375,31 @@ public void save(OutputStream output, ProgressListener listener) throws IOExcept buffer.putInt(8, maps.size()); buffer.putInt(12, chunkSize); buffer.putLong(16, numbits); + buffer.putLong(24, maps.size()); output.write(bytes); - for (Bitmap map : maps) { - RoaringBitmap handle = ((RoaringBitmap32) map).getHandle(); + for (int i = 0; i < maps.size(); i++) { + // put the maps sequentially, maybe to test by putting the chunks + // closer? + for (Bitmap map : maps.get(i)) { + RoaringBitmap handle = ((RoaringBitmap32) map).getHandle(); - int sizeInBytes = handle.serializedSizeInBytes(); - byte[] array = new byte[8]; - ByteBuffer.wrap(array).order(ByteOrder.LITTLE_ENDIAN).putLong(0, sizeInBytes); - output.write(array); + output.write(BLOCK_BITMAP); + int sizeInBytes = handle.serializedSizeInBytes(); + IOUtil.writeLong(output, sizeInBytes); + IOUtil.writeLong(output, i); // layer - handle.serialize(new DataOutputStream(output)); + handle.serialize(new DataOutputStream(output)); + } } + output.write(BLOCK_END); } @Override - public boolean access(long position) { + public boolean access(long graph, long position) { int location = (int) (position / chunkSize); + List maps = this.maps.get((int) graph); if (location >= maps.size() || position < 0) { return false; } @@ -331,7 +414,17 @@ public long getNumBits() { @Override public long getSizeBytes() { - return HEADER_SIZE + maps.stream().mapToLong(Bitmap::getSizeBytes).sum(); + return HEADER_SIZE + maps.stream().flatMap(Collection::stream).mapToLong(Bitmap::getSizeBytes).sum(); + } + + @Override + public void save(OutputStream output, ProgressListener listener) throws IOException { + save(output); + } + + @Override + public void load(InputStream input, ProgressListener listener) throws IOException { + throw new NotImplementedException(); } @Override @@ -340,41 +433,55 @@ public String getType() { } @Override - public long countOnes() { - return maps.stream().mapToLong(Bitmap::countOnes).sum(); + public long getLayersCount() { + return maps.size(); + } + + @Override + public long countOnes(long graph) { + return maps.get((int) graph).stream().mapToLong(Bitmap::countOnes).sum(); } @Override - public long select1(long n) { + public long countZeros(long layer) { + throw new NotImplementedException(); + } + + @Override + public long select1(long graph, long n) { long count = n; long delta = 0; int idx = 0; - while (idx < maps.size()) { - long countOnes = maps.get(idx).countOnes(); + List map = maps.get((int) graph); + while (true) { + if (!(idx < map.size())) + break; + long countOnes = map.get(idx).countOnes(); if (count <= countOnes) { break; } count -= countOnes; - delta += idx != maps.size() - 1 ? chunkSize : maps.get(idx).getNumBits(); + delta += idx != map.size() - 1 ? chunkSize : map.get(idx).getNumBits(); idx++; } - if (idx == maps.size()) { - if (maps.isEmpty()) { + if (idx == map.size()) { + if (map.isEmpty()) { return 0; } return delta; } - return delta + maps.get(idx).select1(count); + return delta + map.get(idx).select1(count); } @Override - public long rank1(long position) { + public long rank1(long graph, long position) { + List map = maps.get((int) graph); int location = (int) (position / chunkSize); - if (location >= maps.size() || position < 0) { + if (location >= map.size() || position < 0) { return 0; } @@ -382,32 +489,52 @@ public long rank1(long position) { long delta = 0; for (int i = 0; i < location; i++) { - delta += maps.get(i).getNumBits(); + delta += map.get(i).getNumBits(); } - return delta + maps.get(location).rank1(localLocation); + return delta + map.get(location).rank1(localLocation); } @Override - public long selectPrev1(long start) { - return select1(rank1(start)); + public long rank0(long layer, long position) { + return position + 1L - rank1(layer, position); } @Override - public long selectNext1(long start) { - long pos = rank1(start - 1); + public long selectPrev1(long graph, long start) { + return select1(graph, rank1(graph, start)); + } + + @Override + public long selectNext1(long graph, long start) { + long pos = rank1(graph, start - 1); if (pos < getNumBits()) - return select1(pos + 1); + return select1(graph, pos + 1); return -1; } + @Override + public long select0(long layer, long n) { + throw new NotImplementedException(); + } + @Override public void close() throws IOException { + if (closed) { + return; + } + closed = true; try { if (output != null) { // write remaining Closer.closeAll(IntStream.range(0, maps.size()) - .mapToObj(index -> (Closeable) (() -> closeStreamBitmap(index)))); + .mapToObj(layer -> IntStream.range(0, maps.get(layer) == null ? 0 : maps.get(layer).size()) + .mapToObj(index -> (Closeable) (() -> closeStreamBitmap(layer, index)))) + .flatMap(Function.identity())); + + OutputStream os = Channels.newOutputStream(output.position(outputMax++)); + os.write(BLOCK_END); + os.flush(); } } finally { Closer.closeAll(maps, output); @@ -415,11 +542,22 @@ public void close() throws IOException { } @Override - public void set(long position, boolean value) { + public void set(long layer, long position, boolean value) { if (!writable) { throw new IllegalArgumentException("not writable"); } + if (layer >= maps.size()) { + for (int i = 0; i <= layer; i++) { + List map = new ArrayList<>(); + maps.add(map); + for (int j = 0; j < chunks; j++) { + map.add(new RoaringBitmap32()); // to on use? + } + } + } + List maps = this.maps.get((int) layer); + int location = (int) (position / chunkSize); if (location >= maps.size() || position < 0) { throw new IllegalArgumentException(format("bit outside of range %d < 0 || map(%d)=%d >= %d", position, @@ -433,8 +571,8 @@ public void set(long position, boolean value) { } // clear previous try { - Closer.closeAll( - IntStream.range(0, location).mapToObj(index -> (Closeable) (() -> closeStreamBitmap(index)))); + Closer.closeAll(IntStream.range(0, location) + .mapToObj(index -> (Closeable) (() -> closeStreamBitmap((int) layer, index)))); } catch (IOException e) { throw new RuntimeException(e); } @@ -443,9 +581,4 @@ public void set(long position, boolean value) { // set the bit ((ModifiableBitmap) maps.get(location)).set(localLocation, value); } - - @Override - public void append(boolean value) { - throw new NotImplementedException(); - } } diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/kcat/GroupBySubjectMapIterator.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/kcat/GroupBySubjectMapIterator.java index 68cf8533..9c799206 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/kcat/GroupBySubjectMapIterator.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/kcat/GroupBySubjectMapIterator.java @@ -1,6 +1,6 @@ package com.the_qa_company.qendpoint.core.dictionary.impl.kcat; -import com.the_qa_company.qendpoint.core.compact.bitmap.GraphDeleteBitmap; +import com.the_qa_company.qendpoint.core.compact.bitmap.MultiLayerBitmap; import com.the_qa_company.qendpoint.core.hdt.HDT; import com.the_qa_company.qendpoint.core.triples.IteratorTripleID; import com.the_qa_company.qendpoint.core.triples.TripleID; @@ -131,9 +131,8 @@ public static Iterator fromHDTs(KCatMerger merger, HDT[] hdts, List { // extract hdt elements for this index HDT hdt = hdts[hdtIndex]; - GraphDeleteBitmap deleteBitmap = deleteBitmaps == null ? null - : GraphDeleteBitmap.wrap(deleteBitmaps.get(hdtIndex), - quad ? hdt.getDictionary().getNgraphs() : 1); + MultiLayerBitmap deleteBitmap = deleteBitmaps == null ? null + : MultiLayerBitmap.ofBitmap(deleteBitmaps.get(hdtIndex)); if (hdt.getTriples().getNumberOfElements() == 0) { // no triples @@ -172,9 +171,8 @@ public static Iterator fromHDTs(KCatMerger merger, HDT[] hdts, List { // extract hdt elements for this index HDT hdt = hdts[hdtIndex]; - GraphDeleteBitmap deleteBitmap = deleteBitmaps == null ? null - : GraphDeleteBitmap.wrap(deleteBitmaps.get(hdtIndex), - quad ? hdt.getDictionary().getNgraphs() : 1); + MultiLayerBitmap deleteBitmap = deleteBitmaps == null ? null + : MultiLayerBitmap.ofBitmap(deleteBitmaps.get(hdtIndex)); if (hdt.getTriples().getNumberOfElements() == 0) { // no triples @@ -223,7 +221,7 @@ public static Iterator fromHDTs(KCatMerger merger, HDT[] hdts, List createIdMapper(KCatMerger merger, int hdtIndex, HDT hdt, Iterator it, - long start, GraphDeleteBitmap deleteBitmap) { + long start, MultiLayerBitmap deleteBitmap) { if (deleteBitmap == null) { return new MapIterator<>(it, (tid) -> { assert inHDT(tid, hdt); @@ -240,7 +238,7 @@ private static Iterator createIdMapper(KCatMerger merger, int hdtIndex }); } return MapFilterIterator.of(it, (tid, index) -> { - if (deleteBitmap.access(index + start)) { + if (deleteBitmap.access(0, index + start)) { return null; } assert inHDT(tid, hdt); diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/kcat/KCatImpl.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/kcat/KCatImpl.java index 4881711a..338bb8c7 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/kcat/KCatImpl.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/dictionary/impl/kcat/KCatImpl.java @@ -2,8 +2,8 @@ import com.the_qa_company.qendpoint.core.compact.bitmap.Bitmap; import com.the_qa_company.qendpoint.core.compact.bitmap.Bitmap64Big; -import com.the_qa_company.qendpoint.core.compact.bitmap.GraphDeleteBitmap; import com.the_qa_company.qendpoint.core.compact.bitmap.ModifiableBitmap; +import com.the_qa_company.qendpoint.core.compact.bitmap.MultiLayerBitmap; import com.the_qa_company.qendpoint.core.compact.bitmap.NegBitmap; import com.the_qa_company.qendpoint.core.dictionary.DictionaryPrivate; import com.the_qa_company.qendpoint.core.enums.TripleComponentOrder; @@ -260,8 +260,7 @@ private KCatImpl(List hdtFileNames, List deleteBitmaps, HDT long c = 0; @SuppressWarnings("resource") - GraphDeleteBitmap bm = GraphDeleteBitmap.wrap(deleteBitmap, - quad ? hdt.getDictionary().getNgraphs() : 1); + MultiLayerBitmap bm = MultiLayerBitmap.ofBitmap(deleteBitmap); while (searchAll.hasNext()) { TripleID tripleID = searchAll.next(); @@ -347,14 +346,15 @@ public HDT cat() throws IOException { // create a GROUP BY subject iterator to get the new ordered // stream Iterator tripleIterator = GroupBySubjectMapIterator.fromHDTs(merger, hdts, deleteBitmaps); + long quads = quad ? dictionary.getNgraphs() : -1; try (WriteBitmapTriples triples = new WriteBitmapTriples(hdtFormat, location.resolve("triples"), - bufferSize, quad)) { + bufferSize, quads)) { long count = Arrays.stream(hdts).mapToLong(h -> h.getTriples().getNumberOfElements()).sum(); il.setRange(40, 80); il.setPrefix("Merge triples: "); il.notifyProgress(0, "start"); - triples.load(new OneReadTempTriples(tripleIterator, order, count), il); + triples.load(new OneReadTempTriples(tripleIterator, order, count, quads), il); profiler.popSection(); WriteHDTImpl writeHDT = new WriteHDTImpl(hdtFormat, location, dictionary, triples, diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/enums/CompressionType.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/enums/CompressionType.java index c5628d32..8fa2f3b6 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/enums/CompressionType.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/enums/CompressionType.java @@ -1,5 +1,17 @@ package com.the_qa_company.qendpoint.core.enums; +import com.the_qa_company.qendpoint.core.util.concurrent.ExceptionFunction; +import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream; +import org.apache.commons.compress.compressors.bzip2.BZip2CompressorOutputStream; +import org.apache.commons.compress.compressors.xz.XZCompressorInputStream; +import org.apache.commons.compress.compressors.xz.XZCompressorOutputStream; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.util.zip.GZIPInputStream; +import java.util.zip.GZIPOutputStream; + /** * A compression type * @@ -10,19 +22,19 @@ public enum CompressionType { /** * gzip compression (.gz .tgz) */ - GZIP("gz", "tgz"), + GZIP(GZIPInputStream::new, GZIPOutputStream::new, "gz", "tgz"), /** * bzip compression (.bz2 .bz) */ - BZIP("bz2", "bz"), + BZIP(BZip2CompressorInputStream::new, BZip2CompressorOutputStream::new, "bz2", "bz"), /** * bzip compression (.xz) */ - XZ("xz"), + XZ(XZCompressorInputStream::new, XZCompressorOutputStream::new, "xz"), /** * no compression */ - NONE; + NONE(ExceptionFunction.identity(), ExceptionFunction.identity()); /** * try to guess a compression of a file with its name @@ -48,8 +60,35 @@ public static CompressionType guess(String fileName) { } private final String[] ext; + private final ExceptionFunction decompress; + private final ExceptionFunction compress; - CompressionType(String... ext) { + CompressionType(ExceptionFunction decompress, + ExceptionFunction compress, String... ext) { + this.decompress = decompress; + this.compress = compress; this.ext = ext; } + + /** + * decompress a stream + * + * @param stream stream + * @return decompressed stream + * @throws IOException io + */ + public InputStream decompress(InputStream stream) throws IOException { + return decompress.apply(stream); + } + + /** + * compress a stream + * + * @param stream stream + * @return compressed stream + * @throws IOException io + */ + public OutputStream compress(OutputStream stream) throws IOException { + return compress.apply(stream); + } } diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/enums/RDFNotation.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/enums/RDFNotation.java index ca69fe52..648b4ecd 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/enums/RDFNotation.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/enums/RDFNotation.java @@ -74,6 +74,16 @@ public enum RDFNotation { */ NQUAD, + /** + * Trig + */ + TRIG, + + /** + * Trix + */ + TRIX, + /** * JSON-LD */ @@ -132,6 +142,12 @@ public static RDFNotation parse(String str) { case "hdt" -> { return HDT; } + case "trig" -> { + return TRIG; + } + case "trix" -> { + return TRIX; + } } throw new IllegalArgumentException(); } @@ -175,6 +191,10 @@ public static RDFNotation guess(String fileName) throws IllegalArgumentException return LIST; } else if (str.endsWith("hdt")) { return HDT; + } else if (str.endsWith("trig")) { + return TRIG; + } else if (str.endsWith("trix")) { + return TRIX; } throw new IllegalArgumentException("Could not guess the format for " + fileName); diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/HDTVocabulary.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/HDTVocabulary.java index 3366aea7..a5bea304 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/HDTVocabulary.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/HDTVocabulary.java @@ -51,6 +51,7 @@ public class HDTVocabulary { public static final String VOID_PROPERTIES = VOID_BASE + "properties>"; public static final String VOID_DISTINCT_SUBJECTS = VOID_BASE + "distinctSubjects>"; public static final String VOID_DISTINCT_OBJECTS = VOID_BASE + "distinctObjects>"; + public static final String VOID_DISTINCT_GRAPHS = VOID_BASE + "distinctGraphs>"; // Header public static final String HEADER_NTRIPLES = "ntriples"; diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/HDTBase.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/HDTBase.java index 66a48298..a6c478c8 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/HDTBase.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/HDTBase.java @@ -155,6 +155,9 @@ public void populateHeaderStructure(String baseUri) { header.insert(baseUri, HDTVocabulary.VOID_PROPERTIES, dictionary.getNpredicates()); header.insert(baseUri, HDTVocabulary.VOID_DISTINCT_SUBJECTS, dictionary.getNsubjects()); header.insert(baseUri, HDTVocabulary.VOID_DISTINCT_OBJECTS, dictionary.getNobjects()); + if (dictionary.supportGraphs()) { + header.insert(baseUri, HDTVocabulary.VOID_DISTINCT_GRAPHS, dictionary.getNgraphs()); + } // Structure String formatNode = "_:format"; diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/HDTDiskImporter.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/HDTDiskImporter.java index 19fc6b6c..5b20e837 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/HDTDiskImporter.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/HDTDiskImporter.java @@ -202,7 +202,8 @@ public CompressTripleMapper compressDictionary(Iterator iterator) // create sections and triple mapping DictionaryPrivate dictionary = hdt.getDictionary(); CompressTripleMapper mapper = new CompressTripleMapper(basePath, compressionResult.getTripleCount(), chunkSize, - compressionResult.supportsGraph()); + compressionResult.supportsGraph(), + compressionResult.supportsGraph() ? compressionResult.getGraphCount() : 0); try (CompressFourSectionDictionary modifiableDictionary = new CompressFourSectionDictionary(compressionResult, mapper, listener, debugHDTBuilding, compressionResult.supportsGraph())) { dictionary.loadAsync(modifiableDictionary, listener); @@ -240,7 +241,8 @@ public void compressTriples(CompressTripleMapper mapper) throws ParserException, try { MapCompressTripleMerger tripleMapper = new MapCompressTripleMerger(basePath.resolve("tripleMapper"), new AsyncIteratorFetcher<>(TripleGenerator.of(mapper.getTripleCount(), mapper.supportsGraph())), - mapper, listener, order, bufferSize, chunkSize, 1 << ways); + mapper, listener, order, bufferSize, chunkSize, 1 << ways, + mapper.supportsGraph() ? mapper.getGraphsCount() : 0); tripleCompressionResult = tripleMapper.merge(workers, compressMode); } catch (KWayMerger.KWayMergerException | InterruptedException e) { throw new ParserException(e); diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/WriteHDTImpl.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/WriteHDTImpl.java index b606d927..96a6c729 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/WriteHDTImpl.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/WriteHDTImpl.java @@ -40,7 +40,7 @@ public WriteHDTImpl(HDTOptions spec, CloseSuppressPath workingLocation, int buff // we need to have the bitmaps in memory, so we can't bypass the // implementation triples = new WriteBitmapTriples(this.spec, workingLocation.resolve("tripleBitmap"), bufferSize, - dictionary.supportGraphs()); + dictionary.supportGraphs() ? 1 : -1); // small, can use default implementation header = HeaderFactory.createHeader(this.spec); } diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/diskimport/CompressTripleMapper.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/diskimport/CompressTripleMapper.java index 5edd18f8..d954b97d 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/diskimport/CompressTripleMapper.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/diskimport/CompressTripleMapper.java @@ -31,8 +31,10 @@ public class CompressTripleMapper implements CompressFourSectionDictionary.NodeC private long shared = -1; private final long tripleCount; private final boolean quads; + private final long graphs; - public CompressTripleMapper(CloseSuppressPath location, long tripleCount, long chunkSize, boolean quads) { + public CompressTripleMapper(CloseSuppressPath location, long tripleCount, long chunkSize, boolean quads, + long graphs) { this.tripleCount = tripleCount; this.quads = quads; @@ -40,6 +42,7 @@ public CompressTripleMapper(CloseSuppressPath location, long tripleCount, long c locationPredicates = location.resolve("map_predicates"); locationObjects = location.resolve("map_objects"); locationGraph = location.resolve("map_graph"); + this.graphs = graphs; int numbits = BitUtil.log2(tripleCount + 2) + CompressUtil.INDEX_SHIFT; int maxElement = (int) Math.min(chunkSize / Long.BYTES / 3, Integer.MAX_VALUE - 5); subjects = new WriteLongArrayBuffer( @@ -172,4 +175,8 @@ public long getTripleCount() { public boolean supportsGraph() { return quads; } + + public long getGraphsCount() { + return graphs; + } } diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/diskimport/TripleCompressionResultEmpty.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/diskimport/TripleCompressionResultEmpty.java index 6407a500..bb4695a5 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/diskimport/TripleCompressionResultEmpty.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/diskimport/TripleCompressionResultEmpty.java @@ -27,7 +27,7 @@ public boolean hasNext() { public TripleID next() { return null; } - }, order, 0); + }, order, 0, 0); } @Override diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/diskimport/TripleCompressionResultFile.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/diskimport/TripleCompressionResultFile.java index 78c4e6fb..d1655e52 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/diskimport/TripleCompressionResultFile.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/diskimport/TripleCompressionResultFile.java @@ -19,10 +19,12 @@ public class TripleCompressionResultFile implements TripleCompressionResult { private final CompressTripleReader reader; private final TripleComponentOrder order; private final CloseSuppressPath triples; + private final long graphs; public TripleCompressionResultFile(long tripleCount, CloseSuppressPath triples, TripleComponentOrder order, - int bufferSize) throws IOException { + int bufferSize, long graphs) throws IOException { this.tripleCount = tripleCount; + this.graphs = graphs; this.reader = new CompressTripleReader(triples.openInputStream(bufferSize)); this.order = order; this.triples = triples; @@ -30,7 +32,7 @@ public TripleCompressionResultFile(long tripleCount, CloseSuppressPath triples, @Override public TempTriples getTriples() { - return new OneReadTempTriples(reader.asIterator(), order, tripleCount); + return new OneReadTempTriples(reader.asIterator(), order, tripleCount, graphs); } @Override diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/diskimport/TripleCompressionResultPartial.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/diskimport/TripleCompressionResultPartial.java index ea735a07..ffaebbf9 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/diskimport/TripleCompressionResultPartial.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/diskimport/TripleCompressionResultPartial.java @@ -26,12 +26,12 @@ public class TripleCompressionResultPartial implements TripleCompressionResult { private final TripleComponentOrder order; public TripleCompressionResultPartial(List files, long tripleCount, TripleComponentOrder order, - int bufferSize) throws IOException { + int bufferSize, long graphs) throws IOException { this.files = new ArrayList<>(files.size()); this.tripleCount = tripleCount; this.order = order; this.triples = new OneReadTempTriples(createBTree(files, 0, files.size(), bufferSize).asIterator(), order, - tripleCount); + tripleCount, graphs); } private ExceptionIterator createBTree(List files, int start, int end, diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/quad/impl/BitmapTriplesIteratorGraph.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/quad/impl/BitmapTriplesIteratorGraph.java index 3447716d..7eceb0df 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/quad/impl/BitmapTriplesIteratorGraph.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/quad/impl/BitmapTriplesIteratorGraph.java @@ -1,6 +1,6 @@ package com.the_qa_company.qendpoint.core.quad.impl; -import com.the_qa_company.qendpoint.core.compact.bitmap.Bitmap; +import com.the_qa_company.qendpoint.core.compact.bitmap.MultiLayerBitmap; import com.the_qa_company.qendpoint.core.enums.ResultEstimationType; import com.the_qa_company.qendpoint.core.enums.TripleComponentOrder; import com.the_qa_company.qendpoint.core.exceptions.NotImplementedException; @@ -27,7 +27,7 @@ public BitmapTriplesIteratorGraph(BitmapQuadTriples triples, IteratorTripleID ti @Override protected TripleID getNext() { - List quadInfoAG = quads.getQuadInfoAG(); + MultiLayerBitmap quadInfoAG = quads.getQuadInfoAG(); while (true) { if (tid == null) { // we need to compute the next one if (!tidIt.hasNext()) { @@ -43,7 +43,7 @@ protected TripleID getNext() { // we are searching for a particular graph, we only need to // check if this graph // contains the current triple - if (quadInfoAG.get((int) graph - 1).access(posZ)) { + if (quadInfoAG.access(graph - 1, posZ)) { TripleID id = tid; tid = null; // pass to the next one in the future case return id; @@ -52,8 +52,8 @@ protected TripleID getNext() { continue; } - for (long i = tid.getGraph() + 1; i <= quadInfoAG.size(); i++) { - if (quadInfoAG.get((int) i - 1).access(posZ)) { + for (long i = tid.getGraph() + 1; i <= quadInfoAG.getLayersCount(); i++) { + if (quadInfoAG.access(i - 1, posZ)) { // found a graph containing it tid.setGraph(i); return tid; diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/quad/impl/BitmapTriplesIteratorGraphG.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/quad/impl/BitmapTriplesIteratorGraphG.java index 8c3c7bf2..b6f73594 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/quad/impl/BitmapTriplesIteratorGraphG.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/quad/impl/BitmapTriplesIteratorGraphG.java @@ -1,6 +1,6 @@ package com.the_qa_company.qendpoint.core.quad.impl; -import com.the_qa_company.qendpoint.core.compact.bitmap.Bitmap; +import com.the_qa_company.qendpoint.core.compact.bitmap.MultiLayerBitmap; import com.the_qa_company.qendpoint.core.enums.ResultEstimationType; import com.the_qa_company.qendpoint.core.enums.TripleComponentOrder; import com.the_qa_company.qendpoint.core.exceptions.NotImplementedException; @@ -11,7 +11,8 @@ public class BitmapTriplesIteratorGraphG extends FetcherIterator implements SuppliableIteratorTripleID { private final long graph; - private final Bitmap bitmapW; + private final MultiLayerBitmap mlb; + private final long posW; protected final long minZ, maxZ; protected final TripleID qid = new TripleID(); protected final BitmapQuadTriples triples; @@ -21,10 +22,12 @@ public BitmapTriplesIteratorGraphG(BitmapQuadTriples triples, TripleID pattern) this.triples = triples; this.graph = pattern.getGraph(); - bitmapW = triples.getQuadInfoAG().get((int) (graph - 1)); + mlb = triples.getQuadInfoAG(); - minZ = bitmapW.select1(1); - maxZ = bitmapW.select1(bitmapW.countOnes()); + posW = graph - 1; + + minZ = mlb.select1(posW, 1); + maxZ = mlb.select1(posW, mlb.countOnes(posW)); goToStart(); } @@ -38,7 +41,7 @@ protected TripleID getNext() { if (posZ == -1) { posZ = minZ; // start } else { - posZ = bitmapW.select1(bitmapW.rank1(posZ) + 1); // next + posZ = mlb.select1(posW, mlb.rank1(posW, posZ) + 1); // next } TripleID tripleID = triples.findTriple(posZ, qid); @@ -73,7 +76,7 @@ public void goTo(long pos) { @Override public long estimatedNumResults() { - return bitmapW.rank1(maxZ) - bitmapW.rank1(minZ) + 1; + return mlb.rank1(posW, maxZ) - mlb.rank1(posW, minZ) + 1; } @Override diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/rdf/RDFParserFactory.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/rdf/RDFParserFactory.java index 810a9578..f1101e77 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/rdf/RDFParserFactory.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/rdf/RDFParserFactory.java @@ -57,6 +57,8 @@ public static RDFParserCallback getParserCallback(RDFNotation notation, HDTOptio case TURTLE: case N3: case RDFXML: + case TRIG: + case TRIX: return new RDFParserRIOT(); case DIR: return new RDFParserDir(spec); diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/rdf/parsers/RDFParserRIOT.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/rdf/parsers/RDFParserRIOT.java index 0cd7bc54..a89ec1e1 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/rdf/parsers/RDFParserRIOT.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/rdf/parsers/RDFParserRIOT.java @@ -75,12 +75,14 @@ public void doParse(String fileName, String baseUri, RDFNotation notation, boole public void doParse(InputStream input, String baseUri, RDFNotation notation, boolean keepBNode, RDFCallback callback) throws ParserException { try { - ElemStringBuffer buffer = new ElemStringBuffer(notation == RDFNotation.NQUAD, callback); + ElemStringBuffer buffer = new ElemStringBuffer(callback); switch (notation) { case NTRIPLES -> parse(input, baseUri, Lang.NTRIPLES, keepBNode, buffer); case NQUAD -> parse(input, baseUri, Lang.NQUADS, keepBNode, buffer); case RDFXML -> parse(input, baseUri, Lang.RDFXML, keepBNode, buffer); case N3, TURTLE -> parse(input, baseUri, Lang.TURTLE, keepBNode, buffer); + case TRIG -> parse(input, baseUri, Lang.TRIG, keepBNode, buffer); + case TRIX -> parse(input, baseUri, Lang.TRIX, keepBNode, buffer); default -> throw new NotImplementedException("Parser not found for format " + notation); } } catch (Exception e) { @@ -90,11 +92,11 @@ public void doParse(InputStream input, String baseUri, RDFNotation notation, boo } private static class ElemStringBuffer implements StreamRDF { - private final TripleString triple; + private final TripleString triple = new TripleString(); + private final QuadString quad = new QuadString(); private final RDFCallback callback; - private ElemStringBuffer(boolean quad, RDFCallback callback) { - this.triple = quad ? new QuadString() : new TripleString(); + private ElemStringBuffer(RDFCallback callback) { this.callback = callback; } @@ -107,11 +109,11 @@ public void triple(Triple parsedTriple) { } @Override - public void quad(Quad quad) { - triple.setAll(JenaNodeFormatter.format(quad.getSubject()), JenaNodeFormatter.format(quad.getPredicate()), - JenaNodeFormatter.format(quad.getObject())); - triple.setGraph(JenaNodeFormatter.format(quad.getGraph())); - callback.processTriple(triple, 0); + public void quad(Quad parsedQuad) { + quad.setAll(JenaNodeFormatter.format(parsedQuad.getSubject()), + JenaNodeFormatter.format(parsedQuad.getPredicate()), + JenaNodeFormatter.format(parsedQuad.getObject()), JenaNodeFormatter.format(parsedQuad.getGraph())); + callback.processTriple(quad, 0); } @Override diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/tools/RDF2HDT.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/tools/RDF2HDT.java index d952139a..f8980fd7 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/tools/RDF2HDT.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/tools/RDF2HDT.java @@ -222,8 +222,9 @@ public void execute() throws ParserException, IOException { colorTool.logValue("Different subjects .... ", String.valueOf(hdt.getDictionary().getNsubjects())); colorTool.logValue("Different predicates .. ", String.valueOf(hdt.getDictionary().getNpredicates())); colorTool.logValue("Different objects ..... ", String.valueOf(hdt.getDictionary().getNobjects())); - if (isQuad) + if (hdt.getDictionary().supportGraphs()) { colorTool.logValue("Different graphs ...... ", String.valueOf(hdt.getDictionary().getNgraphs())); + } colorTool.logValue("Common Subject/Object . ", String.valueOf(hdt.getDictionary().getNshared())); } diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/TempTriples.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/TempTriples.java index 13906cad..0424cf50 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/TempTriples.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/TempTriples.java @@ -93,4 +93,6 @@ public interface TempTriples extends TriplesPrivate, Closeable { void replaceAllIds(DictionaryIDMapping mapSubj, DictionaryIDMapping mapPred, DictionaryIDMapping mapObj, DictionaryIDMapping mapGraph); + + long getGraphsCount(); } diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapQuadTriples.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapQuadTriples.java index 79895f06..381f7a94 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapQuadTriples.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapQuadTriples.java @@ -19,12 +19,12 @@ package com.the_qa_company.qendpoint.core.triples.impl; import com.the_qa_company.qendpoint.core.compact.bitmap.AdjacencyList; -import com.the_qa_company.qendpoint.core.compact.bitmap.Bitmap; import com.the_qa_company.qendpoint.core.compact.bitmap.Bitmap375Big; import com.the_qa_company.qendpoint.core.compact.bitmap.BitmapFactory; import com.the_qa_company.qendpoint.core.compact.bitmap.ModifiableBitmap; +import com.the_qa_company.qendpoint.core.compact.bitmap.ModifiableMultiLayerBitmap; +import com.the_qa_company.qendpoint.core.compact.bitmap.MultiLayerBitmap; import com.the_qa_company.qendpoint.core.compact.bitmap.MultiRoaringBitmap; -import com.the_qa_company.qendpoint.core.compact.integer.VByte; import com.the_qa_company.qendpoint.core.compact.sequence.DynamicSequence; import com.the_qa_company.qendpoint.core.compact.sequence.SequenceFactory; import com.the_qa_company.qendpoint.core.compact.sequence.SequenceLog64Big; @@ -44,6 +44,7 @@ import com.the_qa_company.qendpoint.core.util.BitUtil; import com.the_qa_company.qendpoint.core.util.io.Closer; import com.the_qa_company.qendpoint.core.util.io.CountInputStream; +import com.the_qa_company.qendpoint.core.util.io.IOUtil; import com.the_qa_company.qendpoint.core.util.listener.IntermediateListener; import com.the_qa_company.qendpoint.core.util.listener.ListenerUtil; @@ -52,16 +53,13 @@ import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; -import java.nio.file.Path; -import java.util.ArrayList; -import java.util.List; /** * @author mario.arias */ public class BitmapQuadTriples extends BitmapTriples { - protected final List quadInfoAG = new ArrayList<>(); + protected ModifiableMultiLayerBitmap graphs = MultiRoaringBitmap.memory(0, 0); public BitmapQuadTriples() throws IOException { super(); @@ -82,6 +80,7 @@ public String getType() { @Override public void load(IteratorTripleID it, ProgressListener listener) { + IOUtil.closeQuietly(graphs); long number = it.estimatedNumResults(); DynamicSequence vectorY = new SequenceLog64Big(BitUtil.log2(number), number + 1); @@ -109,9 +108,6 @@ public void load(IteratorTripleID it, ProgressListener listener) { throw new IllegalFormatException("None of the components of a quad can be null"); } if (g > numGraphs) { - for (long i = numGraphs; i < g; i++) { - quadInfoAG.add(MultiRoaringBitmap.memory(number)); - } numGraphs = g; } long graphIndex = g - 1; @@ -120,7 +116,7 @@ public void load(IteratorTripleID it, ProgressListener listener) { tripleIndex += 1; } - quadInfoAG.get((int) graphIndex).set(tripleIndex, true); + graphs.set(graphIndex, tripleIndex, true); if (sameAsLast) { continue; @@ -202,8 +198,7 @@ public void load(TempTriples triples, ProgressListener listener) { public long size() { if (isClosed) return 0; - long graphs = quadInfoAG.stream().mapToLong(Bitmap::getSizeBytes).sum(); - return seqY.size() + seqZ.size() + bitmapY.getSizeBytes() + bitmapZ.getSizeBytes() + graphs; + return seqY.size() + seqZ.size() + bitmapY.getSizeBytes() + bitmapZ.getSizeBytes() + graphs.getSizeBytes(); } @Override @@ -219,10 +214,7 @@ public void save(OutputStream output, ControlInfo ci, ProgressListener listener) bitmapZ.save(output, iListener); seqY.save(output, iListener); seqZ.save(output, iListener); - VByte.encode(output, quadInfoAG.size()); - for (ModifiableBitmap b : quadInfoAG) { - b.save(output, iListener); - } + graphs.save(output, iListener); } @Override @@ -276,19 +268,9 @@ public void mapFromFile(CountInputStream input, File f, ProgressListener listene adjY = new AdjacencyList(seqY, bitmapY); adjZ = new AdjacencyList(seqZ, bitmapZ); - Closer.closeSingle(quadInfoAG); - quadInfoAG.clear(); - - long numGraphs = VByte.decode(input); + Closer.closeSingle(graphs); - Path fPath = f.toPath(); - for (long i = 0; i < numGraphs; i++) { - // map the multi roaring bitmap and skip the bytes - long base = input.getTotalBytes(); - MultiRoaringBitmap mapped = MultiRoaringBitmap.mapped(fPath, base); - input.skipNBytes(mapped.getSizeBytes()); - quadInfoAG.add(mapped); - } + graphs = MultiRoaringBitmap.mapped(f.toPath(), input.getTotalBytes()); isClosed = false; } @@ -324,26 +306,21 @@ public void load(InputStream input, ControlInfo ci, ProgressListener listener) t adjY = new AdjacencyList(seqY, bitmapY); adjZ = new AdjacencyList(seqZ, bitmapZ); - Closer.closeSingle(quadInfoAG); - quadInfoAG.clear(); - - long numGraphs = VByte.decode(input); + Closer.closeSingle(graphs); - for (long i = 0; i < numGraphs; i++) { - quadInfoAG.add(MultiRoaringBitmap.load(input)); - } + graphs = MultiRoaringBitmap.load(input); isClosed = false; } // Fast but dangerous covariant cast @Override - public List getQuadInfoAG() { - return quadInfoAG; + public MultiLayerBitmap getQuadInfoAG() { + return graphs; } @Override public void close() throws IOException { - Closer.closeAll((Closeable) super::close, quadInfoAG); + Closer.closeAll((Closeable) super::close, graphs); } } diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriples.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriples.java index f90e8bb9..8f417a71 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriples.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriples.java @@ -23,6 +23,7 @@ import com.the_qa_company.qendpoint.core.compact.bitmap.Bitmap375Big; import com.the_qa_company.qendpoint.core.compact.bitmap.BitmapFactory; import com.the_qa_company.qendpoint.core.compact.bitmap.ModifiableBitmap; +import com.the_qa_company.qendpoint.core.compact.bitmap.MultiLayerBitmap; import com.the_qa_company.qendpoint.core.compact.sequence.DynamicSequence; import com.the_qa_company.qendpoint.core.compact.sequence.Sequence; import com.the_qa_company.qendpoint.core.compact.sequence.SequenceFactory; @@ -1296,7 +1297,7 @@ public void close() throws IOException { } } - public List getQuadInfoAG() { + public MultiLayerBitmap getQuadInfoAG() { throw new UnsupportedOperationException("Cannot get quad info from a BitmapTriples"); } } diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/OneReadTempTriples.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/OneReadTempTriples.java index ab2d1d0c..99000ed3 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/OneReadTempTriples.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/OneReadTempTriples.java @@ -34,10 +34,16 @@ public class OneReadTempTriples implements TempTriples { private IteratorTripleID iterator; private TripleComponentOrder order; + private long graphs; public OneReadTempTriples(Iterator iterator, TripleComponentOrder order, long triples) { + this(iterator, order, triples, 0); + } + + public OneReadTempTriples(Iterator iterator, TripleComponentOrder order, long triples, long graphs) { this.iterator = new SimpleIteratorTripleID(iterator, order, triples); this.order = order; + this.graphs = graphs; } @Override @@ -143,6 +149,7 @@ public void load(TempTriples input, ProgressListener listener) { if (input instanceof OneReadTempTriples input2) { this.iterator = input2.iterator; this.order = input2.order; + this.graphs = input2.graphs; } else { throw new NotImplementedException(); } @@ -188,6 +195,11 @@ public void close() throws IOException { // nothing to do } + @Override + public long getGraphsCount() { + return graphs; + } + private static class SimpleIteratorTripleID implements IteratorTripleID { private final Iterator it; private final TripleComponentOrder order; diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/TriplesList.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/TriplesList.java index d1a2b614..e3aafdc5 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/TriplesList.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/TriplesList.java @@ -64,6 +64,7 @@ public class TriplesList implements TempTriples { */ private TripleComponentOrder order; private long numValidTriples; + private long numGraphs; private boolean sorted; @@ -203,6 +204,7 @@ public void load(TempTriples input, ProgressListener listener) { arrayOfTriples.add(new TripleIDInt(iterator.next())); numValidTriples++; } + numGraphs = input.getGraphsCount(); sorted = false; } @@ -232,6 +234,11 @@ public boolean insert(TripleID... triples) { for (TripleID triple : triples) { arrayOfTriples.add(new TripleIDInt(triple)); numValidTriples++; + if (triple.isQuad()) { + if (numGraphs < triple.getGraph()) { + numGraphs = triple.getGraph(); + } + } } sorted = false; return true; @@ -253,6 +260,9 @@ public boolean insert(long subject, long predicate, long object) { public boolean insert(long subject, long predicate, long object, long graph) { arrayOfTriples.add(new TripleIDInt(subject, predicate, object, graph)); numValidTriples++; + if (numGraphs < graph) { + numGraphs = graph; + } sorted = false; return true; } @@ -551,4 +561,8 @@ public void replaceAllIds(DictionaryIDMapping mapSubj, DictionaryIDMapping mapPr } } + @Override + public long getGraphsCount() { + return numGraphs; + } } diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/TriplesListLong.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/TriplesListLong.java index 0bab30fb..f2fbce58 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/TriplesListLong.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/TriplesListLong.java @@ -62,6 +62,7 @@ public class TriplesListLong implements TempTriples { private long numValidTriples; private boolean sorted = false; + private long numGraphs; /** * Constructor, given an order to sort by @@ -200,6 +201,7 @@ public void load(TempTriples input, ProgressListener listener) { arrayOfTriples.add(iterator.next()); numValidTriples++; } + numGraphs = input.getGraphsCount(); sorted = false; } @@ -228,6 +230,11 @@ public boolean insert(TripleID... triples) { for (TripleID triple : triples) { arrayOfTriples.add(new TripleID(triple)); numValidTriples++; + if (triple.isQuad()) { + if (numGraphs < triple.getGraph()) { + numGraphs = triple.getGraph(); + } + } } sorted = false; return true; @@ -247,6 +254,9 @@ public boolean insert(long subject, long predicate, long object) { @Override public boolean insert(long subject, long predicate, long object, long graph) { + if (numGraphs < graph) { + numGraphs = graph; + } return this.insert(subject, predicate, object); } @@ -530,4 +540,9 @@ public void replaceAllIds(DictionaryIDMapping mapSubj, DictionaryIDMapping mapPr mapObj.getNewID(triple.getObject() - 1), mapGraph.getNewID(triple.getGraph() - 1)); } } + + @Override + public long getGraphsCount() { + return numGraphs; + } } diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/WriteBitmapTriples.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/WriteBitmapTriples.java index 778cb535..0594b703 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/WriteBitmapTriples.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/WriteBitmapTriples.java @@ -1,7 +1,8 @@ package com.the_qa_company.qendpoint.core.triples.impl; +import com.the_qa_company.qendpoint.core.compact.bitmap.EmptyBitmap; +import com.the_qa_company.qendpoint.core.compact.bitmap.ModifiableMultiLayerBitmap; import com.the_qa_company.qendpoint.core.compact.bitmap.MultiRoaringBitmap; -import com.the_qa_company.qendpoint.core.compact.integer.VByte; import com.the_qa_company.qendpoint.core.dictionary.Dictionary; import com.the_qa_company.qendpoint.core.enums.TripleComponentOrder; import com.the_qa_company.qendpoint.core.exceptions.IllegalFormatException; @@ -31,8 +32,6 @@ import java.io.InputStream; import java.io.OutputStream; import java.nio.file.Files; -import java.util.ArrayList; -import java.util.List; /** * Appendable write {@link BitmapTriples} version @@ -45,13 +44,13 @@ public class WriteBitmapTriples implements TriplesPrivate { private final AppendableWriteBitmap bitY, bitZ; private final CloseSuppressPath seqY, seqZ, triples; private SequenceLog64BigDisk vectorY, vectorZ; - private final List quadInfoAG; + private ModifiableMultiLayerBitmap quadInfoAG; public WriteBitmapTriples(HDTOptions spec, CloseSuppressPath triples, int bufferSize) throws IOException { - this(spec, triples, bufferSize, false); + this(spec, triples, bufferSize, -1); } - public WriteBitmapTriples(HDTOptions spec, CloseSuppressPath triples, int bufferSize, boolean quads) + public WriteBitmapTriples(HDTOptions spec, CloseSuppressPath triples, int bufferSize, long quads) throws IOException { String orderStr = spec.get(HDTOptionsKeys.TRIPLE_ORDER_KEY); if (orderStr == null) { @@ -67,8 +66,8 @@ public WriteBitmapTriples(HDTOptions spec, CloseSuppressPath triples, int buffer seqY = triples.resolve("seqY"); seqZ = triples.resolve("seqZ"); - if (quads) { - quadInfoAG = new ArrayList<>(); + if (quads < 0) { + quadInfoAG = EmptyBitmap.of(0, 0); } else { quadInfoAG = null; } @@ -90,18 +89,9 @@ public void save(OutputStream output, ControlInfo ci, ProgressListener listener) if (quadInfoAG != null) { // quads - int numGraphs = quadInfoAG.size(); - VByte.encode(output, numGraphs); + Closer.closeAll(quadInfoAG); - try { - Closer.closeAll(quadInfoAG); - } finally { - quadInfoAG.clear(); - } - - for (int i = 0; i < numGraphs; i++) { - Files.copy(this.triples.resolve("g-" + i + ".bin"), output); - } + Files.copy(this.triples.resolve("quads.bin"), output); } } @@ -203,6 +193,14 @@ public void load(TempTriples triples, ProgressListener listener) { numTriples = 0; long numGraphs = 0; + long graphs = triples.getGraphsCount(); + try { + quadInfoAG = graphs <= 0 ? null + : MultiRoaringBitmap.memoryStream(number, graphs, this.triples.resolve("quads.bin")); + } catch (IOException e) { + throw new RuntimeException(e); + } + while (it.hasNext()) { TripleID triple = it.next(); TripleOrderConvert.swapComponentOrder(triple, TripleComponentOrder.SPO, order); @@ -217,14 +215,6 @@ public void load(TempTriples triples, ProgressListener listener) { if (quadInfoAG != null) { if (g > numGraphs) { - for (long i = numGraphs; i < g; i++) { - try { - quadInfoAG.add( - MultiRoaringBitmap.memoryStream(number, this.triples.resolve("g-" + i + ".bin"))); - } catch (IOException e) { - throw new RuntimeException(e); - } - } numGraphs = g; } long graphIndex = g - 1; @@ -233,7 +223,7 @@ public void load(TempTriples triples, ProgressListener listener) { numTriples += 1; } - quadInfoAG.get((int) graphIndex).set(numTriples - 1, true); + quadInfoAG.set(graphIndex, numTriples - 1, true); if (sameAsLast) { continue; @@ -303,7 +293,7 @@ public TripleComponentOrder getOrder() { @Override public void close() throws IOException { - Closer.closeAll(bitY, bitZ, vectorY, seqY, vectorZ, seqZ, triples, quadInfoAG); + Closer.closeAll(bitY, bitZ, vectorY, seqY, vectorZ, seqZ, quadInfoAG, triples); } public class BitmapTriplesAppender { diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/LargeFakeDataSetStreamSupplier.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/LargeFakeDataSetStreamSupplier.java index 1a9b4dd0..f2a88a95 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/LargeFakeDataSetStreamSupplier.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/LargeFakeDataSetStreamSupplier.java @@ -13,10 +13,11 @@ import org.apache.commons.compress.compressors.bzip2.BZip2CompressorOutputStream; import org.apache.commons.compress.compressors.xz.XZCompressorOutputStream; -import java.io.BufferedWriter; +import java.io.BufferedOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; +import java.io.OutputStreamWriter; import java.io.PipedInputStream; import java.io.PipedOutputStream; import java.io.PrintStream; @@ -180,7 +181,20 @@ public void createNTFile(String file) throws IOException { * @see #createNTFile(java.lang.String) */ public void createNTFile(Path file) throws IOException { - try (BufferedWriter writer = Files.newBufferedWriter(file)) { + createNTFile(file, CompressionType.NONE); + } + + /** + * create a nt file from the stream + * + * @param file the file to write + * @param compressionType compression type + * @throws IOException io exception + * @see #createNTFile(java.lang.String) + */ + public void createNTFile(Path file, CompressionType compressionType) throws IOException { + try (Writer writer = new OutputStreamWriter( + new BufferedOutputStream(compressionType.compress(Files.newOutputStream(file))))) { createNTFile(writer); } } diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/concurrent/ExceptionFunction.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/concurrent/ExceptionFunction.java index 5e4784ef..d9a07136 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/concurrent/ExceptionFunction.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/concurrent/ExceptionFunction.java @@ -1,6 +1,13 @@ package com.the_qa_company.qendpoint.core.util.concurrent; +import java.io.IOException; +import java.io.InputStream; + @FunctionalInterface public interface ExceptionFunction { + static ExceptionFunction identity() { + return (a) -> a; + } + O apply(I value) throws E; } diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/io/IOUtil.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/io/IOUtil.java index 1f7ddb37..497e69ff 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/io/IOUtil.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/io/IOUtil.java @@ -482,6 +482,46 @@ public static void writeLong(OutputStream output, long value) throws IOException output.write(writeBuffer, 0, 8); } + /** + * Write long, little endian + * + * @param output os + * @param value long + * @throws IOException io exception + */ + public static void writeLong(ByteBuffer output, long value) throws IOException { + byte[] writeBuffer = new byte[8]; + writeBuffer[7] = (byte) (value >>> 56); + writeBuffer[6] = (byte) (value >>> 48); + writeBuffer[5] = (byte) (value >>> 40); + writeBuffer[4] = (byte) (value >>> 32); + writeBuffer[3] = (byte) (value >>> 24); + writeBuffer[2] = (byte) (value >>> 16); + writeBuffer[1] = (byte) (value >>> 8); + writeBuffer[0] = (byte) (value); + output.put(writeBuffer, 0, 8); + } + + /** + * Write long, little endian + * + * @param output os + * @param value long + * @throws IOException io exception + */ + public static void writeLong(int idx, ByteBuffer output, long value) throws IOException { + byte[] writeBuffer = new byte[8]; + writeBuffer[7] = (byte) (value >>> 56); + writeBuffer[6] = (byte) (value >>> 48); + writeBuffer[5] = (byte) (value >>> 40); + writeBuffer[4] = (byte) (value >>> 32); + writeBuffer[3] = (byte) (value >>> 24); + writeBuffer[2] = (byte) (value >>> 16); + writeBuffer[1] = (byte) (value >>> 8); + writeBuffer[0] = (byte) (value); + output.put(idx, writeBuffer, 0, 8); + } + /** * Read long, little endian. * @@ -504,6 +544,19 @@ public static long readLong(InputStream input) throws IOException { + ((readBuffer[0] & 255)); } + public static long readLong(long location, FileChannel channel) throws IOException { + try (CloseMappedByteBuffer buffer = new CloseMappedByteBuffer("readLong", + channel.map(FileChannel.MapMode.READ_ONLY, location, 8), false)) { + byte[] readBuffer = new byte[8]; + buffer.get(readBuffer); + + return ((long) readBuffer[7] << 56) + ((long) (readBuffer[6] & 255) << 48) + + ((long) (readBuffer[5] & 255) << 40) + ((long) (readBuffer[4] & 255) << 32) + + ((long) (readBuffer[3] & 255) << 24) + ((readBuffer[2] & 255) << 16) + + ((readBuffer[1] & 255) << 8) + ((readBuffer[0] & 255)); + } + } + /** * Write int, little endian * @@ -659,6 +712,12 @@ public static void skip(InputStream in, long n) throws IOException { } } + public static void closeQuietly(Object output) { + if (output instanceof Closeable cl) { + closeQuietly(cl); + } + } + public static void closeQuietly(Closeable output) { if (output == null) return; diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/io/compress/MapCompressTripleMerger.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/io/compress/MapCompressTripleMerger.java index 6b50f892..eb3df82f 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/io/compress/MapCompressTripleMerger.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/io/compress/MapCompressTripleMerger.java @@ -43,10 +43,11 @@ public class MapCompressTripleMerger implements KWayMerger.KWayMergerImpl source, CompressTripleMapper mapper, MultiThreadListener listener, TripleComponentOrder order, int bufferSize, - long chunkSize, int k) { + long chunkSize, int k, long graphs) { this.baseFileName = baseFileName; this.source = source; this.mapper = mapper; @@ -55,6 +56,7 @@ public MapCompressTripleMerger(CloseSuppressPath baseFileName, AsyncIteratorFetc this.bufferSize = bufferSize; this.chunkSize = chunkSize; this.k = k; + this.graphs = graphs; } /** @@ -77,7 +79,7 @@ public TripleCompressionResult mergeToFile(int workers) if (sections.isEmpty()) { return new TripleCompressionResultEmpty(order); } - return new TripleCompressionResultFile(triplesCount.get(), sections.get(), order, bufferSize); + return new TripleCompressionResultFile(triplesCount.get(), sections.get(), order, bufferSize, graphs); } /** @@ -108,7 +110,7 @@ public TripleCompressionResult mergeToPartial() throws IOException, KWayMerger.K } } } - return new TripleCompressionResultPartial(files, triplesCount.get(), order, bufferSize) { + return new TripleCompressionResultPartial(files, triplesCount.get(), order, bufferSize, graphs) { @Override public void close() throws IOException { try { diff --git a/qendpoint-core/src/test/java/com/the_qa_company/qendpoint/core/compact/bitmap/MultiRoaringBitmapTest.java b/qendpoint-core/src/test/java/com/the_qa_company/qendpoint/core/compact/bitmap/MultiRoaringBitmapTest.java index 9d32c523..221732cf 100644 --- a/qendpoint-core/src/test/java/com/the_qa_company/qendpoint/core/compact/bitmap/MultiRoaringBitmapTest.java +++ b/qendpoint-core/src/test/java/com/the_qa_company/qendpoint/core/compact/bitmap/MultiRoaringBitmapTest.java @@ -43,30 +43,30 @@ public void serialSyncTest() throws IOException { try { Path output = root.resolve("tmp.bin"); MultiRoaringBitmap.defaultChunkSize = 9; - try (MultiRoaringBitmap map = MultiRoaringBitmap.memoryStream(100, output)) { + try (MultiRoaringBitmap map = MultiRoaringBitmap.memoryStream(100, 1, output)) { assertEquals(9, map.chunkSize); - assertEquals(12, map.maps.size()); - map.set(0, true); - map.set(42, true); - map.set(80, true); - map.set(90, true); + assertEquals(12, map.maps.get(0).size()); + map.set(0, 0, true); + map.set(0, 42, true); + map.set(0, 80, true); + map.set(0, 90, true); } - try (MultiRoaringBitmap map = MultiRoaringBitmap.mapped(output)) { + try (BufferedInputStream stream = new BufferedInputStream(Files.newInputStream(output)); + MultiRoaringBitmap map = MultiRoaringBitmap.load(stream)) { for (int i = 0; i < 100; i++) { switch (i) { - case 0, 42, 80, 90 -> assertTrue(map.access(i)); - default -> assertFalse(map.access(i)); + case 0, 42, 80, 90 -> assertTrue(map.access(0, i)); + default -> assertFalse(map.access(0, i)); } } } - try (BufferedInputStream stream = new BufferedInputStream(Files.newInputStream(output)); - MultiRoaringBitmap map = MultiRoaringBitmap.load(stream)) { + try (MultiRoaringBitmap map = MultiRoaringBitmap.mapped(output)) { for (int i = 0; i < 100; i++) { switch (i) { - case 0, 42, 80, 90 -> assertTrue(map.access(i)); - default -> assertFalse(map.access(i)); + case 0, 42, 80, 90 -> assertTrue(map.access(0, i)); + default -> assertFalse(map.access(0, i)); } } } @@ -80,6 +80,7 @@ public void serialSyncTest() throws IOException { public void largeSerialSyncTest() throws IOException { final int seed = 684; final int size = 10_000; + final int layers = 21; Random rnd = new Random(seed); Path root = tempDir.newFolder().toPath(); @@ -89,13 +90,16 @@ public void largeSerialSyncTest() throws IOException { MultiRoaringBitmap.defaultChunkSize = size / 9; - try (MultiRoaringBitmap map = MultiRoaringBitmap.memory(size)) { + try (MultiRoaringBitmap map = MultiRoaringBitmap.memory(size, layers)) { assertEquals(MultiRoaringBitmap.defaultChunkSize, map.chunkSize); - assertEquals((size - 1) / map.chunkSize + 1, map.maps.size()); + assertEquals(layers, map.maps.size()); + assertEquals((size - 1) / map.chunkSize + 1, map.maps.get(0).size()); for (int i = 0; i < size / 50; i++) { + int layer = rnd.nextInt(layers); int position = rnd.nextInt(size); - map.set(position, true); + map.set(layer, position, true); + assertTrue(map.access(layer, position)); } try (BufferedOutputStream out = new BufferedOutputStream(Files.newOutputStream(output))) { @@ -105,18 +109,19 @@ public void largeSerialSyncTest() throws IOException { rnd = new Random(seed); - try (MultiRoaringBitmap map = MultiRoaringBitmap.mapped(output)) { + try (MultiRoaringBitmap map = MultiRoaringBitmap.load(output)) { for (int i = 0; i < size / 50; i++) { - assertTrue(map.access(rnd.nextInt(size))); + int layer = rnd.nextInt(layers); + assertTrue(map.access(layer, rnd.nextInt(size))); } } rnd = new Random(seed); - try (BufferedInputStream stream = new BufferedInputStream(Files.newInputStream(output)); - MultiRoaringBitmap map = MultiRoaringBitmap.load(stream)) { + try (MultiRoaringBitmap map = MultiRoaringBitmap.mapped(output)) { for (int i = 0; i < size / 50; i++) { - assertTrue(map.access(rnd.nextInt(size))); + int layer = rnd.nextInt(layers); + assertTrue(map.access(layer, rnd.nextInt(size))); } } @@ -126,58 +131,69 @@ public void largeSerialSyncTest() throws IOException { } @Test + @SuppressWarnings("resource") public void rankSelectTest() throws IOException { final int seed = 684; final int size = 10_000; + final int layers = 20; Random rnd = new Random(seed); MultiRoaringBitmap.defaultChunkSize = size / 9; - try (MultiRoaringBitmap map = MultiRoaringBitmap.memory(size); - Bitmap375Big memmap = Bitmap375Big.memory(size)) { - assertEquals(MultiRoaringBitmap.defaultChunkSize, map.chunkSize); - assertEquals((size - 1) / map.chunkSize + 1, map.maps.size()); + try (MultiRoaringBitmap map = MultiRoaringBitmap.memory(size, layers)) { + Bitmap375Big[] memmaps = new Bitmap375Big[layers]; - for (int i = 0; i < size / 50; i++) { - int position = rnd.nextInt(size); - map.set(position, true); - memmap.set(position, true); + for (int i = 0; i < memmaps.length; i++) { + memmaps[i] = Bitmap375Big.memory(size); } + assertEquals(MultiRoaringBitmap.defaultChunkSize, map.chunkSize); + assertEquals((size - 1) / map.chunkSize + 1, map.maps.get(0).size()); - memmap.updateIndex(); + for (int l = 0; l < layers; l++) { + Bitmap375Big memmap = memmaps[l]; + for (int i = 0; i < size / 50; i++) { + int position = rnd.nextInt(size); + map.set(l, position, true); + memmap.set(position, true); + } + memmap.updateIndex(); + } - long numBits = memmap.countOnes(); + for (int l = 0; l < layers; l++) { + Bitmap375Big memmap = memmaps[l]; + long numBits = memmap.countOnes(); - assertEquals("countOnes", numBits, map.countOnes()); + assertEquals("countOnes", numBits, map.countOnes(l)); - for (int i = 0; i < size; i++) { - assertEquals("access#" + i + "/" + size, memmap.access(i), map.access(i)); - } + for (int i = 0; i < size; i++) { + assertEquals("access#" + i + "/" + size, memmap.access(i), map.access(l, i)); + } - for (int i = 0; i < size; i++) { - assertEquals("rank1#" + i + "/" + size, memmap.rank1(i), map.rank1(i)); - } - for (int i = 0; i < size; i++) { - assertEquals("rank0#" + i + "/" + size, memmap.rank0(i), map.rank0(i)); - } - for (int i = 0; i < numBits; i++) { - long n = i; - long j = -1; - while (n > 0) { - if (memmap.access(++j)) { - n--; + for (int i = 0; i < size; i++) { + assertEquals("rank1#" + i + "/" + size, memmap.rank1(i), map.rank1(l, i)); + } + for (int i = 0; i < size; i++) { + assertEquals("rank0#" + i + "/" + size, memmap.rank0(i), map.rank0(l, i)); + } + for (int i = 0; i < numBits; i++) { + long n = i; + long j = -1; + while (n > 0) { + if (memmap.access(++j)) { + n--; + } } + assertEquals(j, memmap.select1(i)); + assertEquals("select1#" + i + "/" + numBits, memmap.select1(i), map.select1(l, i)); } - assertEquals(j, memmap.select1(i)); - assertEquals("select1#" + i + "/" + numBits, memmap.select1(i), map.select1(i)); - } - for (int i = 0; i < numBits; i++) { - assertEquals("selectNext1#" + i + "/" + numBits, memmap.selectNext1(i), map.selectNext1(i)); - } + for (int i = 0; i < numBits; i++) { + assertEquals("selectNext1#" + i + "/" + numBits, memmap.selectNext1(i), map.selectNext1(l, i)); + } - for (int i = 0; i < numBits; i++) { - assertEquals("selectPrev1#" + i + "/" + numBits, memmap.selectPrev1(i), map.selectPrev1(i)); + for (int i = 0; i < numBits; i++) { + assertEquals("selectPrev1#" + i + "/" + numBits, memmap.selectPrev1(i), map.selectPrev1(l, i)); + } } } } diff --git a/qendpoint-core/src/test/java/com/the_qa_company/qendpoint/core/dictionary/impl/kcat/KCatMergerTest.java b/qendpoint-core/src/test/java/com/the_qa_company/qendpoint/core/dictionary/impl/kcat/KCatMergerTest.java index 50f5811f..b5bd98f3 100644 --- a/qendpoint-core/src/test/java/com/the_qa_company/qendpoint/core/dictionary/impl/kcat/KCatMergerTest.java +++ b/qendpoint-core/src/test/java/com/the_qa_company/qendpoint/core/dictionary/impl/kcat/KCatMergerTest.java @@ -2,8 +2,9 @@ import com.the_qa_company.qendpoint.core.compact.bitmap.Bitmap; import com.the_qa_company.qendpoint.core.compact.bitmap.BitmapFactory; -import com.the_qa_company.qendpoint.core.compact.bitmap.GraphDeleteBitmap; +import com.the_qa_company.qendpoint.core.compact.bitmap.EmptyBitmap; import com.the_qa_company.qendpoint.core.compact.bitmap.ModifiableBitmap; +import com.the_qa_company.qendpoint.core.compact.bitmap.MultiRoaringBitmap; import com.the_qa_company.qendpoint.core.dictionary.Dictionary; import com.the_qa_company.qendpoint.core.dictionary.DictionaryPrivate; import com.the_qa_company.qendpoint.core.dictionary.DictionarySection; @@ -600,8 +601,8 @@ public void catTest() throws ParserException, IOException, NotFoundException { for (Path file : files) { try (HDT hdt = supplier.createFakeHDT(spec)) { hdt.saveToHDT(file); - deletes.add(GraphDeleteBitmap.empty(hdt.getDictionary().getNgraphs(), - hdt.getTriples().getNumberOfElements())); + deletes.add( + EmptyBitmap.of(hdt.getTriples().getNumberOfElements(), hdt.getDictionary().getNgraphs())); } } @@ -670,8 +671,9 @@ public void catDiffTest() throws ParserException, IOException, NotFoundException long graphs = hdt.getDictionary().supportGraphs() ? hdt.getDictionary().getNgraphs() : 1; assert graphs > 0; long triples = hdt.getTriples().getNumberOfElements(); + assertNotEquals(0, triples); - GraphDeleteBitmap memory = GraphDeleteBitmap.memory(graphs, triples + 1); + MultiRoaringBitmap memory = MultiRoaringBitmap.memory(triples + 1, graphs); // create delete bitmap int toDelete = (int) (triples / 200); @@ -694,7 +696,9 @@ public void catDiffTest() throws ParserException, IOException, NotFoundException IteratorTripleString it = hdt.searchAll(); - while (it.hasNext()) { + assertTrue(it.hasNext()); + + do { TripleString ts = it.next(); long pos = it.getLastTriplePosition(); long g = ts.getGraph().isEmpty() ? 0 @@ -706,7 +710,7 @@ public void catDiffTest() throws ParserException, IOException, NotFoundException // not deleted, we can add it dataset.add(ts.tripleToString()); } - } + } while (it.hasNext()); deleteBitmaps.add(memory); } @@ -716,6 +720,8 @@ public void catDiffTest() throws ParserException, IOException, NotFoundException Path exceptedHDT = root.resolve("excepted.hdt"); + assertNotEquals(0, dataset.size()); + try (HDT hdt = HDTManager.generateHDT(dataset.iterator(), LargeFakeDataSetStreamSupplier.BASE_URI, spec, ProgressListener.ignore())) { hdt.saveToHDT(exceptedHDT); diff --git a/qendpoint-core/src/test/java/com/the_qa_company/qendpoint/core/hdt/HDTManagerTest.java b/qendpoint-core/src/test/java/com/the_qa_company/qendpoint/core/hdt/HDTManagerTest.java index 416b51b5..59113768 100644 --- a/qendpoint-core/src/test/java/com/the_qa_company/qendpoint/core/hdt/HDTManagerTest.java +++ b/qendpoint-core/src/test/java/com/the_qa_company/qendpoint/core/hdt/HDTManagerTest.java @@ -1072,6 +1072,28 @@ public void bigGenCatTreeDiskTest() throws ParserException, IOException { System.out.println(hdt.getTriples().getNumberOfElements()); } } + + @Test + public void quadTest() throws IOException, ParserException { + Path file = Path.of("C:\\Users\\wilat\\workspace\\hdtq\\trusty.LIDDIv1.01.trig"); + HDTOptions spec = HDTOptions + .readFromFile(Path.of("C:\\Users\\wilat\\workspace\\hdtq\\qendpoint-cli-1.13.7\\bin\\opt.hdtspec")); + + long size = 100_000; + int[] graph = { 10, 100, 1000, 10000, 25000, 50000 }; + + for (int g : graph) { + Path ff = file.resolveSibling("ds-" + g + ".nq.gz"); + LargeFakeDataSetStreamSupplier.createSupplierWithMaxTriples(size, (int) (Math.tan(g) * 100)) + .withMaxElementSplit((int) (size / 500)).withMaxGraph(g).withQuads(true) + .createNTFile(ff, CompressionType.GZIP); + + try (HDT hdt = HDTManager.generateHDT(ff, ff.toString().replace('\\', '/'), RDFNotation.NQUAD, spec, + ProgressListener.sout())) { + hdt.saveToHDT(ff.resolveSibling("big.hdtq")); + } + } + } } @RunWith(Parameterized.class) From 5232b01f7bb13f9a42052dba2f5a172d0d69ca1d Mon Sep 17 00:00:00 2001 From: qaate47 Date: Sun, 29 Oct 2023 20:26:55 +0100 Subject: [PATCH 5/5] GH-429 Add multi-indexes --- .../core/enums/TripleComponentOrder.java | 65 +++- .../qendpoint/core/hdt/impl/HDTImpl.java | 56 ++- .../qendpoint/core/hdt/impl/WriteHDTImpl.java | 8 + .../hdt/impl/diskimport/MapOnCallHDT.java | 39 ++ .../qendpoint/core/header/PlainHeader.java | 13 + .../core/header/PlainHeaderIterator.java | 5 + .../iterator/DictionaryTranslateIterator.java | 6 + .../DictionaryTranslateIteratorBuffer.java | 6 + .../core/options/HDTOptionsKeys.java | 6 + .../qendpoint/core/rdf/RDFAccess.java | 45 +++ .../core/rdf/parsers/JenaModelIterator.java | 6 + .../core/triples/IteratorTripleString.java | 6 + .../qendpoint/core/triples/TripleID.java | 40 +- .../qendpoint/core/triples/Triples.java | 29 +- .../core/triples/TriplesPrivate.java | 32 +- .../core/triples/impl/BitmapQuadTriples.java | 22 ++ .../core/triples/impl/BitmapTriples.java | 140 ++++++- .../core/triples/impl/BitmapTriplesIndex.java | 22 ++ .../triples/impl/BitmapTriplesIndexFile.java | 357 ++++++++++++++++++ .../triples/impl/BitmapTriplesIterator.java | 28 +- .../impl/BitmapTriplesIteratorYFOQ.java | 2 +- .../impl/BitmapTriplesIteratorZFOQ.java | 2 +- .../impl/DiskTriplesReorderSorter.java | 156 ++++++++ .../core/triples/impl/OneReadTempTriples.java | 16 + .../core/triples/impl/TriplesList.java | 16 + .../core/triples/impl/TriplesListLong.java | 15 + .../core/triples/impl/WriteBitmapTriples.java | 16 + .../qendpoint/core/util/io/IOUtil.java | 22 ++ .../triples/impl/BitmapTriplesOrderTest.java | 99 +++++ .../core/triples/impl/BitmapTriplesTest.java | 13 + .../qendpoint/model/HDTValue.java | 27 ++ .../qendpoint/model/SimpleBNodeHDT.java | 12 +- .../qendpoint/model/SimpleIRIHDT.java | 10 + .../qendpoint/model/SimpleLiteralHDT.java | 12 +- .../qendpoint/store/EndpointStore.java | 19 +- .../qendpoint/store/HDTConverter.java | 2 +- .../qendpoint/tools/QEPSearch.java | 4 +- .../qendpoint/utils/BitArrayDisk.java | 23 +- .../qendpoint/utils/CloseSafeHDT.java | 18 + .../qendpoint/store/EndpointStoreTest.java | 2 +- 40 files changed, 1327 insertions(+), 90 deletions(-) create mode 100644 qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIndex.java create mode 100644 qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIndexFile.java create mode 100644 qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/DiskTriplesReorderSorter.java create mode 100644 qendpoint-core/src/test/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesOrderTest.java diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/enums/TripleComponentOrder.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/enums/TripleComponentOrder.java index 92608d1e..fb648429 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/enums/TripleComponentOrder.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/enums/TripleComponentOrder.java @@ -19,6 +19,8 @@ package com.the_qa_company.qendpoint.core.enums; +import java.util.Map; + /** * Indicates the order of the triples */ @@ -26,41 +28,88 @@ public enum TripleComponentOrder { /** * Subject, predicate, object */ - Unknown(null, null, null), + Unknown(null, null, null, 0), /** * Subject, predicate, object */ - SPO(TripleComponentRole.SUBJECT, TripleComponentRole.PREDICATE, TripleComponentRole.OBJECT), + SPO(TripleComponentRole.SUBJECT, TripleComponentRole.PREDICATE, TripleComponentRole.OBJECT, 1), /** * Subject, object, predicate */ - SOP(TripleComponentRole.SUBJECT, TripleComponentRole.OBJECT, TripleComponentRole.PREDICATE), + SOP(TripleComponentRole.SUBJECT, TripleComponentRole.OBJECT, TripleComponentRole.PREDICATE, 1 << 1), /** * Predicate, subject, object */ - PSO(TripleComponentRole.PREDICATE, TripleComponentRole.SUBJECT, TripleComponentRole.OBJECT), + PSO(TripleComponentRole.PREDICATE, TripleComponentRole.SUBJECT, TripleComponentRole.OBJECT, 1 << 2), /** * Predicate, object, subject */ - POS(TripleComponentRole.PREDICATE, TripleComponentRole.OBJECT, TripleComponentRole.SUBJECT), + POS(TripleComponentRole.PREDICATE, TripleComponentRole.OBJECT, TripleComponentRole.SUBJECT, 1 << 3), /** * Object, subject, predicate */ - OSP(TripleComponentRole.OBJECT, TripleComponentRole.SUBJECT, TripleComponentRole.PREDICATE), + OSP(TripleComponentRole.OBJECT, TripleComponentRole.SUBJECT, TripleComponentRole.PREDICATE, 1 << 4), /** * Object, predicate, subject */ - OPS(TripleComponentRole.OBJECT, TripleComponentRole.PREDICATE, TripleComponentRole.SUBJECT); + OPS(TripleComponentRole.OBJECT, TripleComponentRole.PREDICATE, TripleComponentRole.SUBJECT, 1 << 5); + + public static final int ALL_MASK; + + static { + int allMask = 0; + // add all the mask to the var + for (TripleComponentOrder order : values()) { + allMask |= order.mask; + } + ALL_MASK = allMask; + } private final TripleComponentRole subjectMapping; private final TripleComponentRole predicateMapping; private final TripleComponentRole objectMapping; + public final int mask; TripleComponentOrder(TripleComponentRole subjectMapping, TripleComponentRole predicateMapping, - TripleComponentRole objectMapping) { + TripleComponentRole objectMapping, int mask) { this.subjectMapping = subjectMapping; this.predicateMapping = predicateMapping; this.objectMapping = objectMapping; + this.mask = mask; + } + + /** + * Search for an acceptable value in a map of orders + * + * @param flags flags to search the value + * @param map map + * @param value type + * @return find value, null for no matching value + */ + public static T fetchBestForCfg(int flags, Map map) { + for (Map.Entry e : map.entrySet()) { + if ((e.getKey().mask & flags) != 0) { + return e.getValue(); + } + } + return null; + } + + /** + * get an acceptable order for a order mask + * + * @param flags order mask + * @return order, {@link #Unknown} if nothing was found + */ + public static TripleComponentOrder getAcceptableOrder(int flags) { + if (flags != 0) { + for (TripleComponentOrder v : values()) { + if ((v.mask & flags) == 0) { + return v; + } + } + } + return Unknown; } public TripleComponentRole getSubjectMapping() { diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/HDTImpl.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/HDTImpl.java index f5bf99d3..11e22692 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/HDTImpl.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/HDTImpl.java @@ -33,6 +33,7 @@ import com.the_qa_company.qendpoint.core.dictionary.impl.MultipleSectionDictionaryBig; import com.the_qa_company.qendpoint.core.dictionary.impl.MultipleSectionDictionaryCat; import com.the_qa_company.qendpoint.core.enums.ResultEstimationType; +import com.the_qa_company.qendpoint.core.enums.TripleComponentOrder; import com.the_qa_company.qendpoint.core.enums.TripleComponentRole; import com.the_qa_company.qendpoint.core.exceptions.IllegalFormatException; import com.the_qa_company.qendpoint.core.exceptions.NotFoundException; @@ -83,6 +84,7 @@ import java.io.InputStream; import java.io.OutputStream; import java.nio.file.Files; +import java.nio.file.Path; import java.nio.file.Paths; import java.util.Map; import java.util.zip.GZIPInputStream; @@ -265,6 +267,18 @@ public void saveToHDT(String fileName, ProgressListener listener) throws IOExcep @Override public IteratorTripleString search(CharSequence subject, CharSequence predicate, CharSequence object) throws NotFoundException { + return search(subject, predicate, object, TripleComponentOrder.ALL_MASK); + } + + @Override + public IteratorTripleString search(CharSequence subject, CharSequence predicate, CharSequence object, + CharSequence graph) throws NotFoundException { + return search(subject, predicate, object, graph, TripleComponentOrder.ALL_MASK); + } + + @Override + public IteratorTripleString search(CharSequence subject, CharSequence predicate, CharSequence object, + int searchOrderMask) throws NotFoundException { if (isClosed) { throw new IllegalStateException("Cannot search an already closed HDT"); @@ -314,6 +328,11 @@ public long estimatedNumResults() { public long getLastTriplePosition() { throw new NotImplementedException(); } + + @Override + public TripleComponentOrder getOrder() { + return TripleComponentOrder.getAcceptableOrder(searchOrderMask); + } }; } @@ -321,22 +340,23 @@ public long getLastTriplePosition() { if (isMapped) { try { - return new DictionaryTranslateIteratorBuffer(triples.search(triple), dictionary, subject, predicate, - object, g); + return new DictionaryTranslateIteratorBuffer(triples.search(triple, searchOrderMask), dictionary, + subject, predicate, object, g); } catch (NullPointerException e) { e.printStackTrace(); // FIXME: find why this can happen - return new DictionaryTranslateIterator(triples.search(triple), dictionary, subject, predicate, object, - g); + return new DictionaryTranslateIterator(triples.search(triple, searchOrderMask), dictionary, subject, + predicate, object, g); } } else { - return new DictionaryTranslateIterator(triples.search(triple), dictionary, subject, predicate, object, g); + return new DictionaryTranslateIterator(triples.search(triple, searchOrderMask), dictionary, subject, + predicate, object, g); } } @Override public IteratorTripleString search(CharSequence subject, CharSequence predicate, CharSequence object, - CharSequence graph) throws NotFoundException { + CharSequence graph, int searchOrderMask) throws NotFoundException { if (isClosed) { throw new IllegalStateException("Cannot search an already closed HDT"); } @@ -386,22 +406,27 @@ public long estimatedNumResults() { public long getLastTriplePosition() { throw new NotImplementedException(); } + + @Override + public TripleComponentOrder getOrder() { + return TripleComponentOrder.getAcceptableOrder(searchOrderMask); + } }; } if (isMapped) { try { - return new DictionaryTranslateIteratorBuffer(triples.search(triple), dictionary, subject, predicate, - object, graph); + return new DictionaryTranslateIteratorBuffer(triples.search(triple, searchOrderMask), dictionary, + subject, predicate, object, graph); } catch (NullPointerException e) { e.printStackTrace(); // FIXME: find why this can happen - return new DictionaryTranslateIterator(triples.search(triple), dictionary, subject, predicate, object, - graph); + return new DictionaryTranslateIterator(triples.search(triple, searchOrderMask), dictionary, subject, + predicate, object, graph); } } else { - return new DictionaryTranslateIterator(triples.search(triple), dictionary, subject, predicate, object, - graph); + return new DictionaryTranslateIterator(triples.search(triple, searchOrderMask), dictionary, subject, + predicate, object, graph); } } @@ -454,6 +479,13 @@ public void loadOrCreateIndex(ProgressListener listener, HDTOptions spec) throws // We need no index. return; } + triples.mapGenOtherIndexes(Path.of(String.valueOf(hdtFileName)), spec, listener); + + // disable the FOQ generation if asked + if (spec.getBoolean(HDTOptionsKeys.BITMAPTRIPLES_INDEX_NO_FOQ, false)) { + return; + } + ControlInfo ci = new ControlInformation(); String indexName = hdtFileName + HDTVersion.get_index_suffix("-"); indexName = indexName.replaceAll("\\.hdt\\.gz", "hdt"); diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/WriteHDTImpl.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/WriteHDTImpl.java index 96a6c729..e9f6cf28 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/WriteHDTImpl.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/WriteHDTImpl.java @@ -2,6 +2,7 @@ import com.the_qa_company.qendpoint.core.dictionary.DictionaryFactory; import com.the_qa_company.qendpoint.core.dictionary.DictionaryPrivate; +import com.the_qa_company.qendpoint.core.exceptions.NotFoundException; import com.the_qa_company.qendpoint.core.exceptions.NotImplementedException; import com.the_qa_company.qendpoint.core.header.HeaderFactory; import com.the_qa_company.qendpoint.core.header.HeaderPrivate; @@ -122,4 +123,11 @@ public void close() throws IOException { public IteratorTripleString search(CharSequence subject, CharSequence predicate, CharSequence object) { throw new NotImplementedException(); } + + @Override + public IteratorTripleString search(CharSequence subject, CharSequence predicate, CharSequence object, + int searchOrderMask) throws NotFoundException { + throw new NotImplementedException(); + } + } diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/diskimport/MapOnCallHDT.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/diskimport/MapOnCallHDT.java index 3bbec86e..fa1a97b4 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/diskimport/MapOnCallHDT.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/hdt/impl/diskimport/MapOnCallHDT.java @@ -9,6 +9,7 @@ import com.the_qa_company.qendpoint.core.listener.ProgressListener; import com.the_qa_company.qendpoint.core.options.HDTOptions; import com.the_qa_company.qendpoint.core.triples.IteratorTripleString; +import com.the_qa_company.qendpoint.core.triples.TripleString; import com.the_qa_company.qendpoint.core.triples.Triples; import com.the_qa_company.qendpoint.core.util.io.CloseSuppressPath; import com.the_qa_company.qendpoint.core.util.io.IOUtil; @@ -20,6 +21,7 @@ import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.StandardCopyOption; +import java.util.Iterator; /** * HDT implementation delaying the map method to avoid mapping into memory a @@ -107,6 +109,43 @@ public IteratorTripleString search(CharSequence subject, CharSequence predicate, return mapOrGetHDT().search(subject, predicate, object, graph); } + @Override + public IteratorTripleString search(TripleString triple) throws NotFoundException { + return mapOrGetHDT().search(triple); + } + + @Override + public IteratorTripleString searchAll() throws NotFoundException { + return mapOrGetHDT().searchAll(); + } + + @Override + public IteratorTripleString search(CharSequence subject, CharSequence predicate, CharSequence object, + int searchOrderMask) throws NotFoundException { + return mapOrGetHDT().search(subject, predicate, object, searchOrderMask); + } + + @Override + public IteratorTripleString search(CharSequence subject, CharSequence predicate, CharSequence object, + CharSequence graph, int searchOrderMask) throws NotFoundException { + return mapOrGetHDT().search(subject, predicate, object, graph, searchOrderMask); + } + + @Override + public IteratorTripleString search(TripleString triple, int searchOrderMask) throws NotFoundException { + return mapOrGetHDT().search(triple, searchOrderMask); + } + + @Override + public IteratorTripleString searchAll(int searchOrderMask) throws NotFoundException { + return mapOrGetHDT().searchAll(searchOrderMask); + } + + @Override + public Iterator iterator() { + return mapOrGetHDT().iterator(); + } + @Override public void loadFromHDT(InputStream input, ProgressListener listener) throws IOException { ((HDTPrivate) mapOrGetHDT()).loadFromHDT(input, listener); diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/header/PlainHeader.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/header/PlainHeader.java index 0df71236..0c37539b 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/header/PlainHeader.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/header/PlainHeader.java @@ -27,6 +27,7 @@ import java.util.List; import com.the_qa_company.qendpoint.core.exceptions.IllegalFormatException; +import com.the_qa_company.qendpoint.core.exceptions.NotFoundException; import com.the_qa_company.qendpoint.core.exceptions.ParserException; import com.the_qa_company.qendpoint.core.listener.ProgressListener; import com.the_qa_company.qendpoint.core.options.ControlInfo; @@ -188,6 +189,18 @@ public IteratorTripleString search(CharSequence subject, CharSequence predicate, return new PlainHeaderIterator(this, pattern); } + @Override + public IteratorTripleString search(CharSequence subject, CharSequence predicate, CharSequence object, + int searchOrderMask) throws NotFoundException { + return search(subject, predicate, object); + } + + @Override + public IteratorTripleString search(CharSequence subject, CharSequence predicate, CharSequence object, + CharSequence graph, int searchOrderMask) throws NotFoundException { + return search(subject, predicate, object, graph); + } + @Override public void processTriple(TripleString triple, long pos) { triples.add(new TripleString(triple)); diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/header/PlainHeaderIterator.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/header/PlainHeaderIterator.java index c0f04c4f..5c0a6b35 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/header/PlainHeaderIterator.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/header/PlainHeaderIterator.java @@ -19,6 +19,7 @@ package com.the_qa_company.qendpoint.core.header; +import com.the_qa_company.qendpoint.core.enums.TripleComponentOrder; import com.the_qa_company.qendpoint.core.triples.IteratorTripleString; import com.the_qa_company.qendpoint.core.triples.TripleString; import com.the_qa_company.qendpoint.core.enums.ResultEstimationType; @@ -111,4 +112,8 @@ public long getLastTriplePosition() { throw new UnsupportedOperationException(); } + @Override + public TripleComponentOrder getOrder() { + return TripleComponentOrder.Unknown; + } } diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/iterator/DictionaryTranslateIterator.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/iterator/DictionaryTranslateIterator.java index ad5e2328..b26015b5 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/iterator/DictionaryTranslateIterator.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/iterator/DictionaryTranslateIterator.java @@ -21,6 +21,7 @@ import com.the_qa_company.qendpoint.core.dictionary.Dictionary; import com.the_qa_company.qendpoint.core.enums.ResultEstimationType; +import com.the_qa_company.qendpoint.core.enums.TripleComponentOrder; import com.the_qa_company.qendpoint.core.enums.TripleComponentRole; import com.the_qa_company.qendpoint.core.quad.QuadString; import com.the_qa_company.qendpoint.core.triples.IteratorTripleID; @@ -161,4 +162,9 @@ public long getLastTriplePosition() { return iterator.getLastTriplePosition(); } + @Override + public TripleComponentOrder getOrder() { + return iterator.getOrder(); + } + } diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/iterator/DictionaryTranslateIteratorBuffer.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/iterator/DictionaryTranslateIteratorBuffer.java index cf46d3f7..84553d64 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/iterator/DictionaryTranslateIteratorBuffer.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/iterator/DictionaryTranslateIteratorBuffer.java @@ -21,6 +21,7 @@ import com.the_qa_company.qendpoint.core.dictionary.DictionaryPrivate; import com.the_qa_company.qendpoint.core.dictionary.impl.OptimizedExtractor; import com.the_qa_company.qendpoint.core.enums.ResultEstimationType; +import com.the_qa_company.qendpoint.core.enums.TripleComponentOrder; import com.the_qa_company.qendpoint.core.enums.TripleComponentRole; import com.the_qa_company.qendpoint.core.quad.QuadString; import com.the_qa_company.qendpoint.core.triples.IteratorTripleString; @@ -278,6 +279,11 @@ public long getLastTriplePosition() { return lastPosition.compute(); } + @Override + public TripleComponentOrder getOrder() { + return iterator.getOrder(); + } + public static void setBlockSize(int size) { DEFAULT_BLOCK_SIZE = size; } diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/options/HDTOptionsKeys.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/options/HDTOptionsKeys.java index 3e85f61a..1d0eed3b 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/options/HDTOptionsKeys.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/options/HDTOptionsKeys.java @@ -508,6 +508,12 @@ public class HDTOptionsKeys { @Key(type = Key.Type.BOOLEAN, desc = "Dump binary offsets, default false") public static final String DUMP_BINARY_OFFSETS = "bump.binary.offsets"; + @Key(type = Key.Type.STRING, desc = "Create other indexes in bitmaptriples pattern values (spo, ops, etc.), default none") + public static final String BITMAPTRIPLES_INDEX_OTHERS = "bitmaptriples.index.others"; + + @Key(type = Key.Type.BOOLEAN, desc = "No FoQ index generation default false") + public static final String BITMAPTRIPLES_INDEX_NO_FOQ = "bitmaptriples.index.noFoQ"; + // use tree-map to have a better order private static final Map OPTION_MAP = new TreeMap<>(); diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/rdf/RDFAccess.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/rdf/RDFAccess.java index 6b753100..3866cad3 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/rdf/RDFAccess.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/rdf/RDFAccess.java @@ -71,6 +71,51 @@ default IteratorTripleString searchAll() throws NotFoundException { return search("", "", "", ""); } + /** + * Iterate over the triples of an RDF Set that match the specified pattern. + * null and empty strings act as a wildcard. (e.g. search(null, null, null) + * iterates over all elements) + * + * @param subject The subject to search + * @param predicate The predicate to search + * @param object The object to search + * @param searchOrderMask The search order mask, can be get using + * {@link com.the_qa_company.qendpoint.core.enums.TripleComponentOrder#mask} + * @return Iterator of TripleStrings + * @throws NotFoundException when the triple cannot be found + */ + IteratorTripleString search(CharSequence subject, CharSequence predicate, CharSequence object, int searchOrderMask) + throws NotFoundException; + + /** + * Iterate over the triples of an RDF Set that match the specified pattern. + * null and empty strings act as a wildcard. Default implementation ignore + * the graph (e.g. search(null, null, null, null) iterates over all + * elements) + * + * @param subject The subject to search + * @param predicate The predicate to search + * @param object The object to search + * @param graph The graph to search + * @param searchOrderMask The search order mask, can be get using + * {@link com.the_qa_company.qendpoint.core.enums.TripleComponentOrder#mask} + * @return Iterator of TripleStrings + * @throws NotFoundException when the triple cannot be found + */ + default IteratorTripleString search(CharSequence subject, CharSequence predicate, CharSequence object, + CharSequence graph, int searchOrderMask) throws NotFoundException { + return search(subject, predicate, object, searchOrderMask); + } + + default IteratorTripleString search(TripleString triple, int searchOrderMask) throws NotFoundException { + return search(triple.getSubject(), triple.getPredicate(), triple.getObject(), triple.getGraph(), + searchOrderMask); + } + + default IteratorTripleString searchAll(int searchOrderMask) throws NotFoundException { + return search("", "", "", "", searchOrderMask); + } + @Override default Iterator iterator() { try { diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/rdf/parsers/JenaModelIterator.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/rdf/parsers/JenaModelIterator.java index 2c524e93..1ca94353 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/rdf/parsers/JenaModelIterator.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/rdf/parsers/JenaModelIterator.java @@ -1,5 +1,6 @@ package com.the_qa_company.qendpoint.core.rdf.parsers; +import com.the_qa_company.qendpoint.core.enums.TripleComponentOrder; import org.apache.jena.rdf.model.Model; import org.apache.jena.rdf.model.Statement; import org.apache.jena.rdf.model.StmtIterator; @@ -54,4 +55,9 @@ public long getLastTriplePosition() { throw new UnsupportedOperationException(); } + @Override + public TripleComponentOrder getOrder() { + return TripleComponentOrder.Unknown; + } + } diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/IteratorTripleString.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/IteratorTripleString.java index 3a943655..f8798b96 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/IteratorTripleString.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/IteratorTripleString.java @@ -22,6 +22,7 @@ import java.util.Iterator; import com.the_qa_company.qendpoint.core.enums.ResultEstimationType; +import com.the_qa_company.qendpoint.core.enums.TripleComponentOrder; /** * Iterator of TripleStrings based on IteratorTripleID @@ -57,4 +58,9 @@ public interface IteratorTripleString extends Iterator { * @see Triples#findTriple(long) */ long getLastTriplePosition(); + + /** + * @return order of the components from the iterator + */ + TripleComponentOrder getOrder(); } diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/TripleID.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/TripleID.java index 62f2ba4a..4e3f2de6 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/TripleID.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/TripleID.java @@ -19,10 +19,18 @@ package com.the_qa_company.qendpoint.core.triples; +import com.the_qa_company.qendpoint.core.util.LongCompare; + import java.io.Serial; import java.io.Serializable; -import com.the_qa_company.qendpoint.core.util.LongCompare; +import static com.the_qa_company.qendpoint.core.enums.TripleComponentOrder.ALL_MASK; +import static com.the_qa_company.qendpoint.core.enums.TripleComponentOrder.OPS; +import static com.the_qa_company.qendpoint.core.enums.TripleComponentOrder.OSP; +import static com.the_qa_company.qendpoint.core.enums.TripleComponentOrder.POS; +import static com.the_qa_company.qendpoint.core.enums.TripleComponentOrder.PSO; +import static com.the_qa_company.qendpoint.core.enums.TripleComponentOrder.SOP; +import static com.the_qa_company.qendpoint.core.enums.TripleComponentOrder.SPO; /** * TripleID holds a triple using Long IDs @@ -336,4 +344,34 @@ public int hashCode() { public TripleID copyNoGraph() { return new TripleID(subject, predicate, object); } + + /** + * @return the pattern order flags for this triple id + */ + public int getPatternOrderFlags() { + if (subject == 0) { + if (predicate == 0) { + if (object == 0) { + return ALL_MASK; // ??? + } + return OPS.mask | OSP.mask; // ??o + } + if (object == 0) { + return POS.mask | PSO.mask; // ?p? + } else { + return OPS.mask | POS.mask; // ?po + } + } + if (predicate == 0) { + if (object == 0) { + return SPO.mask | SOP.mask; // s?? + } + return SOP.mask | OSP.mask; // s?o + } + if (object == 0) { + return SPO.mask | PSO.mask; // sp? + } else { + return ALL_MASK; // spo + } + } } diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/Triples.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/Triples.java index 4a815085..339defa8 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/Triples.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/Triples.java @@ -20,13 +20,16 @@ package com.the_qa_company.qendpoint.core.triples; import java.io.Closeable; +import java.util.Iterator; +import com.the_qa_company.qendpoint.core.enums.TripleComponentOrder; import com.the_qa_company.qendpoint.core.header.Header; +import com.the_qa_company.qendpoint.core.iterator.SuppliableIteratorTripleID; /** * Interface for Triples implementation. */ -public interface Triples extends Closeable { +public interface Triples extends Closeable, Iterable { /** * Iterates over all triples. Equivalent to this.search(new TripleID()); * @@ -34,6 +37,15 @@ public interface Triples extends Closeable { */ IteratorTripleID searchAll(); + /** + * Iterates over all triples. Equivalent to this.search(new TripleID()); + * + * @param searchMask search index mark, done by combining + * {@link TripleComponentOrder#mask} + * @return IteratorTripleID + */ + IteratorTripleID searchAll(int searchMask); + /** * Iterates over all triples that match the pattern. * @@ -42,6 +54,16 @@ public interface Triples extends Closeable { */ IteratorTripleID search(TripleID pattern); + /** + * Iterates over all triples that match the pattern. + * + * @param pattern The pattern to match against + * @param searchMask search index mark, done by combining + * {@link TripleComponentOrder#mask} + * @return IteratorTripleID + */ + SuppliableIteratorTripleID search(TripleID pattern, int searchMask); + /** * Returns the total number of triples * @@ -94,4 +116,9 @@ default TripleID findTriple(long position) { * @see IteratorTripleString#getLastTriplePosition() */ TripleID findTriple(long position, TripleID buffer); + + @Override + default Iterator iterator() { + return searchAll(); + } } diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/TriplesPrivate.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/TriplesPrivate.java index 0c7076a6..d986d9b8 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/TriplesPrivate.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/TriplesPrivate.java @@ -1,18 +1,18 @@ package com.the_qa_company.qendpoint.core.triples; -import java.io.File; -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; - import com.the_qa_company.qendpoint.core.dictionary.Dictionary; -import com.the_qa_company.qendpoint.core.iterator.SuppliableIteratorTripleID; -import com.the_qa_company.qendpoint.core.listener.ProgressListener; import com.the_qa_company.qendpoint.core.enums.TripleComponentOrder; +import com.the_qa_company.qendpoint.core.listener.ProgressListener; import com.the_qa_company.qendpoint.core.options.ControlInfo; import com.the_qa_company.qendpoint.core.options.HDTOptions; import com.the_qa_company.qendpoint.core.util.io.CountInputStream; +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.nio.file.Path; + public interface TriplesPrivate extends Triples { /** * Serializes the triples to an OutputStream @@ -21,14 +21,6 @@ public interface TriplesPrivate extends Triples { */ void save(OutputStream output, ControlInfo ci, ProgressListener listener) throws IOException; - /** - * Iterates over all triples that match the pattern. - * - * @param pattern The pattern to match against - * @return IteratorTripleID - */ - SuppliableIteratorTripleID search(TripleID pattern); - /** * Loads the structure from an InputStream * @@ -62,6 +54,16 @@ public interface TriplesPrivate extends Triples { */ void mapIndex(CountInputStream input, File f, ControlInfo ci, ProgressListener listener) throws IOException; + /** + * Sync or create the asked other index + * + * @param file hdt file + * @param spec spec + * @param listener listener + * @throws IOException io + */ + void mapGenOtherIndexes(Path file, HDTOptions spec, ProgressListener listener) throws IOException; + /** * Saves the associated Index to an OutputStream * diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapQuadTriples.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapQuadTriples.java index 381f7a94..42aeeb6e 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapQuadTriples.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapQuadTriples.java @@ -239,6 +239,28 @@ public SuppliableIteratorTripleID search(TripleID pattern) { pattern.isQuad() ? pattern.getGraph() : 0); } + @Override + public SuppliableIteratorTripleID search(TripleID pattern, int searchMask) { + if (isClosed) { + throw new IllegalStateException("Cannot search on BitmapTriples if it's already closed"); + } + + if (getNumberOfElements() == 0 || pattern.isNoMatch()) { + return new EmptyTriplesIterator(order); + } + + TripleID reorderedPat = new TripleID(pattern); + TripleOrderConvert.swapComponentOrder(reorderedPat, TripleComponentOrder.SPO, order); + String patternString = reorderedPat.getPatternString(); + + if (hasFOQIndex() && patternString.equals("???G")) { + return new BitmapTriplesIteratorGraphG(this, pattern); + } + + return new BitmapTriplesIteratorGraph(this, super.search(pattern.copyNoGraph(), searchMask), + pattern.isQuad() ? pattern.getGraph() : 0); + } + @Override public void mapFromFile(CountInputStream input, File f, ProgressListener listener) throws IOException { ControlInformation ci = new ControlInformation(); diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriples.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriples.java index 8f417a71..3e7fd912 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriples.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriples.java @@ -72,20 +72,32 @@ import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; +import java.nio.channels.FileChannel; import java.nio.file.Files; +import java.nio.file.NoSuchFileException; import java.nio.file.Path; +import java.nio.file.StandardOpenOption; import java.util.ArrayList; +import java.util.Arrays; import java.util.Comparator; +import java.util.HashMap; import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Set; +import java.util.stream.Collectors; /** * @author mario.arias */ -public class BitmapTriples implements TriplesPrivate { +public class BitmapTriples implements TriplesPrivate, BitmapTriplesIndex { private static final Logger log = LoggerFactory.getLogger(BitmapTriples.class); protected TripleComponentOrder order; + protected final Map indexes = new HashMap<>(); + protected int indexesMask = 0; + protected Sequence seqY, seqZ, indexZ, predicateCount; protected Bitmap bitmapY, bitmapZ, bitmapIndexZ; @@ -285,12 +297,13 @@ public void load(TempTriples triples, ProgressListener listener) { this.load(it, listener); } - /* - * (non-Javadoc) - * @see hdt.triples.Triples#search(hdt.triples.TripleID) - */ @Override public SuppliableIteratorTripleID search(TripleID pattern) { + return search(pattern, TripleComponentOrder.ALL_MASK); + } + + @Override + public SuppliableIteratorTripleID search(TripleID pattern, int searchMask) { if (isClosed) { throw new IllegalStateException("Cannot search on BitmapTriples if it's already closed"); } @@ -301,6 +314,32 @@ public SuppliableIteratorTripleID search(TripleID pattern) { TripleID reorderedPat = new TripleID(pattern); TripleOrderConvert.swapComponentOrder(reorderedPat, TripleComponentOrder.SPO, order); + int flags = reorderedPat.getPatternOrderFlags(); + + if ((flags & searchMask & this.order.mask) != 0) { + // we can use the default order, so we use it + return new BitmapTriplesIterator(this, pattern); + } + + if ((indexesMask & flags) != 0) { + BitmapTriplesIndex idx; + + int bestOrders = flags & searchMask; + + if ((indexesMask & bestOrders) != 0) { + // we can use the asked order + idx = TripleComponentOrder.fetchBestForCfg(bestOrders, indexes); + } else { + // no asked order found, we can still use the best index + idx = TripleComponentOrder.fetchBestForCfg(flags, indexes); + } + + assert idx != null : String.format("the tid flags were describing an unknown pattern: %x &= %x", flags, + indexesMask & flags); + + return new BitmapTriplesIterator(idx, pattern); + } + String patternString = reorderedPat.getPatternString(); if (patternString.equals("?P?")) { @@ -342,7 +381,12 @@ public SuppliableIteratorTripleID search(TripleID pattern) { */ @Override public IteratorTripleID searchAll() { - return this.search(new TripleID()); + return searchAll(TripleComponentOrder.ALL_MASK); + } + + @Override + public IteratorTripleID searchAll(int searchMask) { + return this.search(new TripleID(), searchMask); } /* @@ -1030,7 +1074,7 @@ public TripleID findTriple(long position, TripleID tripleID) { // -1 so we don't count end of tree long posX = bitmapY.rank1(posY - 1); long x = posX + 1; // the subject ID is the position + 1, IDs start from - // 1 not zero + // 1 not zero tripleID.setAll(x, y, z); return tripleID; @@ -1207,11 +1251,17 @@ public void mapIndex(CountInputStream input, File f, ControlInfo ci, ProgressLis this.adjIndex = new AdjacencyList(this.indexZ, this.bitmapIndexZ); } + @Override + public void mapGenOtherIndexes(Path file, HDTOptions spec, ProgressListener listener) throws IOException { + syncOtherIndexes(file, spec, listener); + } + @Override public void close() throws IOException { isClosed = true; try { - Closer.closeAll(seqY, seqZ, indexZ, predicateCount, predicateIndex, bitmapIndexZ, diskSequenceLocation); + Closer.closeAll(seqY, seqZ, indexZ, predicateCount, predicateIndex, bitmapIndexZ, diskSequenceLocation, + indexes); } finally { diskSequenceLocation = null; seqY = null; @@ -1220,6 +1270,8 @@ public void close() throws IOException { predicateCount = null; predicateIndex = null; bitmapIndexZ = null; + indexes.clear(); + indexesMask = 0; } } @@ -1227,6 +1279,72 @@ public boolean hasFOQIndex() { return indexZ != null && bitmapIndexZ != null; } + public void syncOtherIndexes(Path fileLocation, HDTOptions spec, ProgressListener listener) throws IOException { + Closer.closeAll(indexes); + indexes.clear(); + indexesMask = 0; + + if (fileLocation == null) { + return; + } + + String otherIdxs = spec.get(HDTOptionsKeys.BITMAPTRIPLES_INDEX_OTHERS, ""); + + Set askedOrders = Arrays.stream(otherIdxs.toUpperCase().split(",")).map(e -> { + if (e.isEmpty() || e.equalsIgnoreCase(TripleComponentOrder.Unknown.name())) { + return null; + } + try { + return TripleComponentOrder.valueOf(e); + } catch (IllegalArgumentException ex) { + log.warn("Trying to use a bad order name {}", e, ex); + return null; + } + }).filter(Objects::nonNull).collect(Collectors.toSet()); + + MultiThreadListener mListener = MultiThreadListener.ofSingle(listener); + for (TripleComponentOrder order : TripleComponentOrder.values()) { + if (order == TripleComponentOrder.Unknown || order == this.order) { + continue; + } + + Path subIndexPath = BitmapTriplesIndexFile.getIndexPath(fileLocation, order); + + try (FileChannel channel = FileChannel.open(subIndexPath, StandardOpenOption.READ)) { + // load from the path... + + BitmapTriplesIndex idx = BitmapTriplesIndexFile.map(subIndexPath, channel); + BitmapTriplesIndex old = indexes.put(order, idx); + indexesMask |= idx.getOrder().mask; + if (old != null) { + log.warn("an index is using a bad order old:{} cur:{} new:{}", old.getOrder(), order, + idx.getOrder()); + } + IOUtil.closeQuietly(old); + } catch (NoSuchFileException ignore) { + // no index with this name + if (!askedOrders.contains(order)) { + continue; // not asked by the user, we can ignore + } + // generate the file + BitmapTriplesIndexFile.generateIndex(this, subIndexPath, order, spec, mListener); + try (FileChannel channel = FileChannel.open(subIndexPath, StandardOpenOption.READ)) { + // load from the path... + BitmapTriplesIndex idx = BitmapTriplesIndexFile.map(subIndexPath, channel); + BitmapTriplesIndex old = indexes.put(order, idx); + indexesMask |= order.mask; + if (old != null) { + log.warn("an index is using a bad order old:{} cur:{} new:{}", old.getOrder(), order, + idx.getOrder()); + } + IOUtil.closeQuietly(old); // should be null? + } catch (NoSuchFileException ex2) { + throw new IOException("index not generated", ex2); + } + } + } + } + @Override public TripleComponentOrder getOrder() { return this.order; @@ -1236,18 +1354,22 @@ public Sequence getIndexZ() { return indexZ; } + @Override public Sequence getSeqY() { return seqY; } + @Override public Sequence getSeqZ() { return seqZ; } + @Override public AdjacencyList getAdjacencyListY() { return adjY; } + @Override public AdjacencyList getAdjacencyListZ() { return adjZ; } @@ -1256,10 +1378,12 @@ public AdjacencyList getAdjacencyListIndex() { return adjIndex; } + @Override public Bitmap getBitmapY() { return bitmapY; } + @Override public Bitmap getBitmapZ() { return bitmapZ; } diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIndex.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIndex.java new file mode 100644 index 00000000..213dc280 --- /dev/null +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIndex.java @@ -0,0 +1,22 @@ +package com.the_qa_company.qendpoint.core.triples.impl; + +import com.the_qa_company.qendpoint.core.compact.bitmap.AdjacencyList; +import com.the_qa_company.qendpoint.core.compact.bitmap.Bitmap; +import com.the_qa_company.qendpoint.core.compact.sequence.Sequence; +import com.the_qa_company.qendpoint.core.enums.TripleComponentOrder; + +public interface BitmapTriplesIndex { + Bitmap getBitmapY(); + + Bitmap getBitmapZ(); + + Sequence getSeqY(); + + Sequence getSeqZ(); + + AdjacencyList getAdjacencyListY(); + + AdjacencyList getAdjacencyListZ(); + + TripleComponentOrder getOrder(); +} diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIndexFile.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIndexFile.java new file mode 100644 index 00000000..49896901 --- /dev/null +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIndexFile.java @@ -0,0 +1,357 @@ +package com.the_qa_company.qendpoint.core.triples.impl; + +import com.the_qa_company.qendpoint.core.compact.bitmap.AdjacencyList; +import com.the_qa_company.qendpoint.core.compact.bitmap.Bitmap; +import com.the_qa_company.qendpoint.core.compact.bitmap.Bitmap64Big; +import com.the_qa_company.qendpoint.core.compact.bitmap.BitmapFactory; +import com.the_qa_company.qendpoint.core.compact.bitmap.ModifiableBitmap; +import com.the_qa_company.qendpoint.core.compact.sequence.DynamicSequence; +import com.the_qa_company.qendpoint.core.compact.sequence.Sequence; +import com.the_qa_company.qendpoint.core.compact.sequence.SequenceFactory; +import com.the_qa_company.qendpoint.core.compact.sequence.SequenceLog64BigDisk; +import com.the_qa_company.qendpoint.core.enums.TripleComponentOrder; +import com.the_qa_company.qendpoint.core.exceptions.IllegalFormatException; +import com.the_qa_company.qendpoint.core.iterator.utils.AsyncIteratorFetcher; +import com.the_qa_company.qendpoint.core.iterator.utils.ExceptionIterator; +import com.the_qa_company.qendpoint.core.iterator.utils.MapIterator; +import com.the_qa_company.qendpoint.core.listener.MultiThreadListener; +import com.the_qa_company.qendpoint.core.listener.ProgressListener; +import com.the_qa_company.qendpoint.core.options.HDTOptions; +import com.the_qa_company.qendpoint.core.options.HDTOptionsKeys; +import com.the_qa_company.qendpoint.core.triples.TripleID; +import com.the_qa_company.qendpoint.core.util.BitUtil; +import com.the_qa_company.qendpoint.core.util.concurrent.KWayMerger; +import com.the_qa_company.qendpoint.core.util.io.CloseMappedByteBuffer; +import com.the_qa_company.qendpoint.core.util.io.CloseSuppressPath; +import com.the_qa_company.qendpoint.core.util.io.Closer; +import com.the_qa_company.qendpoint.core.util.io.CountInputStream; +import com.the_qa_company.qendpoint.core.util.io.IOUtil; +import com.the_qa_company.qendpoint.core.util.listener.ListenerUtil; + +import java.io.BufferedInputStream; +import java.io.BufferedOutputStream; +import java.io.Closeable; +import java.io.IOException; +import java.io.InterruptedIOException; +import java.nio.channels.Channels; +import java.nio.channels.FileChannel; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Arrays; + +import static java.lang.String.format; + +/** + * File containing a BitmapTriples index + * + * @author Antoine Willerval + */ +public class BitmapTriplesIndexFile implements BitmapTriplesIndex, Closeable { + /** + * Get the path for an order for a hdt file + * + * @param hdt hdt path + * @param order order + * @return index path + */ + public static Path getIndexPath(Path hdt, TripleComponentOrder order) { + return hdt.resolveSibling(hdt.getFileName() + "." + order.name().toLowerCase() + ".idx"); + } + + public static final byte[] MAGIC = "$HDTIDX0".getBytes(StandardCharsets.US_ASCII); + + /** + * Map a file from a file + * + * @param file file + * @param channel channel + * @return index + * @throws IOException io + */ + public static BitmapTriplesIndex map(Path file, FileChannel channel) throws IOException { + try (CloseMappedByteBuffer header = IOUtil.mapChannel(file, channel, FileChannel.MapMode.READ_ONLY, 0, + MAGIC.length)) { + byte[] magicRead = new byte[MAGIC.length]; + + header.get(magicRead); + + if (!Arrays.equals(magicRead, MAGIC)) { + throw new IOException(format("Can't read %s magic", file)); + } + } + + CountInputStream stream = new CountInputStream(new BufferedInputStream(Channels.newInputStream(channel))); + stream.skipNBytes(MAGIC.length); + + String orderCfg = IOUtil.readSizedString(stream, ProgressListener.ignore()); + + TripleComponentOrder order = TripleComponentOrder.valueOf(orderCfg); + + Sequence seqY = SequenceFactory.createStream(stream, file.toFile()); + Bitmap bitY = BitmapFactory.createBitmap(stream); + bitY.load(stream, ProgressListener.ignore()); + + Sequence seqZ = SequenceFactory.createStream(stream, file.toFile()); + Bitmap bitZ = BitmapFactory.createBitmap(stream); + bitZ.load(stream, ProgressListener.ignore()); + + return new BitmapTriplesIndexFile(seqY, seqZ, bitY, bitZ, order); + } + + /** + * Generate an index in a particular destination + * + * @param triples triples to convert + * @param destination destination path + * @param order order to build + * @param spec ixd spec + * @param mtlistener listener + * @throws IOException ioe + */ + public static void generateIndex(BitmapTriples triples, Path destination, TripleComponentOrder order, + HDTOptions spec, MultiThreadListener mtlistener) throws IOException { + MultiThreadListener listener = MultiThreadListener.ofNullable(mtlistener); + Path diskLocation; + if (triples.diskSequence) { + diskLocation = triples.diskSequenceLocation.createOrGetPath(); + } else { + diskLocation = Files.createTempDirectory("bitmapTriples"); + } + int workers = (int) spec.getInt(HDTOptionsKeys.BITMAPTRIPLES_DISK_WORKER_KEY, + Runtime.getRuntime()::availableProcessors); + // check and set default values if required + if (workers <= 0) { + throw new IllegalArgumentException("Number of workers should be positive!"); + } + long chunkSize = spec.getInt(HDTOptionsKeys.BITMAPTRIPLES_DISK_CHUNK_SIZE_KEY, + () -> BitmapTriples.getMaxChunkSizeDiskIndex(workers)); + if (chunkSize < 0) { + throw new IllegalArgumentException("Negative chunk size!"); + } + long maxFileOpenedLong = spec.getInt(HDTOptionsKeys.BITMAPTRIPLES_DISK_MAX_FILE_OPEN_KEY, 1024); + int maxFileOpened; + if (maxFileOpenedLong < 0 || maxFileOpenedLong > Integer.MAX_VALUE) { + throw new IllegalArgumentException("maxFileOpened should be positive!"); + } else { + maxFileOpened = (int) maxFileOpenedLong; + } + long kwayLong = spec.getInt(HDTOptionsKeys.BITMAPTRIPLES_DISK_KWAY_KEY, + () -> Math.max(1, BitUtil.log2(maxFileOpened / workers))); + int k; + if (kwayLong <= 0 || kwayLong > Integer.MAX_VALUE) { + throw new IllegalArgumentException("kway can't be negative!"); + } else { + k = 1 << ((int) kwayLong); + } + long bufferSizeLong = spec.getInt(HDTOptionsKeys.BITMAPTRIPLES_DISK_BUFFER_SIZE_KEY, + CloseSuppressPath.BUFFER_SIZE); + int bufferSize; + if (bufferSizeLong > Integer.MAX_VALUE - 5L || bufferSizeLong <= 0) { + throw new IllegalArgumentException("Buffer size can't be negative or bigger than the size of an array!"); + } else { + bufferSize = (int) bufferSizeLong; + } + + try (CloseSuppressPath workDir = CloseSuppressPath + .of(diskLocation.resolve("triplesort-" + order.name().toLowerCase()))) { + workDir.mkdirs(); + workDir.closeWithDeleteRecurse(); + + ExceptionIterator sortedIds = null; + ModifiableBitmap bitY = null; + ModifiableBitmap bitZ = null; + DynamicSequence seqY = null; + DynamicSequence seqZ = null; + try { + sortedIds = new DiskTriplesReorderSorter(workDir, + new AsyncIteratorFetcher<>( + new MapIterator<>(triples.searchAll(triples.getOrder().mask), TripleID::clone)), + listener, bufferSize, chunkSize, k, triples.getOrder(), order).sort(workers); + + int ss = BitUtil.log2(triples.getBitmapY().countOnes()); + int ps = triples.getSeqY().sizeOf(); + int os = triples.getSeqZ().sizeOf(); + + TripleID logTriple = new TripleID(ss, ps, os); + + // we swap the order to find the new allocation numbits + TripleComponentOrder oldOrder = triples.getOrder(); + TripleOrderConvert.swapComponentOrder(logTriple, oldOrder, order); + + int ySize = (int) logTriple.getPredicate(); + int zSize = (int) logTriple.getObject(); + + long count = triples.getNumberOfElements(); + workDir.mkdirs(); + workDir.closeWithDeleteRecurse(); + bitY = Bitmap64Big.disk(workDir.resolve("bity"), count); + bitZ = Bitmap64Big.disk(workDir.resolve("bitZ"), count); + + triples.getSeqY().sizeOf(); + + seqY = new SequenceLog64BigDisk(workDir.resolve("seqy"), ySize, count, false, true); + seqZ = new SequenceLog64BigDisk(workDir.resolve("seqz"), zSize, count, false, true); + + long lastX = 0; + long lastY = 0; + long lastZ = 0; + + // filling index + + long x, y, z; + long numTriples = 0; + while (sortedIds.hasNext()) { + TripleID tid = sortedIds.next(); + + x = tid.getSubject(); + y = tid.getPredicate(); + z = tid.getObject(); + + if (x == 0 || y == 0 || z == 0) { + throw new IllegalFormatException("None of the components of a triple can be null"); + } + + if (numTriples == 0) { + seqY.append(y); + seqZ.append(z); + } else if (lastX != x) { + if (x != lastX + 1) { + throw new RuntimeException("Upper level must be increasing and correlative"); + } + + // X changed + bitY.append(true); + seqY.append(y); + + bitZ.append(true); + seqZ.append(z); + } else if (y != lastY) { + if (y < lastY) { + throw new IllegalFormatException("Middle level must be increasing for each parent."); + } + + // Y changed + bitY.append(false); + seqY.append(y); + + bitZ.append(true); + seqZ.append(z); + } else { + if (z < lastZ) { + throw new IllegalFormatException("Lower level must be increasing for each parent."); + } + + // Z changed + bitZ.append(false); + seqZ.append(z); + } + + lastX = x; + lastY = y; + lastZ = z; + + ListenerUtil.notifyCond(listener, "Converting to BitmapTriples", numTriples, numTriples, count); + numTriples++; + } + + if (numTriples > 0) { + bitY.append(true); + bitZ.append(true); + } + + assert numTriples == triples.getNumberOfElements(); + + seqY.aggressiveTrimToSize(); + seqZ.trimToSize(); + + // saving the index + try (BufferedOutputStream output = new BufferedOutputStream(Files.newOutputStream(destination))) { + output.write(MAGIC); + + IOUtil.writeSizedString(output, order.name(), listener); + + seqY.save(output, listener); + bitY.save(output, listener); + + seqZ.save(output, listener); + bitZ.save(output, listener); + + // no need for CRC I guess? + } + } catch (Throwable t) { + try { + Closer.closeAll(sortedIds, bitY, bitZ, seqY, seqZ); + } catch (Exception ex) { + t.addSuppressed(ex); + } catch (Throwable t2) { + t2.addSuppressed(t); + throw t2; + } + throw t; + } + Closer.closeAll(sortedIds, bitY, bitZ, seqY, seqZ); + + } catch (InterruptedException e) { + throw new InterruptedIOException(e.getMessage()); + } catch (KWayMerger.KWayMergerException e) { + throw new IOException(e); + } + } + + private final Sequence seqY, seqZ; + private final Bitmap bitY, bitZ; + private final AdjacencyList adjY, adjZ; + private final TripleComponentOrder order; + + private BitmapTriplesIndexFile(Sequence seqY, Sequence seqZ, Bitmap bitY, Bitmap bitZ, TripleComponentOrder order) { + this.seqY = seqY; + this.seqZ = seqZ; + this.bitY = bitY; + this.bitZ = bitZ; + this.order = order; + + this.adjY = new AdjacencyList(seqY, bitY); + this.adjZ = new AdjacencyList(seqZ, bitZ); + } + + @Override + public Bitmap getBitmapY() { + return bitY; + } + + @Override + public Bitmap getBitmapZ() { + return bitZ; + } + + @Override + public Sequence getSeqY() { + return seqY; + } + + @Override + public Sequence getSeqZ() { + return seqZ; + } + + @Override + public AdjacencyList getAdjacencyListY() { + return adjY; + } + + @Override + public AdjacencyList getAdjacencyListZ() { + return adjZ; + } + + @Override + public TripleComponentOrder getOrder() { + return order; + } + + @Override + public void close() throws IOException { + Closer.closeAll(bitY, bitZ, seqY, seqZ); + } +} diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIterator.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIterator.java index 57ba0a4a..3118c149 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIterator.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIterator.java @@ -30,7 +30,7 @@ */ public class BitmapTriplesIterator implements SuppliableIteratorTripleID { - protected final BitmapTriples triples; + protected final BitmapTriplesIndex idx; protected final TripleID pattern, returnTriple; protected long lastPosition; protected long patX, patY, patZ; @@ -40,8 +40,8 @@ public class BitmapTriplesIterator implements SuppliableIteratorTripleID { protected long nextY, nextZ; protected long x, y, z; - protected BitmapTriplesIterator(BitmapTriples triples, TripleID pattern, boolean search) { - this.triples = triples; + protected BitmapTriplesIterator(BitmapTriplesIndex idx, TripleID pattern, boolean search) { + this.idx = idx; this.returnTriple = new TripleID(); this.pattern = new TripleID(); if (search) { @@ -49,16 +49,16 @@ protected BitmapTriplesIterator(BitmapTriples triples, TripleID pattern, boolean } } - public BitmapTriplesIterator(BitmapTriples triples, TripleID pattern) { - this(triples, pattern, true); + public BitmapTriplesIterator(BitmapTriplesIndex idx, TripleID pattern) { + this(idx, pattern, true); } - public BitmapTriplesIterator(BitmapTriples triples, long minZ, long maxZ) { - this.triples = triples; + public BitmapTriplesIterator(BitmapTriplesIndex idx, long minZ, long maxZ) { + this.idx = idx; this.returnTriple = new TripleID(); this.pattern = new TripleID(); - adjY = triples.adjY; - adjZ = triples.adjZ; + adjY = idx.getAdjacencyListY(); + adjZ = idx.getAdjacencyListZ(); this.minZ = minZ; this.maxZ = maxZ; @@ -70,13 +70,13 @@ public BitmapTriplesIterator(BitmapTriples triples, long minZ, long maxZ) { public void newSearch(TripleID pattern) { this.pattern.assign(pattern); - TripleOrderConvert.swapComponentOrder(this.pattern, TripleComponentOrder.SPO, triples.order); + TripleOrderConvert.swapComponentOrder(this.pattern, TripleComponentOrder.SPO, idx.getOrder()); patX = this.pattern.getSubject(); patY = this.pattern.getPredicate(); patZ = this.pattern.getObject(); - adjY = triples.adjY; - adjZ = triples.adjZ; + adjY = idx.getAdjacencyListY(); + adjZ = idx.getAdjacencyListZ(); // ((BitSequence375)triples.bitmapZ).dump(); @@ -87,7 +87,7 @@ public void newSearch(TripleID pattern) { protected void updateOutput() { lastPosition = posZ; returnTriple.setAll(x, y, z); - TripleOrderConvert.swapComponentOrder(returnTriple, triples.order, TripleComponentOrder.SPO); + TripleOrderConvert.swapComponentOrder(returnTriple, idx.getOrder(), TripleComponentOrder.SPO); } private void findRange() { @@ -277,7 +277,7 @@ public void goTo(long pos) { */ @Override public TripleComponentOrder getOrder() { - return triples.order; + return idx.getOrder(); } /* diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIteratorYFOQ.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIteratorYFOQ.java index b762ab86..72638957 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIteratorYFOQ.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIteratorYFOQ.java @@ -208,7 +208,7 @@ public void goTo(long pos) { */ @Override public TripleComponentOrder getOrder() { - return triples.order; + return TripleComponentOrder.Unknown; // triples.order; } /* diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIteratorZFOQ.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIteratorZFOQ.java index 1d18bcfe..51d74f53 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIteratorZFOQ.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIteratorZFOQ.java @@ -240,7 +240,7 @@ public void goTo(long pos) { */ @Override public TripleComponentOrder getOrder() { - return triples.order; + return TripleComponentOrder.Unknown;// triples.order; } /* diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/DiskTriplesReorderSorter.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/DiskTriplesReorderSorter.java new file mode 100644 index 00000000..14136b5a --- /dev/null +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/DiskTriplesReorderSorter.java @@ -0,0 +1,156 @@ +package com.the_qa_company.qendpoint.core.triples.impl; + +import com.the_qa_company.qendpoint.core.enums.TripleComponentOrder; +import com.the_qa_company.qendpoint.core.iterator.utils.AsyncIteratorFetcher; +import com.the_qa_company.qendpoint.core.iterator.utils.ExceptionIterator; +import com.the_qa_company.qendpoint.core.iterator.utils.SizeFetcher; +import com.the_qa_company.qendpoint.core.listener.MultiThreadListener; +import com.the_qa_company.qendpoint.core.triples.TripleID; +import com.the_qa_company.qendpoint.core.util.ParallelSortableArrayList; +import com.the_qa_company.qendpoint.core.util.concurrent.KWayMerger; +import com.the_qa_company.qendpoint.core.util.io.CloseSuppressPath; +import com.the_qa_company.qendpoint.core.util.io.IOUtil; +import com.the_qa_company.qendpoint.core.util.io.compress.CompressTripleMergeIterator; +import com.the_qa_company.qendpoint.core.util.io.compress.CompressTripleReader; +import com.the_qa_company.qendpoint.core.util.io.compress.CompressTripleWriter; +import com.the_qa_company.qendpoint.core.util.listener.IntermediateListener; + +import java.io.IOException; +import java.util.List; +import java.util.Optional; +import java.util.concurrent.atomic.AtomicLong; +import java.util.function.Supplier; + +public class DiskTriplesReorderSorter implements KWayMerger.KWayMergerImpl> { + private final CloseSuppressPath baseFileName; + private final AsyncIteratorFetcher source; + private final MultiThreadListener listener; + private final int bufferSize; + private final long chunkSize; + private final int k; + private final TripleComponentOrder oldOrder; + private final TripleComponentOrder newOrder; + private final AtomicLong read = new AtomicLong(); + + public DiskTriplesReorderSorter(CloseSuppressPath baseFileName, AsyncIteratorFetcher source, + MultiThreadListener listener, int bufferSize, long chunkSize, int k, TripleComponentOrder oldOrder, + TripleComponentOrder newOrder) { + this.source = source; + this.listener = MultiThreadListener.ofNullable(listener); + this.baseFileName = baseFileName; + this.bufferSize = bufferSize; + this.chunkSize = chunkSize; + this.k = k; + this.oldOrder = oldOrder; + this.newOrder = newOrder; + } + + @Override + public void createChunk(SizeFetcher flux, CloseSuppressPath output) + throws KWayMerger.KWayMergerException { + ParallelSortableArrayList pairs = new ParallelSortableArrayList<>(TripleID[].class); + + TripleID tid; + // loading the pairs + listener.notifyProgress(10, "reading triple part 0"); + while ((tid = flux.get()) != null) { + TripleOrderConvert.swapComponentOrder(tid, oldOrder, newOrder); + pairs.add(tid); + long r = read.incrementAndGet(); + if (r % 1_000_000 == 0) { + listener.notifyProgress(10, "reading triple part " + r); + } + } + + // sort the pairs + pairs.parallelSort(TripleID::compareTo); + + // write the result on disk + int count = 0; + int block = pairs.size() < 10 ? 1 : pairs.size() / 10; + IntermediateListener il = new IntermediateListener(listener); + il.setRange(70, 100); + il.notifyProgress(0, "creating file"); + try (CompressTripleWriter w = new CompressTripleWriter(output.openOutputStream(bufferSize), false)) { + // encode the size of the chunk + for (int i = 0; i < pairs.size(); i++) { + w.appendTriple(pairs.get(i)); + if (i % block == 0) { + il.notifyProgress(i / (block / 10f), "writing triples " + count + "/" + pairs.size()); + } + } + listener.notifyProgress(100, "writing completed " + pairs.size() + " " + output.getFileName()); + } catch (IOException e) { + throw new KWayMerger.KWayMergerException("Can't write chunk", e); + } + } + + @Override + public void mergeChunks(List inputs, CloseSuppressPath output) + throws KWayMerger.KWayMergerException { + try { + listener.notifyProgress(0, "merging triples " + output.getFileName()); + CompressTripleReader[] readers = new CompressTripleReader[inputs.size()]; + long count = 0; + try { + for (int i = 0; i < inputs.size(); i++) { + readers[i] = new CompressTripleReader(inputs.get(i).openInputStream(bufferSize)); + } + + // use spo because we are writing xyz + ExceptionIterator it = CompressTripleMergeIterator.buildOfTree(readers, + TripleComponentOrder.SPO); + // at least one + long rSize = it.getSize(); + long size = Math.max(rSize, 1); + long block = size < 10 ? 1 : size / 10; + try (CompressTripleWriter w = new CompressTripleWriter(output.openOutputStream(bufferSize), false)) { + while (it.hasNext()) { + w.appendTriple(it.next()); + if (count % block == 0) { + listener.notifyProgress(count / (block / 10f), "merging triples " + count + "/" + size); + } + count++; + } + } + } finally { + IOUtil.closeAll(readers); + } + listener.notifyProgress(100, "triples merged " + output.getFileName() + " " + count); + // delete old pairs + IOUtil.closeAll(inputs); + } catch (IOException e) { + throw new KWayMerger.KWayMergerException(e); + } + } + + @Override + public SizeFetcher newStopFlux(Supplier flux) { + return SizeFetcher.of(flux, p -> 3 * Long.BYTES, chunkSize); + } + + public ExceptionIterator sort(int workers) + throws InterruptedException, IOException, KWayMerger.KWayMergerException { + listener.notifyProgress(0, "Triple sort asked in " + baseFileName.toAbsolutePath()); + // force to create the first file + KWayMerger> merger = new KWayMerger<>(baseFileName, source, this, + Math.max(1, workers - 1), k); + merger.start(); + // wait for the workers to merge the sections and create the triples + Optional sections = merger.waitResult(); + if (sections.isEmpty()) { + return ExceptionIterator.empty(); + } + CloseSuppressPath path = sections.get(); + return new CompressTripleReader(path.openInputStream(bufferSize)) { + @Override + public void close() throws IOException { + try { + super.close(); + } finally { + IOUtil.closeObject(path); + } + } + }; + } +} diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/OneReadTempTriples.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/OneReadTempTriples.java index 99000ed3..327f9e6a 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/OneReadTempTriples.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/OneReadTempTriples.java @@ -21,6 +21,7 @@ import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; +import java.nio.file.Path; import java.util.Iterator; /** @@ -114,6 +115,11 @@ public SuppliableIteratorTripleID search(TripleID pattern) { throw new NotImplementedException(); } + @Override + public SuppliableIteratorTripleID search(TripleID pattern, int searchMask) { + throw new NotImplementedException(); + } + @Override public void load(InputStream input, ControlInfo ci, ProgressListener listener) throws IOException { throw new NotImplementedException(); @@ -139,6 +145,11 @@ public void mapIndex(CountInputStream input, File f, ControlInfo ci, ProgressLis throw new NotImplementedException(); } + @Override + public void mapGenOtherIndexes(Path file, HDTOptions spec, ProgressListener listener) { + throw new NotImplementedException(); + } + @Override public void saveIndex(OutputStream output, ControlInfo ci, ProgressListener listener) { throw new NotImplementedException(); @@ -165,6 +176,11 @@ public IteratorTripleID searchAll() { return new NoDuplicateTripleIDIterator(iterator); } + @Override + public IteratorTripleID searchAll(int searchMask) { + return new NoDuplicateTripleIDIterator(iterator); + } + @Override public long getNumberOfElements() { return iterator.estimatedNumResults(); diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/TriplesList.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/TriplesList.java index e3aafdc5..9f77fb77 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/TriplesList.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/TriplesList.java @@ -47,6 +47,7 @@ import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; +import java.nio.file.Path; import java.util.ArrayList; /** @@ -118,12 +119,22 @@ public SuppliableIteratorTripleID search(TripleID pattern) { } } + @Override + public SuppliableIteratorTripleID search(TripleID pattern, int searchMask) { + return search(pattern); + } + /* * (non-Javadoc) * @see hdt.triples.Triples#searchAll() */ @Override public IteratorTripleID searchAll() { + return searchAll(TripleComponentOrder.ALL_MASK); + } + + @Override + public IteratorTripleID searchAll(int searchMask) { TripleID all = new TripleID(0, 0, 0); return this.search(all); } @@ -530,6 +541,11 @@ public void mapFromFile(CountInputStream in, File f, ProgressListener listener) public void mapIndex(CountInputStream input, File f, ControlInfo ci, ProgressListener listener) { } + @Override + public void mapGenOtherIndexes(Path file, HDTOptions spec, ProgressListener listener) { + throw new NotImplementedException(); + } + @Override public void replaceAllIds(DictionaryIDMapping mapSubj, DictionaryIDMapping mapPred, DictionaryIDMapping mapObj, DictionaryIDMapping mapGraph) { diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/TriplesListLong.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/TriplesListLong.java index f2fbce58..3897a972 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/TriplesListLong.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/TriplesListLong.java @@ -24,6 +24,7 @@ import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; +import java.nio.file.Path; import java.util.ArrayList; import com.the_qa_company.qendpoint.core.dictionary.Dictionary; @@ -114,12 +115,22 @@ public SuppliableIteratorTripleID search(TripleID pattern) { } } + @Override + public SuppliableIteratorTripleID search(TripleID pattern, int searchMask) { + return search(pattern); + } + /* * (non-Javadoc) * @see hdt.triples.Triples#searchAll() */ @Override public IteratorTripleID searchAll() { + return searchAll(TripleComponentOrder.ALL_MASK); + } + + @Override + public IteratorTripleID searchAll(int searchMask) { TripleID all = new TripleID(0, 0, 0); return this.search(all); } @@ -522,6 +533,10 @@ public void mapFromFile(CountInputStream in, File f, ProgressListener listener) public void mapIndex(CountInputStream input, File f, ControlInfo ci, ProgressListener listener) { } + @Override + public void mapGenOtherIndexes(Path file, HDTOptions spec, ProgressListener listener) { + } + @Override public void replaceAllIds(DictionaryIDMapping mapSubj, DictionaryIDMapping mapPred, DictionaryIDMapping mapObj) { sorted = false; diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/WriteBitmapTriples.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/WriteBitmapTriples.java index 0594b703..03f31473 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/WriteBitmapTriples.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/WriteBitmapTriples.java @@ -32,6 +32,7 @@ import java.io.InputStream; import java.io.OutputStream; import java.nio.file.Files; +import java.nio.file.Path; /** * Appendable write {@link BitmapTriples} version @@ -100,11 +101,21 @@ public IteratorTripleID searchAll() { throw new NotImplementedException(); } + @Override + public IteratorTripleID searchAll(int searchMask) { + throw new NotImplementedException(); + } + @Override public SuppliableIteratorTripleID search(TripleID pattern) { throw new NotImplementedException(); } + @Override + public SuppliableIteratorTripleID search(TripleID pattern, int searchMask) { + throw new NotImplementedException(); + } + @Override public long getNumberOfElements() { return numTriples; @@ -171,6 +182,11 @@ public void mapIndex(CountInputStream input, File f, ControlInfo ci, ProgressLis throw new NotImplementedException(); } + @Override + public void mapGenOtherIndexes(Path file, HDTOptions spec, ProgressListener listener) { + throw new NotImplementedException(); + } + @Override public void saveIndex(OutputStream output, ControlInfo ci, ProgressListener listener) { throw new NotImplementedException(); diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/io/IOUtil.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/io/IOUtil.java index 497e69ff..29112c49 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/io/IOUtil.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/util/io/IOUtil.java @@ -544,6 +544,28 @@ public static long readLong(InputStream input) throws IOException { + ((readBuffer[0] & 255)); } + /** + * Read long, big endian. + * + * @param input is + * @throws IOException io exception + */ + public static long readLongBigEndian(InputStream input) throws IOException { + int n = 0; + byte[] readBuffer = new byte[8]; + while (n < 8) { + int count = input.read(readBuffer, n, 8 - n); + if (count < 0) + throw new EOFException(); + n += count; + } + + return ((long) readBuffer[0] << 56) + ((long) (readBuffer[1] & 255) << 48) + + ((long) (readBuffer[2] & 255) << 40) + ((long) (readBuffer[3] & 255) << 32) + + ((long) (readBuffer[4] & 255) << 24) + ((readBuffer[5] & 255) << 16) + ((readBuffer[6] & 255) << 8) + + ((readBuffer[7] & 255)); + } + public static long readLong(long location, FileChannel channel) throws IOException { try (CloseMappedByteBuffer buffer = new CloseMappedByteBuffer("readLong", channel.map(FileChannel.MapMode.READ_ONLY, location, 8), false)) { diff --git a/qendpoint-core/src/test/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesOrderTest.java b/qendpoint-core/src/test/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesOrderTest.java new file mode 100644 index 00000000..821dba3c --- /dev/null +++ b/qendpoint-core/src/test/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesOrderTest.java @@ -0,0 +1,99 @@ +package com.the_qa_company.qendpoint.core.triples.impl; + +import com.the_qa_company.qendpoint.core.enums.TripleComponentOrder; +import com.the_qa_company.qendpoint.core.exceptions.ParserException; +import com.the_qa_company.qendpoint.core.hdt.HDT; +import com.the_qa_company.qendpoint.core.hdt.HDTManager; +import com.the_qa_company.qendpoint.core.listener.ProgressListener; +import com.the_qa_company.qendpoint.core.options.HDTOptions; +import com.the_qa_company.qendpoint.core.options.HDTOptionsKeys; +import com.the_qa_company.qendpoint.core.triples.IteratorTripleID; +import com.the_qa_company.qendpoint.core.triples.TripleID; +import com.the_qa_company.qendpoint.core.util.LargeFakeDataSetStreamSupplier; +import org.apache.commons.io.file.PathUtils; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Arrays; +import java.util.HashSet; +import java.util.Set; +import java.util.stream.Collectors; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +public class BitmapTriplesOrderTest { + @Rule + public TemporaryFolder tempDir = TemporaryFolder.builder().assureDeletion().build(); + + @Test + public void orderTest() throws IOException, ParserException { + Path root = tempDir.newFolder().toPath(); + + try { + LargeFakeDataSetStreamSupplier supplier = LargeFakeDataSetStreamSupplier + .createSupplierWithMaxTriples(10_000, 52).withMaxElementSplit(50).withMaxLiteralSize(20); + + HDTOptions spec = HDTOptions.of(HDTOptionsKeys.BITMAPTRIPLES_INDEX_NO_FOQ, true, + HDTOptionsKeys.BITMAPTRIPLES_INDEX_OTHERS, Arrays.stream(TripleComponentOrder.values()) + .map(Object::toString).collect(Collectors.joining(","))); + + Path hdtFile = root.resolve("file.hdt"); + supplier.createAndSaveFakeHDT(spec, hdtFile); + + try (HDT hdt = HDTManager.mapIndexedHDT(hdtFile, spec, ProgressListener.ignore())) { + + // check index creations + for (TripleComponentOrder order : TripleComponentOrder.values()) { + if (order == TripleComponentOrder.Unknown || order == TripleComponentOrder.SPO) { + // default or unknown + continue; + } + + Path path = BitmapTriplesIndexFile.getIndexPath(hdtFile, order); + assertTrue(path + " doesn't exist! order " + order, Files.exists(path)); + } + + // all triples available? + Set dso = new HashSet<>(); + + IteratorTripleID it = hdt.getTriples().searchAll(TripleComponentOrder.SPO.mask); + + assertEquals(TripleComponentOrder.SPO, it.getOrder()); + while (it.hasNext()) { + TripleID tid = it.next().clone(); + if (!dso.add(tid)) { + fail("tid " + tid + " was read twice, dso: " + dso); + } + } + + assertEquals(hdt.getTriples().getNumberOfElements(), dso.size()); + for (TripleComponentOrder order : TripleComponentOrder.values()) { + if (order == TripleComponentOrder.Unknown) { + continue; + } + Set ds = new HashSet<>(dso); + + IteratorTripleID it2 = hdt.getTriples().searchAll(order.mask); + assertEquals(order, it2.getOrder()); + while (it2.hasNext()) { + TripleID tid = it2.next().clone(); + if (!ds.remove(tid)) { + fail("tid " + tid + " can't be find, previously here: " + dso.contains(tid)); + } + } + + assertTrue("ds not empty, " + ds.size() + " elem remaining", ds.isEmpty()); + } + } + + } finally { + PathUtils.deleteDirectory(root); + } + } +} diff --git a/qendpoint-core/src/test/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesTest.java b/qendpoint-core/src/test/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesTest.java index ccecea79..b8438f36 100644 --- a/qendpoint-core/src/test/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesTest.java +++ b/qendpoint-core/src/test/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesTest.java @@ -24,9 +24,12 @@ import java.io.ByteArrayOutputStream; import java.io.IOException; +import java.nio.channels.FileChannel; import java.nio.file.Files; import java.nio.file.Path; +import java.nio.file.StandardOpenOption; import java.util.Collection; +import java.util.UUID; import java.util.stream.Stream; import static org.junit.Assert.*; @@ -298,6 +301,16 @@ public void memBitmapLoadIndexedTest() throws IOException, ParserException { @Ignore("Hand tests") public static class HandTest extends AbstractTest { + @Test + public void readBadChannelTest() throws IOException { + // java.nio.file.NoSuchFileException + try (FileChannel channel = FileChannel.open(Path.of(UUID.randomUUID().toString()), + StandardOpenOption.READ)) { + System.out.println(channel.isOpen()); + } + + } + @Test public void largeTest() throws IOException { /* diff --git a/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/model/HDTValue.java b/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/model/HDTValue.java index a469f578..7f11507f 100644 --- a/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/model/HDTValue.java +++ b/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/model/HDTValue.java @@ -6,6 +6,16 @@ * @author Antoine Willerval */ public interface HDTValue { + static int compare(HDTValue v1, HDTValue v2) { + int c = Integer.compare(v1.getHDTPosition(), v2.getHDTPosition()); + + if (c != 0) { + return c; + } + + return Long.compare(v1.getHDTPosition(), v2.getHDTPosition()); + } + /** * @return is a delegate value, should return the exact hashcode when asked * if true @@ -19,4 +29,21 @@ public interface HDTValue { * @param delegate boolean */ void setDelegate(boolean delegate); + + /** + * @return the id inside the hdt section, 0 or negative for invalid ids + */ + long getHDTId(); + + /** + * @return the section id of the hdt value + */ + int getHDTPosition(); + + /** + * @return if the HDT id is valid + */ + default boolean isValidHDTId() { + return getHDTId() > 0; + } } diff --git a/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/model/SimpleBNodeHDT.java b/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/model/SimpleBNodeHDT.java index 3b5d2e3f..35bd2b74 100644 --- a/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/model/SimpleBNodeHDT.java +++ b/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/model/SimpleBNodeHDT.java @@ -46,8 +46,8 @@ public boolean equals(Object o) { return false; if (this == o) { return true; - } else if (o instanceof SimpleBNodeHDT && this.id != -1 && ((SimpleBNodeHDT) o).getHdtId() != -1) { - return this.id == (((SimpleBNodeHDT) o).getHdtId()); + } else if (o instanceof HDTValue hv && this.id != -1 && hv.getHDTId() != -1) { + return this.id == hv.getHDTId(); } else { // could not compare IDs, we have to compare to string if (!(o instanceof BNode)) { return false; @@ -80,10 +80,16 @@ else if (this.position == SimpleIRIHDT.OBJECT_POS) } } - public long getHdtId() { + @Override + public long getHDTId() { return id; } + @Override + public int getHDTPosition() { + return position; + } + @Override public void setDelegate(boolean delegate) { this.delegate = delegate; diff --git a/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/model/SimpleIRIHDT.java b/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/model/SimpleIRIHDT.java index f5d1a95f..4be71659 100644 --- a/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/model/SimpleIRIHDT.java +++ b/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/model/SimpleIRIHDT.java @@ -57,6 +57,16 @@ public SimpleIRIHDT(HDT hdt, String iriString) { this.localNameIdx = -1; } + @Override + public long getHDTId() { + return id; + } + + @Override + public int getHDTPosition() { + return postion; + } + public long getId() { return id; } diff --git a/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/model/SimpleLiteralHDT.java b/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/model/SimpleLiteralHDT.java index 241ec7c7..54056c06 100644 --- a/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/model/SimpleLiteralHDT.java +++ b/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/model/SimpleLiteralHDT.java @@ -232,8 +232,8 @@ public boolean equals(Object o) { return true; } - if (o instanceof SimpleLiteralHDT) { - return ((SimpleLiteralHDT) o).getHdtID() == getHdtID(); + if (o instanceof HDTValue hv) { + return hv.getHDTId() == getHDTId(); } else if (o instanceof Literal other) { // Compare datatypes if (!getDatatype().equals(other.getDatatype())) { @@ -281,10 +281,16 @@ public String toString() { }); } - public long getHdtID() { + @Override + public long getHDTId() { return hdtID; } + @Override + public int getHDTPosition() { + return SimpleIRIHDT.OBJECT_POS; // a literal is only an object + } + @Override public String stringValue() { return getLabel(); diff --git a/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/store/EndpointStore.java b/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/store/EndpointStore.java index 400e2aee..7316d4c5 100644 --- a/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/store/EndpointStore.java +++ b/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/store/EndpointStore.java @@ -14,8 +14,7 @@ import com.the_qa_company.qendpoint.core.util.StopWatch; import com.the_qa_company.qendpoint.core.util.io.Closer; import com.the_qa_company.qendpoint.model.EndpointStoreValueFactory; -import com.the_qa_company.qendpoint.model.SimpleBNodeHDT; -import com.the_qa_company.qendpoint.model.SimpleIRIHDT; +import com.the_qa_company.qendpoint.model.HDTValue; import com.the_qa_company.qendpoint.utils.BitArrayDisk; import com.the_qa_company.qendpoint.utils.CloseSafeHDT; import com.the_qa_company.qendpoint.utils.OverrideHDTOptions; @@ -803,24 +802,20 @@ public void modifyBitmaps(Resource subject, IRI predicate, Value object) { // mark in HDT the store the subject, predicate, objects that are used // in rdf4j long subjectID; - if (subject instanceof SimpleIRIHDT iriHDT) { - subjectID = iriHDT.getId(); - } else if (subject instanceof SimpleBNodeHDT bNodeHDT) { - subjectID = bNodeHDT.getHdtId(); + if (subject instanceof HDTValue hv) { + subjectID = hv.getHDTId(); } else { subjectID = -1; } long predicateID; - if (predicate instanceof SimpleIRIHDT iriHDT) { - predicateID = iriHDT.getId(); + if (predicate instanceof HDTValue hv) { + predicateID = hv.getHDTId(); } else { predicateID = -1; } long objectID; - if (object instanceof SimpleIRIHDT iriHDT) { - objectID = iriHDT.getId(); - } else if (object instanceof SimpleBNodeHDT bNodeHDT) { - objectID = bNodeHDT.getHdtId(); + if (object instanceof HDTValue hv) { + objectID = hv.getHDTId(); } else { objectID = -1; } diff --git a/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/store/HDTConverter.java b/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/store/HDTConverter.java index 537efe30..45596a81 100644 --- a/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/store/HDTConverter.java +++ b/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/store/HDTConverter.java @@ -112,7 +112,7 @@ public long objectToID(Value obj) { } return hdt.getDictionary().stringToId(translate, TripleComponentRole.OBJECT); } else if (obj instanceof SimpleLiteralHDT hdtObj) { - return hdtObj.getHdtID(); + return hdtObj.getHDTId(); } else { if (QueryEvaluationUtil.isSimpleLiteral(obj)) { return this.hdt.getDictionary().stringToId('"' + obj.stringValue() + '"', diff --git a/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/tools/QEPSearch.java b/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/tools/QEPSearch.java index 1b2e84be..f1018664 100644 --- a/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/tools/QEPSearch.java +++ b/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/tools/QEPSearch.java @@ -877,11 +877,11 @@ private String prettyComponent(Object object) { } if (object instanceof SimpleLiteralHDT hdtLit) { - return out + colorTool.yellow() + " (" + hdtLit.getHdtID() + ")"; + return out + colorTool.yellow() + " (" + hdtLit.getHDTId() + ")"; } else if (object instanceof SimpleIRIHDT hdtIri) { return out + colorTool.yellow() + " (" + hdtIri.getId() + ")"; } else if (object instanceof SimpleBNodeHDT hdtBN) { - return out + colorTool.yellow() + " (" + hdtBN.getHdtId() + ")"; + return out + colorTool.yellow() + " (" + hdtBN.getHDTId() + ")"; } else { return out + colorTool.yellow(); } diff --git a/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/utils/BitArrayDisk.java b/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/utils/BitArrayDisk.java index 292b0045..e17e9352 100644 --- a/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/utils/BitArrayDisk.java +++ b/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/utils/BitArrayDisk.java @@ -6,11 +6,14 @@ import com.the_qa_company.qendpoint.core.listener.ProgressListener; import com.the_qa_company.qendpoint.core.util.io.IOUtil; +import java.io.BufferedInputStream; import java.io.Closeable; +import java.io.EOFException; import java.io.File; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; +import java.nio.file.Files; import java.util.Arrays; /** @@ -106,6 +109,7 @@ public void changeToInDisk(File file) throws IOException { inMemory = false; this.output = new NioFile(file); writeBits(); + for (int offset = 0; offset < words.length; offset++) { output.writeLong(words[offset], 8L * (offset + 1)); } @@ -119,8 +123,7 @@ public void changeToInDisk(File file) throws IOException { */ private void writeBits() throws IOException { // write the length of the array in the beginning - int nwords = (int) numWords(allBits); - this.output.writeLong(nwords, 0); + this.output.writeLong((int) numWords(allBits), 0); } private void initWordsArray(long nbits) throws IOException { @@ -137,12 +140,18 @@ private void initWordsArray(long nbits) throws IOException { int lastNonZero = -1; // read previous values - for (int i = 0; i < this.words.length; i++) { - long v = this.output.readLong((i + 1) * 8L); - if (v != 0) { - this.words[i] = v; - lastNonZero = i; + try (BufferedInputStream is = new BufferedInputStream( + Files.newInputStream(this.output.getFile().toPath()))) { + // skip header + is.skipNBytes(8); + for (int i = 0; i < this.words.length; i++) { + long v = IOUtil.readLongBigEndian(is); + if (v != 0) { + this.words[i] = v; + lastNonZero = i; + } } + } catch (EOFException ignore) { } // recompute numbits if we have at least one bit if (lastNonZero != -1) diff --git a/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/utils/CloseSafeHDT.java b/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/utils/CloseSafeHDT.java index 46565f1f..9a16c33e 100644 --- a/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/utils/CloseSafeHDT.java +++ b/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/utils/CloseSafeHDT.java @@ -89,4 +89,22 @@ public IteratorTripleString search(CharSequence subject, CharSequence predicate, throws NotFoundException { return hdt.search(subject, predicate, object); } + + @Override + public IteratorTripleString search(CharSequence subject, CharSequence predicate, CharSequence object, + CharSequence graph) throws NotFoundException { + return hdt.search(subject, predicate, object, graph); + } + + @Override + public IteratorTripleString search(CharSequence subject, CharSequence predicate, CharSequence object, + int searchOrderMask) throws NotFoundException { + return hdt.search(subject, predicate, object, searchOrderMask); + } + + @Override + public IteratorTripleString search(CharSequence subject, CharSequence predicate, CharSequence object, + CharSequence graph, int searchOrderMask) throws NotFoundException { + return hdt.search(subject, predicate, object, graph, searchOrderMask); + } } diff --git a/qendpoint-store/src/test/java/com/the_qa_company/qendpoint/store/EndpointStoreTest.java b/qendpoint-store/src/test/java/com/the_qa_company/qendpoint/store/EndpointStoreTest.java index 941f1481..c7de1f46 100644 --- a/qendpoint-store/src/test/java/com/the_qa_company/qendpoint/store/EndpointStoreTest.java +++ b/qendpoint-store/src/test/java/com/the_qa_company/qendpoint/store/EndpointStoreTest.java @@ -795,7 +795,7 @@ public void bnodeTest() throws ParserException, IOException { Resource bnode = converter.idToSubjectHDTResource(1L); Assert.assertTrue(bnode instanceof BNode); Assert.assertTrue(bnode instanceof SimpleBNodeHDT); - Assert.assertEquals(1L, ((SimpleBNodeHDT) bnode).getHdtId()); + Assert.assertEquals(1L, ((SimpleBNodeHDT) bnode).getHDTId()); Assert.assertEquals("aaaa", ((BNode) bnode).getID()); Assert.assertEquals("_:aaaa", bnode.toString()); try (SailRepositoryConnection connection = repo.getConnection()) {