diff --git a/hdt-java-core/pom.xml b/hdt-java-core/pom.xml index 195a0de9..e457fead 100644 --- a/hdt-java-core/pom.xml +++ b/hdt-java-core/pom.xml @@ -49,8 +49,9 @@ jena-arq - pl.edu.icm + org.visnow JLargeArrays + 1.7-20220624.150242-7 pl.pragmatists @@ -63,7 +64,12 @@ 1.6 - + + + gitlab-maven + https://gitlab.com/api/v4/projects/375779/packages/maven + + diff --git a/hdt-java-core/src/main/java/org/rdfhdt/hdt/compact/bitmap/Bitmap375Disk.java b/hdt-java-core/src/main/java/org/rdfhdt/hdt/compact/bitmap/Bitmap375Disk.java index d9af9fb6..eec2e6e2 100644 --- a/hdt-java-core/src/main/java/org/rdfhdt/hdt/compact/bitmap/Bitmap375Disk.java +++ b/hdt-java-core/src/main/java/org/rdfhdt/hdt/compact/bitmap/Bitmap375Disk.java @@ -111,7 +111,7 @@ public boolean access(long bitIndex) { if (bitIndex < 0) throw new IndexOutOfBoundsException("bitIndex < 0: " + bitIndex); - int wordIndex = wordIndex(bitIndex); + long wordIndex = wordIndex(bitIndex); if(wordIndex>=words.length()) { return false; } @@ -415,4 +415,4 @@ public void load(InputStream input, ProgressListener listener) throws IOExceptio super.load(input, listener); updateIndex(); } -} \ No newline at end of file +} diff --git a/hdt-java-core/src/main/java/org/rdfhdt/hdt/compact/bitmap/Bitmap64Disk.java b/hdt-java-core/src/main/java/org/rdfhdt/hdt/compact/bitmap/Bitmap64Disk.java index 777423d5..6c1ccef2 100644 --- a/hdt-java-core/src/main/java/org/rdfhdt/hdt/compact/bitmap/Bitmap64Disk.java +++ b/hdt-java-core/src/main/java/org/rdfhdt/hdt/compact/bitmap/Bitmap64Disk.java @@ -21,6 +21,7 @@ import org.rdfhdt.hdt.compact.integer.VByte; import org.rdfhdt.hdt.exceptions.NotImplementedException; +import org.rdfhdt.hdt.hdt.HDTVocabulary; import org.rdfhdt.hdt.listener.ProgressListener; import org.rdfhdt.hdt.util.BitUtil; import org.rdfhdt.hdt.util.crc.CRC32; @@ -37,7 +38,7 @@ /** * Version of Bitmap64 which is backed up on disk */ -public class Bitmap64Disk implements Closeable { +public class Bitmap64Disk implements Closeable, ModifiableBitmap { // Constants protected final static int LOGW = 6; @@ -59,7 +60,7 @@ public Bitmap64Disk(String location, long nbits) { /** * Given a bit index, return word index containing it. */ - protected static int wordIndex(long bitIndex) { + protected static long wordIndex(long bitIndex) { return (int) (bitIndex >>> LOGW); } @@ -78,7 +79,7 @@ protected static int lastWordNumBits(long numbits) { return (int) ((numbits-1) % W)+1; // +1 To have output in the range 1-64, -1 to compensate. } - protected final void ensureSize(int wordsRequired) { + protected final void ensureSize(long wordsRequired) { words.resize(Math.max(words.getSize()*2, wordsRequired)); } @@ -97,7 +98,7 @@ public boolean access(long bitIndex) { if (bitIndex < 0) throw new IndexOutOfBoundsException("bitIndex < 0: " + bitIndex); - int wordIndex = wordIndex(bitIndex); + long wordIndex = wordIndex(bitIndex); if(wordIndex>=words.length()) { return false; } @@ -105,6 +106,65 @@ public boolean access(long bitIndex) { return (words.get(wordIndex) & (1L << bitIndex)) != 0; } + @Override + public long rank1(long pos) { + throw new NotImplementedException(); + } + + @Override + public long rank0(long pos) { + throw new NotImplementedException(); + } + + @Override + public long selectNext1(long fromIndex) { + if (fromIndex < 0) + throw new IndexOutOfBoundsException("fromIndex < 0: " + fromIndex); + + long wordIndex = wordIndex(fromIndex); + if (wordIndex >= words.length()) + return -1; + + long word = words.get(wordIndex) & (~0L << fromIndex); + + while (true) { + if (word != 0) + return ((long)wordIndex * W) + Long.numberOfTrailingZeros(word); + if (++wordIndex == words.length()) + return -1; + word = words.get(wordIndex); + } + } + + @Override + public long select0(long n) { + throw new NotImplementedException(); + } + + @Override + public long select1(long n) { + throw new NotImplementedException(); + } + + @Override + public long countOnes() { + if (words.length() == 0) + return 0; + long acc = 0; + long end = wordIndex(numbits); + if (end >= words.length()) { + end = words.length() - 1; + } + for (int i = 0; i <= end; i++) + acc += Long.bitCount(words.get(i)); + return acc; + } + + @Override + public long countZeros() { + return words.length() * 64L - countOnes(); + } + /* (non-Javadoc) * @see hdt.compact.bitmap.ModifiableBitmap#append(boolean) */ @@ -116,7 +176,7 @@ public void set(long bitIndex, boolean value) { if (bitIndex < 0) throw new IndexOutOfBoundsException("bitIndex < 0: " + bitIndex); - int wordIndex = wordIndex(bitIndex); + long wordIndex = wordIndex(bitIndex); ensureSize(wordIndex+1); if(value) { @@ -128,27 +188,13 @@ public void set(long bitIndex, boolean value) { this.numbits = Math.max(this.numbits, bitIndex+1); } - public long selectPrev1(long start) { - throw new NotImplementedException(); + @Override + public String getType() { + return HDTVocabulary.BITMAP_TYPE_PLAIN; } - public long selectNext1(long fromIndex) { - if (fromIndex < 0) - throw new IndexOutOfBoundsException("fromIndex < 0: " + fromIndex); - - int wordIndex = wordIndex(fromIndex); - if (wordIndex >= words.length()) - return -1; - - long word = words.get(wordIndex) & (~0L << fromIndex); - - while (true) { - if (word != 0) - return ((long)wordIndex * W) + Long.numberOfTrailingZeros(word); - if (++wordIndex == words.length()) - return -1; - word = words.get(wordIndex); - } + public long selectPrev1(long start) { + throw new NotImplementedException(); } public long getWord(int word) { @@ -211,4 +257,4 @@ public long getRealSizeBytes() { public void close() throws IOException { words.close(); } -} \ No newline at end of file +} diff --git a/hdt-java-core/src/main/java/org/rdfhdt/hdt/compact/sequence/SequenceInt32.java b/hdt-java-core/src/main/java/org/rdfhdt/hdt/compact/sequence/SequenceInt32.java index 11df538d..53ee2fdb 100644 --- a/hdt-java-core/src/main/java/org/rdfhdt/hdt/compact/sequence/SequenceInt32.java +++ b/hdt-java-core/src/main/java/org/rdfhdt/hdt/compact/sequence/SequenceInt32.java @@ -105,9 +105,13 @@ public void set(long position, long value) { @Override public void append(long value) { assert value>=0 && value<=Integer.MAX_VALUE; - - if(data.length Integer.MAX_VALUE - 5) { + throw new IllegalArgumentException("Needed size exceeds the maximum size of this data structure " + neededSize); + } + if(data.length < neededSize) { + resizeArray((int) Math.min(Integer.MAX_VALUE - 5L, data.length*2L)); } data[numelements++] = (int) value; } diff --git a/hdt-java-core/src/main/java/org/rdfhdt/hdt/compact/sequence/SequenceInt64.java b/hdt-java-core/src/main/java/org/rdfhdt/hdt/compact/sequence/SequenceInt64.java index 2f246de1..f374fb2e 100644 --- a/hdt-java-core/src/main/java/org/rdfhdt/hdt/compact/sequence/SequenceInt64.java +++ b/hdt-java-core/src/main/java/org/rdfhdt/hdt/compact/sequence/SequenceInt64.java @@ -94,7 +94,7 @@ public long get(long position) { @Override public void set(long position, long value) { assert position>=0 && position<=Integer.MAX_VALUE; - assert value>=0 && value<=Long.MAX_VALUE; + assert value>=0; data[(int)position] = value; numelements = (int) Math.max(numelements, position+1); @@ -102,11 +102,15 @@ public void set(long position, long value) { @Override public void append(long value) { - assert value>=0 && value<=Long.MAX_VALUE; + assert value>=0; assert numelements Integer.MAX_VALUE - 5) { + throw new IllegalArgumentException("Needed size exceeds the maximum size of this data structure " + neededSize); + } + if(data.length < neededSize) { + resizeArray((int) Math.min(Integer.MAX_VALUE - 5L, data.length*2L)); } data[(int)numelements++] = value; } diff --git a/hdt-java-core/src/main/java/org/rdfhdt/hdt/compact/sequence/SequenceLog64.java b/hdt-java-core/src/main/java/org/rdfhdt/hdt/compact/sequence/SequenceLog64.java index 616a5d9e..8c54bddd 100644 --- a/hdt-java-core/src/main/java/org/rdfhdt/hdt/compact/sequence/SequenceLog64.java +++ b/hdt-java-core/src/main/java/org/rdfhdt/hdt/compact/sequence/SequenceLog64.java @@ -241,8 +241,11 @@ public void append(long value) { } long neededSize = numWordsFor(numbits, numentries+1); - if(data.length Integer.MAX_VALUE - 5) { + throw new IllegalArgumentException("Needed size exceeds the maximum size of this data structure " + neededSize); + } + if(data.length < neededSize) { + resizeArray((int) Math.min(Integer.MAX_VALUE - 5L, data.length*2L)); } this.set((int)numentries, value); diff --git a/hdt-java-core/src/main/java/org/rdfhdt/hdt/compact/sequence/SequenceLog64Big.java b/hdt-java-core/src/main/java/org/rdfhdt/hdt/compact/sequence/SequenceLog64Big.java index 3121ea54..db536827 100644 --- a/hdt-java-core/src/main/java/org/rdfhdt/hdt/compact/sequence/SequenceLog64Big.java +++ b/hdt-java-core/src/main/java/org/rdfhdt/hdt/compact/sequence/SequenceLog64Big.java @@ -44,9 +44,8 @@ import org.rdfhdt.hdt.util.crc.CRCInputStream; import org.rdfhdt.hdt.util.crc.CRCOutputStream; import org.rdfhdt.hdt.util.io.IOUtil; - -import pl.edu.icm.jlargearrays.LongLargeArray; -import pl.edu.icm.jlargearrays.LargeArrayUtils; +import org.visnow.jlargearrays.LargeArrayUtils; +import org.visnow.jlargearrays.LongLargeArray; /** * @author mario.arias,Lyudmila Balakireva @@ -55,8 +54,8 @@ public class SequenceLog64Big implements DynamicSequence { private static final byte W = 64; private static final int INDEX = 1073741824; - - LongLargeArray data; + + LongLargeArray data; private int numbits; private long numentries=0; private long maxvalue; @@ -77,7 +76,7 @@ public SequenceLog64Big(int numbits, long capacity) { long size = numWordsFor(numbits, capacity); LongLargeArray.setMaxSizeOf32bitArray(SequenceLog64Big.INDEX); - data = new LongLargeArray(Math.max((int)size,1)); + data = new LongLargeArray(Math.max(size,1)); } public SequenceLog64Big(int numbits, long capacity, boolean initialize) { @@ -158,11 +157,7 @@ private void resizeArray(long size) { //data = Arrays.copyOf(data, size); if(size > 0) { LongLargeArray a = new LongLargeArray(size); - if (size < data.length()) { - LargeArrayUtils.arraycopy(data, 0, a, 0, size); - } else { - LargeArrayUtils.arraycopy(data, 0, a, 0, data.length()); - } + LargeArrayUtils.arraycopy(data, 0, a, 0, Math.min(size, data.length())); data = a; }else{ this.numentries = 0; @@ -182,7 +177,7 @@ public void add(Iterator elements) { // Count and calculate number of bits needed per element. while(elements.hasNext()) { long val = elements.next(); - max = val>max ? val : max; + max = Math.max(val, max); numentries++; } @@ -208,7 +203,7 @@ public void addIntegers(ArrayList elements) { // Count and calculate number of bits needed per element. for (int i=0;imax ? val : max; + max = Math.max(val, max); numentries++; } diff --git a/hdt-java-core/src/main/java/org/rdfhdt/hdt/hdt/impl/HDTImpl.java b/hdt-java-core/src/main/java/org/rdfhdt/hdt/hdt/impl/HDTImpl.java index 7c5e829d..f69f1bcb 100644 --- a/hdt-java-core/src/main/java/org/rdfhdt/hdt/hdt/impl/HDTImpl.java +++ b/hdt-java-core/src/main/java/org/rdfhdt/hdt/hdt/impl/HDTImpl.java @@ -786,7 +786,7 @@ public void diffBit(String location, HDT hdt, Bitmap deleteBitmap, ProgressListe log.debug("Generating Triples..."); il.notifyProgress(40, "Generating Triples..."); // map the triples based on the new dictionary - BitmapTriplesIteratorMapDiff mapIter = new BitmapTriplesIteratorMapDiff(hdt, deleteBitmap, diff, iter.getCount() + 1); + BitmapTriplesIteratorMapDiff mapIter = new BitmapTriplesIteratorMapDiff(hdt, deleteBitmap, diff); BitmapTriples triples = new BitmapTriples(spec); triples.load(mapIter, listener); diff --git a/hdt-java-core/src/main/java/org/rdfhdt/hdt/triples/impl/BitmapTriples.java b/hdt-java-core/src/main/java/org/rdfhdt/hdt/triples/impl/BitmapTriples.java index 957cdbb7..bfe186ac 100644 --- a/hdt-java-core/src/main/java/org/rdfhdt/hdt/triples/impl/BitmapTriples.java +++ b/hdt-java-core/src/main/java/org/rdfhdt/hdt/triples/impl/BitmapTriples.java @@ -129,8 +129,8 @@ public void load(IteratorTripleID it, ProgressListener listener) { long number = it.estimatedNumResults(); - SequenceLog64 vectorY = new SequenceLog64(BitUtil.log2(number), number); - SequenceLog64 vectorZ = new SequenceLog64(BitUtil.log2(number), number); + DynamicSequence vectorY = new SequenceLog64Big(BitUtil.log2(number), number); + DynamicSequence vectorZ = new SequenceLog64Big(BitUtil.log2(number), number); ModifiableBitmap bitY = new Bitmap375(number); ModifiableBitmap bitZ = new Bitmap375(number); diff --git a/hdt-java-core/src/main/java/org/rdfhdt/hdt/triples/impl/BitmapTriplesIteratorMapDiff.java b/hdt-java-core/src/main/java/org/rdfhdt/hdt/triples/impl/BitmapTriplesIteratorMapDiff.java index 3b760248..7a86d2f4 100644 --- a/hdt-java-core/src/main/java/org/rdfhdt/hdt/triples/impl/BitmapTriplesIteratorMapDiff.java +++ b/hdt-java-core/src/main/java/org/rdfhdt/hdt/triples/impl/BitmapTriplesIteratorMapDiff.java @@ -32,13 +32,13 @@ public class BitmapTriplesIteratorMapDiff implements IteratorTripleID { TripleIDComparator tripleIDComparator = new TripleIDComparator(TripleComponentOrder.SPO); Bitmap bitArrayDisk; - public BitmapTriplesIteratorMapDiff(HDT hdtOriginal, Bitmap deleteBitmap, DictionaryDiff dictionaryDiff, long countTriples) { + public BitmapTriplesIteratorMapDiff(HDT hdtOriginal, Bitmap deleteBitmap, DictionaryDiff dictionaryDiff) { this.subjMapping = dictionaryDiff.getAllMappings().get("subject"); this.objMapping = dictionaryDiff.getAllMappings().get("object"); this.predMapping = dictionaryDiff.getAllMappings().get("predicate"); this.sharedMapping = dictionaryDiff.getAllMappings().get("shared"); this.dictionaryDiff = dictionaryDiff; - this.countTriples = countTriples; + this.countTriples = Math.max(0, hdtOriginal.getTriples().getNumberOfElements() - deleteBitmap.countOnes()); this.triples = hdtOriginal.getTriples(); this.bitArrayDisk = deleteBitmap; list = getTripleID(0).listIterator(); diff --git a/hdt-java-core/src/main/java/org/rdfhdt/hdt/util/io/IOUtil.java b/hdt-java-core/src/main/java/org/rdfhdt/hdt/util/io/IOUtil.java index f4275796..bbd84658 100644 --- a/hdt-java-core/src/main/java/org/rdfhdt/hdt/util/io/IOUtil.java +++ b/hdt-java-core/src/main/java/org/rdfhdt/hdt/util/io/IOUtil.java @@ -30,7 +30,7 @@ import org.apache.commons.compress.compressors.xz.XZCompressorInputStream; import org.rdfhdt.hdt.listener.ProgressListener; import org.rdfhdt.hdt.util.string.ByteStringUtil; -import pl.edu.icm.jlargearrays.LargeArrayUtils; +import org.visnow.jlargearrays.LargeArrayUtils; import java.io.*; import java.net.URL; diff --git a/hdt-java-core/src/test/java/org/rdfhdt/hdt/compact/sequence/LargeArrayTest.java b/hdt-java-core/src/test/java/org/rdfhdt/hdt/compact/sequence/LargeArrayTest.java new file mode 100644 index 00000000..4eeaee88 --- /dev/null +++ b/hdt-java-core/src/test/java/org/rdfhdt/hdt/compact/sequence/LargeArrayTest.java @@ -0,0 +1,20 @@ +package org.rdfhdt.hdt.compact.sequence; + +import org.junit.Test; +import org.visnow.jlargearrays.LargeArray; +import org.visnow.jlargearrays.LongLargeArray; + +public class LargeArrayTest { + + @Test + public void allocationTest() { + int old = LargeArray.getMaxSizeOf32bitArray(); + try { + LargeArray.setMaxSizeOf32bitArray(100); + long size = LargeArray.getMaxSizeOf32bitArray() + 2L; + new LongLargeArray(size); + } finally { + LargeArray.setMaxSizeOf32bitArray(old); + } + } +}