diff --git a/hdt-java-core/pom.xml b/hdt-java-core/pom.xml
index 195a0de9..e457fead 100644
--- a/hdt-java-core/pom.xml
+++ b/hdt-java-core/pom.xml
@@ -49,8 +49,9 @@
jena-arq
- pl.edu.icm
+ org.visnow
JLargeArrays
+ 1.7-20220624.150242-7
pl.pragmatists
@@ -63,7 +64,12 @@
1.6
-
+
+
+ gitlab-maven
+ https://gitlab.com/api/v4/projects/375779/packages/maven
+
+
diff --git a/hdt-java-core/src/main/java/org/rdfhdt/hdt/compact/bitmap/Bitmap375Disk.java b/hdt-java-core/src/main/java/org/rdfhdt/hdt/compact/bitmap/Bitmap375Disk.java
index d9af9fb6..eec2e6e2 100644
--- a/hdt-java-core/src/main/java/org/rdfhdt/hdt/compact/bitmap/Bitmap375Disk.java
+++ b/hdt-java-core/src/main/java/org/rdfhdt/hdt/compact/bitmap/Bitmap375Disk.java
@@ -111,7 +111,7 @@ public boolean access(long bitIndex) {
if (bitIndex < 0)
throw new IndexOutOfBoundsException("bitIndex < 0: " + bitIndex);
- int wordIndex = wordIndex(bitIndex);
+ long wordIndex = wordIndex(bitIndex);
if(wordIndex>=words.length()) {
return false;
}
@@ -415,4 +415,4 @@ public void load(InputStream input, ProgressListener listener) throws IOExceptio
super.load(input, listener);
updateIndex();
}
-}
\ No newline at end of file
+}
diff --git a/hdt-java-core/src/main/java/org/rdfhdt/hdt/compact/bitmap/Bitmap64Disk.java b/hdt-java-core/src/main/java/org/rdfhdt/hdt/compact/bitmap/Bitmap64Disk.java
index 777423d5..6c1ccef2 100644
--- a/hdt-java-core/src/main/java/org/rdfhdt/hdt/compact/bitmap/Bitmap64Disk.java
+++ b/hdt-java-core/src/main/java/org/rdfhdt/hdt/compact/bitmap/Bitmap64Disk.java
@@ -21,6 +21,7 @@
import org.rdfhdt.hdt.compact.integer.VByte;
import org.rdfhdt.hdt.exceptions.NotImplementedException;
+import org.rdfhdt.hdt.hdt.HDTVocabulary;
import org.rdfhdt.hdt.listener.ProgressListener;
import org.rdfhdt.hdt.util.BitUtil;
import org.rdfhdt.hdt.util.crc.CRC32;
@@ -37,7 +38,7 @@
/**
* Version of Bitmap64 which is backed up on disk
*/
-public class Bitmap64Disk implements Closeable {
+public class Bitmap64Disk implements Closeable, ModifiableBitmap {
// Constants
protected final static int LOGW = 6;
@@ -59,7 +60,7 @@ public Bitmap64Disk(String location, long nbits) {
/**
* Given a bit index, return word index containing it.
*/
- protected static int wordIndex(long bitIndex) {
+ protected static long wordIndex(long bitIndex) {
return (int) (bitIndex >>> LOGW);
}
@@ -78,7 +79,7 @@ protected static int lastWordNumBits(long numbits) {
return (int) ((numbits-1) % W)+1; // +1 To have output in the range 1-64, -1 to compensate.
}
- protected final void ensureSize(int wordsRequired) {
+ protected final void ensureSize(long wordsRequired) {
words.resize(Math.max(words.getSize()*2, wordsRequired));
}
@@ -97,7 +98,7 @@ public boolean access(long bitIndex) {
if (bitIndex < 0)
throw new IndexOutOfBoundsException("bitIndex < 0: " + bitIndex);
- int wordIndex = wordIndex(bitIndex);
+ long wordIndex = wordIndex(bitIndex);
if(wordIndex>=words.length()) {
return false;
}
@@ -105,6 +106,65 @@ public boolean access(long bitIndex) {
return (words.get(wordIndex) & (1L << bitIndex)) != 0;
}
+ @Override
+ public long rank1(long pos) {
+ throw new NotImplementedException();
+ }
+
+ @Override
+ public long rank0(long pos) {
+ throw new NotImplementedException();
+ }
+
+ @Override
+ public long selectNext1(long fromIndex) {
+ if (fromIndex < 0)
+ throw new IndexOutOfBoundsException("fromIndex < 0: " + fromIndex);
+
+ long wordIndex = wordIndex(fromIndex);
+ if (wordIndex >= words.length())
+ return -1;
+
+ long word = words.get(wordIndex) & (~0L << fromIndex);
+
+ while (true) {
+ if (word != 0)
+ return ((long)wordIndex * W) + Long.numberOfTrailingZeros(word);
+ if (++wordIndex == words.length())
+ return -1;
+ word = words.get(wordIndex);
+ }
+ }
+
+ @Override
+ public long select0(long n) {
+ throw new NotImplementedException();
+ }
+
+ @Override
+ public long select1(long n) {
+ throw new NotImplementedException();
+ }
+
+ @Override
+ public long countOnes() {
+ if (words.length() == 0)
+ return 0;
+ long acc = 0;
+ long end = wordIndex(numbits);
+ if (end >= words.length()) {
+ end = words.length() - 1;
+ }
+ for (int i = 0; i <= end; i++)
+ acc += Long.bitCount(words.get(i));
+ return acc;
+ }
+
+ @Override
+ public long countZeros() {
+ return words.length() * 64L - countOnes();
+ }
+
/* (non-Javadoc)
* @see hdt.compact.bitmap.ModifiableBitmap#append(boolean)
*/
@@ -116,7 +176,7 @@ public void set(long bitIndex, boolean value) {
if (bitIndex < 0)
throw new IndexOutOfBoundsException("bitIndex < 0: " + bitIndex);
- int wordIndex = wordIndex(bitIndex);
+ long wordIndex = wordIndex(bitIndex);
ensureSize(wordIndex+1);
if(value) {
@@ -128,27 +188,13 @@ public void set(long bitIndex, boolean value) {
this.numbits = Math.max(this.numbits, bitIndex+1);
}
- public long selectPrev1(long start) {
- throw new NotImplementedException();
+ @Override
+ public String getType() {
+ return HDTVocabulary.BITMAP_TYPE_PLAIN;
}
- public long selectNext1(long fromIndex) {
- if (fromIndex < 0)
- throw new IndexOutOfBoundsException("fromIndex < 0: " + fromIndex);
-
- int wordIndex = wordIndex(fromIndex);
- if (wordIndex >= words.length())
- return -1;
-
- long word = words.get(wordIndex) & (~0L << fromIndex);
-
- while (true) {
- if (word != 0)
- return ((long)wordIndex * W) + Long.numberOfTrailingZeros(word);
- if (++wordIndex == words.length())
- return -1;
- word = words.get(wordIndex);
- }
+ public long selectPrev1(long start) {
+ throw new NotImplementedException();
}
public long getWord(int word) {
@@ -211,4 +257,4 @@ public long getRealSizeBytes() {
public void close() throws IOException {
words.close();
}
-}
\ No newline at end of file
+}
diff --git a/hdt-java-core/src/main/java/org/rdfhdt/hdt/compact/sequence/SequenceInt32.java b/hdt-java-core/src/main/java/org/rdfhdt/hdt/compact/sequence/SequenceInt32.java
index 11df538d..53ee2fdb 100644
--- a/hdt-java-core/src/main/java/org/rdfhdt/hdt/compact/sequence/SequenceInt32.java
+++ b/hdt-java-core/src/main/java/org/rdfhdt/hdt/compact/sequence/SequenceInt32.java
@@ -105,9 +105,13 @@ public void set(long position, long value) {
@Override
public void append(long value) {
assert value>=0 && value<=Integer.MAX_VALUE;
-
- if(data.length Integer.MAX_VALUE - 5) {
+ throw new IllegalArgumentException("Needed size exceeds the maximum size of this data structure " + neededSize);
+ }
+ if(data.length < neededSize) {
+ resizeArray((int) Math.min(Integer.MAX_VALUE - 5L, data.length*2L));
}
data[numelements++] = (int) value;
}
diff --git a/hdt-java-core/src/main/java/org/rdfhdt/hdt/compact/sequence/SequenceInt64.java b/hdt-java-core/src/main/java/org/rdfhdt/hdt/compact/sequence/SequenceInt64.java
index 2f246de1..f374fb2e 100644
--- a/hdt-java-core/src/main/java/org/rdfhdt/hdt/compact/sequence/SequenceInt64.java
+++ b/hdt-java-core/src/main/java/org/rdfhdt/hdt/compact/sequence/SequenceInt64.java
@@ -94,7 +94,7 @@ public long get(long position) {
@Override
public void set(long position, long value) {
assert position>=0 && position<=Integer.MAX_VALUE;
- assert value>=0 && value<=Long.MAX_VALUE;
+ assert value>=0;
data[(int)position] = value;
numelements = (int) Math.max(numelements, position+1);
@@ -102,11 +102,15 @@ public void set(long position, long value) {
@Override
public void append(long value) {
- assert value>=0 && value<=Long.MAX_VALUE;
+ assert value>=0;
assert numelements Integer.MAX_VALUE - 5) {
+ throw new IllegalArgumentException("Needed size exceeds the maximum size of this data structure " + neededSize);
+ }
+ if(data.length < neededSize) {
+ resizeArray((int) Math.min(Integer.MAX_VALUE - 5L, data.length*2L));
}
data[(int)numelements++] = value;
}
diff --git a/hdt-java-core/src/main/java/org/rdfhdt/hdt/compact/sequence/SequenceLog64.java b/hdt-java-core/src/main/java/org/rdfhdt/hdt/compact/sequence/SequenceLog64.java
index 616a5d9e..8c54bddd 100644
--- a/hdt-java-core/src/main/java/org/rdfhdt/hdt/compact/sequence/SequenceLog64.java
+++ b/hdt-java-core/src/main/java/org/rdfhdt/hdt/compact/sequence/SequenceLog64.java
@@ -241,8 +241,11 @@ public void append(long value) {
}
long neededSize = numWordsFor(numbits, numentries+1);
- if(data.length Integer.MAX_VALUE - 5) {
+ throw new IllegalArgumentException("Needed size exceeds the maximum size of this data structure " + neededSize);
+ }
+ if(data.length < neededSize) {
+ resizeArray((int) Math.min(Integer.MAX_VALUE - 5L, data.length*2L));
}
this.set((int)numentries, value);
diff --git a/hdt-java-core/src/main/java/org/rdfhdt/hdt/compact/sequence/SequenceLog64Big.java b/hdt-java-core/src/main/java/org/rdfhdt/hdt/compact/sequence/SequenceLog64Big.java
index 3121ea54..db536827 100644
--- a/hdt-java-core/src/main/java/org/rdfhdt/hdt/compact/sequence/SequenceLog64Big.java
+++ b/hdt-java-core/src/main/java/org/rdfhdt/hdt/compact/sequence/SequenceLog64Big.java
@@ -44,9 +44,8 @@
import org.rdfhdt.hdt.util.crc.CRCInputStream;
import org.rdfhdt.hdt.util.crc.CRCOutputStream;
import org.rdfhdt.hdt.util.io.IOUtil;
-
-import pl.edu.icm.jlargearrays.LongLargeArray;
-import pl.edu.icm.jlargearrays.LargeArrayUtils;
+import org.visnow.jlargearrays.LargeArrayUtils;
+import org.visnow.jlargearrays.LongLargeArray;
/**
* @author mario.arias,Lyudmila Balakireva
@@ -55,8 +54,8 @@
public class SequenceLog64Big implements DynamicSequence {
private static final byte W = 64;
private static final int INDEX = 1073741824;
-
- LongLargeArray data;
+
+ LongLargeArray data;
private int numbits;
private long numentries=0;
private long maxvalue;
@@ -77,7 +76,7 @@ public SequenceLog64Big(int numbits, long capacity) {
long size = numWordsFor(numbits, capacity);
LongLargeArray.setMaxSizeOf32bitArray(SequenceLog64Big.INDEX);
- data = new LongLargeArray(Math.max((int)size,1));
+ data = new LongLargeArray(Math.max(size,1));
}
public SequenceLog64Big(int numbits, long capacity, boolean initialize) {
@@ -158,11 +157,7 @@ private void resizeArray(long size) {
//data = Arrays.copyOf(data, size);
if(size > 0) {
LongLargeArray a = new LongLargeArray(size);
- if (size < data.length()) {
- LargeArrayUtils.arraycopy(data, 0, a, 0, size);
- } else {
- LargeArrayUtils.arraycopy(data, 0, a, 0, data.length());
- }
+ LargeArrayUtils.arraycopy(data, 0, a, 0, Math.min(size, data.length()));
data = a;
}else{
this.numentries = 0;
@@ -182,7 +177,7 @@ public void add(Iterator elements) {
// Count and calculate number of bits needed per element.
while(elements.hasNext()) {
long val = elements.next();
- max = val>max ? val : max;
+ max = Math.max(val, max);
numentries++;
}
@@ -208,7 +203,7 @@ public void addIntegers(ArrayList elements) {
// Count and calculate number of bits needed per element.
for (int i=0;imax ? val : max;
+ max = Math.max(val, max);
numentries++;
}
diff --git a/hdt-java-core/src/main/java/org/rdfhdt/hdt/hdt/impl/HDTImpl.java b/hdt-java-core/src/main/java/org/rdfhdt/hdt/hdt/impl/HDTImpl.java
index 7c5e829d..f69f1bcb 100644
--- a/hdt-java-core/src/main/java/org/rdfhdt/hdt/hdt/impl/HDTImpl.java
+++ b/hdt-java-core/src/main/java/org/rdfhdt/hdt/hdt/impl/HDTImpl.java
@@ -786,7 +786,7 @@ public void diffBit(String location, HDT hdt, Bitmap deleteBitmap, ProgressListe
log.debug("Generating Triples...");
il.notifyProgress(40, "Generating Triples...");
// map the triples based on the new dictionary
- BitmapTriplesIteratorMapDiff mapIter = new BitmapTriplesIteratorMapDiff(hdt, deleteBitmap, diff, iter.getCount() + 1);
+ BitmapTriplesIteratorMapDiff mapIter = new BitmapTriplesIteratorMapDiff(hdt, deleteBitmap, diff);
BitmapTriples triples = new BitmapTriples(spec);
triples.load(mapIter, listener);
diff --git a/hdt-java-core/src/main/java/org/rdfhdt/hdt/triples/impl/BitmapTriples.java b/hdt-java-core/src/main/java/org/rdfhdt/hdt/triples/impl/BitmapTriples.java
index 957cdbb7..bfe186ac 100644
--- a/hdt-java-core/src/main/java/org/rdfhdt/hdt/triples/impl/BitmapTriples.java
+++ b/hdt-java-core/src/main/java/org/rdfhdt/hdt/triples/impl/BitmapTriples.java
@@ -129,8 +129,8 @@ public void load(IteratorTripleID it, ProgressListener listener) {
long number = it.estimatedNumResults();
- SequenceLog64 vectorY = new SequenceLog64(BitUtil.log2(number), number);
- SequenceLog64 vectorZ = new SequenceLog64(BitUtil.log2(number), number);
+ DynamicSequence vectorY = new SequenceLog64Big(BitUtil.log2(number), number);
+ DynamicSequence vectorZ = new SequenceLog64Big(BitUtil.log2(number), number);
ModifiableBitmap bitY = new Bitmap375(number);
ModifiableBitmap bitZ = new Bitmap375(number);
diff --git a/hdt-java-core/src/main/java/org/rdfhdt/hdt/triples/impl/BitmapTriplesIteratorMapDiff.java b/hdt-java-core/src/main/java/org/rdfhdt/hdt/triples/impl/BitmapTriplesIteratorMapDiff.java
index 3b760248..7a86d2f4 100644
--- a/hdt-java-core/src/main/java/org/rdfhdt/hdt/triples/impl/BitmapTriplesIteratorMapDiff.java
+++ b/hdt-java-core/src/main/java/org/rdfhdt/hdt/triples/impl/BitmapTriplesIteratorMapDiff.java
@@ -32,13 +32,13 @@ public class BitmapTriplesIteratorMapDiff implements IteratorTripleID {
TripleIDComparator tripleIDComparator = new TripleIDComparator(TripleComponentOrder.SPO);
Bitmap bitArrayDisk;
- public BitmapTriplesIteratorMapDiff(HDT hdtOriginal, Bitmap deleteBitmap, DictionaryDiff dictionaryDiff, long countTriples) {
+ public BitmapTriplesIteratorMapDiff(HDT hdtOriginal, Bitmap deleteBitmap, DictionaryDiff dictionaryDiff) {
this.subjMapping = dictionaryDiff.getAllMappings().get("subject");
this.objMapping = dictionaryDiff.getAllMappings().get("object");
this.predMapping = dictionaryDiff.getAllMappings().get("predicate");
this.sharedMapping = dictionaryDiff.getAllMappings().get("shared");
this.dictionaryDiff = dictionaryDiff;
- this.countTriples = countTriples;
+ this.countTriples = Math.max(0, hdtOriginal.getTriples().getNumberOfElements() - deleteBitmap.countOnes());
this.triples = hdtOriginal.getTriples();
this.bitArrayDisk = deleteBitmap;
list = getTripleID(0).listIterator();
diff --git a/hdt-java-core/src/main/java/org/rdfhdt/hdt/util/io/IOUtil.java b/hdt-java-core/src/main/java/org/rdfhdt/hdt/util/io/IOUtil.java
index f4275796..bbd84658 100644
--- a/hdt-java-core/src/main/java/org/rdfhdt/hdt/util/io/IOUtil.java
+++ b/hdt-java-core/src/main/java/org/rdfhdt/hdt/util/io/IOUtil.java
@@ -30,7 +30,7 @@
import org.apache.commons.compress.compressors.xz.XZCompressorInputStream;
import org.rdfhdt.hdt.listener.ProgressListener;
import org.rdfhdt.hdt.util.string.ByteStringUtil;
-import pl.edu.icm.jlargearrays.LargeArrayUtils;
+import org.visnow.jlargearrays.LargeArrayUtils;
import java.io.*;
import java.net.URL;
diff --git a/hdt-java-core/src/test/java/org/rdfhdt/hdt/compact/sequence/LargeArrayTest.java b/hdt-java-core/src/test/java/org/rdfhdt/hdt/compact/sequence/LargeArrayTest.java
new file mode 100644
index 00000000..4eeaee88
--- /dev/null
+++ b/hdt-java-core/src/test/java/org/rdfhdt/hdt/compact/sequence/LargeArrayTest.java
@@ -0,0 +1,20 @@
+package org.rdfhdt.hdt.compact.sequence;
+
+import org.junit.Test;
+import org.visnow.jlargearrays.LargeArray;
+import org.visnow.jlargearrays.LongLargeArray;
+
+public class LargeArrayTest {
+
+ @Test
+ public void allocationTest() {
+ int old = LargeArray.getMaxSizeOf32bitArray();
+ try {
+ LargeArray.setMaxSizeOf32bitArray(100);
+ long size = LargeArray.getMaxSizeOf32bitArray() + 2L;
+ new LongLargeArray(size);
+ } finally {
+ LargeArray.setMaxSizeOf32bitArray(old);
+ }
+ }
+}