Skip to content

Commit

Permalink
Merge pull request #174 from ate47/diff_fix
Browse files Browse the repository at this point in the history
Update JLargeArrays and use SequenceLog64Big instead of SequenceLog64
  • Loading branch information
D063520 authored Sep 28, 2022
2 parents 7222f8a + 9d69a25 commit 43c374c
Show file tree
Hide file tree
Showing 12 changed files with 136 additions and 58 deletions.
10 changes: 8 additions & 2 deletions hdt-java-core/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,9 @@
<artifactId>jena-arq</artifactId>
</dependency>
<dependency>
<groupId>pl.edu.icm</groupId>
<groupId>org.visnow</groupId>
<artifactId>JLargeArrays</artifactId>
<version>1.7-20220624.150242-7</version>
</dependency>
<dependency>
<groupId>pl.pragmatists</groupId>
Expand All @@ -63,7 +64,12 @@
<version>1.6</version>
</dependency>
</dependencies>

<repositories>
<repository>
<id>gitlab-maven</id>
<url>https://gitlab.com/api/v4/projects/375779/packages/maven</url>
</repository>
</repositories>
<build>
<plugins>
<plugin>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ public boolean access(long bitIndex) {
if (bitIndex < 0)
throw new IndexOutOfBoundsException("bitIndex < 0: " + bitIndex);

int wordIndex = wordIndex(bitIndex);
long wordIndex = wordIndex(bitIndex);
if(wordIndex>=words.length()) {
return false;
}
Expand Down Expand Up @@ -415,4 +415,4 @@ public void load(InputStream input, ProgressListener listener) throws IOExceptio
super.load(input, listener);
updateIndex();
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

import org.rdfhdt.hdt.compact.integer.VByte;
import org.rdfhdt.hdt.exceptions.NotImplementedException;
import org.rdfhdt.hdt.hdt.HDTVocabulary;
import org.rdfhdt.hdt.listener.ProgressListener;
import org.rdfhdt.hdt.util.BitUtil;
import org.rdfhdt.hdt.util.crc.CRC32;
Expand All @@ -37,7 +38,7 @@
/**
* Version of Bitmap64 which is backed up on disk
*/
public class Bitmap64Disk implements Closeable {
public class Bitmap64Disk implements Closeable, ModifiableBitmap {

// Constants
protected final static int LOGW = 6;
Expand All @@ -59,7 +60,7 @@ public Bitmap64Disk(String location, long nbits) {
/**
* Given a bit index, return word index containing it.
*/
protected static int wordIndex(long bitIndex) {
protected static long wordIndex(long bitIndex) {
return (int) (bitIndex >>> LOGW);
}

Expand All @@ -78,7 +79,7 @@ protected static int lastWordNumBits(long numbits) {
return (int) ((numbits-1) % W)+1; // +1 To have output in the range 1-64, -1 to compensate.
}

protected final void ensureSize(int wordsRequired) {
protected final void ensureSize(long wordsRequired) {
words.resize(Math.max(words.getSize()*2, wordsRequired));
}

Expand All @@ -97,14 +98,73 @@ public boolean access(long bitIndex) {
if (bitIndex < 0)
throw new IndexOutOfBoundsException("bitIndex < 0: " + bitIndex);

int wordIndex = wordIndex(bitIndex);
long wordIndex = wordIndex(bitIndex);
if(wordIndex>=words.length()) {
return false;
}

return (words.get(wordIndex) & (1L << bitIndex)) != 0;
}

@Override
public long rank1(long pos) {
throw new NotImplementedException();
}

@Override
public long rank0(long pos) {
throw new NotImplementedException();
}

@Override
public long selectNext1(long fromIndex) {
if (fromIndex < 0)
throw new IndexOutOfBoundsException("fromIndex < 0: " + fromIndex);

long wordIndex = wordIndex(fromIndex);
if (wordIndex >= words.length())
return -1;

long word = words.get(wordIndex) & (~0L << fromIndex);

while (true) {
if (word != 0)
return ((long)wordIndex * W) + Long.numberOfTrailingZeros(word);
if (++wordIndex == words.length())
return -1;
word = words.get(wordIndex);
}
}

@Override
public long select0(long n) {
throw new NotImplementedException();
}

@Override
public long select1(long n) {
throw new NotImplementedException();
}

@Override
public long countOnes() {
if (words.length() == 0)
return 0;
long acc = 0;
long end = wordIndex(numbits);
if (end >= words.length()) {
end = words.length() - 1;
}
for (int i = 0; i <= end; i++)
acc += Long.bitCount(words.get(i));
return acc;
}

@Override
public long countZeros() {
return words.length() * 64L - countOnes();
}

/* (non-Javadoc)
* @see hdt.compact.bitmap.ModifiableBitmap#append(boolean)
*/
Expand All @@ -116,7 +176,7 @@ public void set(long bitIndex, boolean value) {
if (bitIndex < 0)
throw new IndexOutOfBoundsException("bitIndex < 0: " + bitIndex);

int wordIndex = wordIndex(bitIndex);
long wordIndex = wordIndex(bitIndex);
ensureSize(wordIndex+1);

if(value) {
Expand All @@ -128,27 +188,13 @@ public void set(long bitIndex, boolean value) {
this.numbits = Math.max(this.numbits, bitIndex+1);
}

public long selectPrev1(long start) {
throw new NotImplementedException();
@Override
public String getType() {
return HDTVocabulary.BITMAP_TYPE_PLAIN;
}

public long selectNext1(long fromIndex) {
if (fromIndex < 0)
throw new IndexOutOfBoundsException("fromIndex < 0: " + fromIndex);

int wordIndex = wordIndex(fromIndex);
if (wordIndex >= words.length())
return -1;

long word = words.get(wordIndex) & (~0L << fromIndex);

while (true) {
if (word != 0)
return ((long)wordIndex * W) + Long.numberOfTrailingZeros(word);
if (++wordIndex == words.length())
return -1;
word = words.get(wordIndex);
}
public long selectPrev1(long start) {
throw new NotImplementedException();
}

public long getWord(int word) {
Expand Down Expand Up @@ -211,4 +257,4 @@ public long getRealSizeBytes() {
public void close() throws IOException {
words.close();
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -105,9 +105,13 @@ public void set(long position, long value) {
@Override
public void append(long value) {
assert value>=0 && value<=Integer.MAX_VALUE;

if(data.length<numelements+1) {
resizeArray(data.length*2);

long neededSize = numelements+1L;
if (neededSize > Integer.MAX_VALUE - 5) {
throw new IllegalArgumentException("Needed size exceeds the maximum size of this data structure " + neededSize);
}
if(data.length < neededSize) {
resizeArray((int) Math.min(Integer.MAX_VALUE - 5L, data.length*2L));
}
data[numelements++] = (int) value;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -94,19 +94,23 @@ public long get(long position) {
@Override
public void set(long position, long value) {
assert position>=0 && position<=Integer.MAX_VALUE;
assert value>=0 && value<=Long.MAX_VALUE;
assert value>=0;

data[(int)position] = value;
numelements = (int) Math.max(numelements, position+1);
}

@Override
public void append(long value) {
assert value>=0 && value<=Long.MAX_VALUE;
assert value>=0;
assert numelements<Long.MAX_VALUE;

if(data.length<numelements+1) {
resizeArray(data.length*2);

long neededSize = numelements+1L;
if (neededSize > Integer.MAX_VALUE - 5) {
throw new IllegalArgumentException("Needed size exceeds the maximum size of this data structure " + neededSize);
}
if(data.length < neededSize) {
resizeArray((int) Math.min(Integer.MAX_VALUE - 5L, data.length*2L));
}
data[(int)numelements++] = value;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -241,8 +241,11 @@ public void append(long value) {
}

long neededSize = numWordsFor(numbits, numentries+1);
if(data.length<neededSize) {
resizeArray(data.length*2);
if (neededSize > Integer.MAX_VALUE - 5) {
throw new IllegalArgumentException("Needed size exceeds the maximum size of this data structure " + neededSize);
}
if(data.length < neededSize) {
resizeArray((int) Math.min(Integer.MAX_VALUE - 5L, data.length*2L));
}

this.set((int)numentries, value);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,8 @@
import org.rdfhdt.hdt.util.crc.CRCInputStream;
import org.rdfhdt.hdt.util.crc.CRCOutputStream;
import org.rdfhdt.hdt.util.io.IOUtil;

import pl.edu.icm.jlargearrays.LongLargeArray;
import pl.edu.icm.jlargearrays.LargeArrayUtils;
import org.visnow.jlargearrays.LargeArrayUtils;
import org.visnow.jlargearrays.LongLargeArray;

/**
* @author mario.arias,Lyudmila Balakireva
Expand All @@ -55,8 +54,8 @@
public class SequenceLog64Big implements DynamicSequence {
private static final byte W = 64;
private static final int INDEX = 1073741824;
LongLargeArray data;

LongLargeArray data;
private int numbits;
private long numentries=0;
private long maxvalue;
Expand All @@ -77,7 +76,7 @@ public SequenceLog64Big(int numbits, long capacity) {
long size = numWordsFor(numbits, capacity);
LongLargeArray.setMaxSizeOf32bitArray(SequenceLog64Big.INDEX);

data = new LongLargeArray(Math.max((int)size,1));
data = new LongLargeArray(Math.max(size,1));
}

public SequenceLog64Big(int numbits, long capacity, boolean initialize) {
Expand Down Expand Up @@ -158,11 +157,7 @@ private void resizeArray(long size) {
//data = Arrays.copyOf(data, size);
if(size > 0) {
LongLargeArray a = new LongLargeArray(size);
if (size < data.length()) {
LargeArrayUtils.arraycopy(data, 0, a, 0, size);
} else {
LargeArrayUtils.arraycopy(data, 0, a, 0, data.length());
}
LargeArrayUtils.arraycopy(data, 0, a, 0, Math.min(size, data.length()));
data = a;
}else{
this.numentries = 0;
Expand All @@ -182,7 +177,7 @@ public void add(Iterator<Long> elements) {
// Count and calculate number of bits needed per element.
while(elements.hasNext()) {
long val = elements.next();
max = val>max ? val : max;
max = Math.max(val, max);
numentries++;
}

Expand All @@ -208,7 +203,7 @@ public void addIntegers(ArrayList<Integer> elements) {
// Count and calculate number of bits needed per element.
for (int i=0;i<elements.size();i++){
long val = elements.get(i).longValue();
max = val>max ? val : max;
max = Math.max(val, max);
numentries++;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -786,7 +786,7 @@ public void diffBit(String location, HDT hdt, Bitmap deleteBitmap, ProgressListe
log.debug("Generating Triples...");
il.notifyProgress(40, "Generating Triples...");
// map the triples based on the new dictionary
BitmapTriplesIteratorMapDiff mapIter = new BitmapTriplesIteratorMapDiff(hdt, deleteBitmap, diff, iter.getCount() + 1);
BitmapTriplesIteratorMapDiff mapIter = new BitmapTriplesIteratorMapDiff(hdt, deleteBitmap, diff);

BitmapTriples triples = new BitmapTriples(spec);
triples.load(mapIter, listener);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -129,8 +129,8 @@ public void load(IteratorTripleID it, ProgressListener listener) {

long number = it.estimatedNumResults();

SequenceLog64 vectorY = new SequenceLog64(BitUtil.log2(number), number);
SequenceLog64 vectorZ = new SequenceLog64(BitUtil.log2(number), number);
DynamicSequence vectorY = new SequenceLog64Big(BitUtil.log2(number), number);
DynamicSequence vectorZ = new SequenceLog64Big(BitUtil.log2(number), number);

ModifiableBitmap bitY = new Bitmap375(number);
ModifiableBitmap bitZ = new Bitmap375(number);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,13 @@ public class BitmapTriplesIteratorMapDiff implements IteratorTripleID {
TripleIDComparator tripleIDComparator = new TripleIDComparator(TripleComponentOrder.SPO);
Bitmap bitArrayDisk;

public BitmapTriplesIteratorMapDiff(HDT hdtOriginal, Bitmap deleteBitmap, DictionaryDiff dictionaryDiff, long countTriples) {
public BitmapTriplesIteratorMapDiff(HDT hdtOriginal, Bitmap deleteBitmap, DictionaryDiff dictionaryDiff) {
this.subjMapping = dictionaryDiff.getAllMappings().get("subject");
this.objMapping = dictionaryDiff.getAllMappings().get("object");
this.predMapping = dictionaryDiff.getAllMappings().get("predicate");
this.sharedMapping = dictionaryDiff.getAllMappings().get("shared");
this.dictionaryDiff = dictionaryDiff;
this.countTriples = countTriples;
this.countTriples = Math.max(0, hdtOriginal.getTriples().getNumberOfElements() - deleteBitmap.countOnes());
this.triples = hdtOriginal.getTriples();
this.bitArrayDisk = deleteBitmap;
list = getTripleID(0).listIterator();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
import org.apache.commons.compress.compressors.xz.XZCompressorInputStream;
import org.rdfhdt.hdt.listener.ProgressListener;
import org.rdfhdt.hdt.util.string.ByteStringUtil;
import pl.edu.icm.jlargearrays.LargeArrayUtils;
import org.visnow.jlargearrays.LargeArrayUtils;

import java.io.*;
import java.net.URL;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
package org.rdfhdt.hdt.compact.sequence;

import org.junit.Test;
import org.visnow.jlargearrays.LargeArray;
import org.visnow.jlargearrays.LongLargeArray;

public class LargeArrayTest {

@Test
public void allocationTest() {
int old = LargeArray.getMaxSizeOf32bitArray();
try {
LargeArray.setMaxSizeOf32bitArray(100);
long size = LargeArray.getMaxSizeOf32bitArray() + 2L;
new LongLargeArray(size);
} finally {
LargeArray.setMaxSizeOf32bitArray(old);
}
}
}

0 comments on commit 43c374c

Please sign in to comment.