Skip to content

Commit

Permalink
GH-560 Replace V11 calls with other indexes calls
Browse files Browse the repository at this point in the history
  • Loading branch information
ate47 committed Jan 6, 2025
1 parent cebf4b0 commit e465762
Show file tree
Hide file tree
Showing 5 changed files with 185 additions and 24 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -456,7 +456,10 @@ public LoadFileResult loadFile(InputStream input, String filename) throws IOExce

Files.createDirectories(Paths.get(locationHdt));
Files.deleteIfExists(Paths.get(hdtOutput));
Files.deleteIfExists(Paths.get(EndpointFiles.getHDTIndexV11(locationHdt, hdtIndexName)));

for (String indexFile : EndpointFiles.getHDTIndexNames(locationHdt, hdtIndexName)) {
Files.deleteIfExists(Path.of(indexFile));
}

if (sparqlRepository.getOptions().getStorageMode().equals(SailCompilerSchema.ENDPOINTSTORE_STORAGE)) {
shutdown();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,12 @@
import com.the_qa_company.qendpoint.compiler.ParsedStringValue;
import com.the_qa_company.qendpoint.core.enums.TripleComponentOrder;
import com.the_qa_company.qendpoint.core.hdt.HDTVersion;
import com.the_qa_company.qendpoint.core.triples.impl.BitmapTriplesIndexFile;

import java.io.File;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.List;

/**
* store the files used by the endpoint store
Expand Down Expand Up @@ -34,6 +37,42 @@ public static String getHDTIndexV11(String locationHdt, String hdtIndexName) {
return locationHdt + hdtIndexName + HDTVersion.get_index_suffix("-");
}

public static List<String> getHDTIndexNames(String locationHdt, String hdtIndexName) {
Path path = Path.of(locationHdt + hdtIndexName);

List<String> list = new ArrayList<>();

Path hdtPath = Path.of(getHDTIndex(locationHdt, hdtIndexName));

list.add(getHDTIndexV11(locationHdt, hdtIndexName));

for (TripleComponentOrder order : TripleComponentOrder.values()) {
if (order == TripleComponentOrder.Unknown)
continue;
list.add(BitmapTriplesIndexFile.getIndexPath(hdtPath, order).toAbsolutePath().toString());
}

return list;
}

public static List<Path> getHDTIndexNamesPath(String locationHdt, String hdtIndexName) {
Path path = Path.of(locationHdt + hdtIndexName);

List<Path> list = new ArrayList<>();

Path hdtPath = Path.of(getHDTIndex(locationHdt, hdtIndexName));

list.add(Path.of(getHDTIndexV11(locationHdt, hdtIndexName)));

for (TripleComponentOrder order : TripleComponentOrder.values()) {
if (order == TripleComponentOrder.Unknown)
continue;
list.add(BitmapTriplesIndexFile.getIndexPath(hdtPath, order));
}

return list;
}

// basic locations
private final String locationNative, locationHdt, hdtIndexName;
private final Path locationNativePath, locationHdtPath;
Expand Down Expand Up @@ -184,20 +223,25 @@ public String getHDTIndexV11() {
return getHDTIndexV11(locationHdt, hdtIndexName);
}

public List<String> getHDTIndexNames() {
return getHDTIndexNames(locationHdt, hdtIndexName);
}

public List<String> getHDTNewIndexNames() {
return getHDTIndexNames(locationHdt, hdtIndexName + ".new.hdt");
}

public List<Path> getHDTNewIndexNamesPath() {
return getHDTIndexNamesPath(locationHdt, hdtIndexName + ".new.hdt");
}

/**
* @return the HDT file
*/
public Path getHDTIndexPath() {
return Path.of(getHDTIndex());
}

/**
* @return the HDT file with HDT version
*/
public Path getHDTIndexV11Path() {
return Path.of(getHDTIndexV11());
}

/**
* @return the new HDT file
*/
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
package com.the_qa_company.qendpoint.store;

import com.the_qa_company.qendpoint.core.hdt.HDTVersion;

import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.List;

/**
* information on how a {@link EndpointStore} should be dumped
Expand Down Expand Up @@ -36,9 +35,31 @@ default void beforeMerge(EndpointStore store) throws IOException {
default void afterMerge(EndpointStore store, Path mergedDataset) throws IOException {
}

/**
* call after the indexing
*
* @param store store
* @param mergedDatasetIndex v11 index
* @throws IOException any
* @deprecated use {@link #afterIndexing(EndpointStore, List)} instead
*/
@Deprecated
default void afterIndexing(EndpointStore store, Path mergedDatasetIndex) throws IOException {
}

/**
* call after the indexing
*
* @param store store
* @param mergedDatasetIndexes indexes
* @throws IOException any
*/
default void afterIndexing(EndpointStore store, List<Path> mergedDatasetIndexes) throws IOException {
if (!mergedDatasetIndexes.isEmpty()) {
afterIndexing(store, mergedDatasetIndexes.get(0));
}
}

class EndpointStoreDumpDataset implements EndpointStoreDump {
protected final Path outputLocation;

Expand All @@ -53,11 +74,25 @@ public void afterMerge(EndpointStore store, Path mergedDataset) throws IOExcepti
Files.copy(mergedDataset, outputLocation.resolve("store.hdt"));
}

static String replaceHDTFilename(Path path, String newName) {
String filename = path.getFileName().toString();

int idx = filename.lastIndexOf(".hdt");

if (idx == -1)
throw new IllegalArgumentException("Not a HDT file");

return newName + filename.substring(idx);
}

@Override
public void afterIndexing(EndpointStore store, Path mergedDatasetIndex) throws IOException {
public void afterIndexing(EndpointStore store, List<Path> mergedDatasetIndexes) throws IOException {
Files.createDirectories(outputLocation.getParent());
// store the dataset
Files.copy(mergedDatasetIndex, outputLocation.resolve("store.hdt" + HDTVersion.get_index_suffix("-")));
for (Path path : mergedDatasetIndexes) {
Files.copy(path, outputLocation.resolve(replaceHDTFilename(path, "store")));

}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,14 @@ private static boolean exists(String file) {
return Files.exists(Path.of(file));
}

private static boolean existsAny(List<String> files) {
for (String file : files) {
if (Files.exists(Path.of(file)))
return true;
}
return false;
}

/**
* test if the file "file + {@link #OLD_EXT}" exists
*
Expand All @@ -207,6 +215,21 @@ private static boolean existsOld(String file) {
return exists(file + OLD_EXT);
}

/**
* test if the file "file + {@link #OLD_EXT}" exists for file in files
*
* @param files the files
* @return true if the file exists, false otherwise
*/
private static boolean existsOldAny(List<String> files) {
for (String file : files) {
if (exists(file + OLD_EXT)) {
return true;
}
}
return false;
}

/**
* rename file to "file + {@link #OLD_EXT}"
*
Expand All @@ -225,6 +248,14 @@ private static void renameFromOld(String file) {
rename(file + OLD_EXT, file);
}

private static void renameFromOld(List<String> files) {
for (String file : files) {
if (Files.exists(Path.of(file + OLD_EXT))) {
rename(file + OLD_EXT, file);
}
}
}

/**
* rename a file to another
*
Expand All @@ -241,6 +272,29 @@ private static void rename(String oldFile, String newFile) {
}
}

/**
* rename a file to another list
*
* @param oldFile the current name
* @param newFile the new name
*/
private static void rename(List<String> oldFile, List<String> newFile) {
if (oldFile.size() != newFile.size())
throw new RuntimeException("Bad list count: " + oldFile.size() + "/" + newFile.size());
try {
for (int i = 0; i < oldFile.size(); i++) {
Path old = Path.of(oldFile.get(i));
if (Files.exists(old)) {
Files.move(old, Path.of(newFile.get(i)), StandardCopyOption.REPLACE_EXISTING);
}
}
} catch (IOException e) {
logger.warn("Can't rename the file {} into {} ({})", oldFile, newFile, e.getClass().getName());
if (MergeRunnableStopPoint.debug)
throw new RuntimeException(e);
}
}

private static final Logger logger = LoggerFactory.getLogger(MergeRunnable.class);

public class MergeThread<T> extends Thread {
Expand Down Expand Up @@ -686,7 +740,7 @@ private Step3SubStep getStep3SubStep() {
boolean existsOldTripleDeleteTempArr = endpoint.getValidOrders().stream()
.anyMatch(order -> existsOld(endpointFiles.getTripleDeleteTempArr(order)));

if (existsOldTripleDeleteTempArr && !exists(endpointFiles.getHDTNewIndexV11())) {
if (existsOldTripleDeleteTempArr && !existsAny(endpointFiles.getHDTNewIndexNames())) {
// after rename(endpointFiles.getHDTNewIndexV11(),
// endpointFiles.getHDTIndexV11());
return Step3SubStep.AFTER_INDEX_V11_RENAME;
Expand All @@ -696,7 +750,7 @@ private Step3SubStep getStep3SubStep() {
// endpointFiles.getHDTIndex());
return Step3SubStep.AFTER_INDEX_RENAME;
}
if (existsOld(endpointFiles.getHDTIndexV11())) {
if (existsOldAny(endpointFiles.getHDTIndexNames())) {
// after renameToOld(endpointFiles.getHDTIndexV11());
return Step3SubStep.AFTER_INDEX_V11_OLD_RENAME;
}
Expand Down Expand Up @@ -725,11 +779,11 @@ private void preloadStep3() {

switch (step3SubStep) {
case AFTER_INDEX_V11_RENAME:
rename(endpointFiles.getHDTIndexV11(), endpointFiles.getHDTNewIndexV11());
rename(endpointFiles.getHDTIndexNames(), endpointFiles.getHDTNewIndexNames());
case AFTER_INDEX_RENAME:
rename(endpointFiles.getHDTIndex(), endpointFiles.getHDTNewIndex());
case AFTER_INDEX_V11_OLD_RENAME:
renameFromOld(endpointFiles.getHDTIndexV11());
renameFromOld(endpointFiles.getHDTIndexNames());
case AFTER_INDEX_OLD_RENAME:
renameFromOld(endpointFiles.getHDTIndex());
case AFTER_TRIPLEDEL_TMP_OLD_RENAME:
Expand Down Expand Up @@ -773,7 +827,7 @@ private synchronized void step3(boolean restarting, Lock lock, EndpointStoreDump
boolean graph;
try (HDT newHdt = HDTManager.mapIndexedHDT(endpointFiles.getHDTNewIndex(), endpoint.getHDTSpec(), null)) {
if (dumpInfo != null) {
dumpInfo.afterIndexing(endpoint, Path.of(endpointFiles.getHDTNewIndexV11()));
dumpInfo.afterIndexing(endpoint, endpointFiles.getHDTNewIndexNamesPath());
endpoint.setDumping(endpoint.getDumpRef().get() != null);
}
graph = newHdt.getDictionary().supportGraphs();
Expand All @@ -795,12 +849,15 @@ private synchronized void step3(boolean restarting, Lock lock, EndpointStoreDump
this.endpoint.resetDeleteArray(newHdt);
}

Path hdtIndexV11 = Path.of(endpointFiles.getHDTIndexV11());
// if the index.hdt.index.v1-1 doesn't exist, the hdt is empty, so we
// create a mock index file
// (ignored by RDF-HDT)
if (!Files.exists(hdtIndexV11)) {
Files.writeString(hdtIndexV11, "");
if (!endpoint.getHDTSpec().getBoolean(HDTOptionsKeys.BITMAPTRIPLES_INDEX_NO_FOQ, false)) {
Path hdtIndexV11 = Path.of(endpointFiles.getHDTIndexV11());
// if the index.hdt.index.v1-1 doesn't exist, the hdt is empty, so
// we
// create a mock index file
// (ignored by RDF-HDT)
if (!Files.exists(hdtIndexV11)) {
Files.writeString(hdtIndexV11, "");
}
}

// rename new hdt to old hdt name so that they are replaces
Expand All @@ -817,7 +874,7 @@ private synchronized void step3(boolean restarting, Lock lock, EndpointStoreDump
rename(endpointFiles.getHDTNewIndex(), endpointFiles.getHDTIndex());
debugStepPoint(MergeRunnableStopPoint.STEP3_FILES_MID2);
// AFTER_INDEX_RENAME
rename(endpointFiles.getHDTNewIndexV11(), endpointFiles.getHDTIndexV11());
rename(endpointFiles.getHDTNewIndexNames(), endpointFiles.getHDTIndexNames());
// AFTER_INDEX_V11_RENAME

HDT tempHdt = endpoint.loadIndex();
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
package com.the_qa_company.qendpoint.store;

import org.junit.Test;

import java.nio.file.Path;

import static org.junit.Assert.*;

public class EndpointStoreDumpTest {

@Test
public void nameTest() {
assertEquals("aaaa.hdt",
EndpointStoreDump.EndpointStoreDumpDataset.replaceHDTFilename(Path.of("test.hdt"), "aaaa"));
assertEquals("bbbb.hdt",
EndpointStoreDump.EndpointStoreDumpDataset.replaceHDTFilename(Path.of("test.hdt"), "bbbb"));
assertEquals("bbbb.hdt.idx",
EndpointStoreDump.EndpointStoreDumpDataset.replaceHDTFilename(Path.of("test.hdt.idx"), "bbbb"));
assertEquals("bbbb.hdt",
EndpointStoreDump.EndpointStoreDumpDataset.replaceHDTFilename(Path.of("test.idx.hdt"), "bbbb"));
}
}

0 comments on commit e465762

Please sign in to comment.