Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GH-560 Manage permutes #562

Merged
merged 1 commit into from
Jan 6, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -456,7 +456,10 @@ public LoadFileResult loadFile(InputStream input, String filename) throws IOExce

Files.createDirectories(Paths.get(locationHdt));
Files.deleteIfExists(Paths.get(hdtOutput));
Files.deleteIfExists(Paths.get(EndpointFiles.getHDTIndexV11(locationHdt, hdtIndexName)));

for (String indexFile : EndpointFiles.getHDTIndexNames(locationHdt, hdtIndexName)) {
Files.deleteIfExists(Path.of(indexFile));
}

if (sparqlRepository.getOptions().getStorageMode().equals(SailCompilerSchema.ENDPOINTSTORE_STORAGE)) {
shutdown();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,12 @@
import com.the_qa_company.qendpoint.compiler.ParsedStringValue;
import com.the_qa_company.qendpoint.core.enums.TripleComponentOrder;
import com.the_qa_company.qendpoint.core.hdt.HDTVersion;
import com.the_qa_company.qendpoint.core.triples.impl.BitmapTriplesIndexFile;

import java.io.File;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.List;

/**
* store the files used by the endpoint store
Expand Down Expand Up @@ -34,6 +37,42 @@ public static String getHDTIndexV11(String locationHdt, String hdtIndexName) {
return locationHdt + hdtIndexName + HDTVersion.get_index_suffix("-");
}

public static List<String> getHDTIndexNames(String locationHdt, String hdtIndexName) {
Path path = Path.of(locationHdt + hdtIndexName);

List<String> list = new ArrayList<>();

Path hdtPath = Path.of(getHDTIndex(locationHdt, hdtIndexName));

list.add(getHDTIndexV11(locationHdt, hdtIndexName));

for (TripleComponentOrder order : TripleComponentOrder.values()) {
if (order == TripleComponentOrder.Unknown)
continue;
list.add(BitmapTriplesIndexFile.getIndexPath(hdtPath, order).toAbsolutePath().toString());
}

return list;
}

public static List<Path> getHDTIndexNamesPath(String locationHdt, String hdtIndexName) {
Path path = Path.of(locationHdt + hdtIndexName);

List<Path> list = new ArrayList<>();

Path hdtPath = Path.of(getHDTIndex(locationHdt, hdtIndexName));

list.add(Path.of(getHDTIndexV11(locationHdt, hdtIndexName)));

for (TripleComponentOrder order : TripleComponentOrder.values()) {
if (order == TripleComponentOrder.Unknown)
continue;
list.add(BitmapTriplesIndexFile.getIndexPath(hdtPath, order));
}

return list;
}

// basic locations
private final String locationNative, locationHdt, hdtIndexName;
private final Path locationNativePath, locationHdtPath;
Expand Down Expand Up @@ -184,20 +223,25 @@ public String getHDTIndexV11() {
return getHDTIndexV11(locationHdt, hdtIndexName);
}

public List<String> getHDTIndexNames() {
return getHDTIndexNames(locationHdt, hdtIndexName);
}

public List<String> getHDTNewIndexNames() {
return getHDTIndexNames(locationHdt, hdtIndexName + ".new.hdt");
}

public List<Path> getHDTNewIndexNamesPath() {
return getHDTIndexNamesPath(locationHdt, hdtIndexName + ".new.hdt");
}

/**
* @return the HDT file
*/
public Path getHDTIndexPath() {
return Path.of(getHDTIndex());
}

/**
* @return the HDT file with HDT version
*/
public Path getHDTIndexV11Path() {
return Path.of(getHDTIndexV11());
}

/**
* @return the new HDT file
*/
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
package com.the_qa_company.qendpoint.store;

import com.the_qa_company.qendpoint.core.hdt.HDTVersion;

import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.List;

/**
* information on how a {@link EndpointStore} should be dumped
Expand Down Expand Up @@ -36,9 +35,31 @@ default void beforeMerge(EndpointStore store) throws IOException {
default void afterMerge(EndpointStore store, Path mergedDataset) throws IOException {
}

/**
* call after the indexing
*
* @param store store
* @param mergedDatasetIndex v11 index
* @throws IOException any
* @deprecated use {@link #afterIndexing(EndpointStore, List)} instead
*/
@Deprecated
default void afterIndexing(EndpointStore store, Path mergedDatasetIndex) throws IOException {
}

/**
* call after the indexing
*
* @param store store
* @param mergedDatasetIndexes indexes
* @throws IOException any
*/
default void afterIndexing(EndpointStore store, List<Path> mergedDatasetIndexes) throws IOException {
if (!mergedDatasetIndexes.isEmpty()) {
afterIndexing(store, mergedDatasetIndexes.get(0));
}
}

class EndpointStoreDumpDataset implements EndpointStoreDump {
protected final Path outputLocation;

Expand All @@ -53,11 +74,25 @@ public void afterMerge(EndpointStore store, Path mergedDataset) throws IOExcepti
Files.copy(mergedDataset, outputLocation.resolve("store.hdt"));
}

static String replaceHDTFilename(Path path, String newName) {
String filename = path.getFileName().toString();

int idx = filename.lastIndexOf(".hdt");

if (idx == -1)
throw new IllegalArgumentException("Not a HDT file");

return newName + filename.substring(idx);
}

@Override
public void afterIndexing(EndpointStore store, Path mergedDatasetIndex) throws IOException {
public void afterIndexing(EndpointStore store, List<Path> mergedDatasetIndexes) throws IOException {
Files.createDirectories(outputLocation.getParent());
// store the dataset
Files.copy(mergedDatasetIndex, outputLocation.resolve("store.hdt" + HDTVersion.get_index_suffix("-")));
for (Path path : mergedDatasetIndexes) {
Files.copy(path, outputLocation.resolve(replaceHDTFilename(path, "store")));

}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,14 @@ private static boolean exists(String file) {
return Files.exists(Path.of(file));
}

private static boolean existsAny(List<String> files) {
for (String file : files) {
if (Files.exists(Path.of(file)))
return true;
}
return false;
}

/**
* test if the file "file + {@link #OLD_EXT}" exists
*
Expand All @@ -207,6 +215,21 @@ private static boolean existsOld(String file) {
return exists(file + OLD_EXT);
}

/**
* test if the file "file + {@link #OLD_EXT}" exists for file in files
*
* @param files the files
* @return true if the file exists, false otherwise
*/
private static boolean existsOldAny(List<String> files) {
for (String file : files) {
if (exists(file + OLD_EXT)) {
return true;
}
}
return false;
}

/**
* rename file to "file + {@link #OLD_EXT}"
*
Expand All @@ -225,6 +248,14 @@ private static void renameFromOld(String file) {
rename(file + OLD_EXT, file);
}

private static void renameFromOld(List<String> files) {
for (String file : files) {
if (Files.exists(Path.of(file + OLD_EXT))) {
rename(file + OLD_EXT, file);
}
}
}

/**
* rename a file to another
*
Expand All @@ -241,6 +272,29 @@ private static void rename(String oldFile, String newFile) {
}
}

/**
* rename a file to another list
*
* @param oldFile the current name
* @param newFile the new name
*/
private static void rename(List<String> oldFile, List<String> newFile) {
if (oldFile.size() != newFile.size())
throw new RuntimeException("Bad list count: " + oldFile.size() + "/" + newFile.size());
try {
for (int i = 0; i < oldFile.size(); i++) {
Path old = Path.of(oldFile.get(i));
if (Files.exists(old)) {
Files.move(old, Path.of(newFile.get(i)), StandardCopyOption.REPLACE_EXISTING);
}
}
} catch (IOException e) {
logger.warn("Can't rename the file {} into {} ({})", oldFile, newFile, e.getClass().getName());
if (MergeRunnableStopPoint.debug)
throw new RuntimeException(e);
}
}

private static final Logger logger = LoggerFactory.getLogger(MergeRunnable.class);

public class MergeThread<T> extends Thread {
Expand Down Expand Up @@ -686,7 +740,7 @@ private Step3SubStep getStep3SubStep() {
boolean existsOldTripleDeleteTempArr = endpoint.getValidOrders().stream()
.anyMatch(order -> existsOld(endpointFiles.getTripleDeleteTempArr(order)));

if (existsOldTripleDeleteTempArr && !exists(endpointFiles.getHDTNewIndexV11())) {
if (existsOldTripleDeleteTempArr && !existsAny(endpointFiles.getHDTNewIndexNames())) {
// after rename(endpointFiles.getHDTNewIndexV11(),
// endpointFiles.getHDTIndexV11());
return Step3SubStep.AFTER_INDEX_V11_RENAME;
Expand All @@ -696,7 +750,7 @@ private Step3SubStep getStep3SubStep() {
// endpointFiles.getHDTIndex());
return Step3SubStep.AFTER_INDEX_RENAME;
}
if (existsOld(endpointFiles.getHDTIndexV11())) {
if (existsOldAny(endpointFiles.getHDTIndexNames())) {
// after renameToOld(endpointFiles.getHDTIndexV11());
return Step3SubStep.AFTER_INDEX_V11_OLD_RENAME;
}
Expand Down Expand Up @@ -725,11 +779,11 @@ private void preloadStep3() {

switch (step3SubStep) {
case AFTER_INDEX_V11_RENAME:
rename(endpointFiles.getHDTIndexV11(), endpointFiles.getHDTNewIndexV11());
rename(endpointFiles.getHDTIndexNames(), endpointFiles.getHDTNewIndexNames());
case AFTER_INDEX_RENAME:
rename(endpointFiles.getHDTIndex(), endpointFiles.getHDTNewIndex());
case AFTER_INDEX_V11_OLD_RENAME:
renameFromOld(endpointFiles.getHDTIndexV11());
renameFromOld(endpointFiles.getHDTIndexNames());
case AFTER_INDEX_OLD_RENAME:
renameFromOld(endpointFiles.getHDTIndex());
case AFTER_TRIPLEDEL_TMP_OLD_RENAME:
Expand Down Expand Up @@ -773,7 +827,7 @@ private synchronized void step3(boolean restarting, Lock lock, EndpointStoreDump
boolean graph;
try (HDT newHdt = HDTManager.mapIndexedHDT(endpointFiles.getHDTNewIndex(), endpoint.getHDTSpec(), null)) {
if (dumpInfo != null) {
dumpInfo.afterIndexing(endpoint, Path.of(endpointFiles.getHDTNewIndexV11()));
dumpInfo.afterIndexing(endpoint, endpointFiles.getHDTNewIndexNamesPath());
endpoint.setDumping(endpoint.getDumpRef().get() != null);
}
graph = newHdt.getDictionary().supportGraphs();
Expand All @@ -795,12 +849,15 @@ private synchronized void step3(boolean restarting, Lock lock, EndpointStoreDump
this.endpoint.resetDeleteArray(newHdt);
}

Path hdtIndexV11 = Path.of(endpointFiles.getHDTIndexV11());
// if the index.hdt.index.v1-1 doesn't exist, the hdt is empty, so we
// create a mock index file
// (ignored by RDF-HDT)
if (!Files.exists(hdtIndexV11)) {
Files.writeString(hdtIndexV11, "");
if (!endpoint.getHDTSpec().getBoolean(HDTOptionsKeys.BITMAPTRIPLES_INDEX_NO_FOQ, false)) {
Path hdtIndexV11 = Path.of(endpointFiles.getHDTIndexV11());
// if the index.hdt.index.v1-1 doesn't exist, the hdt is empty, so
// we
// create a mock index file
// (ignored by RDF-HDT)
if (!Files.exists(hdtIndexV11)) {
Files.writeString(hdtIndexV11, "");
}
}

// rename new hdt to old hdt name so that they are replaces
Expand All @@ -817,7 +874,7 @@ private synchronized void step3(boolean restarting, Lock lock, EndpointStoreDump
rename(endpointFiles.getHDTNewIndex(), endpointFiles.getHDTIndex());
debugStepPoint(MergeRunnableStopPoint.STEP3_FILES_MID2);
// AFTER_INDEX_RENAME
rename(endpointFiles.getHDTNewIndexV11(), endpointFiles.getHDTIndexV11());
rename(endpointFiles.getHDTNewIndexNames(), endpointFiles.getHDTIndexNames());
// AFTER_INDEX_V11_RENAME

HDT tempHdt = endpoint.loadIndex();
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
package com.the_qa_company.qendpoint.store;

import org.junit.Test;

import java.nio.file.Path;

import static org.junit.Assert.*;

public class EndpointStoreDumpTest {

@Test
public void nameTest() {
assertEquals("aaaa.hdt",
EndpointStoreDump.EndpointStoreDumpDataset.replaceHDTFilename(Path.of("test.hdt"), "aaaa"));
assertEquals("bbbb.hdt",
EndpointStoreDump.EndpointStoreDumpDataset.replaceHDTFilename(Path.of("test.hdt"), "bbbb"));
assertEquals("bbbb.hdt.idx",
EndpointStoreDump.EndpointStoreDumpDataset.replaceHDTFilename(Path.of("test.hdt.idx"), "bbbb"));
assertEquals("bbbb.hdt",
EndpointStoreDump.EndpointStoreDumpDataset.replaceHDTFilename(Path.of("test.idx.hdt"), "bbbb"));
}
}
Loading