Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dev #437

Merged
merged 3 commits into from
Dec 11, 2023
Merged

Dev #437

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions qendpoint-cli/bin/hdtDiffCat.bat
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
@echo off

call "%~dp0\javaenv.bat"

"%JAVACMD%" %JAVAOPTIONS% -classpath %~dp0\..\lib\* com.the_qa_company.qendpoint.core.tools.HDTDiffCat %*
28 changes: 28 additions & 0 deletions qendpoint-cli/bin/hdtDiffCat.ps1
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
param(
[Parameter()]
[String]
$options,
[Parameter()]
[String]
$config,
[Parameter()]
[String]
$diff,
[Parameter()]
[Switch]
$index,
[Parameter()]
[Switch]
$version,
[Parameter()]
[Switch]
$quiet,
[Parameter()]
[Switch]
$color,
[Parameter(ValueFromRemainingArguments, Position = 0)]
[string[]]
$OtherParams
)

& "$(Get-Item $PSScriptRoot)/javaenv.ps1" "com.the_qa_company.qendpoint.core.tools.HDTDiffCat" -RequiredParameters $PSBoundParameters
10 changes: 10 additions & 0 deletions qendpoint-cli/bin/hdtDiffCat.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#!/bin/bash

source `dirname $0`/javaenv.sh

#export MAVEN_OPTS="-Xmx6g"
#mvn exec:java -Dexec.mainClass="com.the_qa_company.qendpoint.core.tools.HDTDiffCat" -Dexec.args="$*"

$JAVA $JAVA_OPTIONS -cp $CP:$CLASSPATH com.the_qa_company.qendpoint.core.tools.HDTDiffCat $*

exit $?
Original file line number Diff line number Diff line change
@@ -0,0 +1,197 @@
package com.the_qa_company.qendpoint.core.tools;

import com.beust.jcommander.JCommander;
import com.beust.jcommander.Parameter;
import com.beust.jcommander.internal.Lists;
import com.the_qa_company.qendpoint.core.compact.bitmap.Bitmap64Big;
import com.the_qa_company.qendpoint.core.enums.RDFNotation;
import com.the_qa_company.qendpoint.core.exceptions.NotFoundException;
import com.the_qa_company.qendpoint.core.exceptions.ParserException;
import com.the_qa_company.qendpoint.core.hdt.HDT;
import com.the_qa_company.qendpoint.core.hdt.HDTManager;
import com.the_qa_company.qendpoint.core.hdt.HDTVersion;
import com.the_qa_company.qendpoint.core.listener.MultiThreadListener;
import com.the_qa_company.qendpoint.core.options.HDTOptions;
import com.the_qa_company.qendpoint.core.options.HDTOptionsKeys;
import com.the_qa_company.qendpoint.core.rdf.RDFParserCallback;
import com.the_qa_company.qendpoint.core.rdf.RDFParserFactory;
import com.the_qa_company.qendpoint.core.triples.IteratorTripleString;
import com.the_qa_company.qendpoint.core.util.StopWatch;
import com.the_qa_company.qendpoint.core.util.io.Closer;
import com.the_qa_company.qendpoint.core.util.listener.ColorTool;
import com.the_qa_company.qendpoint.core.util.listener.MultiThreadListenerConsole;
import org.apache.commons.io.FileUtils;

import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.List;
import java.util.stream.Collectors;

public class HDTDiffCat {

private ColorTool colorTool;

@Parameter(description = "<input HDTs>+ <output HDT>")
public List<String> parameters = Lists.newArrayList();

@Parameter(names = "-options", description = "HDT Conversion options (override those of config file)")
public String options;

@Parameter(names = "-config", description = "Conversion config file")
public String configFile;

@Parameter(names = "-diff", description = "File to use to do the diff")
public String diff;

@Parameter(names = "-index", description = "Generate also external indices to solve all queries")
public boolean generateIndex;

@Parameter(names = "-version", description = "Prints the HDT version number")
public static boolean showVersion;

@Parameter(names = "-quiet", description = "Do not show progress of the conversion")
public boolean quiet;

@Parameter(names = "-color", description = "Print using color (if available)")
public boolean color;

private HDT diffcat(String location, HDTOptions spec, MultiThreadListener listener)
throws IOException, ParserException, NotFoundException {
List<String> inputs = parameters.subList(0, parameters.size() - 1);

if (diff == null || diff.isEmpty()) {
return HDTManager.catHDT(inputs, spec, listener);
}

RDFNotation type = RDFNotation.guess(diff);

Bitmap64Big[] bms = new Bitmap64Big[inputs.size()];
HDT[] inputsMap = new HDT[inputs.size()];

try {
for (int i = 0; i < bms.length; i++) {
inputsMap[i] = HDTManager.mapHDT(inputs.get(i));
bms[i] = Bitmap64Big.memory(inputsMap[i].getTriples().getNumberOfElements());
}

RDFParserCallback.RDFCallback callback = ((triple, pos) -> {
for (int i = 0; i < inputsMap.length; i++) {
IteratorTripleString find;
try {
find = inputsMap[i].search(triple);
} catch (NotFoundException e) {
throw new RuntimeException(e);
}

if (find.hasNext()) {
find.next();
bms[i].set(find.getLastTriplePosition(), true); // delete
// it
}

}
});

if (type == RDFNotation.HDT) {
try (HDT diffHDT = HDTManager.mapHDT(diff)) {
IteratorTripleString it = diffHDT.searchAll();
while (it.hasNext()) {
callback.processTriple(it.next(), 0);
}
}
} else {
RDFParserCallback parser = RDFParserFactory.getParserCallback(type, spec);
parser.doParse(diff, "", type, true, callback);
}
} catch (Throwable t) {
try {
Closer.closeSingle(inputsMap);
} catch (Throwable t2) {
t.addSuppressed(t2);
}
throw t;
}
Closer.closeSingle(inputsMap);

return HDTManager.diffBitCatHDT(inputs, List.of(bms), spec, listener);
}

public void execute() throws IOException, ParserException, NotFoundException {
HDTOptions spec;
if (configFile != null) {
spec = HDTOptions.readFromFile(configFile);
} else {
spec = HDTOptions.of();
}
if (options != null) {
spec.setOptions(options);
}

String hdtOutput = parameters.get(parameters.size() - 1);
File file = new File(hdtOutput);

String locationOpt = spec.get(HDTOptionsKeys.HDTCAT_LOCATION);

if (locationOpt == null) {
locationOpt = file.getAbsolutePath() + "_tmp";
spec.set(HDTOptionsKeys.HDTCAT_LOCATION, locationOpt);
}

File theDir = new File(locationOpt);
Files.createDirectories(theDir.toPath());
String location = theDir.getAbsolutePath() + "/";

MultiThreadListener listenerConsole = !quiet ? new MultiThreadListenerConsole(color) : null;
StopWatch startCat = new StopWatch();
try (HDT hdt = diffcat(location, spec, listenerConsole)) {
colorTool.logValue("Files catdiff in ...... ", startCat.stopAndShow(), true);
assert hdt != null;
// Show Basic stats
if (!quiet) {
colorTool.logValue("Total Triples ......... ", String.valueOf(hdt.getTriples().getNumberOfElements()));
colorTool.logValue("Different subjects .... ", String.valueOf(hdt.getDictionary().getNsubjects()));
colorTool.logValue("Different predicates .. ", String.valueOf(hdt.getDictionary().getNpredicates()));
colorTool.logValue("Different objects ..... ", String.valueOf(hdt.getDictionary().getNobjects()));
colorTool.logValue("Common Subject/Object . ", String.valueOf(hdt.getDictionary().getNshared()));
}

// Dump to HDT file
StopWatch sw = new StopWatch();
hdt.saveToHDT(hdtOutput, listenerConsole);
colorTool.logValue("HDT saved to file in .. ", sw.stopAndShow());
Files.deleteIfExists(Path.of(location + "dictionary"));
Files.deleteIfExists(Path.of(location + "triples"));
FileUtils.deleteDirectory(theDir);

// Generate index and dump it to .hdt.index file
sw.reset();
if (generateIndex) {
HDTManager.indexedHDT(hdt, listenerConsole);
colorTool.logValue("Index generated and saved in ", sw.stopAndShow());
}
}
}

public static void main(String[] args) throws Throwable {
HDTDiffCat diffcat = new HDTDiffCat();
JCommander com = new JCommander(diffcat);
com.parse(args);
com.setProgramName("hdtDiffCat");
diffcat.colorTool = new ColorTool(diffcat.color, diffcat.quiet);

if (showVersion) {
diffcat.colorTool.log(HDTVersion.get_version_string("."));
System.exit(0);
} else if (diffcat.parameters.size() < 2) {
com.usage();
System.exit(1);
}

diffcat.colorTool.log("DiffCat "
+ diffcat.parameters.stream().limit(diffcat.parameters.size() - 1).collect(Collectors.joining(", "))
+ " to " + diffcat.parameters.get(diffcat.parameters.size() - 1));
diffcat.execute();
}
}
Loading