Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

First version of a serverless library for QLever #1669

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -405,6 +405,7 @@ target_precompile_headers(engine PRIVATE ${PRECOMPILED_HEADER_FILES_ENGINE})
add_subdirectory(src/index)
add_subdirectory(src/util)
add_subdirectory(benchmark)
add_subdirectory(src/libqlever)

enable_testing()
option(SINGLE_TEST_BINARY "Link all unit tests into a single binary. This is useful e.g. for code coverage tools" OFF)
Expand All @@ -419,7 +420,7 @@ add_executable(IndexBuilderMain src/index/IndexBuilderMain.cpp)
qlever_target_link_libraries(IndexBuilderMain index ${CMAKE_THREAD_LIBS_INIT} Boost::program_options compilationInfo)

add_executable(ServerMain src/ServerMain.cpp)
qlever_target_link_libraries(ServerMain engine ${CMAKE_THREAD_LIBS_INIT} Boost::program_options compilationInfo)
qlever_target_link_libraries(ServerMain engine server ${CMAKE_THREAD_LIBS_INIT} Boost::program_options compilationInfo)
target_precompile_headers(ServerMain REUSE_FROM engine)

add_executable(VocabularyMergerMain src/VocabularyMergerMain.cpp)
Expand Down
4 changes: 3 additions & 1 deletion src/engine/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ add_library(engine
Engine.cpp QueryExecutionTree.cpp Operation.cpp Result.cpp LocalVocab.cpp
IndexScan.cpp Join.cpp Sort.cpp
Distinct.cpp OrderBy.cpp Filter.cpp
Server.cpp QueryPlanner.cpp QueryPlanningCostFactors.cpp
QueryPlanner.cpp QueryPlanningCostFactors.cpp
OptionalJoin.cpp CountAvailablePredicates.cpp GroupBy.cpp HasPredicateScan.cpp
Union.cpp MultiColumnJoin.cpp TransitivePathBase.cpp
TransitivePathHashMap.cpp TransitivePathBinSearch.cpp Service.cpp
Expand All @@ -14,4 +14,6 @@ add_library(engine
CartesianProductJoin.cpp TextIndexScanForWord.cpp TextIndexScanForEntity.cpp
TextLimit.cpp LazyGroupBy.cpp GroupByHashMapOptimization.cpp SpatialJoin.cpp
CountConnectedSubgraphs.cpp SpatialJoinAlgorithms.cpp PathSearch.cpp ExecuteUpdate.cpp)
add_library(server Server.cpp)
qlever_target_link_libraries(server)
qlever_target_link_libraries(engine util index parser sparqlExpressions http SortPerformanceEstimator Boost::iostreams s2)
1 change: 0 additions & 1 deletion src/engine/Service.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
#include "util/HashMap.h"
#include "util/HashSet.h"
#include "util/StringUtils.h"
#include "util/http/HttpUtils.h"

// ____________________________________________________________________________
Service::Service(QueryExecutionContext* qec,
Expand Down
4 changes: 4 additions & 0 deletions src/engine/Service.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@
#include "util/LazyJsonParser.h"
#include "util/http/HttpClient.h"

// Forward declarations to reduce dependencies
struct HttpOrHttpsResponse;
namespace ad_utility {}

// The SERVICE operation. Sends a query to the remote endpoint specified by the
// service IRI, gets the result as JSON, parses it, and writes it into a result
// table.
Expand Down
1 change: 0 additions & 1 deletion src/index/IndexBuilderMain.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,6 @@ int main(int argc, char** argv) {
bool onlyPsoAndPos = false;
bool addWordsFromLiterals = false;
std::optional<ad_utility::MemorySize> stxxlMemory;
optind = 1;

Index index{ad_utility::makeUnlimitedAllocator<Id>()};

Expand Down
5 changes: 5 additions & 0 deletions src/libqlever/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@

add_library(qlever Qlever.cpp)
qlever_target_link_libraries(qlever parser engine util index absl::strings)
add_executable(LibQLeverExample LibQLeverExample.cpp)
qlever_target_link_libraries(LibQLeverExample parser engine util index qlever absl::strings)
16 changes: 16 additions & 0 deletions src/libqlever/LibQLeverExample.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
// Copyright 2024, University of Freiburg,
// Chair of Algorithms and Data Structures.
// Author: Johannes Kalmbach <[email protected]>

#include <iostream>

#include "libqlever/Qlever.h"

int main() {
qlever::QleverConfig config;
config.baseName = "exampleIndex";
config.inputFiles.emplace_back("/dev/stdin", qlever::Filetype::Turtle);
qlever::Qlever::buildIndex(config);
qlever::Qlever qlever{config};
std::cout << qlever.query("SELECT * {?s ?p ?o}") << std::endl;
}
159 changes: 159 additions & 0 deletions src/libqlever/Qlever.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
// Copyright 2024, University of Freiburg,
// Chair of Algorithms and Data Structures.
// Author: Johannes Kalmbach <[email protected]>

#include "libqlever/Qlever.h"

namespace qlever {
static std::string getStxxlConfigFileName(const string& location) {
return absl::StrCat(location, ".stxxl");
}

static std::string getStxxlDiskFileName(const string& location,
const string& tail) {
return absl::StrCat(location, tail, ".stxxl-disk");
}

// Write a .stxxl config-file.
// All we want is sufficient space somewhere with enough space.
// We can use the location of input files and use a constant size for now.
// The required size can only be estimated anyway, since index size
// depends on the structure of words files rather than their size only,
// because of the "multiplications" performed.
static void writeStxxlConfigFile(const string& location, const string& tail) {
string stxxlConfigFileName = getStxxlConfigFileName(location);
ad_utility::File stxxlConfig(stxxlConfigFileName, "w");
auto configFile = ad_utility::makeOfstream(stxxlConfigFileName);
// Inform stxxl about .stxxl location
setenv("STXXLCFG", stxxlConfigFileName.c_str(), true);
configFile << "disk=" << getStxxlDiskFileName(location, tail) << ","
<< STXXL_DISK_SIZE_INDEX_BUILDER << ",syscall\n";
}

// _____________________________________________________________________________
Qlever::Qlever(const QleverConfig& config)
: allocator_{ad_utility::AllocatorWithLimit<Id>{
ad_utility::makeAllocationMemoryLeftThreadsafeObject(
config.memoryLimit.value())}},
index_{allocator_} {
ad_utility::setGlobalLoggingStream(&ignoreLogStream);
// This also directly triggers the update functions and propagates the
// values of the parameters to the cache.
RuntimeParameters().setOnUpdateAction<"cache-max-num-entries">(
[this](size_t newValue) { cache_.setMaxNumEntries(newValue); });
RuntimeParameters().setOnUpdateAction<"cache-max-size">(
[this](ad_utility::MemorySize newValue) { cache_.setMaxSize(newValue); });
RuntimeParameters().setOnUpdateAction<"cache-max-size-single-entry">(
[this](ad_utility::MemorySize newValue) {
cache_.setMaxSizeSingleEntry(newValue);
});
index_.usePatterns() = !config.noPatterns;
enablePatternTrick_ = !config.noPatterns;
index_.loadAllPermutations() = !config.onlyPsoAndPos;

// Init the index.
index_.createFromOnDiskIndex(config.baseName);
// TODO<joka921> Enable the loading of the text index via the QLever lib.
/*
if (useText) {
index_.addTextFromOnDiskIndex();
}
*/

sortPerformanceEstimator_.computeEstimatesExpensively(
allocator_, index_.numTriples().normalAndInternal_() *
PERCENTAGE_OF_TRIPLES_FOR_SORT_ESTIMATE / 100);
}

// _____________________________________________________________________________
void Qlever::buildIndex(QleverConfig config) {
ad_utility::setGlobalLoggingStream(&ignoreLogStream);
Index index{ad_utility::makeUnlimitedAllocator<Id>()};

if (config.memoryLimit.has_value()) {
index.memoryLimitIndexBuilding() = config.memoryLimit.value();
}
// If no text index name was specified, take the part of the wordsfile after
// the last slash.
if (config.textIndexName.empty() && !config.wordsfile.empty()) {
config.textIndexName =
ad_utility::getLastPartOfString(config.wordsfile, '/');
}
try {
LOG(TRACE) << "Configuring STXXL..." << std::endl;
size_t posOfLastSlash = config.baseName.rfind('/');
string location = config.baseName.substr(0, posOfLastSlash + 1);
string tail = config.baseName.substr(posOfLastSlash + 1);
writeStxxlConfigFile(location, tail);
string stxxlFileName = getStxxlDiskFileName(location, tail);
LOG(TRACE) << "done." << std::endl;

index.setKbName(config.kbIndexName);
index.setTextName(config.textIndexName);
index.usePatterns() = !config.noPatterns;
index.setOnDiskBase(config.baseName);
index.setKeepTempFiles(config.keepTemporaryFiles);
index.setSettingsFile(config.settingsFile);
index.loadAllPermutations() = !config.onlyPsoAndPos;

if (!config.onlyAddTextIndex) {
AD_CONTRACT_CHECK(!config.inputFiles.empty());
index.createFromFiles(config.inputFiles);
}

if (!config.wordsfile.empty() || config.addWordsFromLiterals) {
index.addTextFromContextFile(config.wordsfile,
config.addWordsFromLiterals);
}

if (!config.docsfile.empty()) {
index.buildDocsDB(config.docsfile);
}
ad_utility::deleteFile(stxxlFileName, false);
} catch (std::exception& e) {
LOG(ERROR) << "Creating the index for QLever failed with the following "
"exception: "
<< e.what() << std::endl;
throw;
}
}

// ___________________________________________________________________________
std::string Qlever::query(std::string query) {
QueryExecutionContext qec{index_, &cache_, allocator_,
sortPerformanceEstimator_};
auto parsedQuery = SparqlParser::parseQuery(query);
auto handle = std::make_shared<ad_utility::CancellationHandle<>>();
QueryPlanner qp{&qec, handle};
qp.setEnablePatternTrick(enablePatternTrick_);
auto qet = qp.createExecutionTree(parsedQuery);
qet.isRoot() = true;
auto& limitOffset = parsedQuery._limitOffset;

// TODO<joka921> For cancellation we have to call
// `recursivelySetCancellationHandle` (see `Server::parseAndPlan`).

// TODO<joka921> The following interface looks fishy and should be
// incorporated directly in the query planner or somewhere else.
// (it is used identically in `Server.cpp`.

// Make sure that the offset is not applied again when exporting the result
// (it is already applied by the root operation in the query execution
// tree). Note that we don't need this for the limit because applying a
// fixed limit is idempotent.
AD_CORRECTNESS_CHECK(limitOffset._offset >=
qet.getRootOperation()->getLimit()._offset);
limitOffset._offset -= qet.getRootOperation()->getLimit()._offset;

ad_utility::Timer timer{ad_utility::Timer::Started};
auto responseGenerator = ExportQueryExecutionTrees::computeResult(
parsedQuery, qet, ad_utility::MediaType::sparqlJson, timer,
std::move(handle));
std::string result;
std::cout << "Writing the result:" << std::endl;
for (const auto& batch : responseGenerator) {
result += batch;
}
return result;
}
} // namespace qlever
99 changes: 99 additions & 0 deletions src/libqlever/Qlever.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
// Copyright 2024, University of Freiburg,
// Chair of Algorithms and Data Structures.
// Author: Johannes Kalmbach <[email protected]>

#pragma once

#include <util/MemorySize/MemorySize.h>

#include <optional>
#include <string>
#include <utility>
#include <vector>

#include "engine/ExportQueryExecutionTrees.h"
#include "engine/QueryExecutionContext.h"
#include "engine/QueryPlanner.h"
#include "global/RuntimeParameters.h"
#include "index/Index.h"
#include "index/InputFileSpecification.h"
#include "parser/SparqlParser.h"
#include "util/AllocatorWithLimit.h"
#include "util/http/MediaTypes.h"

namespace qlever {

// A configuration for a QLever instance.
struct QleverConfig {
// A basename for all files that QLever will write as part of the index
// building.
std::string baseName;

// The specification of the input files (Turtle/NT or NQuad) from which the
// index will be built.
std::vector<qlever::InputFileSpecification> inputFiles;

// A memory limit that will be applied during the index building as well as
// during the query processing.
std::optional<ad_utility::MemorySize> memoryLimit =
ad_utility::MemorySize::gigabytes(1);

// If set to true, then no so-called patterns will be built. Patterns are
// useful for autocompletion and for certain statistics queries, but not for
// typical SELECT queries.
bool noPatterns = false;

// Only build two permutations. This is sufficient if all queries have a fixed
// predicate.
// TODO<joka921> We haven't tested this mode in a while, it is currently
// probably broken because the UPDATE mechanism doesn't support only two
// permutations.
bool onlyPsoAndPos = false;

// Optionally a filename to a .json file with additional settings...
// TODO<joka921> Make these settings part of this struct directly
// TODO<joka921> Document these additional settings.
std::string settingsFile;

// The following members are only required if QLever's full-text search
// extension is to be used, see `IndexBuilderMain.cpp` for additional details.
bool addWordsFromLiterals = false;
std::string kbIndexName;
std::string wordsfile;
std::string docsfile;
std::string textIndexName;
bool onlyAddTextIndex = false;

// If set to true, then certain temporary files which are created while
// building the index are not deleted. This can be useful for debugging.
bool keepTemporaryFiles = false;
};

// A class that can be used to use QLever without the HTTP server, e.g. as part
// of another program.
class Qlever {
private:
QueryResultCache cache_;
ad_utility::AllocatorWithLimit<Id> allocator_;
SortPerformanceEstimator sortPerformanceEstimator_;
Index index_;
bool enablePatternTrick_;
static inline std::ostringstream ignoreLogStream;

public:
// Build a persistent on disk index using the `config`.
static void buildIndex(QleverConfig config);

// Load the qlever index from file.
explicit Qlever(const QleverConfig& config);

// Run the given query on the index. Currently only SELECT and ASK queries are
// supported, and the result will always be in sparql-results+json format.
// TODO<joka921> Support other formats + CONSTRUCT queries, support
// cancellation, time limits, and observable queries.
std::string query(std::string query);

// TODO<joka921> Give access to the RuntimeParameters() which allow for
// further tweaking of the qlever instance.
};
} // namespace qlever
2 changes: 1 addition & 1 deletion test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -433,6 +433,6 @@ addLinkAndDiscoverTest(SparqlExpressionGeneratorsTest engine)

addLinkAndDiscoverTest(UrlParserTest)

addLinkAndDiscoverTest(ServerTest engine)
addLinkAndDiscoverTest(ServerTest engine server)

addLinkAndDiscoverTest(ExecuteUpdateTest engine)