Skip to content

Commit

Permalink
start compact lib and hdt file reading
Browse files Browse the repository at this point in the history
  • Loading branch information
ate47 committed Mar 23, 2024
1 parent b89e73f commit 660a346
Show file tree
Hide file tree
Showing 7 changed files with 363 additions and 170 deletions.
57 changes: 43 additions & 14 deletions src/cli/tools/lib/actslibtest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -320,23 +320,51 @@ namespace {
return tool::BASIC_ERROR;
}

try {
actslib::hdt::HDTCookie cookie{ is };
is.close();
LOG_INFO("Type: {}", actslib::hdt::FormatName(cookie.GetType()));
LOG_INFO("Format: {}", cookie.GetFormat());

LOG_INFO("Props:");
for (const auto& [key, val] : cookie) {
LOG_INFO("'{}' = {}", key, val);
}
actslib::ToClose tc{ is };
actslib::hdt::HDTCookie cookie{ is };
LOG_INFO("Type: {}", actslib::hdt::FormatName(cookie.GetType()));
LOG_INFO("Format: {}", cookie.GetFormat());

LOG_INFO("Props:");
for (const auto& [key, val] : cookie) {
LOG_INFO("'{}' = {}", key, val);
}
catch (std::runtime_error& err) {
LOG_ERROR("Can't read profiler {}", err.what());
is.close();

return tool::OK;
}

int actslibhdt(Process& proc, int argc, const char* argv[]) {
if (argc < 3) {
return tool::BAD_USAGE;
}

std::ifstream is{ argv[2], std::ios::binary };

if (!is) {
LOG_ERROR("Can't open {}", argv[2]);
return tool::BASIC_ERROR;
}
is.close();
actslib::ToClose tc{ is };

actslib::hdt::HDT hdt{};

hdt.LoadStream(is);

const auto& cookie = hdt.GetCookie();

LOG_INFO("Type: {}", actslib::hdt::FormatName(cookie.GetType()));
LOG_INFO("Format: {}", cookie.GetFormat());

LOG_INFO("Props:");
for (const auto& [key, val] : cookie.GetProperties()) {
LOG_INFO("'{}' = {}", key, val);
}
LOG_INFO("Header:");

for (const auto* triple : hdt.GetHeader()->data) {
LOG_INFO("- {}", *triple);
}

return tool::OK;
}
Expand All @@ -346,7 +374,8 @@ namespace {
#ifndef CI_BUILD
ADD_TOOL("actslibtest", "", "Acts lib test", nullptr, actslibtest);

ADD_TOOL("actslibhdt", " [hdt]", "Read hdt cookie", nullptr, actslibhdtcookie);
ADD_TOOL("actslibhdtcooke", " [hdt]", "Read hdt cookie", nullptr, actslibhdtcookie);
ADD_TOOL("actslibhdt", " [hdt]", "Read hdt", nullptr, actslibhdt);
#endif

ADD_TOOL("actslibprofiler", " [profile file]", "Read profiler", nullptr, actslibprofiler);
47 changes: 45 additions & 2 deletions src/lib/actslib/data/compact.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,52 @@ namespace actslib::data::compact {
};

class Sequence {

uint64_t* data;
size_t datalen;
size_t numbits;
uint64_t maxValue;
public:
Sequence() {}
static constexpr size_t WORD_LEN = sizeof(*Sequence::data) * 8;

Sequence(char* data, size_t datalen, size_t numbits) :
data(reinterpret_cast<uint64_t*>(data)), datalen(datalen), numbits(numbits),
maxValue(numbits == 64 ? ~0 : ((1ull << numbits) - 1)) {
if (numbits > WORD_LEN) {
throw std::runtime_error(actslib::va("numbits can't be above %lld: %lld > %lld", WORD_LEN, numbits, WORD_LEN));
}
}

uint64_t operator[](size_t id) const {
size_t offset = id * numbits;
size_t word = offset / WORD_LEN;
size_t wordOff = offset % WORD_LEN;

if (wordOff + numbits > WORD_LEN) {
// 2 words
return (data[word] >> wordOff) | ((data[word + 1] << ((WORD_LEN << 1) - wordOff - numbits)) >> (WORD_LEN - numbits));
}
else {
// 1 word
return (data[word] << (WORD_LEN - wordOff - numbits)) >> (WORD_LEN - numbits);
}
}

void Set(size_t id, uint64_t value) {
if (value > maxValue) {
throw std::runtime_error(actslib::va("value is above limit for this structure %lld > %lld", value, maxValue));
}
size_t offset = id * numbits;
size_t word = offset / WORD_LEN;
size_t wordOff = offset % WORD_LEN;



}


void UpdateCRC(actslib::crc::CRC32& crc) const {
crc.Update(reinterpret_cast<char*>(data), 0, datalen);
}
};


Expand Down
18 changes: 18 additions & 0 deletions src/lib/actslib/hdt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,22 @@ namespace actslib::hdt {
const char* FormatName(HDTCookieType type) {
return type >= 0 && type < HDTCookieType::HCT_COUNT ? HDTCookieTypeNames[type] : "bad_type";
}

Header* LoadHeader(std::istream& is) {
HDTCookie cookie{ is };

if (cookie.GetType() != HCT_HEADER) {
throw std::runtime_error(actslib::va("Excepted header type, find: %s", FormatName(cookie.GetType())));
}

const auto& format = cookie.GetFormat();

if (format == "ntriples") {
return new PlainHeader(is, cookie);
}
else {
throw std::runtime_error(actslib::va("Unknown header format: %s", format.c_str()));
}

}
}
96 changes: 92 additions & 4 deletions src/lib/actslib/hdt.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@ namespace actslib::hdt {
using const_iterator = decltype(props)::const_iterator;

HDTCookie(HDTCookieType type, std::string format) : type(type), format(format) {}
HDTCookie(std::istream& is) {

void LoadCookie(std::istream& is) {
actslib::crc::CRC16 crc{};
char buffer[sizeof(HDT_COOKIE_MAGIC) - 1];
is.read(buffer, sizeof(HDT_COOKIE_MAGIC) - 1);
Expand Down Expand Up @@ -72,6 +73,10 @@ namespace actslib::hdt {
}
}

HDTCookie(std::istream& is) {
LoadCookie(is);
}

void Save(std::ostream& os) const {
actslib::crc::CRC16 crc{};
os.write(HDT_COOKIE_MAGIC, sizeof(HDT_COOKIE_MAGIC) - 1);
Expand Down Expand Up @@ -153,7 +158,7 @@ namespace actslib::hdt {
}

const_iterator cbegin() const {
return props.begin();
return props.cbegin();
}

const_iterator cend() const {
Expand All @@ -176,8 +181,41 @@ namespace actslib::hdt {
}
};

class PlainHeader {
PlainHeader(std::istream& is, HDTCookie& cookie) {
class Header {
public:
std::vector<rdf::TripleAlloc*> data{};
using iterator = decltype(data)::iterator;
using const_iterator = decltype(data)::const_iterator;
Header() {}

virtual ~Header() {
for (auto* t : data) {
delete t;
}
}

iterator begin() {
return data.begin();
}

iterator end() {
return data.end();
}

const_iterator cbegin() const {
return data.cbegin();
}

const_iterator cend() const {
return data.cend();
}

};

class PlainHeader : public Header {
HDTCookie cookie;
public:
PlainHeader(std::istream& is, HDTCookie& cookie) : cookie(cookie) {
if (cookie.GetType() != HCT_HEADER) {
throw std::invalid_argument("Cookie not valid for plain header");
}
Expand All @@ -192,8 +230,58 @@ namespace actslib::hdt {
raw.resize(length);
is.read(raw.data(), length);

std::stringstream ss{ raw, std::ios::in };

rdf::RDFParserNTriple parser{ ss };

while (parser) {
const auto& triple = *parser;

data.emplace_back(new rdf::TripleAlloc(triple));

++parser;
}

}

};

Header* LoadHeader(std::istream& is);

constexpr const char HDTV1[] = "<http://purl.org/HDT/hdt#HDTv1>";

class HDT {
Header* header{};
HDTCookie cookie{ HCT_GLOBAL, HDTV1 };

public:
HDT() {}
~HDT() {
if (header) delete header;
}


void LoadStream(std::istream& is) {
if (header) {
delete header;
}
header = nullptr;

cookie.LoadCookie(is);

if (cookie.GetType() != HCT_GLOBAL || cookie.GetFormat() != HDTV1) {
throw std::runtime_error(actslib::va("Global HDT v1 cookie excepted, find %s/%s", FormatName(cookie.GetType()), cookie.GetFormat().c_str()));
}

header = LoadHeader(is);
}

const HDTCookie& GetCookie() const {
return cookie;
}

const Header* GetHeader() const {
return header;
}
};
}
Loading

0 comments on commit 660a346

Please sign in to comment.