Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement type system #92

Merged
merged 7 commits into from
Mar 25, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ include_directories(
"src/algorithms/association-rules"
"src/algorithms/depminer"
"src/model"
"src/model/types"
"src/util"
"src/parser"
"src/parser/json"
Expand Down
1 change: 1 addition & 0 deletions src/algorithms/AlgoFactory.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#pragma once


#include <boost/any.hpp>
#include <boost/mp11/algorithm.hpp>
#include <enum.h>
Expand Down
28 changes: 28 additions & 0 deletions src/model/AbstractColumnData.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#pragma once

#include <string>

#include "Column.h"

namespace model {

class AbstractColumnData {
protected:
Column const* column_;

explicit AbstractColumnData(Column const* column) noexcept : column_(column) {}

public:
AbstractColumnData(AbstractColumnData const& other) = default;
AbstractColumnData& operator=(AbstractColumnData const& other) = default;
AbstractColumnData(AbstractColumnData&& other) noexcept = default;
AbstractColumnData& operator=(AbstractColumnData&& other) noexcept = default;

Column const* GetColumn() const { return column_; }

virtual std::string ToString() const = 0;
virtual ~AbstractColumnData() = default;
};

} // namespace model

66 changes: 0 additions & 66 deletions src/model/ColumnData.cpp

This file was deleted.

27 changes: 8 additions & 19 deletions src/model/ColumnData.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,45 +7,34 @@

#include <vector>

#include "AbstractColumnData.h"
#include "Column.h"
#include "PositionListIndex.h"

class ColumnData {
private:
Column const* column_;
//std::variant<std::unique_ptr<PositionListIndex>, PositionListIndex*> position_list_index_;
class ColumnData final : public model::AbstractColumnData {
std::shared_ptr<util::PositionListIndex> position_list_index_;

public:
ColumnData(Column const* column, std::unique_ptr<util::PositionListIndex> position_list_index);
ColumnData(Column const* column, std::unique_ptr<util::PositionListIndex> position_list_index)
: AbstractColumnData(column), position_list_index_(std::move(position_list_index)) {
position_list_index_->ForceCacheProbingTable();
}
// Инвариант: конструктором гарантируется, что в ColumnData.PLI есть закешированная ProbingTable
std::vector<int> const& GetProbingTable() const {
return *position_list_index_->GetCachedProbingTable();
}
Column const* GetColumn() const { return column_; }
int GetProbingTableValue(int tuple_index) const {
return (*position_list_index_->GetCachedProbingTable())[tuple_index];
}
util::PositionListIndex const* GetPositionListIndex() const {
return position_list_index_.get();
}
// TODO: посмотреть, что будет с производительностью, если добавить указатель на PT прямо сюда
// по идее, это должно оптимизироваться инлайнингом

// Transfers position_list_index_ ownership to the outside world. BE CAREFUL - other methods
// of ColumnData get invalidated while the PLI is moved out
// std::unique_ptr<PositionListIndex> moveOutPositionListIndex();

std::shared_ptr<util::PositionListIndex> GetPliOwnership() { return position_list_index_; }
std::shared_ptr<util::PositionListIndex const> GetPliOwnership() const {
return position_list_index_;
}

// Moves a PLI under the ownership of ColumnData
// void moveInPositionListIndex(std::unique_ptr<PositionListIndex> positionListIndex ) { position_list_index_ = std::move(positionListIndex); }

//void shuffle();

std::string ToString() { return "Data for " + column_->ToString(); }
bool operator==(const ColumnData& rhs);
std::string ToString() const final { return "Data for " + column_->ToString(); }
};

23 changes: 1 addition & 22 deletions src/model/ColumnLayoutRelationData.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,17 +9,6 @@
#include <utility>

#include <easylogging++.h>
ColumnLayoutRelationData::ColumnLayoutRelationData(std::unique_ptr<RelationalSchema> schema,
std::vector<ColumnData> column_data)
: RelationData(std::move(schema)), column_data_(std::move(column_data)) {}

ColumnData& ColumnLayoutRelationData::GetColumnData(int column_index) {
return column_data_[column_index];
}

//ColumnData const& ColumnLayoutRelationData::GetColumnData(int columnIndex) const ;

//unsigned int ColumnLayoutRelationData::GetNumRows() const

std::vector<int> ColumnLayoutRelationData::GetTuple(int tuple_index) const {
int num_columns = schema_->GetNumColumns();
Expand All @@ -30,17 +19,6 @@ std::vector<int> ColumnLayoutRelationData::GetTuple(int tuple_index) const {
return tuple;
}

/*void ColumnLayoutRelationData::shuffleColumns() {
for (auto &columnDatum : column_data_){
columnDatum.shuffle();
}
}*/

std::unique_ptr<ColumnLayoutRelationData> ColumnLayoutRelationData::CreateFrom(
CSVParser& file_input, bool is_null_eq_null) {
return CreateFrom(file_input, is_null_eq_null, -1, -1);
}

std::unique_ptr<ColumnLayoutRelationData> ColumnLayoutRelationData::CreateFrom(
CSVParser& file_input, bool is_null_eq_null, int max_cols, long max_rows) {
auto schema = std::make_unique<RelationalSchema>(file_input.GetRelationName(), is_null_eq_null);
Expand Down Expand Up @@ -102,3 +80,4 @@ std::unique_ptr<ColumnLayoutRelationData> ColumnLayoutRelationData::CreateFrom(

return std::make_unique<ColumnLayoutRelationData>(std::move(schema), std::move(column_data));
}

31 changes: 11 additions & 20 deletions src/model/ColumnLayoutRelationData.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,29 +13,20 @@
#include "RelationalSchema.h"
#include "RelationData.h"

class ColumnLayoutRelationData : public RelationData {
private:
std::vector<ColumnData> column_data_;

class ColumnLayoutRelationData final : public RelationData {
public:
std::vector<ColumnData>& GetColumnData() override { return column_data_; };
std::vector<ColumnData> const& GetColumnData() const override { return column_data_; };
ColumnData& GetColumnData(int column_index) override;
ColumnData const& GetColumnData(int column_index) const override {
return column_data_[column_index];
}
unsigned int GetNumRows() const override { return column_data_[0].GetProbingTable().size(); }
std::vector<int> GetTuple(int tuple_index) const override;

//void shuffleColumns() override = 0;
static constexpr int kNullValueId = -1;

double GetMaximumEntropy() const { return std::log(GetNumRows()); }
using RelationData::AbstractRelationData;

ColumnLayoutRelationData(std::unique_ptr<RelationalSchema> schema,
std::vector<ColumnData> column_data);
[[nodiscard]] unsigned int GetNumRows() const final {
return column_data_[0].GetProbingTable().size();
}
[[nodiscard]] std::vector<int> GetTuple(int tuple_index) const;

static std::unique_ptr<ColumnLayoutRelationData> CreateFrom(CSVParser& file_input,
bool is_null_eq_null);
static std::unique_ptr<ColumnLayoutRelationData> CreateFrom(
CSVParser& file_input, bool is_null_eq_null, int max_cols, long max_rows);
bool is_null_eq_null,
int max_cols = -1,
long max_rows = -1);
};

64 changes: 64 additions & 0 deletions src/model/ColumnLayoutTypedRelationData.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
#include "ColumnLayoutTypedRelationData.h"

#include <easylogging++.h>

namespace model {

std::unique_ptr<ColumnLayoutTypedRelationData> ColumnLayoutTypedRelationData::CreateFrom(
CSVParser& file_input, bool is_null_eq_null, int max_cols, long max_rows) {
auto schema = std::make_unique<RelationalSchema>(file_input.GetRelationName(), is_null_eq_null);
int num_columns = file_input.GetNumberOfColumns();

if (max_cols > 0) {
num_columns = std::min(num_columns, max_cols);
}

std::vector<std::vector<std::string>> columns(num_columns);
int row_num = 0;
std::vector<std::string> row;

/* Parsing is very similar to ColumnLayoutRelationData::CreateFrom().
* Maybe we need column-based parsing in addition to row-based in CSVParser */
polyntsov marked this conversation as resolved.
Show resolved Hide resolved
while (file_input.GetHasNext()) {
row = file_input.ParseNext();

if (row.empty() && num_columns == 1) {
row.emplace_back("");
} else if ((int)row.size() != num_columns) {
LOG(WARNING) << "Skipping incomplete rows";
continue;
}

if (max_rows <= 0 || row_num < max_rows) {
int index = 0;
for (std::string& field : row) {
columns[index].push_back(std::move(field));
index++;
if (index >= num_columns) {
break;
}
}
} else {
LOG(WARNING) << "Processed " << row_num << " rows and " << max_rows - row_num
<< " rows remain unprocessed due to `max_rows` parameter.";
break;
}
row_num++;
}

std::vector<TypedColumnData> column_data;
for (int i = 0; i < num_columns; ++i) {
Column column(schema.get(), file_input.GetColumnName(i), i);
schema->AppendColumn(std::move(column));
TypedColumnData typed_column_data = model::TypedColumnDataFactory::CreateFrom(
schema->GetColumn(i), std::move(columns[i]), is_null_eq_null);
column_data.emplace_back(std::move(typed_column_data));
}

schema->Init();

return std::make_unique<ColumnLayoutTypedRelationData>(std::move(schema),
std::move(column_data));
}

} // namespace model
29 changes: 29 additions & 0 deletions src/model/ColumnLayoutTypedRelationData.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#pragma once

#include "CSVParser.h"
#include "RelationData.h"
#include "TypedColumnData.h"

namespace model {

using TypedRelationData = AbstractRelationData<TypedColumnData>;

class ColumnLayoutTypedRelationData final : public TypedRelationData {
public:
using TypedRelationData::AbstractRelationData;

unsigned int GetNumRows() const final {
if (column_data_.empty()) {
return 0;
} else {
return column_data_.front().GetNumRows();
}
}

static std::unique_ptr<ColumnLayoutTypedRelationData> CreateFrom(CSVParser& file_input,
bool is_null_eq_null,
int max_cols = -1,
long max_rows = -1);
};

} // namespace model
3 changes: 0 additions & 3 deletions src/model/RelationData.cpp

This file was deleted.

Loading