Skip to content

Commit

Permalink
accelerate x = c(x, y) with AppendEidosValues()
Browse files Browse the repository at this point in the history
  • Loading branch information
bhaller committed Dec 26, 2023
1 parent aabea2f commit 602dad1
Show file tree
Hide file tree
Showing 7 changed files with 240 additions and 2 deletions.
1 change: 1 addition & 0 deletions VERSIONS
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ development head (in the master branch):
big changes to Eidos under the hood - removal of the singleton/vector distinction and EidosValue subclasses, add constness flag, differentiate casting vs. non-casting accesses, etc.
policy change: float indices are no longer legal for subsetting, indices must be integer (or a logical vector, as usual); this was inherited from R and is a bad idea for Eidos
policy change: assignment into object properties must match the type of the property; no more promotion to integer/float from lower types
add some internal magic to accelerate statements of the form "x = c(x,y)" by appending y onto the end of x directly


version 4.1 (Eidos version 3.1):
Expand Down
17 changes: 17 additions & 0 deletions eidos/eidos_ast_node.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -377,6 +377,23 @@ void EidosASTNode::_OptimizeAssignments(void) const
}
}
}
else if ((child1_token_type == EidosTokenType::kTokenLParen) && (child1->children_.size() == 3))
{
// ... the rvalue is a function call with three children...
EidosASTNode *left_operand = child1->children_[0];

if ((left_operand->token_->token_type_ == EidosTokenType::kTokenIdentifier) && (left_operand->token_->token_string_ == gEidosStr_c))
{
// ... it's a call to c()...
EidosASTNode *middle_operand = child1->children_[1];

if ((middle_operand->token_->token_type_ == EidosTokenType::kTokenIdentifier) && (middle_operand->token_->token_string_ == child0->token_->token_string_))
{
// ... the first argument to c() is the same as the lvalue, so we have x = c(x, <expression>), so we mark that in the tree for Evaluate_Assign()
cached_append_assignment_ = true;
}
}
}
}
}
}
Expand Down
1 change: 1 addition & 0 deletions eidos/eidos_ast_node.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ class EidosASTNode
mutable uint8_t cached_for_references_index_ = true; // pre-cached as true if the index variable is referenced at all in the loop
mutable uint8_t cached_for_assigns_index_ = true; // pre-cached as true if the index variable is assigned to in the loop
mutable uint8_t cached_compound_assignment_ = false; // pre-cached on assignment nodes if they are of the form "x=x+1" or "x=x-1" only
mutable uint8_t cached_append_assignment_ = false; // pre-cached on assignment nodes if they are of the form "x=c(x, y)" only

mutable EidosTypeSpecifier typespec_; // only valid for type-specifier nodes inside function declarations
mutable bool hit_eof_in_tolerant_parse_ = false; // only valid for compound statement nodes; used by the type-interpreter to handle scoping
Expand Down
108 changes: 108 additions & 0 deletions eidos/eidos_functions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1190,6 +1190,114 @@ EidosValue_SP SubsetEidosValue(const EidosValue *p_original_value, const EidosVa
return result_SP;
}

EidosValue_SP AppendEidosValues(EidosValue_SP x_value, EidosValue_SP y_value)
{
// This concatenates y_value onto the end of x_value, modifying x_value. This is used to accelerate "x = c(x, <expr>)" in
// EidosInterpreter::Evaluate_Assign(), avoiding the overhead of ConcatenateEidosValues(), which has to allocate a new value.
// It can handle some type-promotion cases internally; for the cases it can't handle, it calls ConcatenateEidosValues() to
// do the work for it. It returns the resulting value for x if it changed, or nullptr if it could append successfully.

// Note that this function ignores matrix/array attributes, and always returns a vector, by design (like c())
EidosValueType x_type = x_value->Type();

if ((y_value->Type() == x_type) && (!x_value->IsConstant()))
{
// x and y are the same type, and x is not a constant, so we can handle this case with a true append operation
int x_count = x_value->Count(), y_count = y_value->Count();

switch (x_type)
{
case EidosValueType::kValueLogical:
{
EidosValue_Logical *x_vec = (EidosValue_Logical *)x_value.get();
x_vec->resize_by_expanding_no_initialize(x_count + y_count); // resize first, in case y is x

const eidos_logical_t *y_data = y_value->LogicalData();

for (int y_index = 0; y_index < y_count; ++y_index)
x_vec->set_logical_no_check(y_data[y_index], x_count + y_index);

break;
}
case EidosValueType::kValueInt:
{
EidosValue_Int *x_vec = (EidosValue_Int *)x_value.get();
x_vec->resize_by_expanding_no_initialize(x_count + y_count); // resize first, in case y is x

const int64_t *y_data = y_value->IntData();

for (int y_index = 0; y_index < y_count; ++y_index)
x_vec->set_int_no_check(y_data[y_index], x_count + y_index);

break;
}
case EidosValueType::kValueFloat:
{
EidosValue_Float *x_vec = (EidosValue_Float *)x_value.get();
x_vec->resize_by_expanding_no_initialize(x_count + y_count); // resize first, in case y is x

const double *y_data = y_value->FloatData();

for (int y_index = 0; y_index < y_count; ++y_index)
x_vec->set_float_no_check(y_data[y_index], x_count + y_index);

break;
}
case EidosValueType::kValueString:
{
EidosValue_String *x_vec = (EidosValue_String *)x_value.get();
x_vec->Reserve(x_count + y_count); // resize first, in case y is x

const std::string *y_data = y_value->StringData();

for (int y_index = 0; y_index < y_count; ++y_index)
x_vec->PushString(y_data[y_index]);

break;
}
case EidosValueType::kValueObject:
{
EidosValue_Object *x_vec = (EidosValue_Object *)x_value.get();
x_vec->resize_by_expanding_no_initialize_RR(x_count + y_count); // resize first, in case y is x

EidosObject * const *y_data = y_value->ObjectData();

if (x_vec->UsesRetainRelease())
{
for (int y_index = 0; y_index < y_count; ++y_index)
x_vec->set_object_element_no_check_no_previous_RR(y_data[y_index], x_count + y_index);
}
else
{
for (int y_index = 0; y_index < y_count; ++y_index)
x_vec->set_object_element_no_check_NORR(y_data[y_index], x_count + y_index);
}

break;
}
default:
break;
}

// transform x into a vector, like c() does
x_value->SetDimensions(1, nullptr);

// return nullptr to indicate that we handled the append
return EidosValue_SP();
}
else
{
// Not a case we can handle, because it involves type-promotion, or x is a constant; delegate to ConcatenateEidosValues()
std::vector<EidosValue_SP> arguments;

arguments.push_back(x_value);
arguments.push_back(y_value);

// return the new object created by ConcatenateEidosValues(), which needs to replace x's value
return ConcatenateEidosValues(arguments, true, false); // allow NULL but not VOID
}
}




Expand Down
1 change: 1 addition & 0 deletions eidos/eidos_functions.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ EidosValue_SP ConcatenateEidosValues(const std::vector<EidosValue_SP> &p_argumen
EidosValue_SP UniqueEidosValue(const EidosValue *p_value, bool p_preserve_order);
EidosValue_SP Eidos_ExecuteLambdaInternal(const std::vector<EidosValue_SP> &p_arguments, EidosInterpreter &p_interpreter, bool p_execute_in_outer_scope);
EidosValue_SP SubsetEidosValue(const EidosValue *p_value, const EidosValue *p_indices, EidosToken *p_error_token, bool p_raise_range_errors);
EidosValue_SP AppendEidosValues(EidosValue_SP x_value, EidosValue_SP y_value);


#pragma mark -
Expand Down
30 changes: 28 additions & 2 deletions eidos/eidos_interpreter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3746,8 +3746,34 @@ EidosValue_SP EidosInterpreter::Evaluate_Assign(const EidosASTNode *p_node)

// and then we drop through to be handled normally by the standard assign operator code
}
else if (p_node->cached_append_assignment_)
{
// we have an assignment statement of the form x = c(x, y), where x is a simple identifier and y is any single expression node
// as above, we will try to modify the value of x in place if we can, which should be safe in this context
EidosASTNode *lvalue_node = p_node->children_[0];
EidosValue_SP lvalue_SP = global_symbols_->GetValueOrRaiseForASTNode(lvalue_node);
EidosASTNode *call_node = p_node->children_[1];
EidosASTNode *rvalue_node = call_node->children_[2]; // "c" is [0], "x" is [1], "y" is [2]
EidosValue_SP rvalue_SP = FastEvaluateNode(rvalue_node);

EidosValue_SP result_SP = AppendEidosValues(lvalue_SP, rvalue_SP);

if (!result_SP)
{
// a nullptr return means the append was successful, so we're done
goto compoundAssignmentSuccess;
}
else
{
// a non-nullptr return means that a new value had to be created for x = c(x,y), so we need to replace
// the value of x with that new value; std::swap(lvalue_SP, result_SP) does not do it because lvalue_SP
// is not the EidosValue_SP that is inside the symbol table, it just points to the same EidosValue!
global_symbols_->SetValueForSymbolNoCopy(lvalue_node->cached_stringID_, std::move(result_SP));
goto compoundAssignmentSuccess;
}
}

// we can drop through to here even if cached_compound_assignment_ is set, if the code above bailed for some reason
// we can drop through to here even if cached_compound_assignment_ or cached_append_assignment_ is set, if the code above bailed for some reason
#ifdef SLIMGUI
compoundAssignmentSkip:
#endif
Expand Down Expand Up @@ -3794,7 +3820,7 @@ EidosValue_SP EidosInterpreter::Evaluate_Assign(const EidosASTNode *p_node)

compoundAssignmentSuccess:

// by design, assignment does not yield a usable value; instead it produces void – this prevents the error "if (x = 3) ..."
// by design, assignment does not yield a usable value; instead it produces void – this prevents the bug "if (x = 3) ..."
// since the condition is void and will raise; the loss of legitimate uses of "if (x = 3)" seems a small price to pay
EidosValue_SP result_SP = gStaticEidosValueVOID;

Expand Down
84 changes: 84 additions & 0 deletions eidos/eidos_value.h
Original file line number Diff line number Diff line change
Expand Up @@ -500,6 +500,26 @@ class EidosValue_Logical final : public EidosValue
// vector lookalike methods; not virtual, only for clients with a EidosValue_Logical*
EidosValue_Logical *reserve(size_t p_reserved_size); // as in std::vector
EidosValue_Logical *resize_no_initialize(size_t p_new_size); // does not zero-initialize, unlike std::vector!

inline void resize_by_expanding_no_initialize(size_t p_new_size)
{
// resizes up to exactly p_new_size; if new capacity is needed, doubles to achieve that
// this avoids doing a realloc with every resize, with repeated resize operations
WILL_MODIFY(this);

if (capacity_ < p_new_size)
{
size_t new_capacity = (capacity_ < 16 ? 16 : capacity_);

while (new_capacity < p_new_size)
new_capacity <<= 1;

reserve(new_capacity);
}

count_ = p_new_size; // regardless of the capacity set, set the size to exactly p_new_size
}

void expand(void); // expand to fit (at least) one new value
void erase_index(size_t p_index); // a weak substitute for erase()

Expand Down Expand Up @@ -680,6 +700,25 @@ class EidosValue_Int final : public EidosValue
return this;
}

inline void resize_by_expanding_no_initialize(size_t p_new_size)
{
// resizes up to exactly p_new_size; if new capacity is needed, doubles to achieve that
// this avoids doing a realloc with every resize, with repeated resize operations
WILL_MODIFY(this);

if (capacity_ < p_new_size)
{
size_t new_capacity = (capacity_ < 16 ? 16 : capacity_);

while (new_capacity < p_new_size)
new_capacity <<= 1;

reserve(new_capacity);
}

count_ = p_new_size; // regardless of the capacity set, set the size to exactly p_new_size
}

inline __attribute__((always_inline)) int64_t *data_mutable(void) { WILL_MODIFY(this); return values_; }
inline __attribute__((always_inline)) const int64_t *data(void) const { return values_; }
inline __attribute__((always_inline)) void push_int(int64_t p_int)
Expand Down Expand Up @@ -787,6 +826,25 @@ class EidosValue_Float final : public EidosValue
return this;
}

inline void resize_by_expanding_no_initialize(size_t p_new_size)
{
// resizes up to exactly p_new_size; if new capacity is needed, doubles to achieve that
// this avoids doing a realloc with every resize, with repeated resize operations
WILL_MODIFY(this);

if (capacity_ < p_new_size)
{
size_t new_capacity = (capacity_ < 16 ? 16 : capacity_);

while (new_capacity < p_new_size)
new_capacity <<= 1;

reserve(new_capacity);
}

count_ = p_new_size; // regardless of the capacity set, set the size to exactly p_new_size
}

inline __attribute__((always_inline)) double *data_mutable(void) { WILL_MODIFY(this); return values_; }
inline __attribute__((always_inline)) const double *data(void) const { return values_; }
inline __attribute__((always_inline)) void push_float(double p_float)
Expand Down Expand Up @@ -922,6 +980,32 @@ class EidosValue_Object final : public EidosValue
EidosValue_Object *reserve(size_t p_reserved_size); // as in std::vector
EidosValue_Object *resize_no_initialize(size_t p_new_size); // does not zero-initialize, unlike std::vector!
EidosValue_Object *resize_no_initialize_RR(size_t p_new_size); // doesn't zero-initialize even for the RR case (set_object_element_no_check_RR may not be used, use set_object_element_no_check_no_previous_RR)

//inline void resize_by_expanding_no_initialize(size_t p_new_size)
// not implemented: would, like EidosValue_Object::resize_no_initialize(),
// zero out new slots in the RR case to avoid having pointers in a bad state

inline void resize_by_expanding_no_initialize_RR(size_t p_new_size)
{
// resizes up to exactly p_new_size; if new capacity is needed, doubles to achieve that
// this avoids doing a realloc with every resize, with repeated resize operations
// this version does not zero-initialize the new entries even in the RR case;
// use set_object_element_no_check_no_previous_RR() after this call
WILL_MODIFY(this);

if (capacity_ < p_new_size)
{
size_t new_capacity = (capacity_ < 16 ? 16 : capacity_);

while (new_capacity < p_new_size)
new_capacity <<= 1;

reserve(new_capacity);
}

count_ = p_new_size; // regardless of the capacity set, set the size to exactly p_new_size
}

void expand(void); // expand to fit (at least) one new value
void erase_index(size_t p_index); // a weak substitute for erase()

Expand Down

0 comments on commit 602dad1

Please sign in to comment.