-
Notifications
You must be signed in to change notification settings - Fork 92
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
0359e1b
commit 9c9d2e5
Showing
7 changed files
with
455 additions
and
7 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
115 changes: 115 additions & 0 deletions
115
extensions/standard-processors/processors/SegmentContent.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,115 @@ | ||
/** | ||
* | ||
* Licensed to the Apache Software Foundation (ASF) under one or more | ||
* contributor license agreements. See the NOTICE file distributed with | ||
* this work for additional information regarding copyright ownership. | ||
* The ASF licenses this file to You under the Apache License, Version 2.0 | ||
* (the "License"); you may not use this file except in compliance with | ||
* the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
#include "SegmentContent.h" | ||
|
||
#include "core/FlowFile.h" | ||
#include "core/ProcessContext.h" | ||
#include "core/ProcessSession.h" | ||
#include "core/Resource.h" | ||
#include "range/v3/view/split.hpp" | ||
#include "utils/ProcessorConfigUtils.h" | ||
|
||
namespace org::apache::nifi::minifi::processors { | ||
|
||
constexpr size_t BUFFER_TARGET_SIZE = 1024; | ||
|
||
void SegmentContent::initialize() { | ||
setSupportedProperties(Properties); | ||
setSupportedRelationships(Relationships); | ||
} | ||
|
||
void SegmentContent::onSchedule(core::ProcessContext&, core::ProcessSessionFactory&) {} | ||
|
||
namespace { | ||
std::shared_ptr<core::FlowFile> createSegment(core::ProcessSession& session) { | ||
auto first_split = session.create(); | ||
if (!first_split) { throw Exception(PROCESSOR_EXCEPTION, "Couldn't create FlowFile"); } | ||
return first_split; | ||
} | ||
|
||
void updateSplitAttributesAndTransfer(core::ProcessSession& session, const std::vector<std::shared_ptr<core::FlowFile>>& splits, const core::FlowFile& original) { | ||
const std::string fragment_identifier_ = utils::IdGenerator::getIdGenerator()->generate().to_string(); | ||
for (size_t split_i = 0; split_i < splits.size(); ++split_i) { | ||
const auto& split = splits[split_i]; | ||
split->setAttribute(SegmentContent::FragmentCountOutputAttribute.name, std::to_string(splits.size())); | ||
split->setAttribute(SegmentContent::FragmentIndexOutputAttribute.name, std::to_string(split_i + 1)); // One based indexing | ||
split->setAttribute(SegmentContent::FragmentIdentifierOutputAttribute.name, fragment_identifier_); | ||
split->setAttribute(SegmentContent::SegmentOriginalFilenameOutputAttribute.name, original.getAttribute(core::SpecialFlowAttribute::FILENAME).value_or("")); | ||
session.transfer(split, SegmentContent::Segments); | ||
} | ||
} | ||
} // namespace | ||
|
||
void SegmentContent::onTrigger(core::ProcessContext& context, core::ProcessSession& session) { | ||
const auto original = session.get(); | ||
if (!original) { | ||
context.yield(); | ||
return; | ||
} | ||
|
||
size_t max_segment_size{}; | ||
const auto segment_size_str = context.getProperty(SegmentSize, original.get()); | ||
if (!segment_size_str || !core::DataSizeValue::StringToInt(*segment_size_str, max_segment_size)) { | ||
throw Exception(PROCESSOR_EXCEPTION, fmt::format("Invalid Segment Size {}", segment_size_str)); | ||
} | ||
|
||
const auto ff_content_stream = session.getFlowFileContentStream(*original); | ||
if (!ff_content_stream) { | ||
throw Exception(PROCESSOR_EXCEPTION, fmt::format("Couldn't access the ContentStream of {}", original->getUUID().to_string())); | ||
} | ||
|
||
std::vector<std::byte> buffer; | ||
std::vector<std::shared_ptr<core::FlowFile>> segments{}; | ||
|
||
size_t current_segment_size = 0; | ||
segments.push_back(createSegment(session)); | ||
size_t ret{}; | ||
bool needs_new_segment = false; | ||
while (true) { | ||
const size_t segment_remaining_size = max_segment_size - current_segment_size; | ||
const size_t buffer_size = std::min(BUFFER_TARGET_SIZE, segment_remaining_size); | ||
buffer.resize(buffer_size); | ||
ret = ff_content_stream->read(buffer); | ||
if (io::isError(ret)) { | ||
logger_->log_error("Error while reading from {}", original->getUUID().to_string()); | ||
break; | ||
} | ||
if (ret == 0) { // No more data | ||
break; | ||
} | ||
if (needs_new_segment) { | ||
segments.push_back(createSegment(session)); | ||
needs_new_segment = false; | ||
} | ||
buffer.resize(ret); | ||
session.appendBuffer(segments.back(), buffer); | ||
current_segment_size += ret; | ||
if (current_segment_size >= max_segment_size) { // Defensive >= (read shouldn't read larger than requested size) | ||
needs_new_segment = true; | ||
current_segment_size = 0; | ||
} | ||
}; | ||
|
||
updateSplitAttributesAndTransfer(session, segments, *original); | ||
session.transfer(original, Original); | ||
} | ||
|
||
REGISTER_RESOURCE(SegmentContent, Processor); | ||
|
||
} // namespace org::apache::nifi::minifi::processors |
78 changes: 78 additions & 0 deletions
78
extensions/standard-processors/processors/SegmentContent.h
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
/** | ||
* | ||
* Licensed to the Apache Software Foundation (ASF) under one or more | ||
* contributor license agreements. See the NOTICE file distributed with | ||
* this work for additional information regarding copyright ownership. | ||
* The ASF licenses this file to You under the Apache License, Version 2.0 | ||
* (the "License"); you may not use this file except in compliance with | ||
* the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
#pragma once | ||
|
||
#include <memory> | ||
#include <optional> | ||
#include <string_view> | ||
#include <utility> | ||
|
||
#include "FlowFileRecord.h" | ||
#include "core/ProcessSession.h" | ||
#include "core/Processor.h" | ||
#include "core/PropertyDefinition.h" | ||
#include "core/PropertyDefinitionBuilder.h" | ||
#include "core/RelationshipDefinition.h" | ||
#include "utils/Export.h" | ||
|
||
namespace org::apache::nifi::minifi::processors { | ||
|
||
class SegmentContent final : public core::Processor { | ||
public: | ||
explicit SegmentContent(const std::string_view name, const utils::Identifier& uuid = {}) : Processor(name, uuid) {} | ||
|
||
EXTENSIONAPI static constexpr auto Description = "Segments a FlowFile into multiple smaller segments on byte boundaries."; | ||
|
||
EXTENSIONAPI static constexpr auto SegmentSize = | ||
core::PropertyDefinitionBuilder<2>::createProperty("Segment Size") | ||
.withDescription("The maximum data size in bytes for each segment") | ||
.isRequired(true) | ||
.supportsExpressionLanguage(true) | ||
.build(); | ||
|
||
EXTENSIONAPI static constexpr auto Properties = std::to_array<core::PropertyReference>({SegmentSize}); | ||
|
||
EXTENSIONAPI static constexpr auto Segments = core::RelationshipDefinition{ | ||
"segments", "All segments will be sent to this relationship. If the file was small enough that it was not segmented, a copy of the original is sent to this relationship as well as original"}; | ||
EXTENSIONAPI static constexpr auto Original = core::RelationshipDefinition{"original", "The original FlowFile will be sent to this relationship"}; | ||
EXTENSIONAPI static constexpr auto Relationships = std::array{Original, Segments}; | ||
|
||
EXTENSIONAPI static constexpr auto FragmentIdentifierOutputAttribute = | ||
core::OutputAttributeDefinition<0>{"fragment.identifier", {}, "All segments produced from the same parent FlowFile will have the same randomly generated UUID added for this attribute"}; | ||
EXTENSIONAPI static constexpr auto FragmentIndexOutputAttribute = | ||
core::OutputAttributeDefinition<0>{"fragment.index", {}, "A one-up number that indicates the ordering of the segments that were created from a single parent FlowFile"}; | ||
EXTENSIONAPI static constexpr auto FragmentCountOutputAttribute = core::OutputAttributeDefinition<0>{"fragment.count", {}, "The number of segments generated from the parent FlowFile"}; | ||
EXTENSIONAPI static constexpr auto SegmentOriginalFilenameOutputAttribute = core::OutputAttributeDefinition<0>{"segment.original.filename", {}, "The filename of the parent FlowFile"}; | ||
EXTENSIONAPI static constexpr auto OutputAttributes = | ||
std::to_array<core::OutputAttributeReference>({FragmentIdentifierOutputAttribute, FragmentIndexOutputAttribute, FragmentCountOutputAttribute, SegmentOriginalFilenameOutputAttribute}); | ||
|
||
EXTENSIONAPI static constexpr bool SupportsDynamicProperties = false; | ||
EXTENSIONAPI static constexpr bool SupportsDynamicRelationships = false; | ||
EXTENSIONAPI static constexpr auto InputRequirement = core::annotation::Input::INPUT_REQUIRED; | ||
EXTENSIONAPI static constexpr bool IsSingleThreaded = false; | ||
ADD_COMMON_VIRTUAL_FUNCTIONS_FOR_PROCESSORS | ||
|
||
void onSchedule(core::ProcessContext& context, core::ProcessSessionFactory& session_factory) override; | ||
void onTrigger(core::ProcessContext& context, core::ProcessSession& session) override; | ||
void initialize() override; | ||
|
||
private: | ||
std::shared_ptr<core::logging::Logger> logger_ = core::logging::LoggerFactory<SegmentContent>::getLogger(uuid_); | ||
}; | ||
|
||
} // namespace org::apache::nifi::minifi::processors |
Oops, something went wrong.