@@ -18,10 +18,11 @@ class TBaseParserFixture : public TBaseFixture {
1818 using TPtr = TIntrusivePtr<TParsedDataConsumer>;
1919
2020 public:
21- TParsedDataConsumer (const TBaseParserFixture& self, const TVector<TSchemaColumn>& columns, TCallback callback)
21+ TParsedDataConsumer (const TBaseParserFixture& self, const TVector<TSchemaColumn>& columns, TCallback callback, bool checkOffsets = true )
2222 : Self(self)
2323 , Columns(columns)
2424 , Callback(callback)
25+ , CheckOffsets(checkOffsets)
2526 {}
2627
2728 void ExpectColumnError (ui64 columnId, TStatusCode statusCode, const TString& message) {
@@ -55,9 +56,11 @@ class TBaseParserFixture : public TBaseFixture {
5556
5657 const auto & offsets = Self.Parser ->GetOffsets ();
5758 UNIT_ASSERT_VALUES_EQUAL_C (offsets.size (), numberRows, " Unexpected offsets size" );
58- for (const ui64 offset : offsets) {
59- UNIT_ASSERT_VALUES_EQUAL_C (offset, CurrentOffset, " Unexpected offset" );
60- CurrentOffset++;
59+ if (CheckOffsets) {
60+ for (const ui64 offset : offsets) {
61+ UNIT_ASSERT_VALUES_EQUAL_C (offset, CurrentOffset, " Unexpected offset" );
62+ CurrentOffset++;
63+ }
6164 }
6265
6366 TVector<std::span<NYql::NUdf::TUnboxedValue>> result (Columns.size ());
@@ -80,6 +83,7 @@ class TBaseParserFixture : public TBaseFixture {
8083 const TBaseParserFixture& Self;
8184 const TVector<TSchemaColumn> Columns;
8285 const TCallback Callback;
86+ const bool CheckOffsets;
8387
8488 std::optional<std::pair<TStatusCode, TString>> ExpectedCommonError;
8589 std::unordered_map<ui64, std::pair<TStatusCode, TString>> ExpectedErrors;
@@ -97,8 +101,8 @@ class TBaseParserFixture : public TBaseFixture {
97101 }
98102
99103public:
100- TStatus MakeParser (TVector<TSchemaColumn> columns, TCallback callback) {
101- ParserHandler = MakeIntrusive<TParsedDataConsumer>(*this , columns, callback);
104+ TStatus MakeParser (TVector<TSchemaColumn> columns, TCallback callback, bool checkOffsets = true ) {
105+ ParserHandler = MakeIntrusive<TParsedDataConsumer>(*this , columns, callback, checkOffsets );
102106
103107 auto parserStatus = CreateParser ();
104108 if (parserStatus.IsFail ()) {
@@ -109,12 +113,12 @@ class TBaseParserFixture : public TBaseFixture {
109113 return TStatus::Success ();
110114 }
111115
112- TStatus MakeParser (TVector<TString> columnNames, TString columnType, TCallback callback) {
116+ TStatus MakeParser (TVector<TString> columnNames, TString columnType, TCallback callback, bool checkOffsets = true ) {
113117 TVector<TSchemaColumn> columns;
114118 for (const auto & columnName : columnNames) {
115119 columns.push_back ({.Name = columnName, .TypeYson = columnType});
116120 }
117- return MakeParser (columns, callback);
121+ return MakeParser (columns, callback, checkOffsets );
118122 }
119123
120124 TStatus MakeParser (TVector<TString> columnNames, TString columnType) {
@@ -151,17 +155,19 @@ class TBaseParserFixture : public TBaseFixture {
151155 ui64 ExpectedBatches = 0 ;
152156};
153157
154- class TJsonParserFixture : public TBaseParserFixture {
158+ template <bool SkipErrors = false >
159+ class TJsonParserBaseFixture : public TBaseParserFixture {
155160 using TBase = TBaseParserFixture;
156161
157162public:
158- TJsonParserFixture ()
163+ TJsonParserBaseFixture ()
159164 : TBase()
160165 , Config({
161166 .FunctionRegistry = FunctionRegistry,
162167 .BatchSize = 1_MB,
163168 .LatencyLimit = TDuration::Zero (),
164- .BufferCellCount = 1000
169+ .BufferCellCount = 1000 ,
170+ .SkipErrors = SkipErrors
165171 })
166172 {}
167173
@@ -174,6 +180,9 @@ class TJsonParserFixture : public TBaseParserFixture {
174180 TJsonParserConfig Config;
175181};
176182
183+ using TJsonParserFixture = TJsonParserBaseFixture<false >;
184+ using TJsonParserFixtureSkipErrors = TJsonParserBaseFixture<true >;
185+
177186class TRawParserFixture : public TBaseParserFixture {
178187protected:
179188 TValueStatus<ITopicParser::TPtr> CreateParser () override {
@@ -476,6 +485,85 @@ Y_UNIT_TEST_SUITE(TestJsonParser) {
476485 CheckBatchError (R"( {"a1": "x"} {"a1": "y"})" , EStatusId::INTERNAL_ERROR, TStringBuilder () << " Failed to parse json messages, expected 1 json rows from offset " << FIRST_OFFSET + 2 << " but got 2 (expected one json row for each offset from topic API in json each row format, maybe initial data was corrupted or messages is not in json format), current data batch: {\" a1\" : \" x\" } {\" a1\" : \" y\" }" );
477486 CheckBatchError (R"( {)" , EStatusId::INTERNAL_ERROR, TStringBuilder () << " Failed to parse json messages, expected 1 json rows from offset " << FIRST_OFFSET + 3 << " but got 0 (expected one json row for each offset from topic API in json each row format, maybe initial data was corrupted or messages is not in json format), current data batch: {" );
478487 }
488+
489+ Y_UNIT_TEST_F (SkipErrors_Simple1, TJsonParserFixtureSkipErrors) {
490+ CheckSuccess (MakeParser ({{" a1" , " [DataType; String]" }, {" a2" , " [OptionalType; [DataType; Uint64]]" }}, [](ui64 numberRows, TVector<std::span<NYql::NUdf::TUnboxedValue>> result) {
491+ UNIT_ASSERT_VALUES_EQUAL (1 , numberRows);
492+ UNIT_ASSERT_VALUES_EQUAL (2 , result.size ());
493+ UNIT_ASSERT_VALUES_EQUAL (" hello1" , TString (result[0 ][0 ].AsStringRef ()));
494+ UNIT_ASSERT_VALUES_EQUAL (101 , result[1 ][0 ].GetOptionalValue ().Get <ui64>());
495+ }));
496+ PushToParser (FIRST_OFFSET, R"( {"a1": "hello1", "a2": 101, "event": "event1"})" );
497+ }
498+
499+ Y_UNIT_TEST_F (SkipErrors_StringValidation, TJsonParserFixtureSkipErrors) {
500+ ExpectedBatches = 1 ;
501+ CheckSuccess (MakeParser ({" a1" , " a2" }, " [DataType; String]" , [&](ui64 numberRows, TVector<std::span<NYql::NUdf::TUnboxedValue>> result) {
502+ UNIT_ASSERT_VALUES_EQUAL (2 , numberRows);
503+ UNIT_ASSERT_VALUES_EQUAL (2 , result.size ());
504+ for (size_t i = 0 ; i < numberRows; ++i) {
505+ UNIT_ASSERT_VALUES_EQUAL_C (" hello1" , TString (result[0 ][i].AsStringRef ()), i);
506+ UNIT_ASSERT_VALUES_EQUAL_C (" 101" , TString (result[1 ][i].AsStringRef ()), i);
507+ }
508+ }, false ));
509+
510+ Parser->ParseMessages ({
511+ GetMessage (FIRST_OFFSET, R"( {"a1": "hello1", "a2": "101", "event": "event1"})" ),
512+ GetMessage (FIRST_OFFSET + 1 , R"( {"a1": "hello1", "a2": 999, "event": "event2"})" ),
513+ GetMessage (FIRST_OFFSET + 2 , R"( {"a2": "101", "a1": "hello1", "event": "event3"})" )
514+ });
515+ }
516+
517+ Y_UNIT_TEST_F (SkipErrors_NoField, TJsonParserFixtureSkipErrors) {
518+ ExpectedBatches = 1 ;
519+ CheckSuccess (MakeParser ({" a1" , " a2" }, " [DataType; String]" , [&](ui64 numberRows, TVector<std::span<NYql::NUdf::TUnboxedValue>> result) {
520+ UNIT_ASSERT_VALUES_EQUAL (1 , numberRows);
521+ UNIT_ASSERT_VALUES_EQUAL (2 , result.size ());
522+ for (size_t i = 0 ; i < numberRows; ++i) {
523+ UNIT_ASSERT_VALUES_EQUAL_C (" hello1" , TString (result[0 ][i].AsStringRef ()), i);
524+ UNIT_ASSERT_VALUES_EQUAL_C (" 101" , TString (result[1 ][i].AsStringRef ()), i);
525+ }
526+ }, false ));
527+
528+ Parser->ParseMessages ({
529+ GetMessage (FIRST_OFFSET, R"( {"a1": "hello1", "event": "event1"})" ),
530+ GetMessage (FIRST_OFFSET + 1 , R"( {"a1": "hello1", "a2": "101", "event": "event2"})" )
531+ });
532+ }
533+
534+ Y_UNIT_TEST_F (SkipErrors_NoJson, TJsonParserFixtureSkipErrors) {
535+ ExpectedBatches = 1 ;
536+ CheckSuccess (MakeParser ({" a1" , " a2" }, " [DataType; String]" , [&](ui64 numberRows, TVector<std::span<NYql::NUdf::TUnboxedValue>> /* result*/ ) {
537+ UNIT_ASSERT_VALUES_EQUAL (2 , numberRows);
538+ }, false ));
539+
540+ Parser->ParseMessages ({
541+ GetMessage (FIRST_OFFSET, R"( {"a1": "hello0", "a2": "100"})" ),
542+ GetMessage (FIRST_OFFSET + 1 , " \x80 " ),
543+ GetMessage (FIRST_OFFSET + 2 , R"( })" ),
544+ GetMessage (FIRST_OFFSET + 3 , R"( lalala)" ),
545+ GetMessage (FIRST_OFFSET + 4 , R"( {"a2": "hello2", "a2": "102"})" ),
546+ GetMessage (FIRST_OFFSET + 5 , " \x80 " ),
547+ });
548+ }
549+
550+ Y_UNIT_TEST_F (SkipErrors_Optional, TJsonParserFixtureSkipErrors) {
551+ ExpectedBatches = 1 ;
552+ CheckSuccess (MakeParser ({{" a1" , " [OptionalType; [DataType; String]]" }, {" a2" , " [OptionalType; [DataType; String]]" }}, [&](ui64 numberRows, TVector<std::span<NYql::NUdf::TUnboxedValue>> result) {
553+ UNIT_ASSERT_VALUES_EQUAL (2 , numberRows);
554+ UNIT_ASSERT_VALUES_EQUAL (2 , result.size ());
555+ UNIT_ASSERT_VALUES_EQUAL (" hello0" , TString (result[0 ][0 ].AsStringRef ()));
556+ UNIT_ASSERT_VALUES_EQUAL (" 100" , TString (result[1 ][0 ].AsStringRef ()));
557+ UNIT_ASSERT (!result[0 ][1 ]);
558+ UNIT_ASSERT_VALUES_EQUAL (" 102" , TString (result[1 ][1 ].AsStringRef ()));
559+ }, false ));
560+
561+ Parser->ParseMessages ({
562+ GetMessage (FIRST_OFFSET, R"( {"a1": "hello0", "a2": "100"})" ),
563+ GetMessage (FIRST_OFFSET + 1 , R"( {"a1": "hello1", "a2": 101})" ),
564+ GetMessage (FIRST_OFFSET + 2 , R"( {"a2": "102"})" )
565+ });
566+ }
479567}
480568
481569Y_UNIT_TEST_SUITE (TestRawParser) {
0 commit comments