Skip to content

Commit 0c4ca8f

Browse files
authored
[25-3-1] Fix snapshot cleanup on ModifyScheme failures during vector index build (#27454) (#27652)
2 parents 901222f + 5ee1ca3 commit 0c4ca8f

File tree

4 files changed

+117
-2
lines changed

4 files changed

+117
-2
lines changed

ydb/core/tx/schemeshard/schemeshard_build_index__progress.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2538,6 +2538,23 @@ struct TSchemeShard::TIndexBuilder::TTxReplyModify: public TSchemeShard::TIndexB
25382538
case TIndexBuildInfo::EState::DropBuild:
25392539
case TIndexBuildInfo::EState::CreateBuild:
25402540
case TIndexBuildInfo::EState::LockBuild:
2541+
{
2542+
Y_ENSURE(txId == buildInfo.ApplyTxId);
2543+
2544+
if (record.GetStatus() != NKikimrScheme::StatusAccepted &&
2545+
record.GetStatus() != NKikimrScheme::StatusAlreadyExists) {
2546+
// Otherwise we won't cancel the index build correctly
2547+
buildInfo.ApplyTxId = {};
2548+
buildInfo.ApplyTxStatus = NKikimrScheme::StatusSuccess;
2549+
buildInfo.ApplyTxDone = false;
2550+
} else {
2551+
buildInfo.ApplyTxStatus = record.GetStatus();
2552+
}
2553+
Self->PersistBuildIndexApplyTxStatus(db, buildInfo);
2554+
2555+
ifErrorMoveTo(TIndexBuildInfo::EState::Rejection_Applying);
2556+
break;
2557+
}
25412558
case TIndexBuildInfo::EState::Applying:
25422559
case TIndexBuildInfo::EState::Rejection_Applying:
25432560
{

ydb/core/tx/schemeshard/ut_helpers/helpers.cpp

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2842,7 +2842,7 @@ namespace NSchemeShardUT_Private {
28422842
return WaitNextValResult(runtime, sender, expectedStatus);
28432843
}
28442844

2845-
NKikimrMiniKQL::TResult ReadTable(TTestActorRuntime& runtime, ui64 tabletId,
2845+
NKikimrMiniKQL::TResult ReadSystemTable(TTestActorRuntime& runtime, ui64 tabletId,
28462846
const TString& table, const TVector<TString>& pk, const TVector<TString>& columns,
28472847
const TString& rangeFlags)
28482848
{
@@ -2857,7 +2857,7 @@ namespace NSchemeShardUT_Private {
28572857
NKikimrProto::EReplyStatus status = LocalMiniKQL(runtime, tabletId, Sprintf(R"((
28582858
(let range '(%s%s))
28592859
(let columns '(%s))
2860-
(let result (SelectRange '__user__%s range columns '()))
2860+
(let result (SelectRange '%s range columns '()))
28612861
(return (AsList (SetResult 'Result result) ))
28622862
))", rangeFlags.data(), keyFmt.data(), columnsFmt.data(), table.data()), result, error);
28632863
UNIT_ASSERT_VALUES_EQUAL_C(status, NKikimrProto::EReplyStatus::OK, error);
@@ -2866,6 +2866,13 @@ namespace NSchemeShardUT_Private {
28662866
return result;
28672867
}
28682868

2869+
NKikimrMiniKQL::TResult ReadTable(TTestActorRuntime& runtime, ui64 tabletId,
2870+
const TString& table, const TVector<TString>& pk, const TVector<TString>& columns,
2871+
const TString& rangeFlags)
2872+
{
2873+
return ReadSystemTable(runtime, tabletId, "__user__"+table, pk, columns, rangeFlags);
2874+
}
2875+
28692876
ui32 CountRows(TTestActorRuntime& runtime, ui64 schemeshardId, const TString& table) {
28702877
auto tableDesc = DescribePath(runtime, schemeshardId, table, true, false, true);
28712878
const auto& pathDesc = tableDesc.GetPathDescription();

ydb/core/tx/schemeshard/ut_helpers/helpers.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -715,6 +715,8 @@ namespace NSchemeShardUT_Private {
715715
TTestActorRuntime& runtime, const TString& path,
716716
Ydb::StatusIds::StatusCode expectedStatus = Ydb::StatusIds::SUCCESS);
717717

718+
NKikimrMiniKQL::TResult ReadSystemTable(TTestActorRuntime& runtime, ui64 tabletId,
719+
const TString& table, const TVector<TString>& pk, const TVector<TString>& columns, const TString& rangeFlags = "");
718720
NKikimrMiniKQL::TResult ReadTable(TTestActorRuntime& runtime, ui64 tabletId,
719721
const TString& table, const TVector<TString>& pk, const TVector<TString>& columns, const TString& rangeFlags = "");
720722

ydb/core/tx/schemeshard/ut_index_build/ut_vector_index_build.cpp

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
#include <ydb/public/lib/deprecated/kicli/kicli.h>
12
#include <ydb/core/base/table_index.h>
23
#include <ydb/core/protos/schemeshard/operations.pb.h>
34
#include <ydb/core/tx/schemeshard/ut_helpers/helpers.h>
@@ -1577,4 +1578,92 @@ Y_UNIT_TEST_SUITE(VectorIndexBuildTest) {
15771578
UNIT_ASSERT_STRING_CONTAINS(buildIndexOperation.DebugString(), "Processed: UploadRows: 0 UploadBytes: 0 ReadRows: 0 ReadBytes: 0");
15781579
}
15791580
}
1581+
1582+
Y_UNIT_TEST(CreateBuildProposeReject) {
1583+
TTestBasicRuntime runtime;
1584+
TTestEnv env(runtime);
1585+
ui64 txId = 100;
1586+
1587+
runtime.SetLogPriority(NKikimrServices::TX_DATASHARD, NLog::PRI_TRACE);
1588+
runtime.SetLogPriority(NKikimrServices::BUILD_INDEX, NLog::PRI_TRACE);
1589+
1590+
TestCreateTable(runtime, ++txId, "/MyRoot", R"(
1591+
Name: "vectors"
1592+
Columns { Name: "id" Type: "Uint64" }
1593+
Columns { Name: "embedding" Type: "String" }
1594+
KeyColumnNames: [ "id" ]
1595+
)");
1596+
env.TestWaitNotification(runtime, txId);
1597+
1598+
NYdb::NTable::TGlobalIndexSettings globalIndexSettings;
1599+
1600+
std::unique_ptr<NYdb::NTable::TKMeansTreeSettings> kmeansTreeSettings;
1601+
{
1602+
Ydb::Table::KMeansTreeSettings proto;
1603+
UNIT_ASSERT(google::protobuf::TextFormat::ParseFromString(R"(
1604+
settings {
1605+
metric: DISTANCE_COSINE
1606+
vector_type: VECTOR_TYPE_FLOAT
1607+
vector_dimension: 1024
1608+
}
1609+
levels: 5
1610+
clusters: 4
1611+
)", &proto));
1612+
using T = NYdb::NTable::TKMeansTreeSettings;
1613+
kmeansTreeSettings = std::make_unique<T>(T::FromProto(proto));
1614+
}
1615+
1616+
const auto maxShards = DescribePath(runtime, TTestTxConfig::SchemeShard, "/MyRoot/vectors")
1617+
.GetPathDescription().GetDomainDescription().GetSchemeLimits().GetMaxShardsInPath();
1618+
1619+
TBlockEvents<TEvSchemeShard::TEvModifySchemeTransaction> blocker(runtime, [&](auto& ev) {
1620+
auto& modifyScheme = *ev->Get()->Record.MutableTransaction(0);
1621+
if (modifyScheme.GetOperationType() == NKikimrSchemeOp::ESchemeOpInitiateBuildIndexImplTable) {
1622+
auto& op = *modifyScheme.MutableCreateTable();
1623+
// make shard count exceed the limit to fail the operation
1624+
op.SetUniformPartitionsCount(maxShards+1);
1625+
}
1626+
return false;
1627+
});
1628+
1629+
const ui64 buildIndexTx = ++txId;
1630+
AsyncBuildVectorIndex(runtime, buildIndexTx, TTestTxConfig::SchemeShard, "/MyRoot", "/MyRoot/vectors", "index1", {"embedding"});
1631+
1632+
env.TestWaitNotification(runtime, buildIndexTx);
1633+
1634+
{
1635+
auto buildIndexOperation = TestGetBuildIndex(runtime, TTestTxConfig::SchemeShard, "/MyRoot", buildIndexTx);
1636+
Cout << "BuildIndex 1 " << buildIndexOperation.DebugString() << Endl;
1637+
UNIT_ASSERT_VALUES_EQUAL_C(
1638+
buildIndexOperation.GetIndexBuild().GetState(), Ydb::Table::IndexBuildState::STATE_REJECTED,
1639+
buildIndexOperation.DebugString()
1640+
);
1641+
UNIT_ASSERT_STRING_CONTAINS(buildIndexOperation.DebugString(), "Invalid partition count specified");
1642+
}
1643+
1644+
blocker.Stop().Unblock();
1645+
1646+
{
1647+
auto result = ReadSystemTable(runtime, TTestTxConfig::SchemeShard, "SnapshotTables", {"Id", "TableOwnerId", "TableLocalId"}, {"Id"});
1648+
auto value = NClient::TValue::Create(result);
1649+
auto rowCount = value["Result"]["List"].Size();
1650+
UNIT_ASSERT_VALUES_EQUAL_C(rowCount, 0, "Snapshot is not removed after rejecting index build");
1651+
}
1652+
1653+
// The next index build should succeed
1654+
1655+
const ui64 buildIndexTx2 = ++txId;
1656+
AsyncBuildVectorIndex(runtime, buildIndexTx2, TTestTxConfig::SchemeShard, "/MyRoot", "/MyRoot/vectors", "index1", {"embedding"});
1657+
env.TestWaitNotification(runtime, buildIndexTx2);
1658+
1659+
{
1660+
auto buildIndexOperation = TestGetBuildIndex(runtime, TTestTxConfig::SchemeShard, "/MyRoot", buildIndexTx2);
1661+
Cout << "BuildIndex 2 " << buildIndexOperation.DebugString() << Endl;
1662+
UNIT_ASSERT_VALUES_EQUAL_C(
1663+
buildIndexOperation.GetIndexBuild().GetState(), Ydb::Table::IndexBuildState::STATE_DONE,
1664+
buildIndexOperation.DebugString()
1665+
);
1666+
}
1667+
1668+
}
15801669
}

0 commit comments

Comments
 (0)