@@ -48,7 +48,7 @@ namespace {
4848 return Ydb::Table::VectorIndexSettings::METRIC_UNSPECIFIED;
4949 }
5050 };
51-
51+
5252 Ydb::Table::VectorIndexSettings_Metric ParseSimilarity (const TString& similarity_, TString& error) {
5353 const TString similarity = to_lower (similarity_);
5454 if (similarity == " cosine" )
@@ -60,7 +60,7 @@ namespace {
6060 return Ydb::Table::VectorIndexSettings::METRIC_UNSPECIFIED;
6161 }
6262 };
63-
63+
6464 Ydb::Table::VectorIndexSettings_VectorType ParseVectorType (const TString& vectorType_, TString& error) {
6565 const TString vectorType = to_lower (vectorType_);
6666 if (vectorType == " float" )
@@ -491,6 +491,62 @@ std::unique_ptr<IClusters> CreateClusters(const Ydb::Table::VectorIndexSettings&
491491 }
492492}
493493
494+ std::unique_ptr<IClusters> CreateClustersAutoDetect (Ydb::Table::VectorIndexSettings settings, const TStringBuf& targetVector, ui32 maxRounds, TString& error) {
495+ if (targetVector.empty ()) {
496+ error = " Target vector is empty" ;
497+ return nullptr ;
498+ }
499+
500+ const auto setLinearType = [&](Ydb::Table::VectorIndexSettings::VectorType type, size_t elementSize, TStringBuf typeName) -> bool {
501+ if (targetVector.size () < HeaderLen + elementSize) {
502+ error = TStringBuilder () << " Target vector too short for " << typeName << " type" ;
503+ return false ;
504+ }
505+ settings.set_vector_type (type);
506+ settings.set_vector_dimension ((targetVector.size () - HeaderLen) / elementSize);
507+ return true ;
508+ };
509+
510+ const ui8 formatByte = static_cast <ui8>(targetVector.back ());
511+ switch (formatByte) {
512+ case EFormat::FloatVector:
513+ if (!setLinearType (Ydb::Table::VectorIndexSettings::VECTOR_TYPE_FLOAT, sizeof (float ), " float" )) {
514+ return nullptr ;
515+ }
516+ break ;
517+ case EFormat::Uint8Vector:
518+ if (!setLinearType (Ydb::Table::VectorIndexSettings::VECTOR_TYPE_UINT8, sizeof (ui8), " uint8" )) {
519+ return nullptr ;
520+ }
521+ break ;
522+ case EFormat::Int8Vector:
523+ if (!setLinearType (Ydb::Table::VectorIndexSettings::VECTOR_TYPE_INT8, sizeof (i8 ), " int8" )) {
524+ return nullptr ;
525+ }
526+ break ;
527+ case EFormat::BitVector: {
528+ if (targetVector.size () < HeaderLen + 2 ) {
529+ error = " Target vector too short for bit type" ;
530+ return nullptr ;
531+ }
532+ const ui8 paddingBits = static_cast <ui8>(targetVector[targetVector.size () - 2 ]);
533+ const size_t payloadBits = (targetVector.size () - HeaderLen - 1 ) * 8 ;
534+ if (payloadBits < paddingBits) {
535+ error = " Invalid bit vector padding" ;
536+ return nullptr ;
537+ }
538+ settings.set_vector_type (Ydb::Table::VectorIndexSettings::VECTOR_TYPE_BIT);
539+ settings.set_vector_dimension (payloadBits - paddingBits);
540+ break ;
541+ }
542+ default :
543+ error = TStringBuilder () << " Unknown vector format byte: " << static_cast <int >(formatByte);
544+ return nullptr ;
545+ }
546+
547+ return CreateClusters (settings, maxRounds, error);
548+ }
549+
494550bool ValidateSettings (const Ydb::Table::KMeansTreeSettings& settings, TString& error) {
495551 error = " " ;
496552
@@ -503,16 +559,16 @@ bool ValidateSettings(const Ydb::Table::KMeansTreeSettings& settings, TString& e
503559 return false ;
504560 }
505561
506- if (!ValidateSettingInRange (" levels" ,
507- settings.has_levels () ? std::optional<ui64>(settings.levels ()) : std::nullopt ,
562+ if (!ValidateSettingInRange (" levels" ,
563+ settings.has_levels () ? std::optional<ui64>(settings.levels ()) : std::nullopt ,
508564 MinLevels, MaxLevels,
509565 error))
510566 {
511567 return false ;
512568 }
513569
514- if (!ValidateSettingInRange (" clusters" ,
515- settings.has_clusters () ? std::optional<ui64>(settings.clusters ()) : std::nullopt ,
570+ if (!ValidateSettingInRange (" clusters" ,
571+ settings.has_clusters () ? std::optional<ui64>(settings.clusters ()) : std::nullopt ,
516572 MinClusters, MaxClusters,
517573 error))
518574 {
@@ -529,7 +585,7 @@ bool ValidateSettings(const Ydb::Table::KMeansTreeSettings& settings, TString& e
529585 }
530586
531587 if (settings.settings ().vector_dimension () * settings.clusters () > MaxVectorDimensionMultiplyClusters) {
532- error = TStringBuilder () << " Invalid vector_dimension*clusters: " << settings.settings ().vector_dimension () << " *" << settings.clusters ()
588+ error = TStringBuilder () << " Invalid vector_dimension*clusters: " << settings.settings ().vector_dimension () << " *" << settings.clusters ()
533589 << " should be less than " << MaxVectorDimensionMultiplyClusters;
534590 return false ;
535591 }
@@ -557,8 +613,8 @@ bool ValidateSettings(const Ydb::Table::VectorIndexSettings& settings, TString&
557613 return false ;
558614 }
559615
560- if (!ValidateSettingInRange (" vector_dimension" ,
561- settings.has_vector_dimension () ? std::optional<ui64>(settings.vector_dimension ()) : std::nullopt ,
616+ if (!ValidateSettingInRange (" vector_dimension" ,
617+ settings.has_vector_dimension () ? std::optional<ui64>(settings.vector_dimension ()) : std::nullopt ,
562618 MinVectorDimension, MaxVectorDimension,
563619 error))
564620 {
0 commit comments