@@ -142,8 +142,19 @@ def update_chunk_size_input() -> None:
142142#############################################################################
143143# Helper Functions
144144#############################################################################
145- def _render_embedding_configuration (embed_models_enabled : dict , embed_request : DatabaseVectorStorage ) -> None :
146- """Render the embedding configuration section"""
145+ def _render_embedding_configuration (
146+ embed_models_enabled : dict ,
147+ embed_request : DatabaseVectorStorage ,
148+ show_vs_config : bool = True
149+ ) -> None :
150+ """Render the embedding configuration section
151+
152+ Args:
153+ embed_models_enabled: Dictionary of enabled embedding models
154+ embed_request: The database vector storage request object
155+ show_vs_config: If True, show chunk size, overlap, distance metric, and index type options.
156+ If False, these are determined by the selected existing vector store.
157+ """
147158 st .header ("Embedding Configuration" , divider = "red" )
148159 embed_request .model = st .selectbox (
149160 "Embedding models available: " ,
@@ -160,62 +171,75 @@ def _render_embedding_configuration(embed_models_enabled: dict, embed_request: D
160171 st .rerun ()
161172 st .stop ()
162173
163- chunk_size_max = embed_models_enabled [embed_request .model ]["max_chunk_size" ]
164- col1_1 , col1_2 = st .columns ([0.8 , 0.2 ])
165- with col1_1 :
166- st .slider (
167- "Chunk Size (tokens):" ,
168- min_value = 0 ,
169- max_value = chunk_size_max ,
170- value = chunk_size_max ,
171- key = "selected_chunk_size_slider" ,
172- on_change = update_chunk_size_input ,
173- help = help_text .help_dict ["chunk_size" ],
174- )
175- st .slider (
176- "Chunk Overlap (% of Chunk Size)" ,
177- min_value = 0 ,
178- max_value = 100 ,
179- value = 20 ,
180- step = 5 ,
181- key = "selected_chunk_overlap_slider" ,
182- on_change = update_chunk_overlap_input ,
183- format = "%d%%" ,
184- help = help_text .help_dict ["chunk_overlap" ],
185- )
174+ # Only show vector store configuration if creating new
175+ if show_vs_config :
176+ chunk_size_max = embed_models_enabled [embed_request .model ]["max_chunk_size" ]
177+ col1_1 , col1_2 = st .columns ([0.8 , 0.2 ])
178+ with col1_1 :
179+ st .slider (
180+ "Chunk Size (tokens):" ,
181+ min_value = 0 ,
182+ max_value = chunk_size_max ,
183+ value = chunk_size_max ,
184+ key = "selected_chunk_size_slider" ,
185+ on_change = update_chunk_size_input ,
186+ help = help_text .help_dict ["chunk_size" ],
187+ )
188+ st .slider (
189+ "Chunk Overlap (% of Chunk Size)" ,
190+ min_value = 0 ,
191+ max_value = 100 ,
192+ value = 20 ,
193+ step = 5 ,
194+ key = "selected_chunk_overlap_slider" ,
195+ on_change = update_chunk_overlap_input ,
196+ format = "%d%%" ,
197+ help = help_text .help_dict ["chunk_overlap" ],
198+ )
199+
200+ with col1_2 :
201+ embed_request .chunk_size = st .number_input (
202+ "Chunk Size (tokens):" ,
203+ label_visibility = "hidden" ,
204+ min_value = 0 ,
205+ max_value = chunk_size_max ,
206+ value = chunk_size_max ,
207+ key = "selected_chunk_size_input" ,
208+ on_change = update_chunk_size_slider ,
209+ )
210+ chunk_overlap_pct = st .number_input (
211+ "Chunk Overlap (% of Chunk Size):" ,
212+ label_visibility = "hidden" ,
213+ min_value = 0 ,
214+ max_value = 100 ,
215+ value = 20 ,
216+ step = 5 ,
217+ key = "selected_chunk_overlap_input" ,
218+ on_change = update_chunk_overlap_slider ,
219+ )
220+ embed_request .chunk_overlap = math .ceil ((chunk_overlap_pct / 100 ) * embed_request .chunk_size )
186221
187- with col1_2 :
188- embed_request .chunk_size = st .number_input (
189- "Chunk Size (tokens):" ,
190- label_visibility = "hidden" ,
191- min_value = 0 ,
192- max_value = chunk_size_max ,
193- value = chunk_size_max ,
194- key = "selected_chunk_size_input" ,
195- on_change = update_chunk_size_slider ,
222+ col2_1 , col2_2 = st .columns ([0.5 , 0.5 ])
223+ embed_request .distance_metric = col2_1 .selectbox (
224+ "Distance Metric:" ,
225+ list (DistanceMetrics .__args__ ),
226+ key = "selected_distance_metric" ,
227+ help = help_text .help_dict ["distance_metric" ],
196228 )
197- chunk_overlap_pct = st .number_input (
198- "Chunk Overlap (% of Chunk Size):" ,
199- label_visibility = "hidden" ,
200- min_value = 0 ,
201- max_value = 100 ,
202- value = 20 ,
203- step = 5 ,
204- key = "selected_chunk_overlap_input" ,
205- on_change = update_chunk_overlap_slider ,
229+ embed_request .index_type = col2_2 .selectbox (
230+ "Index Type:" , list (IndexTypes .__args__ ), key = "selected_index_type" , help = help_text .help_dict ["index_type" ]
206231 )
207- embed_request .chunk_overlap = math .ceil ((chunk_overlap_pct / 100 ) * embed_request .chunk_size )
208-
209- col2_1 , col2_2 = st .columns ([0.5 , 0.5 ])
210- embed_request .distance_metric = col2_1 .selectbox (
211- "Distance Metric:" ,
212- list (DistanceMetrics .__args__ ),
213- key = "selected_distance_metric" ,
214- help = help_text .help_dict ["distance_metric" ],
215- )
216- embed_request .index_type = col2_2 .selectbox (
217- "Index Type:" , list (IndexTypes .__args__ ), key = "selected_index_type" , help = help_text .help_dict ["index_type" ]
218- )
232+ else :
233+ # These will be set from the selected existing vector store
234+ # Set defaults to avoid errors, will be overwritten in _render_vector_store_section
235+ if not hasattr (embed_request , 'chunk_size' ) or embed_request .chunk_size is None :
236+ embed_request .chunk_size = embed_models_enabled [embed_request .model ]["max_chunk_size" ]
237+ if not hasattr (embed_request , 'chunk_overlap' ) or embed_request .chunk_overlap is None :
238+ embed_request .chunk_overlap = 0
239+ if not hasattr (embed_request , 'distance_metric' ) or embed_request .distance_metric is None :
240+ embed_request .distance_metric = list (DistanceMetrics .__args__ )[0 ]
241+ if not hasattr (embed_request , 'index_type' ) or embed_request .index_type is None :
242+ embed_request .index_type = list (IndexTypes .__args__ )[0 ]
219243
220244def _render_file_source_section (file_sources : list , oci_setup : dict ) -> FileSourceData :
221245 """Render file source selection and return processing data"""
@@ -341,8 +365,16 @@ def _display_file_list_expander(file_list_response: dict) -> None:
341365 st .info ("No files found in this vector store." )
342366
343367
344- def _render_vector_store_section (embed_request : DatabaseVectorStorage ) -> tuple :
345- """Render vector store configuration section and return validation status and rate limit"""
368+ def _render_vector_store_section (embed_request : DatabaseVectorStorage , create_new_vs : bool ) -> tuple :
369+ """Render vector store configuration section and return validation status and rate limit
370+
371+ Args:
372+ embed_request: The database vector storage request object
373+ create_new_vs: If True, allow creating new vector store. If False, select from existing only.
374+
375+ Returns:
376+ Tuple of (embed_alias_invalid, rate_limit, existing_vs)
377+ """
346378 st .header ("Populate Vector Store" , divider = "red" )
347379 database_lookup = st_common .state_configs_lookup ("database_configs" , "name" )
348380 existing_vs = database_lookup .get (state .client_settings .get ("database" , {}).get ("alias" ), {}).get (
@@ -353,50 +385,65 @@ def _render_vector_store_section(embed_request: DatabaseVectorStorage) -> tuple:
353385 embed_alias_invalid = False
354386 embed_request .vector_store = None
355387
356- # Filter vector stores by matching chunk size and overlap
357- matching_vs = [
358- vs for vs in existing_vs
359- if vs .get ("chunk_size" ) == embed_request .chunk_size
360- and vs .get ("chunk_overlap" ) == embed_request .chunk_overlap
361- and vs .get ("alias" )
362- ]
363- matching_vs_names = [vs .get ("alias" , "" ) for vs in matching_vs ]
364- vs_options = ["Create new..." ] + matching_vs_names
365-
366388 with embed_alias_size :
367- # Dropdown for existing vector stores
368- selected_vs = st .selectbox (
369- "Select or Create Vector Store:" ,
370- options = vs_options ,
371- index = 0 ,
372- help = "Only showing vector stores with matching chunk size and overlap configuration" ,
373- key = "selected_vs_dropdown"
374- )
375-
376- # Show text input if "Create new..." is selected or for editing
377- if selected_vs == "Create new..." :
389+ if create_new_vs :
390+ # Creating new vector store: just show text input for new VS name
378391 embed_request .alias = st .text_input (
379392 "New Vector Store Alias:" ,
380393 max_chars = 20 ,
381394 help = help_text .help_dict ["embed_alias" ],
382395 key = "selected_embed_alias" ,
383- placeholder = "Press Enter to set. " ,
396+ placeholder = "Enter a name for the new vector store " ,
384397 )
385398 else :
386- # Use the selected existing vector store name
399+ # Using existing mode: show only VS created with the same embedding model
400+ # Filter by model to prevent mixing embeddings from different models
401+ vs_lookup = {
402+ vs .get ("alias" ): vs
403+ for vs in existing_vs
404+ if vs .get ("alias" ) and vs .get ("model" ) == embed_request .model
405+ }
406+ vs_options = list (vs_lookup .keys ())
407+
408+ if not vs_options :
409+ st .warning (
410+ f"No existing vector stores found for embedding model '{ embed_request .model } '. "
411+ f"Toggle 'Create New Vector Store' to create one." ,
412+ icon = "⚠️"
413+ )
414+
415+ selected_vs = st .selectbox (
416+ "Select Existing Vector Store:" ,
417+ options = vs_options if vs_options else ["" ],
418+ index = 0 if vs_options else None ,
419+ help = "Only showing vector stores created with the same embedding model to prevent mixing embeddings" ,
420+ key = "selected_vs_dropdown" ,
421+ disabled = not vs_options
422+ )
387423 embed_request .alias = selected_vs
424+
425+ # Get VS properties from selected existing VS and update embed_request
426+ if selected_vs and selected_vs in vs_lookup :
427+ selected_vs_props = vs_lookup [selected_vs ]
428+ embed_request .chunk_size = selected_vs_props .get ("chunk_size" , embed_request .chunk_size )
429+ embed_request .chunk_overlap = selected_vs_props .get ("chunk_overlap" , embed_request .chunk_overlap )
430+ embed_request .distance_metric = selected_vs_props .get ("distance_metric" , embed_request .distance_metric )
431+ embed_request .index_type = selected_vs_props .get ("index_type" , embed_request .index_type )
432+
433+ # Show disabled text input with alias
388434 st .text_input (
389435 "Vector Store Alias:" ,
390- value = selected_vs ,
436+ value = selected_vs if selected_vs else "" ,
391437 max_chars = 20 ,
392438 help = help_text .help_dict ["embed_alias" ],
393- key = "selected_embed_alias " ,
439+ key = "selected_embed_alias_readonly " ,
394440 disabled = True ,
395441 )
442+
396443 pattern = r"^[A-Za-z][A-Za-z0-9_]*$"
397444
398445 # Check if alias is empty when creating new vector store
399- if selected_vs == "Create new..." and not embed_request .alias :
446+ if create_new_vs and not embed_request .alias :
400447 st .warning ("Please enter a Vector Store Alias to continue." )
401448 embed_alias_invalid = True
402449 elif embed_request .alias and not re .match (pattern , embed_request .alias ):
@@ -405,16 +452,23 @@ def _render_vector_store_section(embed_request: DatabaseVectorStorage) -> tuple:
405452 )
406453 embed_alias_invalid = True
407454
408- if not embed_alias_invalid :
455+ if not embed_alias_invalid and embed_request . alias :
409456 embed_request .vector_store , _ = functions .get_vs_table (
410457 ** embed_request .model_dump (exclude = {"database" , "vector_store" })
411458 )
412- vs_msg = f"{ embed_request .vector_store } , will be created."
413459 vs_exists = any (d .get ("vector_store" ) == embed_request .vector_store for d in existing_vs )
414- if vs_exists :
415- vs_msg = f" { embed_request . vector_store } exists, new chunks will be added."
460+
461+ # Show full vector store table name
416462 st .markdown (f"##### **Vector Store:** `{ embed_request .vector_store } `" )
417- st .caption (f"{ vs_msg } " )
463+
464+ # Different messages based on mode
465+ if create_new_vs :
466+ if vs_exists :
467+ st .caption ("Vector store already exists. New chunks will be added." )
468+ else :
469+ st .caption ("New vector store will be created." )
470+ else :
471+ st .caption ("Adding files to existing vector store." )
418472
419473 # Display files in existing vector store
420474 if vs_exists and embed_request .vector_store :
@@ -611,11 +665,22 @@ def display_split_embed() -> None:
611665
612666 embed_request = DatabaseVectorStorage ()
613667
614- _render_embedding_configuration (embed_models_enabled , embed_request )
668+ # Toggle between creating new vector store or using existing
669+ create_new_vs = st .toggle (
670+ "Create New Vector Store" ,
671+ key = "selected_create_new_vs" ,
672+ value = True ,
673+ help = "Toggle between creating a new vector store or adding to an existing one. "
674+ "When using an existing vector store, chunk size, overlap, distance metric, "
675+ "and index type are already defined and cannot be changed." ,
676+ )
677+
678+ # Render embedding configuration - only show VS config options when creating new
679+ _render_embedding_configuration (embed_models_enabled , embed_request , show_vs_config = create_new_vs )
615680
616681 source_data = _render_file_source_section (file_sources , oci_setup )
617682
618- embed_alias_invalid , rate_limit , existing_vs = _render_vector_store_section (embed_request )
683+ embed_alias_invalid , rate_limit , existing_vs = _render_vector_store_section (embed_request , create_new_vs )
619684
620685 if not embed_alias_invalid :
621686 _handle_vector_store_population (
0 commit comments