forked from robertopc1/data-api-builder
-
Notifications
You must be signed in to change notification settings - Fork 2
Phase 3: Stored Procedure Parameter Substitution (embed:true) #1
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Draft
prshri-msft
wants to merge
11
commits into
ajtiwari07:add-internal-text-embedding-system
Choose a base branch
from
prshri-msft:phase3/embed-parameter-substitution
base: add-internal-text-embedding-system
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Draft
Changes from all commits
Commits
Show all changes
11 commits
Select commit
Hold shift + click to select a range
0057718
Phase 3 Stage 1: Add embed flag to stored procedure parameters
prshri-msft 2bb3c0c
Phase 3 Stage 2: Wire EmbeddingService into stored procedure execution
prshri-msft 326f580
Phase 3 Stage 3.5: Address review findings
prshri-msft c435d12
Phase 3 Stage 3.6: Reject non-VECTOR types for embed:true at startup
prshri-msft 0fde9af
Phase 3 Stage 3.7: Round-2 reviewer polish fixes
prshri-msft a586e55
Phase 3 Stage 3.8: Revert two over-corrections from Stage 3.7
prshri-msft 82ab565
Phase 3 Stage 3.9: Multi-embed batching + clarify embed/default ratio…
prshri-msft 8462fbc
Phase 3 Stage 4.1: Refactors enabling Phase 3 unit-testability
prshri-msft d9e45a3
Phase 3 Stage 4.2: Add ParameterEmbeddingHelper unit tests (28 tests)
prshri-msft 029bc41
Phase 3 Stage 4.3: Add validator + metadata override unit tests (16 t…
prshri-msft 059d4c6
Phase 3 Stage 3.10: Address PR review feedback from ajtiwari07
prshri-msft File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -98,6 +98,7 @@ public void ValidateConfigProperties() | |
| ValidateAzureLogAnalyticsAuth(runtimeConfig); | ||
| ValidateFileSinkPath(runtimeConfig); | ||
| ValidateEmbeddingsOptions(runtimeConfig); | ||
| ValidateEmbedParameters(runtimeConfig); | ||
| } | ||
|
|
||
| /// <summary> | ||
|
|
@@ -421,7 +422,154 @@ public void ValidateEmbeddingsOptions(RuntimeConfig runtimeConfig) | |
| subStatusCode: DataApiBuilderException.SubStatusCodes.ConfigValidationError)); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| /// <summary> | ||
| /// Validates that parameters with embed=true are only used on stored-procedure entities | ||
| /// and that runtime.embeddings is configured when embed parameters are present. | ||
| /// </summary> | ||
| /// <remarks> | ||
| /// Internal (rather than private) to allow direct unit testing via the | ||
| /// <c>InternalsVisibleTo</c> attribute on Azure.DataApiBuilder.Core. Callers outside | ||
| /// the assembly should still go through <see cref="ValidateConfigProperties"/>. | ||
| /// </remarks> | ||
| internal void ValidateEmbedParameters(RuntimeConfig runtimeConfig) | ||
| { | ||
| // Check once whether the embedding service is configured and enabled. | ||
| // Example: "runtime": { "embeddings": { "enabled": true, "provider": "azure-openai" } } → true | ||
| // Example: embeddings section missing or "enabled": false → false | ||
| bool embeddingsConfigured = runtimeConfig.Runtime?.Embeddings is not null | ||
| && runtimeConfig.Runtime.Embeddings.Enabled; | ||
|
|
||
| // Loop through every entity in the config. | ||
| // Example entities: "Product" (table), "Category" (table), "SearchProducts" (sproc) | ||
| foreach ((string entityName, Entity entity) in runtimeConfig.Entities) | ||
| { | ||
| // Skip entities that have no parameters defined. | ||
| // Tables and views typically don't have parameters. | ||
| // Example: "Product": { "source": { "type": "table" } } → Parameters is null → skip | ||
| if (entity.Source.Parameters is null) | ||
| { | ||
| continue; | ||
| } | ||
|
|
||
| // Fast-path: skip entities with no embed:true parameters entirely. | ||
| // Avoids the data-source lookup and inner loop for the common case of | ||
| // entities whose params are all normal pass-through. | ||
| if (!entity.Source.Parameters.Any(p => p.Embed)) | ||
| { | ||
| continue; | ||
| } | ||
|
|
||
| // Hoist data source lookup outside the param loop — it's entity-scoped, not param-scoped. | ||
| // Looked up once per entity instead of once per parameter (was duplicated work in Stage 3.5). | ||
| DataSource entityDataSource = runtimeConfig.GetDataSourceFromEntityName(entityName); | ||
|
|
||
| // Check each parameter for the embed flag. | ||
| // Example: iterates over { "name": "query_vector", "embed": true } and { "name": "top_k", "default": "5" } | ||
| foreach (ParameterMetadata param in entity.Source.Parameters) | ||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ALternatively we can remove for loop, instead check if Any param has embed true or default set. |
||
| { | ||
| // Skip parameters that don't have embed: true. Most params are normal pass-through. | ||
| // Example: "top_k" has Embed=false (default) → skip | ||
| // Example: "query_vector" has Embed=true → continue to validation checks | ||
| if (!param.Embed) | ||
| { | ||
| continue; | ||
| } | ||
|
|
||
| // Rule 0: embed:true is only supported on Azure SQL / SQL Server data sources. | ||
| // The metadata type override (Byte[] → String) only exists in MsSqlMetadataProvider. | ||
| // For PostgreSQL/MySQL/Cosmos, the request would fail at runtime with a confusing | ||
| // type error. Reject at startup instead. | ||
| // Example FAIL: PostgreSQL entity with embed:true → "embed feature only supported for MSSQL" | ||
| // TODO: Extend to PostgreSQL/MySQL once their metadata providers grow embed-aware type-override logic. | ||
| if (entityDataSource.DatabaseType != DatabaseType.MSSQL) | ||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe add a TODO for other data stores. |
||
| { | ||
| HandleOrRecordException(new DataApiBuilderException( | ||
| message: $"Entity '{entityName}': parameter '{param.Name}' has 'embed: true' but the data source type is '{entityDataSource.DatabaseType}'. The embed feature is currently only supported for Azure SQL / SQL Server.", | ||
| statusCode: HttpStatusCode.ServiceUnavailable, | ||
| subStatusCode: DataApiBuilderException.SubStatusCodes.ConfigValidationError)); | ||
| } | ||
|
|
||
| // Rule 1: embed:true is only valid on stored-procedure entities. | ||
| // Tables/views don't have user-supplied parameters that get passed to SQL. | ||
| // Example PASS: "SearchProducts": { "source": { "type": "stored-procedure" } } | ||
| // Example FAIL: "Product": { "source": { "type": "table", "parameters": [{"name":"x","embed":true}] } } | ||
| // → Error: "Entity 'Product': parameter 'x' has 'embed: true' but is only valid on stored-procedure entities." | ||
| if (entity.Source.Type is not EntitySourceType.StoredProcedure) | ||
| { | ||
| HandleOrRecordException(new DataApiBuilderException( | ||
| message: $"Entity '{entityName}': parameter '{param.Name}' has 'embed: true' but is only valid on stored-procedure entities.", | ||
| statusCode: HttpStatusCode.ServiceUnavailable, | ||
| subStatusCode: DataApiBuilderException.SubStatusCodes.ConfigValidationError)); | ||
| } | ||
|
|
||
| // Rule 2: embed:true requires runtime.embeddings to be configured and enabled. | ||
| // Can't convert text to vectors without an embedding service. | ||
| // Example PASS: "embeddings": { "enabled": true, "provider": "azure-openai", "api-key": "..." } | ||
| // Example FAIL: "embeddings": { "enabled": false } or embeddings section missing | ||
| // → Error: "parameter 'query_vector' has 'embed: true' but runtime.embeddings is not configured or not enabled." | ||
| if (!embeddingsConfigured) | ||
| { | ||
| HandleOrRecordException(new DataApiBuilderException( | ||
| message: $"Entity '{entityName}': parameter '{param.Name}' has 'embed: true' but runtime.embeddings is not configured or not enabled.", | ||
| statusCode: HttpStatusCode.ServiceUnavailable, | ||
| subStatusCode: DataApiBuilderException.SubStatusCodes.ConfigValidationError)); | ||
| } | ||
|
|
||
| // Rule 3: embed:true with a default value is not supported. | ||
| // | ||
| // An embed parameter represents the user's text input that gets converted | ||
| // to a vector at request time — typically a semantic-search query. | ||
| // | ||
| // Setting a default for an embed parameter would mean: if the client doesn't | ||
| // supply a search query, the server invents one (e.g., "wireless headphones"), | ||
| // embeds it, and runs a semantic search the user never asked for. That isn't | ||
| // a fallback — it's the server fabricating user input. In any real UX, a | ||
| // missing search query indicates a client bug or an empty search box, not an | ||
| // invitation for the server to substitute a canned query on the user's behalf. | ||
| // | ||
| // (Defaults on non-embed parameters of the same sproc are unaffected by this | ||
| // rule and continue to work as before.) | ||
| // | ||
| // Even setting aside the UX concern, supporting embed-defaults would be | ||
| // non-trivial: | ||
| // - GraphQL schema defaults are baked in at startup as typed literals | ||
| // (GraphQLStoredProcedureBuilder.ConvertValueToGraphQLType). There is no | ||
| // VECTOR literal type in GraphQL, and the literal text would surface in | ||
| // introspection as a misleading default value for an embedded parameter. | ||
| // - REST/MCP defaults are injected as plain text into the resolved-parameter | ||
| // dictionary, then would be re-embedded by ParameterEmbeddingHelper on | ||
| // every request — a hidden per-request cost for a value the client never | ||
| // sent. | ||
| // - Embedding the default once at startup would couple application startup | ||
| // to the embedding provider's network availability (validation runs in | ||
| // CLI / startup contexts that may not have outbound access). | ||
| // | ||
| // What happens today if a client forgets to supply an embed parameter: | ||
| // - {"query_vector": null} or "" → 400 BadRequest "has 'embed: true' but | ||
| // the provided text is empty or whitespace." (caught by ParameterEmbeddingHelper) | ||
| // - field omitted entirely → 400 DatabaseInputError "expects parameter | ||
| // '@query_vector', which was not supplied." (SQL Server error, parsed | ||
| // by MsSqlDbExceptionParser) | ||
| // Both produce a clear, actionable client error — no silent failure. | ||
| // | ||
| // If a real use case for embed-defaults ever emerges, this rule can be lifted | ||
| // with the matching runtime support added. For now, embed parameters should | ||
| // always be supplied by the client. | ||
| // | ||
| // Example PASS: { "name": "query_vector", "embed": true } (no default) | ||
| // Example FAIL: { "name": "query_vector", "embed": true, "default": "wireless headphones" } | ||
| // → Error: "parameter 'query_vector' has both 'embed: true' and a 'default' value. Embed parameters cannot have default values." | ||
| if (param.Default is not null) | ||
| { | ||
| HandleOrRecordException(new DataApiBuilderException( | ||
| message: $"Entity '{entityName}': parameter '{param.Name}' has both 'embed: true' and a 'default' value. Embed parameters cannot have default values.", | ||
| statusCode: HttpStatusCode.ServiceUnavailable, | ||
| subStatusCode: DataApiBuilderException.SubStatusCodes.ConfigValidationError)); | ||
| } | ||
| } | ||
| } | ||
| } | ||
|
|
||
| /// <summary> | ||
|
|
||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we make this little less verbose?