diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 448837be..dab5903e 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -15,7 +15,10 @@ on: - 'tests/**' schedule: - cron: "0 10,22 * * *" # Runs at 10:00 AM and 10:00 PM GMT - +permissions: + id-token: write + contents: read + actions: read env: GPT_CAPACITY: 150 TEXT_EMBEDDING_CAPACITY: 200 @@ -23,6 +26,7 @@ env: jobs: deploy: runs-on: ubuntu-latest + environment: production outputs: RESOURCE_GROUP_NAME: ${{ steps.get_webapp_url.outputs.RESOURCE_GROUP_NAME }} KUBERNETES_RESOURCE_GROUP_NAME: ${{ steps.get_webapp_url.outputs.KUBERNETES_RESOURCE_GROUP_NAME }} @@ -35,12 +39,6 @@ jobs: - name: Checkout Code uses: actions/checkout@v5 # Checks out your repository - - name: Install Azure CLI - shell: bash - run: | - curl -sL https://aka.ms/InstallAzureCLIDeb | sudo bash - az --version # Verify installation - - name: Install Kubernetes CLI (kubectl) shell: bash run: | @@ -82,6 +80,14 @@ jobs: with: driver: docker + - name: Login to Azure + uses: azure/login@v2 + with: + client-id: ${{ secrets.AZURE_CLIENT_ID }} + tenant-id: ${{ secrets.AZURE_TENANT_ID }} + subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }} + enable-AzPSSession: true + - name: Run Quota Check id: quota-check shell: pwsh @@ -109,9 +115,6 @@ jobs: } env: AZURE_SUBSCRIPTION_ID: ${{ secrets.AZURE_SUBSCRIPTION_ID }} - AZURE_TENANT_ID: ${{ secrets.AZURE_TENANT_ID }} - AZURE_CLIENT_ID: ${{ secrets.AZURE_CLIENT_ID }} - AZURE_CLIENT_SECRET: ${{ secrets.AZURE_CLIENT_SECRET }} GPT_MIN_CAPACITY: ${{ env.GPT_CAPACITY }} TEXT_EMBEDDING_MIN_CAPACITY: ${{ env.TEXT_EMBEDDING_CAPACITY }} AZURE_REGIONS: "${{ vars.AZURE_REGIONS }}" @@ -144,10 +147,8 @@ jobs: - name: Install Bicep CLI run: az bicep install - - name: Install Azure Developer CLI - run: | - curl -fsSL https://aka.ms/install-azd.sh | bash - shell: bash + - name: Install azd + uses: Azure/setup-azd@v2 - name: Set Deployment Region run: | @@ -164,11 +165,6 @@ jobs: echo "RESOURCE_GROUP_NAME=${UNIQUE_RG_NAME}" >> $GITHUB_ENV echo "Generated RESOURCE_GROUP_NAME: ${UNIQUE_RG_NAME}" - - name: Login to Azure - run: | - az login --service-principal -u ${{ secrets.AZURE_CLIENT_ID }} -p ${{ secrets.AZURE_CLIENT_SECRET }} --tenant ${{ secrets.AZURE_TENANT_ID }} - az account set --subscription ${{ secrets.AZURE_SUBSCRIPTION_ID }} - - name: Check and Create Resource Group id: check_create_rg run: | @@ -223,7 +219,7 @@ jobs: enableRedundancy=false \ enableScalability=false \ createdBy="Pipeline" \ - tags="{'SecurityControl':'Ignore','Purpose':'Deploying and Cleaning Up Resources for Validation','CreatedDate':'$current_date'}" + tags="{'Purpose':'Deploying and Cleaning Up Resources for Validation','CreatedDate':'$current_date'}" - name: Get Deployment Output and extract Values id: get_output @@ -258,11 +254,8 @@ jobs: Write-Host "Resource Group Name is ${{ env.RESOURCE_GROUP_NAME }}" Write-Host "Kubernetes resource group is ${{ env.AZURE_AKS_NAME }}" env: - # From GitHub secrets (for login) + # From GitHub secrets AZURE_SUBSCRIPTION_ID: ${{ secrets.AZURE_SUBSCRIPTION_ID }} - AZURE_TENANT_ID: ${{ secrets.AZURE_TENANT_ID }} - AZURE_CLIENT_ID: ${{ secrets.AZURE_CLIENT_ID }} - AZURE_CLIENT_SECRET: ${{ secrets.AZURE_CLIENT_SECRET }} # From deployment outputs step (these come from $GITHUB_ENV) RESOURCE_GROUP_NAME: ${{ env.RESOURCE_GROUP_NAME }} @@ -298,10 +291,9 @@ jobs: if az account show &> /dev/null; then echo "Azure CLI is authenticated." else - echo "Azure CLI is not authenticated. Logging in..." - az login --service-principal --username ${{ secrets.AZURE_CLIENT_ID }} --password ${{ secrets.AZURE_CLIENT_SECRET }} --tenant ${{ secrets.AZURE_TENANT_ID }} + echo "Azure CLI is not authenticated. Please check the OIDC login step." + exit 1 fi - az account set --subscription ${{ secrets.AZURE_SUBSCRIPTION_ID }} # Get the Web App URL and save it to GITHUB_OUTPUT echo "Retrieving Web App URL..." @@ -356,6 +348,7 @@ jobs: - name: Run Post Deployment Script shell: pwsh + continue-on-error: true run: | Write-Host "Running post deployment script to upload files..." cd Deployment @@ -398,6 +391,7 @@ jobs: if: always() needs: [deploy, e2e-test] runs-on: ubuntu-latest + environment: production env: RESOURCE_GROUP_NAME: ${{ needs.deploy.outputs.RESOURCE_GROUP_NAME }} KUBERNETES_RESOURCE_GROUP_NAME: ${{ needs.deploy.outputs.KUBERNETES_RESOURCE_GROUP_NAME }} @@ -406,17 +400,12 @@ jobs: VALID_REGION: ${{ needs.deploy.outputs.VALID_REGION }} steps: - - name: Install Azure CLI - shell: bash - run: | - curl -sL https://aka.ms/InstallAzureCLIDeb | sudo bash - az --version # Verify installation - - name: Login to Azure - shell: bash - run: | - az login --service-principal --username ${{ secrets.AZURE_CLIENT_ID }} --password ${{ secrets.AZURE_CLIENT_SECRET }} --tenant ${{ secrets.AZURE_TENANT_ID }} - az account set --subscription "${{ secrets.AZURE_SUBSCRIPTION_ID }}" + uses: azure/login@v2 + with: + client-id: ${{ secrets.AZURE_CLIENT_ID }} + tenant-id: ${{ secrets.AZURE_TENANT_ID }} + subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }} - name: Delete Resource Groups if: env.RESOURCE_GROUP_NAME != '' diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index 7a7342cb..d56a9fb2 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -8,10 +8,6 @@ on: - 'App/frontend-app/**' - 'App/kernel-memory/**' - '.github/workflows/codeql.yml' - paths-ignore: - - '**/.gitignore' - - '**/Dockerfile' - - '**/.dockerignore' pull_request: branches: [ "main", "dev", "demo" ] paths: @@ -19,10 +15,6 @@ on: - 'App/frontend-app/**' - 'App/kernel-memory/**' - '.github/workflows/codeql.yml' - paths-ignore: - - '**/.gitignore' - - '**/Dockerfile' - - '**/.dockerignore' schedule: - cron: '37 2 * * 5' diff --git a/.github/workflows/deploy-orchestrator.yml b/.github/workflows/deploy-orchestrator.yml new file mode 100644 index 00000000..aa2cdfc2 --- /dev/null +++ b/.github/workflows/deploy-orchestrator.yml @@ -0,0 +1,112 @@ +name: Deployment orchestrator + +on: + workflow_call: + inputs: + azure_location: + description: 'Azure Location For Deployment' + required: false + default: 'australiaeast' + type: string + resource_group_name: + description: 'Resource Group Name (Optional)' + required: false + default: '' + type: string + waf_enabled: + description: 'Enable WAF' + required: false + default: false + type: boolean + EXP: + description: 'Enable EXP' + required: false + default: false + type: boolean + cleanup_resources: + description: 'Cleanup Deployed Resources' + required: false + default: false + type: boolean + run_e2e_tests: + description: 'Run End-to-End Tests' + required: false + default: 'GoldenPath-Testing' + type: string + AZURE_ENV_LOG_ANALYTICS_WORKSPACE_ID: + description: 'Log Analytics Workspace ID (Optional)' + required: false + default: '' + type: string + existing_webapp_url: + description: 'Existing Container WebApp URL (Skips Deployment)' + required: false + default: '' + type: string + trigger_type: + description: 'Trigger type (workflow_dispatch, pull_request, schedule)' + required: true + type: string + +env: + AZURE_DEV_COLLECT_TELEMETRY: ${{ vars.AZURE_DEV_COLLECT_TELEMETRY }} + +jobs: + deploy: + if: "!cancelled() && (inputs.trigger_type != 'workflow_dispatch' || inputs.existing_webapp_url == '' || inputs.existing_webapp_url == null)" + uses: ./.github/workflows/job-deploy.yml + with: + trigger_type: ${{ inputs.trigger_type }} + azure_location: ${{ inputs.azure_location }} + resource_group_name: ${{ inputs.resource_group_name }} + waf_enabled: ${{ inputs.waf_enabled }} + EXP: ${{ inputs.EXP }} + existing_webapp_url: ${{ inputs.existing_webapp_url }} + AZURE_ENV_LOG_ANALYTICS_WORKSPACE_ID: ${{ inputs.AZURE_ENV_LOG_ANALYTICS_WORKSPACE_ID }} + run_e2e_tests: ${{ inputs.run_e2e_tests }} + cleanup_resources: ${{ inputs.cleanup_resources }} + secrets: inherit + + e2e-test: + if: "!cancelled() && ((needs.deploy.outputs.WEB_APPURL != '' && needs.deploy.outputs.WEB_APPURL != null) || (inputs.existing_webapp_url != '' && inputs.existing_webapp_url != null)) && (inputs.trigger_type != 'workflow_dispatch' || (inputs.run_e2e_tests != 'None' && inputs.run_e2e_tests != '' && inputs.run_e2e_tests != null))" + needs: [deploy] + uses: ./.github/workflows/test-automation-v2.yml + with: + TEST_URL: ${{ needs.deploy.outputs.WEB_APPURL || inputs.existing_webapp_url }} + TEST_SUITE: ${{ inputs.trigger_type == 'workflow_dispatch' && inputs.run_e2e_tests || 'GoldenPath-Testing' }} + secrets: inherit + + send-notification: + # if: "!cancelled()" + if: false # Temporarily disable notification job + needs: [deploy, e2e-test] + uses: ./.github/workflows/job-send-notification.yml + with: + trigger_type: ${{ inputs.trigger_type }} + waf_enabled: ${{ inputs.waf_enabled }} + EXP: ${{ inputs.EXP }} + run_e2e_tests: ${{ inputs.run_e2e_tests }} + existing_webapp_url: ${{ inputs.existing_webapp_url }} + deploy_result: ${{ needs.deploy.result }} + e2e_test_result: ${{ needs.e2e-test.result }} + WEB_APPURL: ${{ needs.deploy.outputs.WEB_APPURL || inputs.existing_webapp_url }} + RESOURCE_GROUP_NAME: ${{ needs.deploy.outputs.RESOURCE_GROUP_NAME }} + QUOTA_FAILED: ${{ needs.deploy.outputs.QUOTA_FAILED }} + TEST_SUCCESS: ${{ needs.e2e-test.outputs.TEST_SUCCESS }} + TEST_REPORT_URL: ${{ needs.e2e-test.outputs.TEST_REPORT_URL }} + secrets: inherit + + cleanup-deployment: + if: "!cancelled() && needs.deploy.outputs.RESOURCE_GROUP_NAME != '' && inputs.existing_webapp_url == '' && (inputs.trigger_type != 'workflow_dispatch' || inputs.cleanup_resources)" + needs: [deploy, e2e-test] + uses: ./.github/workflows/job-cleanup-deployment.yml + with: + trigger_type: ${{ inputs.trigger_type }} + cleanup_resources: ${{ inputs.cleanup_resources }} + existing_webapp_url: ${{ inputs.existing_webapp_url }} + RESOURCE_GROUP_NAME: ${{ needs.deploy.outputs.RESOURCE_GROUP_NAME }} + AZURE_LOCATION: ${{ needs.deploy.outputs.AZURE_LOCATION }} + AZURE_ENV_OPENAI_LOCATION: ${{ needs.deploy.outputs.AZURE_ENV_OPENAI_LOCATION }} + ENV_NAME: ${{ needs.deploy.outputs.ENV_NAME }} + IMAGE_TAG: ${{ needs.deploy.outputs.IMAGE_TAG }} + secrets: inherit diff --git a/.github/workflows/deploy-v2.yml b/.github/workflows/deploy-v2.yml new file mode 100644 index 00000000..e52e6d7e --- /dev/null +++ b/.github/workflows/deploy-v2.yml @@ -0,0 +1,231 @@ +name: Deploy-Test-Cleanup (v2) +on: + push: + branches: + - main # Adjust this to the branch you want to trigger the deployment on + - dev + - demo + schedule: + - cron: "0 10,22 * * *" # Runs at 10:00 AM and 10:00 PM UTC + + workflow_dispatch: + inputs: + azure_location: + description: 'Azure Location For Deployment' + required: false + default: 'australiaeast' + type: choice + options: + - 'australiaeast' + - 'centralus' + - 'eastasia' + - 'eastus2' + - 'japaneast' + - 'northeurope' + - 'southeastasia' + - 'uksouth' + resource_group_name: + description: 'Resource Group Name (Optional)' + required: false + default: '' + type: string + + waf_enabled: + description: 'Enable WAF' + required: false + default: false + type: boolean + EXP: + description: 'Enable EXP' + required: false + default: false + type: boolean + + cleanup_resources: + description: 'Cleanup Deployed Resources' + required: false + default: false + type: boolean + + run_e2e_tests: + description: 'Run End-to-End Tests' + required: false + default: 'GoldenPath-Testing' + type: choice + options: + - 'GoldenPath-Testing' + - 'Smoke-Testing' + - 'None' + + AZURE_ENV_LOG_ANALYTICS_WORKSPACE_ID: + description: 'Log Analytics Workspace ID (Optional)' + required: false + default: '' + type: string + existing_webapp_url: + description: 'Existing WebApp URL (Skips Deployment)' + required: false + default: '' + type: string + +permissions: + id-token: write + contents: read + actions: read + +jobs: + validate-inputs: + name: Validate Input Parameters + runs-on: ubuntu-latest + outputs: + validation_passed: ${{ steps.validate.outputs.passed }} + azure_location: ${{ steps.validate.outputs.azure_location }} + resource_group_name: ${{ steps.validate.outputs.resource_group_name }} + waf_enabled: ${{ steps.validate.outputs.waf_enabled }} + exp: ${{ steps.validate.outputs.exp }} + cleanup_resources: ${{ steps.validate.outputs.cleanup_resources }} + run_e2e_tests: ${{ steps.validate.outputs.run_e2e_tests }} + azure_env_log_analytics_workspace_id: ${{ steps.validate.outputs.azure_env_log_analytics_workspace_id }} + existing_webapp_url: ${{ steps.validate.outputs.existing_webapp_url }} + + steps: + - name: Validate Workflow Input Parameters + id: validate + shell: bash + env: + INPUT_AZURE_LOCATION: ${{ github.event.inputs.azure_location }} + INPUT_RESOURCE_GROUP_NAME: ${{ github.event.inputs.resource_group_name }} + INPUT_WAF_ENABLED: ${{ github.event.inputs.waf_enabled }} + INPUT_EXP: ${{ github.event.inputs.EXP }} + INPUT_CLEANUP_RESOURCES: ${{ github.event.inputs.cleanup_resources }} + INPUT_RUN_E2E_TESTS: ${{ github.event.inputs.run_e2e_tests }} + INPUT_AZURE_ENV_LOG_ANALYTICS_WORKSPACE_ID: ${{ github.event.inputs.AZURE_ENV_LOG_ANALYTICS_WORKSPACE_ID }} + INPUT_EXISTING_WEBAPP_URL: ${{ github.event.inputs.existing_webapp_url }} + + run: | + echo "🔍 Validating workflow input parameters..." + VALIDATION_FAILED=false + + # Validate azure_location (Azure region format) + LOCATION="${INPUT_AZURE_LOCATION:-australiaeast}" + + if [[ ! "$LOCATION" =~ ^[a-z0-9]+$ ]]; then + echo "❌ ERROR: azure_location '$LOCATION' is invalid. Must contain only lowercase letters and numbers" + VALIDATION_FAILED=true + else + echo "✅ azure_location: '$LOCATION' is valid" + fi + + # Validate resource_group_name (Azure naming convention, optional) + if [[ -n "$INPUT_RESOURCE_GROUP_NAME" ]]; then + if [[ ! "$INPUT_RESOURCE_GROUP_NAME" =~ ^[a-zA-Z0-9._\(\)-]+$ ]] || [[ "$INPUT_RESOURCE_GROUP_NAME" =~ \.$ ]]; then + echo "❌ ERROR: resource_group_name '$INPUT_RESOURCE_GROUP_NAME' is invalid. Must contain only alphanumerics, periods, underscores, hyphens, and parentheses. Cannot end with period." + VALIDATION_FAILED=true + elif [[ ${#INPUT_RESOURCE_GROUP_NAME} -gt 90 ]]; then + echo "❌ ERROR: resource_group_name '$INPUT_RESOURCE_GROUP_NAME' exceeds 90 characters (length: ${#INPUT_RESOURCE_GROUP_NAME})" + VALIDATION_FAILED=true + else + echo "✅ resource_group_name: '$INPUT_RESOURCE_GROUP_NAME' is valid" + fi + else + echo "✅ resource_group_name: Not provided (will be auto-generated)" + fi + + # Validate waf_enabled (boolean) + WAF_ENABLED="${INPUT_WAF_ENABLED:-false}" + if [[ "$WAF_ENABLED" != "true" && "$WAF_ENABLED" != "false" ]]; then + echo "❌ ERROR: waf_enabled must be 'true' or 'false', got: '$WAF_ENABLED'" + VALIDATION_FAILED=true + else + echo "✅ waf_enabled: '$WAF_ENABLED' is valid" + fi + + # Validate EXP (boolean) + EXP_ENABLED="${INPUT_EXP:-false}" + if [[ "$EXP_ENABLED" != "true" && "$EXP_ENABLED" != "false" ]]; then + echo "❌ ERROR: EXP must be 'true' or 'false', got: '$EXP_ENABLED'" + VALIDATION_FAILED=true + else + echo "✅ EXP: '$EXP_ENABLED' is valid" + fi + + # Validate cleanup_resources (boolean) + CLEANUP_RESOURCES="${INPUT_CLEANUP_RESOURCES:-false}" + if [[ "$CLEANUP_RESOURCES" != "true" && "$CLEANUP_RESOURCES" != "false" ]]; then + echo "❌ ERROR: cleanup_resources must be 'true' or 'false', got: '$CLEANUP_RESOURCES'" + VALIDATION_FAILED=true + else + echo "✅ cleanup_resources: '$CLEANUP_RESOURCES' is valid" + fi + + # Validate run_e2e_tests (specific allowed values) + TEST_OPTION="${INPUT_RUN_E2E_TESTS:-GoldenPath-Testing}" + if [[ "$TEST_OPTION" != "GoldenPath-Testing" && "$TEST_OPTION" != "Smoke-Testing" && "$TEST_OPTION" != "None" ]]; then + echo "❌ ERROR: run_e2e_tests must be one of: GoldenPath-Testing, Smoke-Testing, None, got: '$TEST_OPTION'" + VALIDATION_FAILED=true + else + echo "✅ run_e2e_tests: '$TEST_OPTION' is valid" + fi + + # Validate AZURE_ENV_LOG_ANALYTICS_WORKSPACE_ID (optional, Azure Resource ID format) + if [[ -n "$INPUT_AZURE_ENV_LOG_ANALYTICS_WORKSPACE_ID" ]]; then + if [[ ! "$INPUT_AZURE_ENV_LOG_ANALYTICS_WORKSPACE_ID" =~ ^/subscriptions/[a-fA-F0-9-]+/[Rr]esource[Gg]roups/[^/]+/providers/[Mm]icrosoft\.[Oo]perational[Ii]nsights/[Ww]orkspaces/[^/]+$ ]]; then + echo "❌ ERROR: AZURE_ENV_LOG_ANALYTICS_WORKSPACE_ID is invalid. Must be a valid Azure Resource ID format:" + echo " /subscriptions/{subscriptionId}/resourceGroups/{resourceGroupName}/providers/Microsoft.OperationalInsights/workspaces/{workspaceName}" + echo " Got: '$INPUT_AZURE_ENV_LOG_ANALYTICS_WORKSPACE_ID'" + VALIDATION_FAILED=true + else + echo "✅ AZURE_ENV_LOG_ANALYTICS_WORKSPACE_ID: Valid Resource ID format" + fi + else + echo "✅ AZURE_ENV_LOG_ANALYTICS_WORKSPACE_ID: Not provided (optional)" + fi + + # Validate existing_webapp_url (optional, must start with https) + if [[ -n "$INPUT_EXISTING_WEBAPP_URL" ]]; then + if [[ ! "$INPUT_EXISTING_WEBAPP_URL" =~ ^https:// ]]; then + echo "❌ ERROR: existing_webapp_url must start with 'https://', got: '$INPUT_EXISTING_WEBAPP_URL'" + VALIDATION_FAILED=true + else + echo "✅ existing_webapp_url: '$INPUT_EXISTING_WEBAPP_URL' is valid" + fi + else + echo "✅ existing_webapp_url: Not provided (will perform deployment)" + fi + + # Fail workflow if any validation failed + if [[ "$VALIDATION_FAILED" == "true" ]]; then + echo "" + echo "❌ Parameter validation failed. Please correct the errors above and try again." + exit 1 + fi + + echo "" + echo "✅ All input parameters validated successfully!" + + # Output validated values + echo "passed=true" >> $GITHUB_OUTPUT + echo "azure_location=$LOCATION" >> $GITHUB_OUTPUT + echo "resource_group_name=$INPUT_RESOURCE_GROUP_NAME" >> $GITHUB_OUTPUT + echo "waf_enabled=$WAF_ENABLED" >> $GITHUB_OUTPUT + echo "exp=$EXP_ENABLED" >> $GITHUB_OUTPUT + echo "cleanup_resources=$CLEANUP_RESOURCES" >> $GITHUB_OUTPUT + echo "run_e2e_tests=$TEST_OPTION" >> $GITHUB_OUTPUT + echo "azure_env_log_analytics_workspace_id=$INPUT_AZURE_ENV_LOG_ANALYTICS_WORKSPACE_ID" >> $GITHUB_OUTPUT + echo "existing_webapp_url=$INPUT_EXISTING_WEBAPP_URL" >> $GITHUB_OUTPUT + + Run: + needs: validate-inputs + if: needs.validate-inputs.outputs.validation_passed == 'true' + uses: ./.github/workflows/deploy-orchestrator.yml + with: + azure_location: ${{ needs.validate-inputs.outputs.azure_location || 'australiaeast' }} + resource_group_name: ${{ needs.validate-inputs.outputs.resource_group_name || '' }} + waf_enabled: ${{ needs.validate-inputs.outputs.waf_enabled == 'true' }} + EXP: ${{ needs.validate-inputs.outputs.exp == 'true' }} + cleanup_resources: ${{ needs.validate-inputs.outputs.cleanup_resources == 'true' }} + run_e2e_tests: ${{ needs.validate-inputs.outputs.run_e2e_tests || 'GoldenPath-Testing' }} + AZURE_ENV_LOG_ANALYTICS_WORKSPACE_ID: ${{ needs.validate-inputs.outputs.azure_env_log_analytics_workspace_id || '' }} + existing_webapp_url: ${{ needs.validate-inputs.outputs.existing_webapp_url || '' }} + trigger_type: ${{ github.event_name }} + secrets: inherit diff --git a/.github/workflows/job-cleanup-deployment.yml b/.github/workflows/job-cleanup-deployment.yml new file mode 100644 index 00000000..082dffe8 --- /dev/null +++ b/.github/workflows/job-cleanup-deployment.yml @@ -0,0 +1,204 @@ +name: Cleanup Deployment Job +on: + workflow_call: + inputs: + trigger_type: + description: 'Trigger type (workflow_dispatch, pull_request, schedule)' + required: true + type: string + cleanup_resources: + description: 'Cleanup Deployed Resources' + required: false + default: false + type: boolean + existing_webapp_url: + description: 'Existing Container WebApp URL (Skips Deployment)' + required: false + default: '' + type: string + RESOURCE_GROUP_NAME: + description: 'Resource Group Name to cleanup' + required: true + type: string + AZURE_LOCATION: + description: 'Azure Location' + required: true + type: string + AZURE_ENV_OPENAI_LOCATION: + description: 'Azure OpenAI Location' + required: true + type: string + ENV_NAME: + description: 'Environment Name' + required: true + type: string + IMAGE_TAG: + description: 'Docker Image Tag' + required: true + type: string + +jobs: + cleanup-deployment: + runs-on: ubuntu-latest + environment: production + continue-on-error: true + env: + RESOURCE_GROUP_NAME: ${{ inputs.RESOURCE_GROUP_NAME }} + AZURE_LOCATION: ${{ inputs.AZURE_LOCATION }} + AZURE_ENV_OPENAI_LOCATION: ${{ inputs.AZURE_ENV_OPENAI_LOCATION }} + ENV_NAME: ${{ inputs.ENV_NAME }} + IMAGE_TAG: ${{ inputs.IMAGE_TAG }} + steps: + - name: Validate Workflow Input Parameters + shell: bash + env: + INPUT_TRIGGER_TYPE: ${{ inputs.trigger_type }} + INPUT_CLEANUP_RESOURCES: ${{ inputs.cleanup_resources }} + INPUT_EXISTING_WEBAPP_URL: ${{ inputs.existing_webapp_url }} + INPUT_RESOURCE_GROUP_NAME: ${{ inputs.RESOURCE_GROUP_NAME }} + INPUT_AZURE_LOCATION: ${{ inputs.AZURE_LOCATION }} + INPUT_AZURE_ENV_OPENAI_LOCATION: ${{ inputs.AZURE_ENV_OPENAI_LOCATION }} + INPUT_ENV_NAME: ${{ inputs.ENV_NAME }} + INPUT_IMAGE_TAG: ${{ inputs.IMAGE_TAG }} + run: | + echo "🔍 Validating workflow input parameters..." + VALIDATION_FAILED=false + + # Validate trigger_type (required - alphanumeric with underscores) + if [[ -z "$INPUT_TRIGGER_TYPE" ]]; then + echo "❌ ERROR: trigger_type is required but was not provided" + VALIDATION_FAILED=true + elif [[ ! "$INPUT_TRIGGER_TYPE" =~ ^[a-zA-Z0-9_]+$ ]]; then + echo "❌ ERROR: trigger_type '$INPUT_TRIGGER_TYPE' is invalid. Must contain only alphanumeric characters and underscores" + VALIDATION_FAILED=true + fi + + # Validate cleanup_resources (boolean) + if [[ "$INPUT_CLEANUP_RESOURCES" != "true" && "$INPUT_CLEANUP_RESOURCES" != "false" ]]; then + echo "❌ ERROR: cleanup_resources must be 'true' or 'false', got '$INPUT_CLEANUP_RESOURCES'" + VALIDATION_FAILED=true + fi + + # Validate existing_webapp_url (optional - must start with https if provided) + if [[ -n "$INPUT_EXISTING_WEBAPP_URL" ]]; then + if [[ ! "$INPUT_EXISTING_WEBAPP_URL" =~ ^https:// ]]; then + echo "❌ ERROR: existing_webapp_url must start with 'https://', got '$INPUT_EXISTING_WEBAPP_URL'" + VALIDATION_FAILED=true + fi + fi + + # Validate RESOURCE_GROUP_NAME (required - Azure resource group naming convention) + if [[ -z "$INPUT_RESOURCE_GROUP_NAME" ]]; then + echo "❌ ERROR: RESOURCE_GROUP_NAME is required but was not provided" + VALIDATION_FAILED=true + elif [[ ! "$INPUT_RESOURCE_GROUP_NAME" =~ ^[a-zA-Z0-9._\(\)-]+$ ]] || [[ "$INPUT_RESOURCE_GROUP_NAME" =~ \.$ ]]; then + echo "❌ ERROR: RESOURCE_GROUP_NAME is invalid. Must contain only alphanumerics, periods, underscores, hyphens, and parentheses. Cannot end with period." + VALIDATION_FAILED=true + elif [[ ${#INPUT_RESOURCE_GROUP_NAME} -gt 90 ]]; then + echo "❌ ERROR: RESOURCE_GROUP_NAME exceeds 90 characters" + VALIDATION_FAILED=true + fi + + # Validate AZURE_LOCATION (required - Azure region format) + if [[ -z "$INPUT_AZURE_LOCATION" ]]; then + echo "❌ ERROR: AZURE_LOCATION is required but was not provided" + VALIDATION_FAILED=true + elif [[ ! "$INPUT_AZURE_LOCATION" =~ ^[a-z0-9]+$ ]]; then + echo "❌ ERROR: AZURE_LOCATION '$INPUT_AZURE_LOCATION' is invalid. Must contain only lowercase letters and numbers" + VALIDATION_FAILED=true + fi + + # Validate AZURE_ENV_OPENAI_LOCATION (required - Azure region format) + if [[ -z "$INPUT_AZURE_ENV_OPENAI_LOCATION" ]]; then + echo "❌ ERROR: AZURE_ENV_OPENAI_LOCATION is required but was not provided" + VALIDATION_FAILED=true + elif [[ ! "$INPUT_AZURE_ENV_OPENAI_LOCATION" =~ ^[a-z0-9]+$ ]]; then + echo "❌ ERROR: AZURE_ENV_OPENAI_LOCATION '$INPUT_AZURE_ENV_OPENAI_LOCATION' is invalid. Must contain only lowercase letters and numbers" + VALIDATION_FAILED=true + fi + + # Validate ENV_NAME (required - alphanumeric with underscores and hyphens) + if [[ -z "$INPUT_ENV_NAME" ]]; then + echo "❌ ERROR: ENV_NAME is required but was not provided" + VALIDATION_FAILED=true + elif [[ ! "$INPUT_ENV_NAME" =~ ^[a-zA-Z0-9_-]+$ ]]; then + echo "❌ ERROR: ENV_NAME '$INPUT_ENV_NAME' is invalid. Must contain only alphanumeric characters, underscores, and hyphens" + VALIDATION_FAILED=true + fi + + # Validate IMAGE_TAG (required - Docker tag pattern) + if [[ -z "$INPUT_IMAGE_TAG" ]]; then + echo "❌ ERROR: IMAGE_TAG is required but was not provided" + VALIDATION_FAILED=true + elif [[ ! "$INPUT_IMAGE_TAG" =~ ^[a-zA-Z0-9_][a-zA-Z0-9._-]{0,127}$ ]]; then + echo "❌ ERROR: IMAGE_TAG '$INPUT_IMAGE_TAG' is invalid. Must be a valid Docker tag (alphanumeric start, up to 128 chars)" + VALIDATION_FAILED=true + fi + + if [[ "$VALIDATION_FAILED" == "true" ]]; then + echo "❌ Input validation failed. Please check the errors above." + exit 1 + fi + + echo "✅ All input parameters validated successfully" + + - name: Setup Azure CLI + shell: bash + run: | + if [[ "${{ runner.os }}" == "Linux" ]]; then + curl -sL https://aka.ms/InstallAzureCLIDeb | sudo bash + fi + az --version + + - name: Login to Azure + uses: azure/login@v2 + with: + client-id: ${{ secrets.AZURE_CLIENT_ID }} + tenant-id: ${{ secrets.AZURE_TENANT_ID }} + subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }} + + - name: Delete Resource Group (Optimized Cleanup) + id: delete_rg + shell: bash + run: | + set -e + echo "🗑️ Starting optimized resource cleanup..." + echo "Deleting resource group: ${{ env.RESOURCE_GROUP_NAME }}" + + az group delete \ + --name "${{ env.RESOURCE_GROUP_NAME }}" \ + --yes \ + --no-wait + + echo "✅ Resource group deletion initiated (running asynchronously)" + echo "Note: Resources will be cleaned up in the background" + + - name: Logout from Azure + if: always() + shell: bash + run: | + az logout || echo "Warning: Failed to logout from Azure CLI" + echo "Logged out from Azure." + + - name: Generate Cleanup Job Summary + if: always() + shell: bash + run: | + echo "## 🧹 Cleanup Job Summary" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "| Field | Value |" >> $GITHUB_STEP_SUMMARY + echo "|-------|--------|" >> $GITHUB_STEP_SUMMARY + echo "| **Resource Group deletion Status** | ${{ steps.delete_rg.outcome == 'success' && '✅ Initiated' || '❌ Failed' }} |" >> $GITHUB_STEP_SUMMARY + echo "| **Resource Group** | \`${{ env.RESOURCE_GROUP_NAME }}\` |" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + if [[ "${{ steps.delete_rg.outcome }}" == "success" ]]; then + echo "### ✅ Cleanup Details" >> $GITHUB_STEP_SUMMARY + echo "- Successfully initiated deletion for Resource Group \`${{ env.RESOURCE_GROUP_NAME }}\`" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + else + echo "### ❌ Cleanup Failed" >> $GITHUB_STEP_SUMMARY + echo "- Cleanup process encountered an error" >> $GITHUB_STEP_SUMMARY + echo "- Manual cleanup may be required for:" >> $GITHUB_STEP_SUMMARY + echo " - Resource Group: \`${{ env.RESOURCE_GROUP_NAME }}\`" >> $GITHUB_STEP_SUMMARY + echo "- Check the cleanup-deployment job logs for detailed error information" >> $GITHUB_STEP_SUMMARY + fi diff --git a/.github/workflows/job-deploy-linux.yml b/.github/workflows/job-deploy-linux.yml new file mode 100644 index 00000000..ca9488ab --- /dev/null +++ b/.github/workflows/job-deploy-linux.yml @@ -0,0 +1,460 @@ +name: Deploy Steps + +on: + workflow_call: + inputs: + ENV_NAME: + required: true + type: string + AZURE_ENV_OPENAI_LOCATION: + required: true + type: string + AZURE_LOCATION: + required: true + type: string + RESOURCE_GROUP_NAME: + required: true + type: string + IMAGE_TAG: + required: true + type: string + EXP: + required: true + type: string + WAF_ENABLED: + required: false + type: string + default: 'false' + AZURE_ENV_LOG_ANALYTICS_WORKSPACE_ID: + required: false + type: string + outputs: + WEB_APPURL: + description: "Container Web App URL" + value: ${{ jobs.deploy-linux.outputs.WEB_APPURL }} + +jobs: + deploy-linux: + runs-on: ubuntu-latest + environment: production + env: + AZURE_DEV_COLLECT_TELEMETRY: ${{ vars.AZURE_DEV_COLLECT_TELEMETRY }} + outputs: + WEB_APPURL: ${{ steps.get_webapp_url.outputs.WEB_APPURL }} + steps: + - name: Validate Workflow Input Parameters + shell: bash + env: + INPUT_ENV_NAME: ${{ inputs.ENV_NAME }} + INPUT_AZURE_ENV_OPENAI_LOCATION: ${{ inputs.AZURE_ENV_OPENAI_LOCATION }} + INPUT_AZURE_LOCATION: ${{ inputs.AZURE_LOCATION }} + INPUT_RESOURCE_GROUP_NAME: ${{ inputs.RESOURCE_GROUP_NAME }} + INPUT_IMAGE_TAG: ${{ inputs.IMAGE_TAG }} + INPUT_EXP: ${{ inputs.EXP }} + INPUT_WAF_ENABLED: ${{ inputs.WAF_ENABLED }} + INPUT_AZURE_ENV_LOG_ANALYTICS_WORKSPACE_ID: ${{ inputs.AZURE_ENV_LOG_ANALYTICS_WORKSPACE_ID }} + run: | + echo "🔍 Validating workflow input parameters..." + VALIDATION_FAILED=false + + # Validate ENV_NAME (required - alphanumeric) + if [[ -z "$INPUT_ENV_NAME" ]]; then + echo "❌ ERROR: ENV_NAME is required but was not provided" + VALIDATION_FAILED=true + elif [[ ! "$INPUT_ENV_NAME" =~ ^[a-zA-Z0-9_-]+$ ]]; then + echo "❌ ERROR: ENV_NAME '$INPUT_ENV_NAME' is invalid. Must contain only alphanumeric characters, underscores, and hyphens" + VALIDATION_FAILED=true + else + echo "✅ ENV_NAME: '$INPUT_ENV_NAME' is valid" + fi + + # Validate AZURE_ENV_OPENAI_LOCATION (required - Azure region format) + if [[ -z "$INPUT_AZURE_ENV_OPENAI_LOCATION" ]]; then + echo "❌ ERROR: AZURE_ENV_OPENAI_LOCATION is required but was not provided" + VALIDATION_FAILED=true + elif [[ ! "$INPUT_AZURE_ENV_OPENAI_LOCATION" =~ ^[a-z0-9]+$ ]]; then + echo "❌ ERROR: AZURE_ENV_OPENAI_LOCATION '$INPUT_AZURE_ENV_OPENAI_LOCATION' is invalid. Must contain only lowercase letters and numbers (e.g., 'australiaeast', 'westus2')" + VALIDATION_FAILED=true + else + echo "✅ AZURE_ENV_OPENAI_LOCATION: '$INPUT_AZURE_ENV_OPENAI_LOCATION' is valid" + fi + + # Validate AZURE_LOCATION (required - Azure region format) + if [[ -z "$INPUT_AZURE_LOCATION" ]]; then + echo "❌ ERROR: AZURE_LOCATION is required but was not provided" + VALIDATION_FAILED=true + elif [[ ! "$INPUT_AZURE_LOCATION" =~ ^[a-z0-9]+$ ]]; then + echo "❌ ERROR: AZURE_LOCATION '$INPUT_AZURE_LOCATION' is invalid. Must contain only lowercase letters and numbers (e.g., 'australiaeast', 'westus2')" + VALIDATION_FAILED=true + else + echo "✅ AZURE_LOCATION: '$INPUT_AZURE_LOCATION' is valid" + fi + + # Validate RESOURCE_GROUP_NAME (required - Azure resource group naming convention) + if [[ -z "$INPUT_RESOURCE_GROUP_NAME" ]]; then + echo "❌ ERROR: RESOURCE_GROUP_NAME is required but was not provided" + VALIDATION_FAILED=true + elif [[ ! "$INPUT_RESOURCE_GROUP_NAME" =~ ^[a-zA-Z0-9._\(\)-]+$ ]] || [[ "$INPUT_RESOURCE_GROUP_NAME" =~ \.$ ]]; then + echo "❌ ERROR: RESOURCE_GROUP_NAME '$INPUT_RESOURCE_GROUP_NAME' is invalid. Must contain only alphanumerics, periods, underscores, hyphens, and parentheses. Cannot end with period." + VALIDATION_FAILED=true + elif [[ ${#INPUT_RESOURCE_GROUP_NAME} -gt 90 ]]; then + echo "❌ ERROR: RESOURCE_GROUP_NAME '$INPUT_RESOURCE_GROUP_NAME' exceeds 90 characters" + VALIDATION_FAILED=true + else + echo "✅ RESOURCE_GROUP_NAME: '$INPUT_RESOURCE_GROUP_NAME' is valid" + fi + + # Validate IMAGE_TAG (required - Docker tag pattern) + if [[ -z "$INPUT_IMAGE_TAG" ]]; then + echo "❌ ERROR: IMAGE_TAG is required but was not provided" + VALIDATION_FAILED=true + elif [[ ! "$INPUT_IMAGE_TAG" =~ ^[a-zA-Z0-9_][a-zA-Z0-9._-]{0,127}$ ]]; then + echo "❌ ERROR: IMAGE_TAG '$INPUT_IMAGE_TAG' is invalid. Must start with alphanumeric or underscore, contain only alphanumerics, underscores, periods, hyphens, and be max 128 characters" + VALIDATION_FAILED=true + else + echo "✅ IMAGE_TAG: '$INPUT_IMAGE_TAG' is valid" + fi + + # Validate EXP (required - must be 'true' or 'false') + if [[ -z "$INPUT_EXP" ]]; then + echo "❌ ERROR: EXP is required but was not provided" + VALIDATION_FAILED=true + elif [[ "$INPUT_EXP" != "true" && "$INPUT_EXP" != "false" ]]; then + echo "❌ ERROR: EXP must be 'true' or 'false', got: '$INPUT_EXP'" + VALIDATION_FAILED=true + else + echo "✅ EXP: '$INPUT_EXP' is valid" + fi + + # Validate WAF_ENABLED (must be 'true' or 'false') + if [[ "$INPUT_WAF_ENABLED" != "true" && "$INPUT_WAF_ENABLED" != "false" ]]; then + echo "❌ ERROR: WAF_ENABLED must be 'true' or 'false', got: '$INPUT_WAF_ENABLED'" + VALIDATION_FAILED=true + else + echo "✅ WAF_ENABLED: '$INPUT_WAF_ENABLED' is valid" + fi + + # Validate AZURE_ENV_LOG_ANALYTICS_WORKSPACE_ID (optional - Azure Resource ID format) + if [[ -n "$INPUT_AZURE_ENV_LOG_ANALYTICS_WORKSPACE_ID" ]]; then + if [[ ! "$INPUT_AZURE_ENV_LOG_ANALYTICS_WORKSPACE_ID" =~ ^/subscriptions/[a-fA-F0-9-]+/[Rr]esource[Gg]roups/[^/]+/providers/[Mm]icrosoft\.[Oo]perational[Ii]nsights/[Ww]orkspaces/[^/]+$ ]]; then + echo "❌ ERROR: AZURE_ENV_LOG_ANALYTICS_WORKSPACE_ID is invalid. Must be a valid Azure Resource ID format:" + echo " /subscriptions/{subscriptionId}/resourceGroups/{resourceGroupName}/providers/Microsoft.OperationalInsights/workspaces/{workspaceName}" + echo " Got: '$INPUT_AZURE_ENV_LOG_ANALYTICS_WORKSPACE_ID'" + VALIDATION_FAILED=true + else + echo "✅ AZURE_ENV_LOG_ANALYTICS_WORKSPACE_ID: Valid Resource ID format" + fi + fi + + # Fail workflow if any validation failed + if [[ "$VALIDATION_FAILED" == "true" ]]; then + echo "" + echo "❌ Parameter validation failed. Please correct the errors above and try again." + exit 1 + fi + + echo "" + echo "✅ All input parameters validated successfully!" + + - name: Checkout Code + uses: actions/checkout@v4 + + - name: Configure Parameters Based on WAF Setting + shell: bash + env: + INPUT_WAF_ENABLED: ${{ inputs.WAF_ENABLED }} + run: | + if [[ "$INPUT_WAF_ENABLED" == "true" ]]; then + cp infra/main.waf.parameters.json infra/main.parameters.json + echo "✅ Successfully copied WAF parameters to main parameters file" + else + echo "🔧 Configuring Non-WAF deployment - using default main.parameters.json..." + fi + + - name: Install Azure CLI + shell: bash + run: | + curl -sL https://aka.ms/InstallAzureCLIDeb | sudo bash + az --version # Verify installation + + - name: Install Kubernetes CLI (kubectl) + shell: bash + run: | + az aks install-cli + az extension add --name aks-preview + + - name: Install Helm + shell: bash + run: | + # If helm is already available on the runner, print version and skip installation + if command -v helm >/dev/null 2>&1; then + echo "helm already installed: $(helm version --short 2>/dev/null || true)" + exit 0 + fi + + # Ensure prerequisites are present + sudo apt-get update + sudo apt-get install -y apt-transport-https ca-certificates curl gnupg lsb-release + + # Ensure keyrings dir exists + sudo mkdir -p /usr/share/keyrings + + # Add Helm GPG key (use -fS to fail fast on curl errors) + curl -fsSL https://baltocdn.com/helm/signing.asc | gpg --dearmor | sudo tee /usr/share/keyrings/helm.gpg >/dev/null + + # Add the Helm apt repository + echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/helm.gpg] https://baltocdn.com/helm/stable/debian/ all main" | sudo tee /etc/apt/sources.list.d/helm-stable-debian.list + + # Install helm + sudo apt-get update + sudo apt-get install -y helm + + # Verify + echo "Installed helm version:" + helm version + + - name: Set up Docker + uses: docker/setup-buildx-action@v3 + with: + driver: docker + + - name: Setup Azure Developer CLI + uses: Azure/setup-azd@v2 + + - name: Login to Azure + uses: azure/login@v2 + with: + client-id: ${{ secrets.AZURE_CLIENT_ID }} + tenant-id: ${{ secrets.AZURE_TENANT_ID }} + subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }} + + - name: Login to azd + run: | + azd auth login --client-id ${{ secrets.AZURE_CLIENT_ID }} --federated-credential-provider "github" --tenant-id ${{ secrets.AZURE_TENANT_ID }} + + + - name: Deploy using azd up + id: azd_deploy + shell: pwsh + env: + INPUT_ENV_NAME: ${{ inputs.ENV_NAME }} + INPUT_AZURE_ENV_OPENAI_LOCATION: ${{ inputs.AZURE_ENV_OPENAI_LOCATION }} + INPUT_AZURE_LOCATION: ${{ inputs.AZURE_LOCATION }} + INPUT_RESOURCE_GROUP_NAME: ${{ inputs.RESOURCE_GROUP_NAME }} + INPUT_IMAGE_TAG: ${{ inputs.IMAGE_TAG }} + INPUT_EXP: ${{ inputs.EXP }} + INPUT_AZURE_ENV_LOG_ANALYTICS_WORKSPACE_ID: ${{ inputs.AZURE_ENV_LOG_ANALYTICS_WORKSPACE_ID }} + run: | + # Create azd environment + azd env new $env:INPUT_ENV_NAME --no-prompt + + # Set environment variables + azd config set defaults.subscription ${{ secrets.AZURE_SUBSCRIPTION_ID }} + azd env set AZURE_SUBSCRIPTION_ID="${{ secrets.AZURE_SUBSCRIPTION_ID }}" + azd env set AZURE_ENV_OPENAI_LOCATION="$env:INPUT_AZURE_ENV_OPENAI_LOCATION" + azd env set AZURE_LOCATION="$env:INPUT_AZURE_LOCATION" + azd env set AZURE_RESOURCE_GROUP="$env:INPUT_RESOURCE_GROUP_NAME" + azd env set AZURE_ENV_IMAGE_TAG="$env:INPUT_IMAGE_TAG" + + # Set AI model capacity parameters + azd env set AZURE_ENV_MODEL_CAPACITY="150" + azd env set AZURE_ENV_EMBEDDING_MODEL_CAPACITY="200" + + if ($env:INPUT_EXP -eq "true") { + Write-Host "✅ EXP ENABLED - Setting EXP parameters..." + + # Set EXP variables dynamically + if ($env:INPUT_AZURE_ENV_LOG_ANALYTICS_WORKSPACE_ID -ne "") { + $EXP_LOG_ANALYTICS_ID = $env:INPUT_AZURE_ENV_LOG_ANALYTICS_WORKSPACE_ID + } else { + $EXP_LOG_ANALYTICS_ID = "${{ secrets.AZURE_ENV_LOG_ANALYTICS_WORKSPACE_ID }}" + } + + Write-Host "AZURE_ENV_LOG_ANALYTICS_WORKSPACE_ID: $EXP_LOG_ANALYTICS_ID" + azd env set AZURE_ENV_LOG_ANALYTICS_WORKSPACE_ID="$EXP_LOG_ANALYTICS_ID" + } else { + Write-Host "❌ EXP DISABLED - Skipping EXP parameters" + } + + # Deploy + azd up --no-prompt + + echo "✅ Azure Developer CLI (azd) deployment completed" + + - name: Get Deployment Outputs + id: get_output + env: + INPUT_RESOURCE_GROUP_NAME: ${{ inputs.RESOURCE_GROUP_NAME }} + run: | + # Get outputs from azd + azd env get-values --output json > /tmp/azd_output.json + cat /tmp/azd_output.json + + # Extract values and write to GITHUB_ENV using bash + while IFS='=' read -r key value; do + # Remove quotes from value + value=$(echo "$value" | tr -d '"') + echo "${key}=${value}" >> $GITHUB_ENV + done < <(jq -r 'to_entries[] | "\(.key)=\(.value)"' /tmp/azd_output.json) + + # Get AKS node resource group if AKS exists + if [ -n "$AZURE_AKS_NAME" ]; then + krg_name=$(az aks show --name "$AZURE_AKS_NAME" --resource-group "$INPUT_RESOURCE_GROUP_NAME" --query "nodeResourceGroup" -o tsv || echo "") + if [ -n "$krg_name" ]; then + echo "krg_name=$krg_name" >> $GITHUB_ENV + echo "AKS node resource group: $krg_name" + fi + fi + + - name: Login to Azure to refresh credentials for subsequent steps + uses: azure/login@v2 + with: + client-id: ${{ secrets.AZURE_CLIENT_ID }} + tenant-id: ${{ secrets.AZURE_TENANT_ID }} + subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }} + enable-AzPSSession: true + + - name: Run Deployment Script with Input + shell: pwsh + env: + # From GitHub secrets + AZURE_SUBSCRIPTION_ID: ${{ secrets.AZURE_SUBSCRIPTION_ID }} + + # From workflow inputs and deployment outputs + RESOURCE_GROUP_NAME: ${{ inputs.RESOURCE_GROUP_NAME }} + AZURE_RESOURCE_GROUP_ID: ${{ env.AZURE_RESOURCE_GROUP_ID }} + STORAGE_ACCOUNT_NAME: ${{ env.STORAGE_ACCOUNT_NAME }} + AZURE_SEARCH_SERVICE_NAME: ${{ env.AZURE_SEARCH_SERVICE_NAME }} + AZURE_AKS_NAME: ${{ env.AZURE_AKS_NAME }} + AZURE_AKS_MI_ID: ${{ env.AZURE_AKS_MI_ID }} + AZURE_CONTAINER_REGISTRY_NAME: ${{ env.AZURE_CONTAINER_REGISTRY_NAME }} + AZURE_COGNITIVE_SERVICE_NAME: ${{ env.AZURE_COGNITIVE_SERVICE_NAME }} + AZURE_COGNITIVE_SERVICE_ENDPOINT: ${{ env.AZURE_COGNITIVE_SERVICE_ENDPOINT }} + AZURE_OPENAI_SERVICE_NAME: ${{ env.AZURE_OPENAI_SERVICE_NAME }} + AZURE_OPENAI_SERVICE_ENDPOINT: ${{ env.AZURE_OPENAI_SERVICE_ENDPOINT }} + AZURE_COSMOSDB_NAME: ${{ env.AZURE_COSMOSDB_NAME }} + AZ_GPT4O_MODEL_NAME: ${{ env.AZ_GPT4O_MODEL_NAME }} + AZ_GPT4O_MODEL_ID: ${{ env.AZ_GPT4O_MODEL_ID }} + AZ_GPT_EMBEDDING_MODEL_NAME: ${{ env.AZ_GPT_EMBEDDING_MODEL_NAME }} + AZ_GPT_EMBEDDING_MODEL_ID: ${{ env.AZ_GPT_EMBEDDING_MODEL_ID }} + AZURE_APP_CONFIG_ENDPOINT: ${{ env.AZURE_APP_CONFIG_ENDPOINT }} + AZURE_APP_CONFIG_NAME: ${{ env.AZURE_APP_CONFIG_NAME }} + run: | + cd Deployment + $input = @" + ${{ secrets.EMAIL }} + yes + "@ + $input | pwsh ./resourcedeployment.ps1 + Write-Host "Resource Group: $env:RESOURCE_GROUP_NAME" + Write-Host "AKS Cluster Name: $env:AZURE_AKS_NAME" + Write-Host "AKS Node Resource Group: $env:krg_name" + + - name: Retrieve Web App URL + id: get_webapp_url + shell: bash + run: | + # Get the Web App URL and save it to GITHUB_OUTPUT + echo "Retrieving Web App URL..." + public_ip_name=$(az network public-ip list --resource-group ${{ env.krg_name }} --query "[?contains(name, 'kubernetes-')].name" -o tsv) + fqdn=$(az network public-ip show --resource-group ${{ env.krg_name }} --name $public_ip_name --query "dnsSettings.fqdn" -o tsv) + if [ -n "$fqdn" ]; then + echo "WEB_APPURL=https://$fqdn" >> $GITHUB_OUTPUT + echo "Web App URL is https://$fqdn" + else + echo "Failed to retrieve Web App URL." + exit 1 + fi + + - name: Validate Deployment + shell: bash + run: | + webapp_url="${{ steps.get_webapp_url.outputs.WEB_APPURL }}" + echo "Validating web app at: $webapp_url" + + # Enhanced health check with retry logic + max_attempts=7 + attempt=1 + success=false + + while [ $attempt -le $max_attempts ] && [ "$success" = false ]; do + echo "Attempt $attempt/$max_attempts: Checking web app health..." + + # Check if web app responds + http_code=$(curl -s -o /dev/null -w "%{http_code}" "$webapp_url" || echo "000") + + if [ "$http_code" -eq 200 ]; then + echo "✅ Web app is healthy (HTTP $http_code)" + success=true + elif [ "$http_code" -eq 404 ]; then + echo "❌ Web app not found (HTTP 404)" + break + elif [ "$http_code" -eq 503 ] || [ "$http_code" -eq 502 ]; then + echo "⚠️ Web app temporarily unavailable (HTTP $http_code), retrying..." + sleep 20 + else + echo "⚠️ Web app returned HTTP $http_code, retrying..." + sleep 20 + fi + + attempt=$((attempt + 1)) + done + + if [ "$success" = false ]; then + echo "❌ Web app validation failed after $max_attempts attempts" + exit 1 + fi + + - name: Run Post Deployment Script + continue-on-error: true + shell: pwsh + run: | + Write-Host "Running post deployment script to upload files..." + cd Deployment + try { + .\uploadfiles.ps1 -EndpointUrl ${{ steps.get_webapp_url.outputs.WEB_APPURL }} + Write-Host "ExitCode: $LASTEXITCODE" + if ($LASTEXITCODE -eq $null -or $LASTEXITCODE -eq 0) { + Write-Host "✅ Post deployment script completed successfully." + } else { + Write-Host "❌ Post deployment script failed with exit code: $LASTEXITCODE" + exit 1 + } + } + catch { + Write-Host "❌ Post deployment script failed with error: $($_.Exception.Message)" + exit 1 + } + + - name: Generate Deploy Job Summary + if: always() + shell: bash + run: | + echo "## 🚀 Deploy Job Summary (Linux)" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "| Field | Value |" >> $GITHUB_STEP_SUMMARY + echo "|-------|--------|" >> $GITHUB_STEP_SUMMARY + echo "| **Job Status** | ${{ job.status == 'success' && '✅ Success' || '❌ Failed' }} |" >> $GITHUB_STEP_SUMMARY + echo "| **Resource Group** | \`${{ inputs.RESOURCE_GROUP_NAME }}\` |" >> $GITHUB_STEP_SUMMARY + echo "| **Configuration Type** | \`${{ inputs.WAF_ENABLED == 'true' && inputs.EXP == 'true' && 'WAF + EXP' || inputs.WAF_ENABLED == 'true' && inputs.EXP != 'true' && 'WAF + Non-EXP' || inputs.WAF_ENABLED != 'true' && inputs.EXP == 'true' && 'Non-WAF + EXP' || 'Non-WAF + Non-EXP' }}\` |" >> $GITHUB_STEP_SUMMARY + echo "| **Azure Region (Infrastructure)** | \`${{ inputs.AZURE_LOCATION }}\` |" >> $GITHUB_STEP_SUMMARY + echo "| **Azure OpenAI Region** | \`${{ inputs.AZURE_ENV_OPENAI_LOCATION }}\` |" >> $GITHUB_STEP_SUMMARY + echo "| **Docker Image Tag** | \`${{ inputs.IMAGE_TAG }}\` |" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + if [[ "${{ job.status }}" == "success" ]]; then + echo "### ✅ Deployment Details" >> $GITHUB_STEP_SUMMARY + echo "- **Web App URL**: [${{ steps.get_webapp_url.outputs.WEB_APPURL }}](${{ steps.get_webapp_url.outputs.WEB_APPURL }})" >> $GITHUB_STEP_SUMMARY + echo "- Successfully deployed to Azure with all resources configured" >> $GITHUB_STEP_SUMMARY + echo "- Post-deployment scripts executed successfully" >> $GITHUB_STEP_SUMMARY + else + echo "### ❌ Deployment Failed" >> $GITHUB_STEP_SUMMARY + echo "- Deployment process encountered an error" >> $GITHUB_STEP_SUMMARY + echo "- Check the deploy job for detailed error information" >> $GITHUB_STEP_SUMMARY + fi + + - name: Logout from Azure + if: always() + shell: bash + run: | + az logout || true + echo "Logged out from Azure." diff --git a/.github/workflows/job-deploy.yml b/.github/workflows/job-deploy.yml new file mode 100644 index 00000000..1464b151 --- /dev/null +++ b/.github/workflows/job-deploy.yml @@ -0,0 +1,457 @@ +name: Deploy Job + +on: + workflow_call: + inputs: + trigger_type: + description: 'Trigger type (workflow_dispatch, pull_request, schedule)' + required: true + type: string + azure_location: + description: 'Azure Location For Deployment' + required: false + default: 'australiaeast' + type: string + resource_group_name: + description: 'Resource Group Name (Optional)' + required: false + default: '' + type: string + waf_enabled: + description: 'Enable WAF' + required: false + default: false + type: boolean + EXP: + description: 'Enable EXP' + required: false + default: false + type: boolean + cleanup_resources: + description: 'Cleanup Deployed Resources' + required: false + default: false + type: boolean + run_e2e_tests: + description: 'Run End-to-End Tests' + required: false + default: 'GoldenPath-Testing' + type: string + existing_webapp_url: + description: 'Existing Container WebApp URL (Skips Deployment)' + required: false + default: '' + type: string + AZURE_ENV_LOG_ANALYTICS_WORKSPACE_ID: + description: 'Log Analytics Workspace ID (Optional)' + required: false + default: '' + type: string + outputs: + RESOURCE_GROUP_NAME: + description: "Resource Group Name" + value: ${{ jobs.azure-setup.outputs.RESOURCE_GROUP_NAME }} + WEB_APPURL: + description: "Container Web App URL" + value: ${{ jobs.deploy-linux.outputs.WEB_APPURL }} + ENV_NAME: + description: "Environment Name" + value: ${{ jobs.azure-setup.outputs.ENV_NAME }} + AZURE_LOCATION: + description: "Azure Location" + value: ${{ jobs.azure-setup.outputs.AZURE_LOCATION }} + AZURE_ENV_OPENAI_LOCATION: + description: "Azure OpenAI Location" + value: ${{ jobs.azure-setup.outputs.AZURE_ENV_OPENAI_LOCATION }} + IMAGE_TAG: + description: "Docker Image Tag Used" + value: ${{ jobs.azure-setup.outputs.IMAGE_TAG }} + QUOTA_FAILED: + description: "Quota Check Failed Flag" + value: ${{ jobs.azure-setup.outputs.QUOTA_FAILED }} + +env: + GPT_MIN_CAPACITY: 150 + TEXT_EMBEDDING_MIN_CAPACITY: 80 + BRANCH_NAME: ${{ github.event.workflow_run.head_branch || github.head_ref || github.ref_name }} + WAF_ENABLED: ${{ inputs.trigger_type == 'workflow_dispatch' && (inputs.waf_enabled || false) || false }} + EXP: ${{ inputs.trigger_type == 'workflow_dispatch' && (inputs.EXP || false) || false }} + CLEANUP_RESOURCES: ${{ inputs.trigger_type != 'workflow_dispatch' || inputs.cleanup_resources }} + RUN_E2E_TESTS: ${{ inputs.trigger_type == 'workflow_dispatch' && (inputs.run_e2e_tests || 'GoldenPath-Testing') || 'GoldenPath-Testing' }} + + +jobs: + azure-setup: + name: Azure Setup + if: inputs.trigger_type != 'workflow_dispatch' || inputs.existing_webapp_url == '' || inputs.existing_webapp_url == null + runs-on: ubuntu-latest + environment: production + outputs: + RESOURCE_GROUP_NAME: ${{ steps.check_create_rg.outputs.RESOURCE_GROUP_NAME }} + ENV_NAME: ${{ steps.generate_env_name.outputs.ENV_NAME }} + AZURE_LOCATION: ${{ steps.set_region.outputs.AZURE_LOCATION }} + AZURE_ENV_OPENAI_LOCATION: ${{ steps.set_region.outputs.AZURE_ENV_OPENAI_LOCATION }} + IMAGE_TAG: ${{ steps.determine_image_tag.outputs.IMAGE_TAG }} + QUOTA_FAILED: ${{ steps.quota_failure_output.outputs.QUOTA_FAILED }} + EXP_ENABLED: ${{ steps.configure_exp.outputs.EXP_ENABLED }} + + steps: + - name: Validate Workflow Input Parameters + shell: bash + env: + INPUT_TRIGGER_TYPE: ${{ inputs.trigger_type }} + INPUT_AZURE_LOCATION: ${{ inputs.azure_location }} + INPUT_RESOURCE_GROUP_NAME: ${{ inputs.resource_group_name }} + INPUT_WAF_ENABLED: ${{ inputs.waf_enabled }} + INPUT_EXP: ${{ inputs.EXP }} + INPUT_CLEANUP_RESOURCES: ${{ inputs.cleanup_resources }} + INPUT_RUN_E2E_TESTS: ${{ inputs.run_e2e_tests }} + INPUT_AZURE_ENV_LOG_ANALYTICS_WORKSPACE_ID: ${{ inputs.AZURE_ENV_LOG_ANALYTICS_WORKSPACE_ID }} + INPUT_EXISTING_WEBAPP_URL: ${{ inputs.existing_webapp_url }} + run: | + echo "🔍 Validating workflow input parameters..." + VALIDATION_FAILED=false + + # Validate trigger_type (required - alphanumeric with underscores) + if [[ -z "$INPUT_TRIGGER_TYPE" ]]; then + echo "❌ ERROR: trigger_type is required but was not provided" + VALIDATION_FAILED=true + elif [[ ! "$INPUT_TRIGGER_TYPE" =~ ^[a-zA-Z0-9_]+$ ]]; then + echo "❌ ERROR: trigger_type '$INPUT_TRIGGER_TYPE' is invalid. Must contain only alphanumeric characters and underscores" + VALIDATION_FAILED=true + else + echo "✅ trigger_type: '$INPUT_TRIGGER_TYPE' is valid" + fi + + # Validate azure_location (Azure region format) + if [[ -n "$INPUT_AZURE_LOCATION" ]]; then + if [[ ! "$INPUT_AZURE_LOCATION" =~ ^[a-z0-9]+$ ]]; then + echo "❌ ERROR: azure_location '$INPUT_AZURE_LOCATION' is invalid. Must contain only lowercase letters and numbers (e.g., 'australiaeast', 'westus2')" + VALIDATION_FAILED=true + else + echo "✅ azure_location: '$INPUT_AZURE_LOCATION' is valid" + fi + fi + + # Validate resource_group_name (Azure resource group naming convention) + if [[ -n "$INPUT_RESOURCE_GROUP_NAME" ]]; then + if [[ ! "$INPUT_RESOURCE_GROUP_NAME" =~ ^[a-zA-Z0-9._\(\)-]+$ ]] || [[ "$INPUT_RESOURCE_GROUP_NAME" =~ \.$ ]]; then + echo "❌ ERROR: resource_group_name '$INPUT_RESOURCE_GROUP_NAME' is invalid. Must contain only alphanumerics, periods, underscores, hyphens, and parentheses. Cannot end with period." + VALIDATION_FAILED=true + elif [[ ${#INPUT_RESOURCE_GROUP_NAME} -gt 90 ]]; then + echo "❌ ERROR: resource_group_name '$INPUT_RESOURCE_GROUP_NAME' exceeds 90 characters" + VALIDATION_FAILED=true + else + echo "✅ resource_group_name: '$INPUT_RESOURCE_GROUP_NAME' is valid" + fi + fi + + # Validate waf_enabled (boolean) + if [[ "$INPUT_WAF_ENABLED" != "true" && "$INPUT_WAF_ENABLED" != "false" ]]; then + echo "❌ ERROR: waf_enabled must be 'true' or 'false', got: '$INPUT_WAF_ENABLED'" + VALIDATION_FAILED=true + else + echo "✅ waf_enabled: '$INPUT_WAF_ENABLED' is valid" + fi + + # Validate EXP (boolean) + if [[ "$INPUT_EXP" != "true" && "$INPUT_EXP" != "false" ]]; then + echo "❌ ERROR: EXP must be 'true' or 'false', got: '$INPUT_EXP'" + VALIDATION_FAILED=true + else + echo "✅ EXP: '$INPUT_EXP' is valid" + fi + + # Validate cleanup_resources (boolean) + if [[ "$INPUT_CLEANUP_RESOURCES" != "true" && "$INPUT_CLEANUP_RESOURCES" != "false" ]]; then + echo "❌ ERROR: cleanup_resources must be 'true' or 'false', got: '$INPUT_CLEANUP_RESOURCES'" + VALIDATION_FAILED=true + else + echo "✅ cleanup_resources: '$INPUT_CLEANUP_RESOURCES' is valid" + fi + + # Validate run_e2e_tests (specific allowed values) + if [[ -n "$INPUT_RUN_E2E_TESTS" ]]; then + ALLOWED_VALUES=("None" "GoldenPath-Testing" "Smoke-Testing") + if [[ ! " ${ALLOWED_VALUES[@]} " =~ " ${INPUT_RUN_E2E_TESTS} " ]]; then + echo "❌ ERROR: run_e2e_tests '$INPUT_RUN_E2E_TESTS' is invalid. Allowed values: ${ALLOWED_VALUES[*]}" + VALIDATION_FAILED=true + else + echo "✅ run_e2e_tests: '$INPUT_RUN_E2E_TESTS' is valid" + fi + fi + + # Validate AZURE_ENV_LOG_ANALYTICS_WORKSPACE_ID (Azure Resource ID format) + if [[ -n "$INPUT_AZURE_ENV_LOG_ANALYTICS_WORKSPACE_ID" ]]; then + if [[ ! "$INPUT_AZURE_ENV_LOG_ANALYTICS_WORKSPACE_ID" =~ ^/subscriptions/[a-fA-F0-9-]+/[Rr]esource[Gg]roups/[^/]+/providers/[Mm]icrosoft\.[Oo]perational[Ii]nsights/[Ww]orkspaces/[^/]+$ ]]; then + echo "❌ ERROR: AZURE_ENV_LOG_ANALYTICS_WORKSPACE_ID is invalid. Must be a valid Azure Resource ID format:" + echo " /subscriptions/{subscriptionId}/resourceGroups/{resourceGroupName}/providers/Microsoft.OperationalInsights/workspaces/{workspaceName}" + echo " Got: '$INPUT_AZURE_ENV_LOG_ANALYTICS_WORKSPACE_ID'" + VALIDATION_FAILED=true + else + echo "✅ AZURE_ENV_LOG_ANALYTICS_WORKSPACE_ID: Valid Resource ID format" + fi + fi + + # Validate existing_webapp_url (must start with https) + if [[ -n "$INPUT_EXISTING_WEBAPP_URL" ]]; then + if [[ ! "$INPUT_EXISTING_WEBAPP_URL" =~ ^https:// ]]; then + echo "❌ ERROR: existing_webapp_url must start with 'https://', got: '$INPUT_EXISTING_WEBAPP_URL'" + VALIDATION_FAILED=true + else + echo "✅ existing_webapp_url: '$INPUT_EXISTING_WEBAPP_URL' is valid" + fi + fi + + # Fail workflow if any validation failed + if [[ "$VALIDATION_FAILED" == "true" ]]; then + echo "" + echo "❌ Parameter validation failed. Please correct the errors above and try again." + exit 1 + fi + + echo "" + echo "✅ All input parameters validated successfully!" + + - name: Validate and Auto-Configure EXP + id: configure_exp + shell: bash + env: + INPUT_EXP: ${{ inputs.EXP }} + INPUT_LOG_ANALYTICS_WORKSPACE_ID: ${{ inputs.AZURE_ENV_LOG_ANALYTICS_WORKSPACE_ID }} + run: | + echo "🔍 Validating EXP configuration..." + + EXP_ENABLED="false" + + if [[ "$INPUT_EXP" == "true" ]]; then + EXP_ENABLED="true" + echo "✅ EXP explicitly enabled by user input" + elif [[ -n "$INPUT_LOG_ANALYTICS_WORKSPACE_ID" ]]; then + echo "🔧 AUTO-ENABLING EXP: Log Analytics Workspace ID was provided but EXP was not explicitly enabled." + echo "" + echo "You provided values for:" + echo " - Azure Log Analytics Workspace ID: '$INPUT_LOG_ANALYTICS_WORKSPACE_ID'" + echo "" + echo "✅ Automatically enabling EXP to use these values." + EXP_ENABLED="true" + fi + + echo "EXP_ENABLED=$EXP_ENABLED" >> $GITHUB_ENV + echo "EXP_ENABLED=$EXP_ENABLED" >> $GITHUB_OUTPUT + echo "Final EXP status: $EXP_ENABLED" + + - name: Checkout Code + uses: actions/checkout@v4 + + - name: Login to Azure + uses: azure/login@v2 + with: + client-id: ${{ secrets.AZURE_CLIENT_ID }} + tenant-id: ${{ secrets.AZURE_TENANT_ID }} + subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }} + enable-AzPSSession: true + + - name: Run Quota Check + id: quota-check + shell: pwsh + run: | + $ErrorActionPreference = "Stop" # Ensure that any error stops the pipeline + + # Path to the PowerShell script for quota check + $quotaCheckScript = "Deployment/checkquota.ps1" + + # Check if the script exists + if (-not (Test-Path $quotaCheckScript)) { + Write-Host "❌ Error: Quota check script not found." + exit 1 + } + + # Run the script and capture its output (stdout and stderr) + $output = & $quotaCheckScript 2>&1 + $exitCode = $LASTEXITCODE + + # Check the execution output for the quota failure message + $quotaFailedMessage = "No region with sufficient quota found" + if ($output -match [Regex]::Escape($quotaFailedMessage) -or $exitCode -ne 0) { + echo "QUOTA_FAILED=true" >> $env:GITHUB_ENV + } + env: + AZURE_SUBSCRIPTION_ID: ${{ secrets.AZURE_SUBSCRIPTION_ID }} + GPT_MIN_CAPACITY: ${{ env.GPT_MIN_CAPACITY }} + TEXT_EMBEDDING_MIN_CAPACITY: ${{ env.TEXT_EMBEDDING_MIN_CAPACITY }} + AZURE_REGIONS: "${{ vars.AZURE_REGIONS }}" + + - name: Set Quota Failure Output + id: quota_failure_output + if: env.QUOTA_FAILED == 'true' + shell: bash + run: | + echo "QUOTA_FAILED=true" >> $GITHUB_OUTPUT + echo "Quota check failed - will notify via separate notification job" + + - name: Fail Pipeline if Quota Check Fails + if: env.QUOTA_FAILED == 'true' + shell: bash + run: exit 1 + + - name: Set Deployment Region + id: set_region + shell: bash + env: + INPUT_TRIGGER_TYPE: ${{ inputs.trigger_type }} + INPUT_AZURE_LOCATION: ${{ inputs.azure_location }} + run: | + if [[ -z "$VALID_REGION" ]]; then + echo "❌ ERROR: VALID_REGION is not set. The quota check script (Deployment/checkquota.ps1) must set this variable before this step runs." >&2 + exit 1 + fi + echo "Selected Region from Quota Check: $VALID_REGION" + echo "AZURE_ENV_OPENAI_LOCATION=$VALID_REGION" >> $GITHUB_ENV + echo "AZURE_ENV_OPENAI_LOCATION=$VALID_REGION" >> $GITHUB_OUTPUT + + if [[ "$INPUT_TRIGGER_TYPE" == "workflow_dispatch" && -n "$INPUT_AZURE_LOCATION" ]]; then + USER_SELECTED_LOCATION="$INPUT_AZURE_LOCATION" + echo "Using user-selected Azure location: $USER_SELECTED_LOCATION" + echo "AZURE_LOCATION=$USER_SELECTED_LOCATION" >> $GITHUB_ENV + echo "AZURE_LOCATION=$USER_SELECTED_LOCATION" >> $GITHUB_OUTPUT + else + echo "Using location from quota check for automatic triggers: $VALID_REGION" + echo "AZURE_LOCATION=$VALID_REGION" >> $GITHUB_ENV + echo "AZURE_LOCATION=$VALID_REGION" >> $GITHUB_OUTPUT + fi + + - name: Generate Resource Group Name + id: generate_rg_name + shell: bash + env: + INPUT_RESOURCE_GROUP_NAME: ${{ inputs.resource_group_name }} + run: | + # Check if a resource group name was provided as input + if [[ -n "$INPUT_RESOURCE_GROUP_NAME" ]]; then + echo "Using provided Resource Group name: $INPUT_RESOURCE_GROUP_NAME" + echo "RESOURCE_GROUP_NAME=$INPUT_RESOURCE_GROUP_NAME" >> $GITHUB_ENV + else + echo "Generating a unique resource group name..." + ACCL_NAME="dkm" # Account name as specified + SHORT_UUID=$(uuidgen | cut -d'-' -f1) + UNIQUE_RG_NAME="arg-${ACCL_NAME}-${SHORT_UUID}" + echo "RESOURCE_GROUP_NAME=${UNIQUE_RG_NAME}" >> $GITHUB_ENV + echo "Generated RESOURCE_GROUP_NAME: ${UNIQUE_RG_NAME}" + fi + + - name: Install Bicep CLI + shell: bash + run: az bicep install + + - name: Check and Create Resource Group + id: check_create_rg + shell: bash + run: | + set -e + echo "🔍 Checking if resource group '$RESOURCE_GROUP_NAME' exists..." + rg_exists=$(az group exists --name $RESOURCE_GROUP_NAME) + if [ "$rg_exists" = "false" ]; then + echo "📦 Resource group does not exist. Creating new resource group '$RESOURCE_GROUP_NAME' in location '$AZURE_LOCATION'..." + az group create --name $RESOURCE_GROUP_NAME --location $AZURE_LOCATION || { echo "❌ Error creating resource group"; exit 1; } + echo "✅ Resource group '$RESOURCE_GROUP_NAME' created successfully." + else + echo "✅ Resource group '$RESOURCE_GROUP_NAME' already exists. Deploying to existing resource group." + fi + echo "RESOURCE_GROUP_NAME=$RESOURCE_GROUP_NAME" >> $GITHUB_OUTPUT + echo "RESOURCE_GROUP_NAME=$RESOURCE_GROUP_NAME" >> $GITHUB_ENV + + - name: Determine Docker Image Tag + id: determine_image_tag + run: | + echo "🏷️ Using existing Docker image based on branch..." + BRANCH_NAME="${{ env.BRANCH_NAME }}" + echo "Current branch: $BRANCH_NAME" + + # Determine image tag based on branch + if [[ "$BRANCH_NAME" == "main" ]]; then + IMAGE_TAG="latest_waf" + echo "Using main branch - image tag: latest_waf" + elif [[ "$BRANCH_NAME" == "dev" ]]; then + IMAGE_TAG="dev" + echo "Using dev branch - image tag: dev" + elif [[ "$BRANCH_NAME" == "demo" ]]; then + IMAGE_TAG="demo" + echo "Using demo branch - image tag: demo" + else + IMAGE_TAG="latest_waf" + echo "Using default for branch '$BRANCH_NAME' - image tag: latest_waf" + fi + + echo "Using existing Docker image tag: $IMAGE_TAG" + + echo "IMAGE_TAG=$IMAGE_TAG" >> $GITHUB_ENV + echo "IMAGE_TAG=$IMAGE_TAG" >> $GITHUB_OUTPUT + + - name: Generate Unique Environment Name + id: generate_env_name + shell: bash + run: | + COMMON_PART="pslc" + TIMESTAMP=$(date +%s) + UPDATED_TIMESTAMP=$(echo $TIMESTAMP | tail -c 6) + UNIQUE_ENV_NAME="${COMMON_PART}${UPDATED_TIMESTAMP}" + echo "ENV_NAME=${UNIQUE_ENV_NAME}" >> $GITHUB_ENV + echo "Generated Environment Name: ${UNIQUE_ENV_NAME}" + echo "ENV_NAME=${UNIQUE_ENV_NAME}" >> $GITHUB_OUTPUT + + - name: Display Workflow Configuration to GitHub Summary + shell: bash + env: + INPUT_TRIGGER_TYPE: ${{ inputs.trigger_type }} + INPUT_AZURE_LOCATION: ${{ inputs.azure_location }} + INPUT_RESOURCE_GROUP_NAME: ${{ inputs.resource_group_name }} + STEP_EVENT_NAME: ${{ github.event_name }} + STEP_BRANCH_NAME: ${{ env.BRANCH_NAME }} + STEP_WAF_ENABLED: ${{ env.WAF_ENABLED }} + STEP_EXP: ${{ env.EXP }} + STEP_RUN_E2E_TESTS: ${{ env.RUN_E2E_TESTS }} + STEP_CLEANUP_RESOURCES: ${{ env.CLEANUP_RESOURCES }} + run: | + echo "## 📋 Workflow Configuration Summary" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "| Configuration | Value |" >> $GITHUB_STEP_SUMMARY + echo "|---------------|-------|" >> $GITHUB_STEP_SUMMARY + echo "| **WAF Enabled** | ${{ env.WAF_ENABLED == 'true' && '✅ Yes' || '❌ No' }} |" >> $GITHUB_STEP_SUMMARY + echo "| **EXP Enabled** | ${{ env.EXP == 'true' && '✅ Yes' || '❌ No' }} |" >> $GITHUB_STEP_SUMMARY + echo "| **Run E2E Tests** | \`${{ env.RUN_E2E_TESTS }}\` |" >> $GITHUB_STEP_SUMMARY + echo "| **Cleanup Resources** | ${{ env.CLEANUP_RESOURCES == 'true' && '✅ Yes' || '❌ No' }} |" >> $GITHUB_STEP_SUMMARY + + if [[ "${{ inputs.trigger_type }}" == "workflow_dispatch" && -n "${{ inputs.azure_location }}" ]]; then + echo "| **Azure Location** | \`${{ inputs.azure_location }}\` (User Selected) |" >> $GITHUB_STEP_SUMMARY + fi + + if [[ -n "${{ inputs.resource_group_name }}" ]]; then + echo "| **Resource Group** | \`${{ inputs.resource_group_name }}\` (Pre-specified) |" >> $GITHUB_STEP_SUMMARY + else + echo "| **Resource Group** | \`${{ env.RESOURCE_GROUP_NAME }}\` (Auto-generated) |" >> $GITHUB_STEP_SUMMARY + fi + + echo "" >> $GITHUB_STEP_SUMMARY + + if [[ "${{ inputs.trigger_type }}" != "workflow_dispatch" ]]; then + echo "ℹ️ **Note:** Automatic Trigger - Using Non-WAF + Non-EXP configuration" >> $GITHUB_STEP_SUMMARY + else + echo "ℹ️ **Note:** Manual Trigger - Using user-specified configuration" >> $GITHUB_STEP_SUMMARY + fi + + deploy-linux: + name: Deploy + needs: azure-setup + if: "!cancelled() && needs.azure-setup.result == 'success'" + uses: ./.github/workflows/job-deploy-linux.yml + with: + ENV_NAME: ${{ needs.azure-setup.outputs.ENV_NAME }} + AZURE_ENV_OPENAI_LOCATION: ${{ needs.azure-setup.outputs.AZURE_ENV_OPENAI_LOCATION }} + AZURE_LOCATION: ${{ needs.azure-setup.outputs.AZURE_LOCATION }} + RESOURCE_GROUP_NAME: ${{ needs.azure-setup.outputs.RESOURCE_GROUP_NAME }} + IMAGE_TAG: ${{ needs.azure-setup.outputs.IMAGE_TAG }} + EXP: ${{ needs.azure-setup.outputs.EXP_ENABLED || inputs.EXP || 'false' }} + WAF_ENABLED: ${{ inputs.waf_enabled == true && 'true' || 'false' }} + AZURE_ENV_LOG_ANALYTICS_WORKSPACE_ID: ${{ inputs.AZURE_ENV_LOG_ANALYTICS_WORKSPACE_ID }} + secrets: inherit diff --git a/.github/workflows/job-send-notification.yml b/.github/workflows/job-send-notification.yml new file mode 100644 index 00000000..5ca35243 --- /dev/null +++ b/.github/workflows/job-send-notification.yml @@ -0,0 +1,224 @@ +name: Send Notification Job + +on: + workflow_call: + inputs: + trigger_type: + description: 'Trigger type (workflow_dispatch, pull_request, schedule)' + required: true + type: string + waf_enabled: + description: 'Enable WAF' + required: false + default: false + type: boolean + EXP: + description: 'Enable EXP' + required: false + default: false + type: boolean + run_e2e_tests: + description: 'Run End-to-End Tests' + required: false + default: 'GoldenPath-Testing' + type: string + existing_webapp_url: + description: 'Existing Container WebApp URL (Skips Deployment)' + required: false + default: '' + type: string + deploy_result: + description: 'Deploy job result (success, failure, skipped)' + required: true + type: string + e2e_test_result: + description: 'E2E test job result (success, failure, skipped)' + required: true + type: string + WEB_APPURL: + description: 'Container Web App URL' + required: false + default: '' + type: string + RESOURCE_GROUP_NAME: + description: 'Resource Group Name' + required: false + default: '' + type: string + QUOTA_FAILED: + description: 'Quota Check Failed Flag' + required: false + default: 'false' + type: string + TEST_SUCCESS: + description: 'Test Success Flag' + required: false + default: '' + type: string + TEST_REPORT_URL: + description: 'Test Report URL' + required: false + default: '' + type: string + +env: + GPT_MIN_CAPACITY: 100 + BRANCH_NAME: ${{ github.event.workflow_run.head_branch || github.head_ref || github.ref_name }} + WAF_ENABLED: ${{ inputs.trigger_type == 'workflow_dispatch' && (inputs.waf_enabled || false) || false }} + EXP: ${{ inputs.trigger_type == 'workflow_dispatch' && (inputs.EXP || false) || false }} + RUN_E2E_TESTS: ${{ inputs.trigger_type == 'workflow_dispatch' && (inputs.run_e2e_tests || 'GoldenPath-Testing') || 'GoldenPath-Testing' }} + +jobs: + send-notification: + runs-on: ubuntu-latest + continue-on-error: true + env: + accelerator_name: "DKM" + steps: + - name: Determine Test Suite Display Name + id: test_suite + shell: bash + run: | + if [ "${{ env.RUN_E2E_TESTS }}" = "GoldenPath-Testing" ]; then + TEST_SUITE_NAME="Golden Path Testing" + elif [ "${{ env.RUN_E2E_TESTS }}" = "Smoke-Testing" ]; then + TEST_SUITE_NAME="Smoke Testing" + elif [ "${{ env.RUN_E2E_TESTS }}" = "None" ]; then + TEST_SUITE_NAME="None" + else + TEST_SUITE_NAME="${{ env.RUN_E2E_TESTS }}" + fi + echo "TEST_SUITE_NAME=$TEST_SUITE_NAME" >> $GITHUB_OUTPUT + echo "Test Suite: $TEST_SUITE_NAME" + + - name: Send Quota Failure Notification + if: inputs.deploy_result == 'failure' && inputs.QUOTA_FAILED == 'true' + shell: bash + run: | + RUN_URL="https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}" + EMAIL_BODY=$(cat <Dear Team,

We would like to inform you that the ${{ env.accelerator_name }} deployment has failed due to insufficient quota in the requested regions.

Issue Details:
• Quota check failed for GPT model
• Required GPT Capacity: ${{ env.GPT_MIN_CAPACITY }}
• Checked Regions: ${{ vars.AZURE_REGIONS }}

Run URL: ${RUN_URL}

Please resolve the quota issue and retry the deployment.

Best regards,
Your Automation Team

", + "subject": "${{ env.accelerator_name }} Pipeline - Failed (Insufficient Quota)" + } + EOF + ) + + curl -X POST "${{ secrets.EMAILNOTIFICATION_LOGICAPP_URL_TA }}" \ + -H "Content-Type: application/json" \ + -d "$EMAIL_BODY" || echo "Failed to send quota failure notification" + + - name: Send Deployment Failure Notification + if: inputs.deploy_result == 'failure' && inputs.QUOTA_FAILED != 'true' + shell: bash + run: | + RUN_URL="https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}" + RESOURCE_GROUP="${{ inputs.RESOURCE_GROUP_NAME }}" + + EMAIL_BODY=$(cat <Dear Team,

We would like to inform you that the ${{ env.accelerator_name }} deployment process has encountered an issue and has failed to complete successfully.

Deployment Details:
• Resource Group: ${RESOURCE_GROUP}
• WAF Enabled: ${{ env.WAF_ENABLED }}
• EXP Enabled: ${{ env.EXP }}

Run URL: ${RUN_URL}

Please investigate the deployment failure at your earliest convenience.

Best regards,
Your Automation Team

", + "subject": "${{ env.accelerator_name }} Pipeline - Failed" + } + EOF + ) + + curl -X POST "${{ secrets.EMAILNOTIFICATION_LOGICAPP_URL_TA }}" \ + -H "Content-Type: application/json" \ + -d "$EMAIL_BODY" || echo "Failed to send deployment failure notification" + + - name: Send Success Notification + if: inputs.deploy_result == 'success' && (inputs.e2e_test_result == 'skipped' || inputs.TEST_SUCCESS == 'true') + shell: bash + run: | + RUN_URL="https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}" + WEBAPP_URL="${{ inputs.WEB_APPURL || inputs.existing_webapp_url }}" + RESOURCE_GROUP="${{ inputs.RESOURCE_GROUP_NAME }}" + TEST_REPORT_URL="${{ inputs.TEST_REPORT_URL }}" + TEST_SUITE_NAME="${{ steps.test_suite.outputs.TEST_SUITE_NAME }}" + + if [ "${{ inputs.e2e_test_result }}" = "skipped" ]; then + EMAIL_BODY=$(cat <Dear Team,

We would like to inform you that the ${{ env.accelerator_name }} deployment has completed successfully.

Deployment Details:
• Resource Group: ${RESOURCE_GROUP}
• Web App URL: ${WEBAPP_URL}
• E2E Tests: Skipped (as configured)

Configuration:
• WAF Enabled: ${{ env.WAF_ENABLED }}
• EXP Enabled: ${{ env.EXP }}

Run URL: ${RUN_URL}

Best regards,
Your Automation Team

", + "subject": "${{ env.accelerator_name }} Pipeline - Deployment Success" + } + EOF + ) + else + EMAIL_BODY=$(cat <Dear Team,

We would like to inform you that the ${{ env.accelerator_name }} deployment and testing process has completed successfully.

Deployment Details:
• Resource Group: ${RESOURCE_GROUP}
• Web App URL: ${WEBAPP_URL}
• E2E Tests: Passed ✅
• Test Suite: ${TEST_SUITE_NAME}
• Test Report: View Report

Configuration:
• WAF Enabled: ${{ env.WAF_ENABLED }}
• EXP Enabled: ${{ env.EXP }}

Run URL: ${RUN_URL}

Best regards,
Your Automation Team

", + "subject": "${{ env.accelerator_name }} Pipeline - Test Automation - Success" + } + EOF + ) + fi + + curl -X POST "${{ secrets.EMAILNOTIFICATION_LOGICAPP_URL_TA }}" \ + -H "Content-Type: application/json" \ + -d "$EMAIL_BODY" || echo "Failed to send success notification" + + - name: Send Test Failure Notification + if: inputs.deploy_result == 'success' && inputs.e2e_test_result != 'skipped' && inputs.TEST_SUCCESS != 'true' + shell: bash + run: | + RUN_URL="https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}" + TEST_REPORT_URL="${{ inputs.TEST_REPORT_URL }}" + WEBAPP_URL="${{ inputs.WEB_APPURL || inputs.existing_webapp_url }}" + RESOURCE_GROUP="${{ inputs.RESOURCE_GROUP_NAME }}" + TEST_SUITE_NAME="${{ steps.test_suite.outputs.TEST_SUITE_NAME }}" + + EMAIL_BODY=$(cat <Dear Team,

We would like to inform you that ${{ env.accelerator_name }} accelerator test automation process has encountered issues and failed to complete successfully.

Deployment Details:
• Resource Group: ${RESOURCE_GROUP}
• Web App URL: ${WEBAPP_URL}
• Deployment Status: ✅ Success
• E2E Tests: ❌ Failed
• Test Suite: ${TEST_SUITE_NAME}

Test Details:
• Test Report: View Report

Run URL: ${RUN_URL}

Please investigate the matter at your earliest convenience.

Best regards,
Your Automation Team

", + "subject": "${{ env.accelerator_name }} Pipeline - Test Automation - Failed" + } + EOF + ) + + curl -X POST "${{ secrets.EMAILNOTIFICATION_LOGICAPP_URL_TA }}" \ + -H "Content-Type: application/json" \ + -d "$EMAIL_BODY" || echo "Failed to send test failure notification" + + - name: Send Existing URL Success Notification + if: inputs.deploy_result == 'skipped' && inputs.existing_webapp_url != '' && inputs.e2e_test_result == 'success' + shell: bash + run: | + RUN_URL="https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}" + EXISTING_URL="${{ inputs.existing_webapp_url }}" + TEST_REPORT_URL="${{ inputs.TEST_REPORT_URL }}" + TEST_SUITE_NAME="${{ steps.test_suite.outputs.TEST_SUITE_NAME }}" + + EMAIL_BODY=$(cat <Dear Team,

The ${{ env.accelerator_name }} pipeline executed against the existing WebApp URL and testing process has completed successfully.

Test Results:
• Status: ✅ Passed
• Test Suite: ${TEST_SUITE_NAME}
${TEST_REPORT_URL:+• Test Report: View Report}
• Target URL: ${EXISTING_URL}

Deployment: Skipped

Run URL: ${RUN_URL}

Best regards,
Your Automation Team

", + "subject": "${{ env.accelerator_name }} Pipeline - Test Automation Passed (Existing URL)" + } + EOF + ) + + curl -X POST "${{ secrets.EMAILNOTIFICATION_LOGICAPP_URL_TA }}" \ + -H "Content-Type: application/json" \ + -d "$EMAIL_BODY" || echo "Failed to send existing URL success notification" + + - name: Send Existing URL Test Failure Notification + if: inputs.deploy_result == 'skipped' && inputs.existing_webapp_url != '' && inputs.e2e_test_result == 'failure' + shell: bash + run: | + RUN_URL="https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}" + EXISTING_URL="${{ inputs.existing_webapp_url }}" + TEST_REPORT_URL="${{ inputs.TEST_REPORT_URL }}" + TEST_SUITE_NAME="${{ steps.test_suite.outputs.TEST_SUITE_NAME }}" + + EMAIL_BODY=$(cat <Dear Team,

The ${{ env.accelerator_name }} pipeline executed against the existing WebApp URL and the test automation has encountered issues and failed to complete successfully.

Failure Details:
• Target URL: ${EXISTING_URL}
${TEST_REPORT_URL:+• Test Report: View Report}
• Test Suite: ${TEST_SUITE_NAME}
• Deployment: Skipped

Run URL: ${RUN_URL}

Best regards,
Your Automation Team

", + "subject": "${{ env.accelerator_name }} Pipeline - Test Automation Failed (Existing URL)" + } + EOF + ) + + curl -X POST "${{ secrets.EMAILNOTIFICATION_LOGICAPP_URL_TA }}" \ + -H "Content-Type: application/json" \ + -d "$EMAIL_BODY" || echo "Failed to send existing URL test failure notification" diff --git a/.github/workflows/test-automation-v2.yml b/.github/workflows/test-automation-v2.yml new file mode 100644 index 00000000..20e8e48a --- /dev/null +++ b/.github/workflows/test-automation-v2.yml @@ -0,0 +1,195 @@ +name: Test Automation Dkm-v2 + +on: + workflow_call: + inputs: + TEST_URL: + required: true + type: string + description: "Web URL for Dkm" + TEST_SUITE: + required: false + type: string + default: "GoldenPath-Testing" + description: "Test suite to run: 'Smoke-Testing', 'GoldenPath-Testing' " + outputs: + TEST_SUCCESS: + description: "Whether tests passed" + value: ${{ jobs.test.outputs.TEST_SUCCESS }} + TEST_REPORT_URL: + description: "URL to test report artifact" + value: ${{ jobs.test.outputs.TEST_REPORT_URL }} + +env: + url: ${{ inputs.TEST_URL }} + accelerator_name: "DKM" + test_suite: ${{ inputs.TEST_SUITE }} + +jobs: + test: + runs-on: ubuntu-latest + environment: production + outputs: + TEST_SUCCESS: ${{ steps.test1.outcome == 'success' || steps.test2.outcome == 'success' || steps.test3.outcome == 'success' }} + TEST_REPORT_URL: ${{ steps.upload_report.outputs.artifact-url }} + steps: + - name: Checkout repository + uses: actions/checkout@v5 + + - name: Set up Python + uses: actions/setup-python@v6 + with: + python-version: '3.13' + + - name: Login to Azure + uses: azure/login@v2 + with: + client-id: ${{ secrets.AZURE_CLIENT_ID }} + tenant-id: ${{ secrets.AZURE_TENANT_ID }} + subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }} + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r tests/e2e-test/requirements.txt + + - name: Ensure browsers are installed + run: python -m playwright install --with-deps chromium + + - name: Validate URL + run: | + if [ -z "${{ env.url }}" ]; then + echo "ERROR: No URL provided for testing" + exit 1 + fi + + echo "Testing URL: ${{ env.url }}" + echo "Test Suite: ${{ env.test_suite }}" + + + - name: Wait for Application to be Ready + run: | + echo "Waiting for application to be ready at ${{ env.url }} " + max_attempts=10 + attempt=1 + + while [ $attempt -le $max_attempts ]; do + echo "Attempt $attempt: Checking if application is ready..." + if curl -f -s "${{ env.url }}" > /dev/null; then + echo "Application is ready!" + break + + fi + + if [ $attempt -eq $max_attempts ]; then + echo "Application is not ready after $max_attempts attempts" + exit 1 + fi + + echo "Application not ready, waiting 30 seconds..." + sleep 30 + attempt=$((attempt + 1)) + done + + - name: Run tests(1) + id: test1 + run: | + if [ "${{ env.test_suite }}" == "GoldenPath-Testing" ]; then + xvfb-run pytest -m goldenpath --headed --html=report/report.html --self-contained-html + else + xvfb-run pytest --headed --html=report/report.html --self-contained-html + fi + working-directory: tests/e2e-test + continue-on-error: true + + - name: Sleep for 30 seconds + if: ${{ steps.test1.outcome == 'failure' }} + run: sleep 30s + shell: bash + + - name: Run tests(2) + id: test2 + if: ${{ steps.test1.outcome == 'failure' }} + run: | + if [ "${{ env.test_suite }}" == "GoldenPath-Testing" ]; then + xvfb-run pytest -m goldenpath --headed --html=report/report.html --self-contained-html + else + xvfb-run pytest --headed --html=report/report.html --self-contained-html + fi + working-directory: tests/e2e-test + continue-on-error: true + + - name: Sleep for 60 seconds + if: ${{ steps.test2.outcome == 'failure' }} + run: sleep 60s + shell: bash + + - name: Run tests(3) + id: test3 + if: ${{ steps.test2.outcome == 'failure' }} + run: | + if [ "${{ env.test_suite }}" == "GoldenPath-Testing" ]; then + xvfb-run pytest -m goldenpath --headed --html=report/report.html --self-contained-html + else + xvfb-run pytest --headed --html=report/report.html --self-contained-html + fi + working-directory: tests/e2e-test + + - name: Upload test report + id: upload_report + uses: actions/upload-artifact@v4 + if: ${{ !cancelled() }} + with: + name: test-report + path: tests/e2e-test/report/* + + - name: Generate E2E Test Summary + if: always() + run: | + # Determine test suite type for title + if [ "${{ env.test_suite }}" == "GoldenPath-Testing" ]; then + echo "## 🧪 E2E Test Job Summary : Golden Path Testing" >> $GITHUB_STEP_SUMMARY + else + echo "## 🧪 E2E Test Job Summary : Smoke Testing" >> $GITHUB_STEP_SUMMARY + fi + echo "" >> $GITHUB_STEP_SUMMARY + echo "| Field | Value |" >> $GITHUB_STEP_SUMMARY + echo "|-------|--------|" >> $GITHUB_STEP_SUMMARY + + # Determine overall test result + OVERALL_SUCCESS="${{ steps.test1.outcome == 'success' || steps.test2.outcome == 'success' || steps.test3.outcome == 'success' }}" + if [[ "$OVERALL_SUCCESS" == "true" ]]; then + echo "| **Job Status** | ✅ Success |" >> $GITHUB_STEP_SUMMARY + else + echo "| **Job Status** | ❌ Failed |" >> $GITHUB_STEP_SUMMARY + fi + + echo "| **Target URL** | [${{ env.url }}](${{ env.url }}) |" >> $GITHUB_STEP_SUMMARY + echo "| **Test Suite** | \`${{ env.test_suite }}\` |" >> $GITHUB_STEP_SUMMARY + echo "| **Test Report** | [Download Artifact](${{ steps.upload_report.outputs.artifact-url }}) |" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + + echo "### 📋 Test Execution Details" >> $GITHUB_STEP_SUMMARY + echo "| Attempt | Status | Notes |" >> $GITHUB_STEP_SUMMARY + echo "|---------|--------|-------|" >> $GITHUB_STEP_SUMMARY + echo "| **Test Run 1** | ${{ steps.test1.outcome == 'success' && '✅ Passed' || '❌ Failed' }} | Initial test execution |" >> $GITHUB_STEP_SUMMARY + + if [[ "${{ steps.test1.outcome }}" == "failure" ]]; then + echo "| **Test Run 2** | ${{ steps.test2.outcome == 'success' && '✅ Passed' || steps.test2.outcome == 'failure' && '❌ Failed' || '⏸️ Skipped' }} | Retry after 30s delay |" >> $GITHUB_STEP_SUMMARY + fi + + if [[ "${{ steps.test2.outcome }}" == "failure" ]]; then + echo "| **Test Run 3** | ${{ steps.test3.outcome == 'success' && '✅ Passed' || steps.test3.outcome == 'failure' && '❌ Failed' || '⏸️ Skipped' }} | Final retry after 60s delay |" >> $GITHUB_STEP_SUMMARY + fi + + echo "" >> $GITHUB_STEP_SUMMARY + + if [[ "$OVERALL_SUCCESS" == "true" ]]; then + echo "### ✅ Test Results" >> $GITHUB_STEP_SUMMARY + echo "- End-to-end tests completed successfully" >> $GITHUB_STEP_SUMMARY + echo "- Application is functioning as expected" >> $GITHUB_STEP_SUMMARY + else + echo "### ❌ Test Results" >> $GITHUB_STEP_SUMMARY + echo "- All test attempts failed" >> $GITHUB_STEP_SUMMARY + echo "- Check the e2e-test/test job for detailed error information" >> $GITHUB_STEP_SUMMARY + fi \ No newline at end of file diff --git a/.github/workflows/test-automation.yml b/.github/workflows/test-automation.yml index 6bf45965..aa0ae606 100644 --- a/.github/workflows/test-automation.yml +++ b/.github/workflows/test-automation.yml @@ -15,7 +15,6 @@ on: env: url: ${{ inputs.DKM_URL }} accelerator_name: "DKM" - jobs: test: runs-on: ubuntu-latest diff --git a/Deployment/checkquota.ps1 b/Deployment/checkquota.ps1 index cc5c4822..c16a0b85 100644 --- a/Deployment/checkquota.ps1 +++ b/Deployment/checkquota.ps1 @@ -8,24 +8,20 @@ Write-Output "📍 Processed Regions: $($REGIONS -join ', ')" $SUBSCRIPTION_ID = $env:AZURE_SUBSCRIPTION_ID $GPT_MIN_CAPACITY = $env:GPT_MIN_CAPACITY $TEXT_EMBEDDING_MIN_CAPACITY = $env:TEXT_EMBEDDING_MIN_CAPACITY -$AZURE_CLIENT_ID = $env:AZURE_CLIENT_ID -$AZURE_TENANT_ID = $env:AZURE_TENANT_ID -$AZURE_CLIENT_SECRET = $env:AZURE_CLIENT_SECRET - -# Authenticate using Service Principal -Write-Host "Authentication using Service Principal..." # Ensure Azure PowerShell module is installed and imported Install-Module -Name Az -AllowClobber -Force -Scope CurrentUser Import-Module Az -# Create a PSCredential object for authentication -$creds = New-Object -TypeName System.Management.Automation.PSCredential -ArgumentList $AZURE_CLIENT_ID, (ConvertTo-SecureString $AZURE_CLIENT_SECRET -AsPlainText -Force) - -# Attempt to connect using Service Principal +# Verify existing Azure session (authentication is handled by the caller workflow via OIDC) try { - Connect-AzAccount -ServicePrincipal -TenantId $AZURE_TENANT_ID -Credential $creds + $context = Get-AzContext + if (-not $context) { + Write-Host "❌ Error: No active Azure session found. Ensure the caller workflow authenticates via azure/login@v2 with enable-AzPSSession: true." + exit 1 + } + Write-Host "✅ Using existing Azure session: $($context.Account.Id)" } catch { - Write-Host "❌ Error: Failed to authenticate using Service Principal. $_" + Write-Host "❌ Error: Failed to verify Azure session. $_" exit 1 } diff --git a/Deployment/resourcedeployment.ps1 b/Deployment/resourcedeployment.ps1 index fcbe4cd2..393e20ca 100644 --- a/Deployment/resourcedeployment.ps1 +++ b/Deployment/resourcedeployment.ps1 @@ -120,11 +120,14 @@ function LoginAzure([string]$tenantId, [string]$subscriptionID) { } } if ($env:CI -eq "true"){ - az login --service-principal ` - --username $env:AZURE_CLIENT_ID ` - --password $env:AZURE_CLIENT_SECRET ` - --tenant $env:AZURE_TENANT_ID ` - Write-Host "CI deployment mode" + # Authentication is handled by the caller workflow via OIDC (azure/login@v2) + $account = az account show 2>&1 + if ($LASTEXITCODE -ne 0) { + Write-Host "❌ Error: No active Azure CLI session found. Ensure the caller workflow authenticates via azure/login@v2." -ForegroundColor Red + failureBanner + exit 1 + } + Write-Host "CI deployment mode - using existing OIDC session" } else{ az login --tenant $tenantId diff --git a/Deployment/send-filestoendpoint.psm1 b/Deployment/send-filestoendpoint.psm1 index e7964467..05a2fe55 100644 --- a/Deployment/send-filestoendpoint.psm1 +++ b/Deployment/send-filestoendpoint.psm1 @@ -27,6 +27,10 @@ function Send-FilesToEndpoint { $totalFiles = $files.Count $currentFileIndex = 0 + $maxRetries = 3 + $retryDelaySeconds = 5 + $failedFiles = @() + $successfulFiles = 0 foreach ($file in $files) { $currentFileIndex++ @@ -35,43 +39,70 @@ function Send-FilesToEndpoint { # Check file size if ($file.Length -eq 0) { - Write-Host "File cannot be uploaded: $($file.Name) (File size is 0)" + Write-Host "⚠️ File cannot be uploaded: $($file.Name) (File size is 0)" -ForegroundColor Yellow + $failedFiles += @{FileName = $file.Name; Reason = "File size is 0"} continue } # Check file type $allowedExtensions = @(".doc", ".docx", ".xls", ".xlsx", ".ppt", ".pptx", ".pdf", ".tif", ".tiff", ".jpg", ".jpeg", ".png", ".bmp", ".txt") if (-Not ($allowedExtensions -contains $file.Extension.ToLower())) { - Write-Host "File cannot be uploaded: $($file.Name) (Unsupported file type)" + Write-Host "⚠️ File cannot be uploaded: $($file.Name) (Unsupported file type)" -ForegroundColor Yellow + $failedFiles += @{FileName = $file.Name; Reason = "Unsupported file type"} continue } - try { - # Read the file content as byte array - $fileContent = [System.IO.File]::ReadAllBytes($file.FullName) + # Retry logic for file upload + $uploadSuccess = $false + $attempt = 0 + + while ($attempt -lt $maxRetries -and -not $uploadSuccess) { + $attempt++ + try { + if ($attempt -gt 1) { + Write-Host "🔄 Retry attempt $attempt of $maxRetries for file: $($file.Name)" -ForegroundColor Cyan + Start-Sleep -Seconds $retryDelaySeconds + } + + # Read the file content as byte array + $fileContent = [System.IO.File]::ReadAllBytes($file.FullName) - # Create the multipart form data content - $content = [System.Net.Http.MultipartFormDataContent]::new() - $fileContentByteArray = [System.Net.Http.ByteArrayContent]::new($fileContent) - $fileContentByteArray.Headers.ContentDisposition = [System.Net.Http.Headers.ContentDispositionHeaderValue]::new("form-data") - $fileContentByteArray.Headers.ContentDisposition.Name = '"file"' - $fileContentByteArray.Headers.ContentDisposition.FileName = '"' + $file.Name + '"' - $content.Add($fileContentByteArray) + # Create the multipart form data content + $content = [System.Net.Http.MultipartFormDataContent]::new() + $fileContentByteArray = [System.Net.Http.ByteArrayContent]::new($fileContent) + $fileContentByteArray.Headers.ContentDisposition = [System.Net.Http.Headers.ContentDispositionHeaderValue]::new("form-data") + $fileContentByteArray.Headers.ContentDisposition.Name = '"file"' + $fileContentByteArray.Headers.ContentDisposition.FileName = '"' + $file.Name + '"' + $content.Add($fileContentByteArray) - # Upload the file content to the HTTP endpoint - $response = $httpClient.PostAsync($EndpointUrl, $content).GetAwaiter().GetResult() - - - # Check the response status - if ($response.IsSuccessStatusCode) { - Write-Host "File uploaded successfully: $($file.Name)" - } - else { - Write-Error "Failed to upload file: $($file.Name). Status code: $($response.StatusCode)" + # Upload the file content to the HTTP endpoint + $response = $httpClient.PostAsync($EndpointUrl, $content).GetAwaiter().GetResult() + + + # Check the response status + if ($response.IsSuccessStatusCode) { + Write-Host "✅ File uploaded successfully: $($file.Name)" -ForegroundColor Green + $uploadSuccess = $true + $successfulFiles++ + } + else { + $statusCode = $response.StatusCode + if ($attempt -lt $maxRetries) { + Write-Host "⚠️ Failed to upload file: $($file.Name). Status code: $statusCode. Will retry..." -ForegroundColor Yellow + } else { + Write-Host "❌ Failed to upload file: $($file.Name). Status code: $statusCode. Max retries reached." -ForegroundColor Red + $failedFiles += @{FileName = $file.Name; Reason = "HTTP Status: $statusCode"} + } + } + } + catch { + if ($attempt -lt $maxRetries) { + Write-Host "⚠️ Error uploading file: $($file.Name). Error: $($_.Exception.Message). Will retry..." -ForegroundColor Yellow + } else { + Write-Host "❌ Error uploading file: $($file.Name). Error: $($_.Exception.Message). Max retries reached." -ForegroundColor Red + $failedFiles += @{FileName = $file.Name; Reason = $_.Exception.Message} + } } - } - catch { - Write-Error "An error occurred while uploading the file: $($file.Name). Error: $_" } } # Dispose HttpClient @@ -79,6 +110,26 @@ function Send-FilesToEndpoint { # Clear the progress bar Write-Progress -Activity "Uploading Files" -Status "Completed" -PercentComplete 100 + + # Print summary report + Write-Host "`n========================================" -ForegroundColor Cyan + Write-Host "📊 File Upload Summary" -ForegroundColor Cyan + Write-Host "========================================" -ForegroundColor Cyan + Write-Host "Total files processed: $totalFiles" -ForegroundColor White + Write-Host "✅ Successfully uploaded: $successfulFiles" -ForegroundColor Green + Write-Host "❌ Failed uploads: $($failedFiles.Count)" -ForegroundColor Red + + if ($failedFiles.Count -gt 0) { + Write-Host "`n❌ Failed Files Details:" -ForegroundColor Red + foreach ($failed in $failedFiles) { + Write-Host " • $($failed.FileName) - Reason: $($failed.Reason)" -ForegroundColor Yellow + } + Write-Host "`n⚠️ Warning: Some files failed to upload after $maxRetries retry attempts." -ForegroundColor Yellow + Write-Host "You can manually retry uploading the failed files later." -ForegroundColor Yellow + } else { + Write-Host "`n✅ All files uploaded successfully!" -ForegroundColor Green + } + Write-Host "========================================`n" -ForegroundColor Cyan } Export-ModuleMember -Function Send-FilesToEndpoint \ No newline at end of file diff --git a/README.md b/README.md index 826f60e9..ecc27d2c 100644 --- a/README.md +++ b/README.md @@ -194,11 +194,21 @@ Check out similar solution accelerators | [Conversation knowledge mining](https://github.com/microsoft/Conversation-Knowledge-Mining-Solution-Accelerator) | Derive insights from volumes of conversational data using generative AI. It offers key phrase extraction, topic modeling, and interactive chat experiences through an intuitive web interface. | | [Content processing](https://github.com/microsoft/content-processing-solution-accelerator) | Programmatically extract data and apply schemas to unstructured documents across text-based and multi-modal content using Azure AI Foundry, Azure OpenAI, Azure AI Content Understanding, and Azure Cosmos DB. | | [Build your own copilot - client advisor](https://github.com/microsoft/build-your-own-copilot-solution-accelerator) | This copilot helps client advisors to save time and prepare relevant discussion topics for scheduled meetings. It provides an overview of daily client meetings with seamless navigation between viewing client profiles and chatting with structured data. | - +|[Document Processing Accelerator](https://github.com/Azure/doc-proc-solution-accelerator/) | Modular document AI pipeline that automatically extracts, analyzes, and indexes information from unstructured documents (PDFs, images, etc.) at scale. It offers plug-and-play components for OCR, classification, summarization, and integration to search or chatbots – speeding up data ingestion with enterprise security.| +|[GPT-RAG Accelerator](https://github.com/Azure/gpt-rag)| Secure enterprise GPT assistant framework that uses Retrieval-Augmented Generation to ground answers on your data. It provides a ready architecture (Azure OpenAI + knowledge search) for building AI chatbots that “know” your enterprise content, with built-in security and scalability.|
+💡 Want to get familiar with Microsoft's AI and Data Engineering best practices? Check out our playbooks to learn more + +| Playbook | Description | +|:---|:---| +| [AI playbook](https://learn.microsoft.com/en-us/ai/playbook/) | The Artificial Intelligence (AI) Playbook provides enterprise software engineers with solutions, capabilities, and code developed to solve real-world AI problems. | +| [Data playbook](https://learn.microsoft.com/en-us/data-engineering/playbook/understanding-data-playbook) | The data playbook provides enterprise software engineers with solutions which contain code developed to solve real-world problems. Everything in the playbook is developed with, and validated by, some of Microsoft's largest and most influential customers and partners. | + +
+ ## Provide feedback Have questions, find a bug, or want to request a feature? [Submit a new issue](https://github.com/microsoft/document-knowledge-mining-solution-accelerator/issues) on this repo and we'll connect. diff --git a/docs/CustomizingAzdParameters.md b/docs/CustomizingAzdParameters.md index 15625e6b..8756c99f 100644 --- a/docs/CustomizingAzdParameters.md +++ b/docs/CustomizingAzdParameters.md @@ -10,6 +10,7 @@ By default this template will use the environment name as the prefix to prevent | ------------------------------- | ------ | ----------------- | --------------------------------------------------------------------------------------------------- | | `AZURE_ENV_NAME` | string | `dkm` | Used as a prefix for all resource names to ensure uniqueness across environments. | | `AZURE_LOCATION` | string | `` | Location of the Azure resources. Controls where the infrastructure will be deployed. | +| `AZURE_ENV_OPENAI_LOCATION` | string | `` | Location for Azure OpenAI resources. Can be different from AZURE_LOCATION for optimized AI service placement. | | `AZURE_ENV_MODEL_DEPLOYMENT_TYPE` | string | `GlobalStandard` | Defines the deployment type for the AI model (e.g., Standard, GlobalStandard). | | `AZURE_ENV_MODEL_NAME` | string | `gpt-4.1` | Specifies the name of the GPT model to be deployed. | | `AZURE_ENV_MODEL_CAPACITY` | int | `100` | Sets the GPT model capacity. | diff --git a/docs/TroubleShootingSteps.md b/docs/TroubleShootingSteps.md index effd48cd..0b177d7e 100644 --- a/docs/TroubleShootingSteps.md +++ b/docs/TroubleShootingSteps.md @@ -1,609 +1,163 @@ # 🛠️ Troubleshooting - + When deploying Azure resources, you may come across different error codes that stop or delay the deployment process. This section lists some of the most common errors along with possible causes and step-by-step resolutions. - + Use these as quick reference guides to unblock your deployments. -> **💡 Need deployment recovery help?** If your deployment failed and you need to start over, see the [Recover from Failed Deployment](./DeploymentGuide.md#recover-from-failed-deployment) section in the deployment guide. - -## Error Codes - -
-ReadOnlyDisabledSubscription - -- Check if you have an active subscription before starting the deployment. - -
-
- MissingSubscriptionRegistration/ AllowBringYourOwnPublicIpAddress/ InvalidAuthenticationToken - -Enable `AllowBringYourOwnPublicIpAddress` Feature - -Before deploying the resources, you may need to enable the **Bring Your Own Public IP Address** feature in Azure. This is required only once per subscription. - -### Steps - -1. **Run the following command to register the feature:** - - ```bash - az feature register --namespace Microsoft.Network --name AllowBringYourOwnPublicIpAddress - ``` - -2. **Wait for the registration to complete.** - You can check the status using: - - ```bash - az feature show --namespace Microsoft.Network --name AllowBringYourOwnPublicIpAddress --query properties.state - ``` - -3. **The output should show:** - "Registered" - -4. **Once the feature is registered, refresh the provider:** - - ```bash - az provider register --namespace Microsoft.Network - ``` - - 💡 Note: Feature registration may take several minutes to complete. This needs to be done only once per Azure subscription. - -
-
-ResourceGroupNotFound - -## Option 1 - -### Steps - -1. Go to [Azure Portal](https:/portal.azure.com/#home). - -2. Click on the **"Resource groups"** option available on the Azure portal home page. - ![alt text](./images/troubleshooting/rg_not_found0.png) - -3. In the Resource Groups search bar, search for the resource group you intend to target for deployment. If it exists, you can proceed with using it. - ![alt text](./images/troubleshooting/rg_not_found.png) - -## Option 2 - -- This error can occur if you deploy the template using the same .env file - from a previous deployment. -- To avoid this issue, create a new environment before redeploying. -- You can use the following command to create a new environment: - -``` -azd env new -``` - -
- -
-ResourceGroupBeingDeleted - -To prevent this issue, please ensure that the resource group you are targeting for deployment is not currently being deleted. You can follow steps to verify resource group is being deleted or not. - -### Steps: - -1. Go to [Azure Portal](https://portal.azure.com/#home) -2. Go to resource group option and search for targeted resource group -3. If Targeted resource group is there and deletion for this is in progress, it means you cannot use this, you can create new or use any other resource group - -
- -
-InternalSubscriptionIsOverQuotaForSku/ManagedEnvironmentProvisioningError - -Quotas are applied per resource group, subscriptions, accounts, and other scopes. For example, your subscription might be configured to limit the number of vCPUs for a region. If you attempt to deploy a virtual machine with more vCPUs than the permitted amount, you receive an error that the quota was exceeded. -For PowerShell, use the `Get-AzVMUsage` cmdlet to find virtual machine quotas. - -```ps -Get-AzVMUsage -Location "West US" -``` - -based on available quota you can deploy application otherwise, you can request for more quota - -
-
-InsufficientQuota - -- Check if you have sufficient quota available in your subscription before deployment. -- To verify, refer to the [Quota Check documentation](./QuotaCheck.md) for details. - -
- -
-LinkedInvalidPropertyId/ ResourceNotFound/DeploymentOutputEvaluationFailed/ CanNotRestoreANonExistingResource - -- Before using any resource ID, ensure it follows the correct format. -- Verify that the resource ID you are passing actually exists. -- Make sure there are no typos in the resource ID. -- Verify that the provisioning state of the existing resource is `Succeeded` by running the following command to avoid this error while deployment or restoring the resource. - - ``` - az resource show --ids --query "properties.provisioningState" - ``` - -- Sample Resource IDs format - - Log Analytics Workspace Resource ID - ``` - /subscriptions/{subscriptionId}/resourceGroups/{resourceGroupName}/providers/Microsoft.OperationalInsights/workspaces/{workspaceName} - ``` - - Azure AI Foundry Project Resource ID - ``` - /subscriptions/{subscriptionId}/resourceGroups/{resourceGroupName}/providers/Microsoft.MachineLearningServices/workspaces/{name} - ``` -- For more information refer [Resource Not Found errors solutions](https://learn.microsoft.com/en-us/azure/azure-resource-manager/troubleshooting/error-not-found?tabs=bicep) - -
- -
-ResourceNameInvalid - -- Ensure the resource name is within the allowed length and naming rules defined for that specific resource type, you can refer [Resource Naming Convention](https://learn.microsoft.com/en-us/azure/azure-resource-manager/management/resource-name-rules) document. - -
- -
-ServiceUnavailable/ResourceNotFound - -- Regions are restricted to guarantee compatibility with paired regions and replica locations for data redundancy and failover scenarios based on articles [Azure regions list](https://learn.microsoft.com/en-us/azure/reliability/regions-list) and [Reliability in Azure Cosmos DB for NoSQL](https://learn.microsoft.com/en-us/azure/reliability/reliability-cosmos-db-nosql). - -- You can request more quota for Cosmos DB, refer [Quota Request](https://learn.microsoft.com/en-us/azure/cosmos-db/nosql/create-support-request-quota-increase) Documentation - -
- -
-Workspace Name - InvalidParameter - -To avoid this errors in workspace ID follow below rules. - -1. Must start and end with an alphanumeric character (letter or number). -2. Allowed characters: - `a–z` - `0–9` - `- (hyphen)` -3. Cannot start or end with a hyphen -. -4. No spaces, underscores (\_), periods (.), or special characters. -5. Must be unique within the Azure region & subscription. -6. Length: 3–33 characters (for AML workspaces). -
- -
-BadRequest: Dns record under zone Document is already taken - -This error can occur only when user hardcoding the CosmosDB Service name. To avoid this you can try few below suggestions. - -- Verify resource names are globally unique. -- If you already created an account/resource with same name in another subscription or resource group, check and delete it before reusing the name. -- By default in this template we are using unique prefix with every resource/account name to avoid this kind for errors. -
- -
-NetcfgSubnetRangeOutsideVnet - -- Ensure the subnet’s IP address range falls within the virtual network’s address space. -- Always validate that the subnet CIDR block is a subset of the VNet range. -- For Azure Bastion, the AzureBastionSubnet must be at least /27. -- Confirm that the AzureBastionSubnet is deployed inside the VNet. -
- -
-DisableExport_PublicNetworkAccessMustBeDisabled - -- Check container source: Confirm whether the deployment is using a Docker image or Azure Container Registry (ACR). -- Verify ACR configuration: If ACR is included, review its settings to ensure they comply with Azure requirements. -- Check export settings: If export is disabled in ACR, make sure public network access is also disabled. -- Dedeploy after fix: Correct the configuration and redeploy. This will prevent the Conflict error during deployment. -- For more information refer [ACR Data Loss Prevention](https://learn.microsoft.com/en-us/azure/container-registry/data-loss-prevention) document. -
- -
-AccountProvisioningStateInvalid - -- The AccountProvisioningStateInvalid error occurs when you try to use resources while they are still in the Accepted provisioning state. -- This means the deployment has not yet fully completed. -- To avoid this error, wait until the provisioning state changes to Succeeded. -- Only use the resources once the deployment is fully completed. -
- -
-VaultNameNotValid - -In this template Vault name will be unique everytime, but if you are trying to hard code the name then please make sure below points. - -1. Check name length - - Ensure the Key Vault name is between 3 and 24 characters. -2. Validate allowed characters - - The name can only contain letters (a–z, A–Z) and numbers (0–9). - - Hyphens are allowed, but not at the beginning or end, and not consecutive (--). -3. Ensure proper start and end - - The name must start with a letter. - - The name must end with a letter or digit (not a hyphen). -4. Test with a new name - - - Example of a valid vault name: - ✅ cartersaikeyvault1 ✅ securevaultdemo ✅ kv-project123 - -
- -
-DeploymentCanceled - -There might be multiple reasons for this error you can follow below steps to troubleshoot. - -1. Check deployment history - - Go to Azure Portal → Resource Group → Deployments. - - Look at the detailed error message for the deployment that was canceled — this will show which resource failed and why. -2. Identify the root cause - - A DeploymentCanceled usually means: - - A dependent resource failed to deploy. - - A validation error occurred earlier. - - A manual cancellation was triggered. - - Expand the failed deployment logs for inner error messages. -3. Validate your template (ARM/Bicep) - Run: - ``` - az deployment group validate --resource-group --template-file main.bicep - ``` -4. Check resource limits/quotas - - Ensure you have not exceeded quotas (vCPUs, IPs, storage accounts, etc.), which can silently cause cancellation. -5. Fix the failed dependency - - If a specific resource shows BadRequest, Conflict, or ValidationError, resolve that first. - - Re-run the deployment after fixing the root cause. -6. Retry deployment -Once corrected, redeploy with: -`az deployment group create --resource-group --template-file main.bicep` -Essentially: DeploymentCanceled itself is just a wrapper error — you need to check inner errors in the deployment logs to find the actual failure. -
- -
-LocationNotAvailableForResourceType - -- You may encounter a LocationNotAvailableForResourceType error if you set the secondary location to 'Australia Central' in the main.bicep file. -- This happens because 'Australia Central' is not a supported region for that resource type. -- Always refer to the README file or Azure documentation to check the list of supported regions. -- Update the deployment with a valid supported region to resolve the issue. - -
- -
-InvalidResourceLocation - -- You may encounter an InvalidResourceLocation error if you change the region for Cosmos DB or the Storage Account (secondary location) multiple times in the main.bicep file and redeploy. -- Azure resources like Cosmos DB and Storage Accounts do not support changing regions after deployment. -- If you need to change the region again, first delete the existing deployment. -- Then redeploy the resources with the updated region configuration. - -
- -
-DeploymentActive - -- This issue occurs when a deployment is already in progress and another deployment is triggered in the same resource group, causing a DeploymentActive error. -- Cancel the ongoing deployment before starting a new one. -- Do not initiate a new deployment in the same resource group until the previous one is completed. -
- -
-ResourceOperationFailure/ProvisioningDisabled - -- This error occurs when provisioning of a resource is restricted in the selected region. - It usually happens because the service is not available in that region or provisioning has been temporarily disabled. - -- Regions are restricted to guarantee compatibility with paired regions and replica locations for data redundancy and failover scenarios based on articles [Azure regions list](https://learn.microsoft.com/en-us/azure/reliability/regions-list) and [Reliability in Azure Cosmos DB for NoSQL](https://learn.microsoft.com/en-us/azure/reliability/reliability-cosmos-db-nosql). - -- If you need to use the same region, you can request a quota or provisioning exception. - Refer to [Quota Request](https://learn.microsoft.com/en-us/azure/cosmos-db/nosql/create-support-request-quota-increase) for more details. - -
- -
-MaxNumberOfRegionalEnvironmentsInSubExceeded - -- This error occurs when you try to create more than the allowed number of **Azure Container App Environments (ACA Environments)** in the same region for a subscription. -- For example, in **Sweden Central**, only **1 Container App Environment** is allowed per subscription. - -The subscription 'xxxx-xxxx' cannot have more than 1 Container App Environments in Sweden Central. - -- To fix this, you can: - - Deploy the Container App Environment in a **different region**, OR - - Request a quota increase via Azure Support → [Quota Increase Request](https://go.microsoft.com/fwlink/?linkid=2208872) - -
- -
-Unauthorized - Operation cannot be completed without additional quota - -- You can check your quota usage using `az vm list-usage`. - - ``` - az vm list-usage --location "" -o table - ``` - -- To Request more quota refer [VM Quota Request](https://techcommunity.microsoft.com/blog/startupsatmicrosoftblog/how-to-increase-quota-for-specific-types-of-azure-virtual-machines/3792394). - -
- -
ParentResourceNotfound - - -- You can refer to the [Parent Resource Not found](https://learn.microsoft.com/en-us/azure/azure-resource-manager/troubleshooting/error-parent-resource?tabs=bicep) documentation if you encounter this error. - -
- -
ResourceProviderError - -- This error occurs when the resource provider is not registered in your subscription. -- To register it, refer to [Register Resource Provider](https://learn.microsoft.com/en-us/azure/azure-resource-manager/management/resource-providers-and-types) documentation. - -
- -
-Conflict - Cannot use the SKU Basic with File Change Audit for site. - -- This error happens because File Change Audit logs aren’t supported on Basic SKU App Service Plans. - -- Upgrading to Premium/Isolated SKU (supports File Change Audit), or - -- Disabling File Change Audit in Diagnostic Settings if you must stay on Basic. -- Always cross-check the [supported log types](https://aka.ms/supported-log-types) - before adding diagnostic logs to your Bicep templates. - -
- -
- -AccountPropertyCannotBeUpdated - -- The property **`isHnsEnabled`** (Hierarchical Namespace for Data Lake Gen2) is **read-only** and can only be set during **storage account creation**. -- Once a storage account is created, this property **cannot be updated**. -- Trying to update it via ARM template, Bicep, CLI, or Portal will fail. - -- **Resolution** -- Create a **new storage account** with `isHnsEnabled=true` if you require hierarchical namespace. -- Migration may be needed if you already have data. -- Refer to [Storage Account Update Restrictions](https://aka.ms/storageaccountupdate) for more details. - -
- -
- -InvalidRequestContent - -- The deployment values either include values that aren't recognized, or required values are missing. -- Confirm the values for your resource type. -- You can refer to the [Invalid Request Content error documentation](https://learn.microsoft.com/en-us/azure/azure-resource-manager/troubleshooting/error-invalid-request-content). - -
- -
-ReadOnlyDisabledSubscription - -- Depending on the type of the Azure Subscription, the expiration date might have been reached. - -- You have to activate the Azure Subscription before creating any Azure resource. - -- You can refer Reactivate a disabled Azure subscription Documentation. - -
- -
- -SkuNotAvailable - -- You receive this error in the following scenarios: - - When the resource SKU you've selected, such as VM size, isn't available for a location or zone. - - If you're deploying an Azure Spot VM or Spot scale set instance, there isn't any capacity for Azure Spot in this location. For more information, see Spot error messages. - -
- -
-CrossTenantDeploymentNotPermitted - -- **Check tenant match:** - Ensure your deployment identity (user/SP) and the target resource group are in the same tenant. - - ```bash - az account show - az group show --name - ``` - -- **Verify pipeline/service principal:** - If using CI/CD, confirm that the service principal belongs to the same tenant and has permissions on the resource group. - -- **Avoid cross-tenant references:** - Make sure your Bicep doesn’t reference subscriptions, resource groups, or resources in another tenant. - -- **Test minimal deployment:** - Deploy a simple resource to the same resource group to confirm that identity and tenant are correct. - -- **Guest/external accounts:** - Avoid using guest users from other tenants; use native accounts or SPs in the tenant. - -
- -
-RequestDisallowedByPolicy - -- This typically indicates that an Azure Policy is preventing the requested action due to policy restrictions in your subscription. -- For more details and guidance on resolving this issue, please refer to the official Microsoft documentation: [RequestDisallowedByPolicy](https://learn.microsoft.com/en-us/troubleshoot/azure/azure-kubernetes/create-upgrade-delete/error-code-requestdisallowedbypolicy) - -
- -
-FlagMustBeSetForRestore/NameUnavailable/CustomDomainInUse - -- This error occurs when you try to deploy a Cognitive Services resource that was soft-deleted earlier. -- Azure requires you to explicitly set the `restore` flag to `true` if you want to recover the soft-deleted resource. -- If you don’t want to restore the resource, you must purge the deleted resource first before redeploying. - -**Example causes:** - -- Trying to redeploy a Cognitive Services account with the same name as a previously deleted one. -- The deleted resource still exists in a soft-delete retention state. - -**How to fix:** - -1. If you want to restore → add `"restore": true` in your template properties. -2. If you want a fresh deployment → purge the resource using: - ```bash - az cognitiveservices account purge \ - --name \ - --resource-group \ - --location - ``` - -- For more details, refer to [Soft delete and resource restore.](https://learn.microsoft.com/en-us/azure/azure-resource-manager/management/delete-resource-group?tabs=azure-powershell) - -
- -
-PrincipalNotFound - -- This error occurs when the principal ID (Service Principal, User, or Group) specified in a role assignment or deployment does not exist in the Azure Active Directory tenant. -- It can also happen due to replication delays right after creating a new principal. - -**Example causes:** - -- The specified Object ID is invalid or belongs to another tenant. -- The principal was recently created, but Azure AD has not yet replicated it. -- Attempting to assign a role to a non-existing or deleted Service Principal/User/Group. - -**How to fix:** - -1. Verify that the principal ID is correct and exists in the same directory/tenant. - ```bash - az ad sp show --id - ``` -2. If the principal was just created, wait a few minutes and retry. -3. Explicitly set the principalType property (ServicePrincipal, User, or Group) in your ARM/Bicep template to avoid replication delays. -4. If the principal does not exist, create it again before assigning roles. - -- For more details, see [Azure PrincipalType documentation](https://learn.microsoft.com/en-us/azure/role-based-access-control/troubleshooting?tabs=bicep) - -
- -
- -RedundancyConfigurationNotAvailableInRegion - -- This issue happens when you try to create a Storage Account with a redundancy configuration (e.g., Standard_GRS) that is not supported in the selected Azure region. - -- Example: Creating a storage account with GRS in italynorth will fail with this error. - - ``` - az storage account create -n mystorageacct123 -g myResourceGroup -l italynorth --sku Standard_GRS --kind StorageV2 - - ``` - -- To check supported SKUs for your region: - ``` - az storage account list-skus -l italynorth -o table - ``` -- Use a supported redundancy option (e.g., Standard_LRS) in the same region Or deploy the Storage Account in a region that supports your chosen redundancy. For more details, refer to [Azure Storage redundancy documentation.](https://learn.microsoft.com/en-us/azure/storage/common/storage-redundancy?utm_source=chatgpt.com) - -
- -
- -DeploymentNotFound - -- This issue occurs when the user deletes a previous deployment along with the resource group (RG), and then redeploys the same RG with the same environment name but in a different location. - -- To avoid the DeploymentNotFound error, do not change the location when redeploying a deleted RG, or Use new names for the RG and environment during redeployment. - -
- -
- -DeploymentCanceled(user.canceled) -- Indicates that the deployment was manually canceled by the user (Portal, CLI, or pipeline). - -- Check deployment history and logs to confirm who/when it was canceled. - -- If accidental, retry the deployment. - -- For pipelines, ensure no automation or timeout is triggering cancellation. - -- Use deployment locks or retry logic to prevent accidental cancellations. - -
- -
- -ResourceGroupDeletionTimeout - -- Some resources in the resource group may be stuck deleting or have dependencies; check RG resources and status. - -- Ensure no resource locks or Azure Policies are blocking deletion. - -- Retry deletion via CLI/PowerShell (```az group delete --name --yes --no-wait```). - -- Check Activity Log to identify failing resources; escalate to Azure Support if deletion is stuck. - -
- -
- -BadRequest - DatabaseAccount is in a failed provisioning state because the previous attempt to create it was not successful - -- This error occurs when a user attempts to redeploy a resource that previously failed to provision. - -- To resolve the issue, delete the failed deployment first, then start a new deployment. - -- For guidance on deleting a resource from a Resource Group, refer to the following link: [Delete an Azure Cosmos DB account](https://learn.microsoft.com/en-us/azure/cosmos-db/nosql/manage-with-powershell#delete-account:~:text=%3A%24enableMultiMaster-,Delete%20an%20Azure%20Cosmos%20DB%20account,-This%20command%20deletes) - -
- -
- -SpecialFeatureOrQuotaIdRequired - -- This error occurs when your subscription does not have access to certain Azure OpenAI models. -- Example error message: - -SpecialFeatureOrQuotaIdRequired: The current subscription does not have access to this model 'Format:OpenAI,Name:o3,Version:2025-04-16'. -- Resolution: -To gain access, submit a request using the official form: - -[👉 Azure OpenAI Model Access Request](https://customervoice.microsoft.com/Pages/ResponsePage.aspx?id=v4j5cvGGr0GRqy180BHbR7en2Ais5pxKtso_Pz4b1_xUQ1VGQUEzRlBIMVU2UFlHSFpSNkpOR0paRSQlQCN0PWcu) - - -You’ll need to use this form if you require access to the following restricted models: - - gpt-5 - - o3 - - o3-pro - - deep research - - reasoning summary - - gpt-image-1 - - Once your request is approved, redeploy your resources. - -
- -
- -Error During deployment +## ⚡ Most Frequently Encountered Errors + +| Error Code | Common Cause | Full Details | +|------------|--------------|--------------| +| **InsufficientQuota** | Not enough quota available in subscription | [View Solution](#quota--capacity-limitations) | +| **MissingSubscriptionRegistration** | Required feature not registered in subscription | [View Solution](#subscription--access-issues) | +| **ResourceGroupNotFound** | RG doesn't exist or using old .env file | [View Solution](#resource-group--deployment-management) | +| **DeploymentModelNotSupported** | Model not available in selected region | [View Solution](#regional--location-issues) | +| **DeploymentNotFound** | Deployment record not found or was deleted | [View Solution](#resource-group--deployment-management) | +| **ResourceNotFound** | Resource does not exist or cannot be found | [View Solution](#resource-identification--references) | +| **SpecialFeatureOrQuotaIdRequired** | Subscription lacks access to specific model | [View Solution](#subscription--access-issues) | +| **ContainerAppOperationError** | Improperly built container image | [View Solution](#miscellaneous) | +| **ServiceUnavailable** | Service not available in selected region | [View Solution](#regional--location-issues) | +| **BadRequest - DatabaseAccount is in a failed provisioning state** | Previous deployment failed | [View Solution](#resource-state--provisioning) | +| **Unauthorized - Operation cannot be completed
without additional quota** | Insufficient quota for requested operation | [View Solution](#subscription--access-issues) | +| **ResourceGroupBeingDeleted** | Resource group deletion in progress | [View Solution](#resource-group--deployment-management) | +| **FlagMustBeSetForRestore** | Soft-deleted resource requires restore flag or purge | [View Solution](#miscellaneous) | +| **ParentResourceNotFound** | Parent resource does not exist or cannot be found | [View Solution](#resource-identification--references) | +| **AccountProvisioningStateInvalid** | Resource used before provisioning completed | [View Solution](#resource-state--provisioning) | +| **InternalSubscriptionIsOverQuotaForSku** | Subscription quota exceeded for the requested SKU | [View Solution](#quota--capacity-limitations) | +| **InvalidResourceGroup** | Invalid resource group configuration | [View Solution](#resource-group--deployment-management) | +| **RequestDisallowedByPolicy** | Azure Policy blocking the requested operation | [View Solution](#subscription--access-issues) | + +## 📖 Table of Contents + +- [Subscription & Access Issues](#subscription--access-issues) +- [Quota & Capacity Limitations](#quota--capacity-limitations) +- [Regional & Location Issues](#regional--location-issues) +- [Resource Naming & Validation](#resource-naming--validation) +- [Resource Identification & References](#resource-identification--references) +- [Network & Infrastructure Configuration](#network--infrastructure-configuration) +- [Configuration & Property Errors](#configuration--property-errors) +- [Resource State & Provisioning](#resource-state--provisioning) +- [Miscellaneous](#miscellaneous) + +## Subscription & Access Issues + +| Issue/Error Code | Description | Steps to Resolve | +|-----------|-------------|------------------| +| **ReadOnlyDisabledSubscription** | Subscription is disabled or in read-only state |
  • Check if you have an active subscription before starting the deployment
  • Depending on the type of the Azure Subscription, the expiration date might have been reached
  • You have to activate the Azure Subscription before creating any Azure resource
  • Refer to [Reactivate a disabled Azure subscription](https://learn.microsoft.com/en-us/azure/cost-management-billing/manage/subscription-disabled) documentation
| +| **MissingSubscriptionRegistration/
AllowBringYourOwnPublicIpAddress** | Required feature not registered in subscription | **Enable `AllowBringYourOwnPublicIpAddress` Feature**

Before deploying the resources, you may need to enable the **Bring Your Own Public IP Address** feature in Azure. This is required only once per subscription.

**Steps:**
  • Run the following command to register the feature:
    `az feature register --namespace Microsoft.Network --name AllowBringYourOwnPublicIpAddress`
  • Wait for the registration to complete. Check the status using:
    `az feature show --namespace Microsoft.Network --name AllowBringYourOwnPublicIpAddress --query properties.state`
  • The output should show: "Registered"
  • Once the feature is registered, refresh the provider:
    `az provider register --namespace Microsoft.Network`
💡 Note: Feature registration may take several minutes to complete. This needs to be done only once per Azure subscription. | +| **Unauthorized - Operation cannot be completed without additional quota** | Insufficient quota for requested operation |
  • Check your quota usage using:
    `az vm list-usage --location "" -o table`
  • To request more quota refer to [VM Quota Request](https://techcommunity.microsoft.com/blog/startupsatmicrosoftblog/how-to-increase-quota-for-specific-types-of-azure-virtual-machines/3792394)
| +| **CrossTenantDeploymentNotPermitted** | Deployment across different Azure AD tenants not allowed |
  • **Check tenant match:** Ensure your deployment identity (user/SP) and the target resource group are in the same tenant:
    `az account show`
    `az group show --name `
  • **Verify pipeline/service principal:** If using CI/CD, confirm the service principal belongs to the same tenant and has permissions on the resource group
  • **Avoid cross-tenant references:** Make sure your Bicep doesn't reference subscriptions, resource groups, or resources in another tenant
  • **Test minimal deployment:** Deploy a simple resource to the same resource group to confirm identity and tenant are correct
  • **Guest/external accounts:** Avoid using guest users from other tenants; use native accounts or SPs in the tenant
| +| **RequestDisallowedByPolicy** | Azure Policy blocking the requested operation |
  • This typically indicates that an Azure Policy is preventing the requested action due to policy restrictions in your subscription
  • For more details and guidance on resolving this issue, refer to: [RequestDisallowedByPolicy](https://learn.microsoft.com/en-us/troubleshoot/azure/azure-kubernetes/create-upgrade-delete/error-code-requestdisallowedbypolicy)
| +| **SpecialFeatureOrQuotaIdRequired** | Subscription lacks access to specific Azure OpenAI models | This error occurs when your subscription does not have access to certain Azure OpenAI models.

**Example error message:**
`SpecialFeatureOrQuotaIdRequired: The current subscription does not have access to this model 'Format:OpenAI,Name:o3,Version:2025-04-16'.`

**Resolution:**
To gain access, submit a request using the official form:
👉 [Azure OpenAI Model Access Request](https://customervoice.microsoft.com/Pages/ResponsePage.aspx?id=v4j5cvGGr0GRqy180BHbR7en2Ais5pxKtso_Pz4b1_xUQ1VGQUEzRlBIMVU2UFlHSFpSNkpOR0paRSQlQCN0PWcu)

You'll need to use this form if you require access to the following restricted models:
  • gpt-5
  • o3
  • o3-pro
  • deep research
  • reasoning summary
  • gpt-image-1
Once your request is approved, redeploy your resource. | +| **ResourceProviderError** | Resource provider not registered in subscription |
  • This error occurs when the resource provider is not registered in your subscription
  • To register it, refer to [Register Resource Provider](https://learn.microsoft.com/en-us/azure/azure-resource-manager/troubleshooting/error-register-resource-provider?tabs=azure-cli) documentation
| + +-------------------------------- + +## Quota & Capacity Limitations + +| Issue/Error Code | Description | Steps to Resolve | +|-----------------|-------------|------------------| +| **InternalSubscriptionIsOverQuotaForSku/
ManagedEnvironmentProvisioningError** | Subscription quota exceeded for the requested SKU | Quotas are applied per resource group, subscriptions, accounts, and other scopes. For example, your subscription might be configured to limit the number of vCPUs for a region. If you attempt to deploy a virtual machine with more vCPUs than the permitted amount, you receive an error that the quota was exceeded.

For PowerShell, use the `Get-AzVMUsage` cmdlet to find virtual machine quotas:
`Get-AzVMUsage -Location "West US"`

Based on available quota you can deploy application otherwise, you can request for more quota | +| **ServiceQuotaExceeded** | Free tier service quota limit reached for Azure AI Search | This error occurs when you attempt to deploy an Azure AI Search service but have already reached the **free tier quota limit** for your subscription. Each Azure subscription is limited to **one free tier Search service**.

**Example error message:**
`ServiceQuotaExceeded: Operation would exceed 'free' tier service quota. You are using 1 out of 1 'free' tier service quota.`

**Common causes:**
  • Already have a free tier Azure AI Search service in the subscription
  • Previous deployment created a free tier Search service that wasn't deleted
  • Attempting to deploy multiple environments with free tier Search services

**Resolution:**
  • **Option 1: Delete existing free tier Search service:**
    `az search service list --query "[?sku.name=='free']" -o table`
    `az search service delete --name --resource-group --yes`
  • **Option 2: Upgrade to a paid SKU:**
    Modify your Bicep/ARM template to use `basic`, `standard`, or higher SKU instead of `free`
  • **Option 3: Use existing Search service:**
    Reference the existing free tier Search service in your deployment instead of creating a new one
  • **Request quota increase:**
    Submit a support request with issue type 'Service and subscription limits (quota)' and quota type 'Search' via [Azure Quota Request](https://aka.ms/AddQuotaSubscription)

**Reference:**
  • [Azure AI Search service limits](https://learn.microsoft.com/en-us/azure/search/search-limits-quotas-capacity)
  • [Azure AI Search pricing tiers](https://learn.microsoft.com/en-us/azure/search/search-sku-tier)
| +| **InsufficientQuota** | Not enough quota available in subscription |
  • Check if you have sufficient quota available in your subscription before deployment
  • To verify, refer to the [quota_check](../docs/QuotaCheck.md) file for details
| +| **MaxNumberOfRegionalEnvironmentsInSubExceeded** | Maximum Container App Environments limit reached for region |This error occurs when you attempt to create more **Azure Container App Environments** than the regional quota limit allows for your subscription. Each Azure region has a specific limit on the number of Container App Environments that can be created per subscription.

**Common Causes:**
  • Deploying to regions with low quota limits (e.g., Sweden Central allows only 1 environment)
  • Multiple deployments without cleaning up previous environments
  • Exceeding the standard limit of 15 environments in most major regions

**Resolution:**
  • **Delete unused environments** in the target region, OR
  • **Deploy to a different region** with available capacity, OR
  • **Request quota increase** via [Azure Support](https://go.microsoft.com/fwlink/?linkid=2208872)

**Reference:**
  • [Azure Container Apps quotas](https://learn.microsoft.com/en-us/azure/container-apps/quotas)
  • [Azure subscription and service limits](https://learn.microsoft.com/en-us/azure/azure-resource-manager/management/azure-subscription-service-limits)
| +| **SkuNotAvailable** | Requested SKU not available in selected location or zone | You receive this error in the following scenarios:
  • When the resource SKU you've selected, such as VM size, isn't available for a location or zone
  • If you're deploying an Azure Spot VM or Spot scale set instance, and there isn't any capacity for Azure Spot in this location. For more information, see Spot error messages
| +| **Conflict - No available instances to satisfy this request** | Azure App Service has insufficient capacity in the region | This error occurs when Azure App Service doesn't have enough available compute instances in the selected region to provision or scale your app.

**Common Causes:**
  • High demand in the selected region (e.g., East US, West Europe)
  • Specific SKUs experiencing capacity constraints (Free, Shared, or certain Premium tiers)
  • Multiple rapid deployments in the same region

**Resolution:**
  • **Wait and Retry** (15-30 minutes): `azd up`
  • **Deploy to a New Resource Group** (Recommended for urgent cases):
    ```
    azd down --force --purge
    azd up
    ```
  • **Try a Different Region:**
    Update region in `main.bicep` or `azure.yaml` to a less congested region (e.g., `westus2`, `centralus`, `northeurope`)
  • **Use a Different SKU/Tier:**
    If using Free/Shared tier, upgrade to Basic or Standard
    Check SKU availability: `az appservice list-locations --sku `

**Reference:** [Azure App Service Plans](https://learn.microsoft.com/en-us/azure/app-service/overview-hosting-plans) | + +-------------------------------- + +## Resource Group & Deployment Management + +| Issue/Error Code | Description | Steps to Resolve | +|-----------------|-------------|------------------| +| **ResourceGroupNotFound** | Specified resource group does not exist | **Option 1:**
  • Go to [Azure Portal](https://portal.azure.com/#home)
  • Click on **"Resource groups"** option
    ![alt text](../docs/images/AzureHomePage.png)
  • Search for the resource group in the search bar. If it exists, you can proceed
    ![alt text](../docs/images/resourcegroup1.png)

**Option 2:**
  • This error can occur if you deploy using the same .env file from a previous deployment
  • Create a new environment before redeploying:
    `azd env new `
| +| **ResourceGroupBeingDeleted** | Resource group is currently being deleted | **Steps:**
  • Go to [Azure Portal](https://portal.azure.com/#home)
  • Go to resource group option and search for targeted resource group
  • If the resource group is being deleted, you cannot use it. Create a new one or use a different resource group
| +| **DeploymentActive** | Another deployment is already in progress in this resource group |
  • This occurs when a deployment is already in progress and another deployment is triggered in the same resource group
  • Cancel the ongoing deployment before starting a new one
  • Do not initiate a new deployment until the previous one is completed
| +| **DeploymentCanceled** | Deployment was canceled before completion |
  • **Check deployment history:**
    Go to Azure Portal → Resource Group → Deployments
    Review the detailed error message
  • **Identify the root cause:**
    Dependent resource failed to deploy
    Validation error occurred
    Manual cancellation was triggered
  • **Validate template:**
    `az deployment group validate --resource-group --template-file main.bicep`
  • **Check resource limits/quotas**
  • **Fix the failed dependency**
  • **Retry deployment:**
    `az deployment group create --resource-group --template-file main.bicep`

💡 **Note:** DeploymentCanceled is a wrapper error — check inner errors in deployment logs | +| **DeploymentCanceled(user.canceled)** | User manually canceled the deployment |
  • Deployment was manually canceled by the user (Portal, CLI, or pipeline)
  • Check deployment history and logs to confirm who/when it was canceled
  • If accidental, retry the deployment
  • For pipelines, ensure no automation or timeout is triggering cancellation
  • Use deployment locks or retry logic to prevent accidental cancellations
| +| **DeploymentNotFound** | Deployment record not found or was deleted |
  • This occurs when the user deletes a previous deployment along with the resource group, then redeploys the same RG with the same environment name but in a different location
  • Do not change the location when redeploying a deleted RG, OR
  • Use new names for the RG and environment during redeployment
| +| **ResourceGroupDeletionTimeout** | Resource group deletion exceeded timeout limit |
  • Some resources may be stuck deleting or have dependencies; check RG resources and status
  • Ensure no resource locks or Azure Policies are blocking deletion
  • Retry deletion via CLI/PowerShell:
    `az group delete --name --yes --no-wait`
  • Check Activity Log to identify failing resources
  • Escalate to Azure Support if deletion is stuck
| + +-------------------------------- + +## Regional & Location Issues + +| Issue/Error Code | Description | Steps to Resolve | +|-----------------|-------------|------------------| +| **LocationNotAvailableForResourceType** | Resource type not supported in selected region | This error occurs when you attempt to deploy a resource to a region that does not support that specific resource type or SKU.

**Resolution:**
  • **Verify resource availability by region:**
    `az provider show --namespace --query "resourceTypes[?resourceType==''].locations" -o table`
  • **Check Azure Products by Region:**
    [Azure Products by Region](https://azure.microsoft.com/en-us/explore/global-infrastructure/products-by-region/)
  • **Supported regions for this deployment:**
    • `australiaeast`
    • `centralus`
    • `eastasia`
    • `eastus2`
    • `japaneast`
    • `northeurope`
    • `southeastasia`
    • `uksouth`
  • **Redeploy:**
    `azd up`
| +| **InvalidResourceLocation** | Cannot change region for already deployed resources | This error occurs when you attempt to modify the location/region of a resource that has already been deployed. Azure resources **cannot change regions** after creation.

**Resolution:**
  • **Option 1: Delete and Redeploy:**
    `azd down --force --purge`
    after purge redeploy app `azd up`
  • **Option 2: Create new environment with different region:**
    `azd env new `
    `azd env set AZURE_LOCATION `
    `azd up`
  • **Option 3: Keep existing deployment:**
    Revert configuration files to use the original region

⚠️ **Important:** Backup critical data before deleting resources.

**Reference:** [Move Azure resources across regions](https://learn.microsoft.com/en-us/azure/resource-mover/overview) | +| **ServiceUnavailable/ResourceNotFound** | Service unavailable or restricted in selected region |
  • Regions are restricted to guarantee compatibility with paired regions and replica locations for data redundancy and failover scenarios based on articles [Azure regions list](https://learn.microsoft.com/en-us/azure/reliability/regions-list) and [Azure Database for MySQL Flexible Server - Azure Regions](https://learn.microsoft.com/azure/mysql/flexible-server/overview#azure-regions)
  • You can request more quota, refer [Quota Request](https://learn.microsoft.com/en-us/azure/cosmos-db/nosql/create-support-request-quota-increase) Documentation
| +| **ResourceOperationFailure/
ProvisioningDisabled** | Resource provisioning restricted or disabled in region |
  • This error occurs when provisioning of a resource is restricted in the selected region. It usually happens because the service is not available in that region or provisioning has been temporarily disabled
  • Regions are restricted to guarantee compatibility with paired regions and replica locations for data redundancy and failover scenarios based on articles [Azure regions list](https://learn.microsoft.com/en-us/azure/reliability/regions-list) and [Azure Database for MySQL Flexible Server - Azure Regions](https://learn.microsoft.com/azure/mysql/flexible-server/overview#azure-regions)
  • If you need to use the same region, you can request a quota or provisioning exception. Refer [Quota Request](https://docs.microsoft.com/en-us/azure/sql-database/quota-increase-request) for more details
| +| **RedundancyConfigurationNotAvailableInRegion** | Redundancy configuration not supported in selected region |
  • This issue happens when you try to create a **Storage Account** with a redundancy configuration (e.g., `Standard_GRS`) that is **not supported in the selected Azure region**
  • Example: Creating a storage account with **GRS** in **italynorth** will fail with error:
    `az storage account create -n mystorageacct123 -g myResourceGroup -l italynorth --sku Standard_GRS --kind StorageV2`
  • To check supported SKUs for your region:
    `az storage account list-skus -l italynorth -o table`
  • Use a supported redundancy option (e.g., Standard_LRS) in the same region or deploy the Storage Account in a region that supports your chosen redundancy
  • For more details, refer to [Azure Storage redundancy documentation](https://learn.microsoft.com/en-us/azure/storage/common/storage-redundancy?utm_source=chatgpt.com)
| +| **NoRegisteredProviderFound** | Unsupported API version for resource type in specified location | This error occurs when you attempt to deploy an Azure resource using an **API version that is not supported** for the specified resource type and location.

**Example error message:**
`NoRegisteredProviderFound: No registered resource provider found for location 'westeurope' and API version '2020-06-30' for type 'searchServices'. The supported api-versions are '2014-07-31-Preview, 2015-02-28, 2015-08-19, 2019-10-01-Preview, 2020-03-13, 2020-08-01, 2020-08-01-Preview, 2021-04-01-Preview, 2021-06-06-Preview, 2022-09-01, 2023-11-01, 2024-03-01-Preview, 2024-06-01-Preview, 2025-02-01-Preview, 2025-05-01'.`

**Common causes:**
  • Using an outdated or invalid API version in Bicep/ARM templates
  • Referencing an Azure Verified Module (AVM) that uses a deprecated API version
  • Copy-pasting old template code with legacy API versions
  • The API version was never valid (typo or incorrect version number)

**Resolution:**
  • **Update the API version** in your Bicep/ARM template to a supported version listed in the error message. For example, change:
    `resource searchService 'Microsoft.Search/searchServices@2020-06-30'`
    to:
    `resource searchService 'Microsoft.Search/searchServices@2025-05-01'`
  • **Check supported API versions** for a resource type:
    `az provider show --namespace Microsoft.Search --query "resourceTypes[?resourceType=='searchServices'].apiVersions" -o table`
  • **Use the latest stable API version** when possible (avoid preview versions for production)
  • **Update Azure Verified Modules (AVM)** to their latest versions if using external modules
  • **Validate your template** before deployment:
    `az deployment group validate --resource-group --template-file main.bicep`

**Reference:**
  • [Azure Resource Manager API versions](https://learn.microsoft.com/en-us/azure/azure-resource-manager/management/resource-providers-and-types)
  • [Azure AI Search REST API versions](https://learn.microsoft.com/en-us/azure/search/search-api-versions)
| + +-------------------------------- + +## Resource Naming & Validation + +| Issue/Error Code | Description | Steps to Resolve | +|-----------------|-------------|------------------| +| **ResourceNameInvalid** | Resource name violates naming convention rules |
  • Ensure the resource name is within the allowed length and naming rules defined for that specific resource type, you can refer [Resource Naming Convention](https://learn.microsoft.com/en-us/azure/azure-resource-manager/management/resource-name-rules) document
| +| **Workspace Name - InvalidParameter** | Workspace name does not meet required format | To avoid this errors in workspace ID follow below rules:
  • Must start and end with an alphanumeric character (letter or number)
  • Allowed characters: `a–z`, `0–9`, `- (hyphen)`
  • Cannot start or end with a hyphen -
  • No spaces, underscores (_), periods (.), or special characters
  • Must be unique within the Azure region & subscription
  • Length: 3–33 characters (for AML workspaces)
| +| **VaultNameNotValid** | Key Vault name does not meet naming requirements | In this template Vault name will be unique everytime, but if you trying to hard code the name then please make sure below points:
  • **Check name length** - Ensure the Key Vault name is between 3 and 24 characters
  • **Validate allowed characters** - The name can only contain letters (a–z, A–Z) and numbers (0–9). Hyphens are allowed, but not at the beginning or end, and not consecutive (--)
  • **Ensure proper start and end** - The name must start with a letter. The name must end with a letter or digit (not a hyphen)
  • **Test with a new name** - Example of a valid vault name: ✅ `cartersaikeyvault1`, ✅ `securevaultdemo`, ✅ `kv-project123`
| +| **BadRequest: Dns record under zone Document is already taken** | DNS record name already in use | This error can occur only when user hardcoding the CosmosDB Service name. To avoid this you can try few below suggestions:
  • Verify resource names are globally unique
  • If you already created an account/resource with same name in another subscription or resource group, check and delete it before reusing the name
  • By default in this template we are using unique prefix with every resource/account name to avoid this kind for errors
| + +--------------------------------- + +## Resource Identification & References + +| Issue/Error Code | Description | Steps to Resolve | +|-----------------|-------------|------------------| +| **LinkedInvalidPropertyId/
ResourceNotFound/
DeploymentOutputEvaluationFailed/
CanNotRestoreANonExistingResource/
The language expression property array index is out of bounds** | Invalid or non-existent resource ID reference |
  • Before using any resource ID, ensure it follows the correct format
  • Verify that the resource ID you are passing actually exists
  • Make sure there are no typos in the resource ID
  • Verify that the provisioning state of the existing resource is `Succeeded` by running the following command to avoid this error while deployment or restoring the resource:
    `az resource show --ids --query "properties.provisioningState"`
  • Sample Resource IDs format:
    Log Analytics Workspace Resource ID: `/subscriptions/{subscriptionId}/resourceGroups/{resourceGroupName}/providers/Microsoft.OperationalInsights/workspaces/{workspaceName}`
    Azure AI Foundry Project Resource ID: `/subscriptions/{subscriptionId}/resourceGroups/{resourceGroupName}/providers/Microsoft.MachineLearningServices/workspaces/{name}`
  • You may encounter the error `The language expression property array index '8' is out of bounds` if the resource ID is incomplete. Please ensure your resource ID is correct and contains all required information, as shown in sample resource IDs
  • For more information refer [Resource Not Found errors solutions](https://learn.microsoft.com/en-us/azure/azure-resource-manager/troubleshooting/error-not-found?tabs=bicep)
| +| **ParentResourceNotFound** | Parent resource does not exist or cannot be found |
  • You can refer to the [Parent Resource Not found](https://learn.microsoft.com/en-us/azure/azure-resource-manager/troubleshooting/error-parent-resource?tabs=bicep) documentation if you encounter this error
| +| **PrincipalNotFound** | Principal ID does not exist in Azure AD tenant | This error occurs when the **principal ID** (Service Principal, User, or Group) specified in a role assignment or deployment does not exist in the Azure Active Directory tenant. It can also happen due to **replication delays** right after creating a new principal.

**Example causes:**
  • The specified **Object ID** is invalid or belongs to another tenant
  • The principal was recently created but Azure AD has not yet replicated it
  • Attempting to assign a role to a non-existing or deleted Service Principal/User/Group

**How to fix:**
  • Verify that the **principal ID is correct** and exists in the same directory/tenant:
    `az ad sp show --id `
  • If the principal was just created, wait a few minutes and retry
  • Explicitly set the principalType property (ServicePrincipal, User, or Group) in your ARM/Bicep template to avoid replication delays
  • If the principal does not exist, create it again before assigning roles
  • For more details, see [Azure PrincipalType documentation](https://learn.microsoft.com/en-us/azure/role-based-access-control/troubleshooting?tabs=bicep)
| +| **SubscriptionDoesNotHaveServer** | Referenced SQL Server does not exist in subscription | This issue happens when you try to reference an **Azure SQL Server** (`Microsoft.Sql/servers`) that does not exist in the selected subscription.

**It can occur if:**
  • The SQL server name is typed incorrectly
  • The SQL server was **deleted** but is still being referenced
  • You are working in the **wrong subscription context**
  • The server exists in a **different subscription/tenant** where you don't have access

**Reproduce:**
Run an Azure CLI command with a non-existent server name:
`az sql db list --server sql-doesnotexist --resource-group myResourceGroup`
or
`az sql server show --name sql-caqfrhxr4i3hyj --resource-group myResourceGroup`

**Resolution:**
  • Verify the SQL Server name exists in your subscription:
    `az sql server list --output table`
  • Make sure you are targeting the correct subscription:
    `az account show`
    `az account set --subscription `
  • If the server was deleted, either restore it (if possible) or update references to use a valid existing server
| + +--------------------------------- + +## Network & Infrastructure Configuration + +| Issue/Error Code | Description | Steps to Resolve | +|-----------------|-------------|------------------| +| **NetcfgSubnetRangeOutsideVnet** | Subnet IP range outside virtual network address space |
  • Ensure the subnet's IP address range falls within the virtual network's address space
  • Always validate that the subnet CIDR block is a subset of the VNet range
  • For Azure Bastion, the AzureBastionSubnet must be at least /27
  • Confirm that the AzureBastionSubnet is deployed inside the VNet
| +| **DisableExport_PublicNetworkAccessMustBeDisabled** | Public network access must be disabled when export is disabled |
  • **Check container source:** Confirm whether the deployment is using a Docker image or Azure Container Registry (ACR)
  • **Verify ACR configuration:** If ACR is included, review its settings to ensure they comply with Azure requirements
  • **Check export settings:** If export is disabled in ACR, make sure public network access is also disabled
  • **Redeploy after fix:** Correct the configuration and redeploy. This will prevent the Conflict error during deployment
  • For more information refer [ACR Data Loss Prevention](https://learn.microsoft.com/en-us/azure/container-registry/data-loss-prevention) document
| -- Attempt: 1st (EXP deployment) For the Error: 503 Service Temporarily Unavailable: If you encounter this error during EXP deployment, first verify whether your deployment completed successfully. If the deployment failed, review the activity logs or error messages for more details about the failure. Address any identified issues, then proceed to start a fresh deployment. +--------------------------------- -![alt text](./images/troubleshooting/503.png) +## Configuration & Property Errors -Begin a new deployment attempt: +| Issue/Error Code | Description | Steps to Resolve | +|-----------------|-------------|------------------| +| **InvalidRequestContent** | Deployment contains unrecognized or missing required values |
  • The deployment values either include values that aren't recognized, or required values are missing. Confirm the values for your resource type
  • You can refer [Invalid Request Content error](https://learn.microsoft.com/en-us/azure/azure-resource-manager/troubleshooting/common-deployment-errors#:~:text=InvalidRequestContent,Template%20reference) documentation
| +| **Conflict - Cannot use the SKU Basic with File Change Audit for site** | File Change Audit not supported on Basic SKU |
  • This error happens because File Change Audit logs aren't supported on Basic SKU App Service Plans
  • Upgrading to Premium/Isolated SKU (supports File Change Audit), or
  • Disabling File Change Audit in Diagnostic Settings if you must stay on Basic
  • Always cross-check the [supported log types](https://aka.ms/supported-log-types) before adding diagnostic logs to your Bicep templates
| +| **AccountPropertyCannotBeUpdated** | Read-only property cannot be modified after creation | The property **`isHnsEnabled`** (Hierarchical Namespace for Data Lake Gen2) is **read-only** and can only be set during **storage account creation**. Once a storage account is created, this property **cannot be updated**. Trying to update it via ARM template, Bicep, CLI, or Portal will fail.

**Resolution:**
  • Create a **new storage account** with `isHnsEnabled=true` if you require hierarchical namespace
  • Migration may be needed if you already have data
  • Refer to [Storage Account Update Restrictions](https://aka.ms/storageaccountupdate) for more details
| +| **Conflict - Local authentication is disabled** | App Configuration store has local authentication disabled but application is using local auth mode | This error occurs when your Azure App Configuration store has **local authentication disabled** (`disableLocalAuth: true`) but your application is trying to access it using **connection strings or access keys** instead of **Azure AD/Managed Identity authentication**.

**Example error message:**
`The operation cannot be performed because the configuration store is using local authentication mode and local authentication is disabled. To enable access to data plane resources while local authentication is disabled, please use pass-through authentication mode.`

**Common causes:**
  • App Configuration store deployed with `disableLocalAuth: true` for security compliance
  • Application code using connection strings instead of Managed Identity
  • SDK client initialized with access keys rather than `DefaultAzureCredential`

**Resolution:**
  • **Option 1: Update application to use Managed Identity (Recommended)**
    ```python
    from azure.identity import DefaultAzureCredential
    from azure.appconfiguration import AzureAppConfigurationClient

    credential = DefaultAzureCredential()
    client = AzureAppConfigurationClient(
    endpoint="https://your-appconfig.azconfig.io",
    credential=credential
    )
    ```
  • **Option 2: Re-enable local authentication (Not recommended for production)**
    Set `disableLocalAuth: false` in your Bicep/ARM template
  • **Ensure proper RBAC assignment:** Verify that the Managed Identity has `App Configuration Data Reader` or `App Configuration Data Owner` role assigned

**Reference:**
  • [Disable local authentication in Azure App Configuration](https://learn.microsoft.com/en-us/azure/azure-app-configuration/howto-disable-access-key-authentication)
  • [Use Managed Identities to access App Configuration](https://learn.microsoft.com/en-us/azure/azure-app-configuration/howto-integrate-azure-managed-service-identity)
| -- Attempt 2 and 3 (EXP deployment). If none of the files were uploaded after running the sample command and all uploads failed, follow these -![alt text](./images/troubleshooting/503_1.png) +---------------------------------- -![alt text](./images/troubleshooting/503_2.png) +## Resource State & Provisioning -![alt text](./images/troubleshooting/503_3.png) +| Issue/Error Code | Description | Steps to Resolve | +|-----------------|-------------|------------------| +| **AccountProvisioningStateInvalid** | Resource used before provisioning completed |
  • The AccountProvisioningStateInvalid error occurs when you try to use resources while they are still in the Accepted provisioning state
  • This means the deployment has not yet fully completed
  • To avoid this error, wait until the provisioning state changes to Succeeded
  • Only use the resources once the deployment is fully completed
| +| **BadRequest - DatabaseAccount is in a failed provisioning state because the previous attempt to create it was not successful** | Database account failed to provision previously |
  • This error occurs when a user attempts to redeploy a resource that previously failed to provision
  • To resolve the issue, delete the failed deployment first, then start a new deployment
  • For guidance on deleting a resource from a Resource Group, refer to the following link: [Delete an Azure Cosmos DB account](https://learn.microsoft.com/en-us/azure/cosmos-db/nosql/manage-with-powershell#delete-account:~:text=%3A%24enableMultiMaster-,Delete%20an%20Azure%20Cosmos%20DB%20account,-This%20command%20deletes)
| +| **ServiceDeleting** | Cannot provision service because deletion is still in progress | This error occurs when you attempt to create an Azure Search service with the same name as one that is currently being deleted. Azure Search services have a **soft-delete period** during which the service name remains reserved.

**Common causes:**
  • Deleting a Search service and immediately trying to recreate it with the same name
  • Rapid redeployments using the same service name in Bicep/ARM templates
  • The deletion operation is asynchronous and takes several minutes to complete

**Resolution:**
  • **Wait for deletion to complete** (10-15 minutes) before redeploying
  • **Use a different service name** - append timestamp or unique identifier to the name
  • **Implement retry logic** with exponential backoff as suggested in the error message
  • **Check deletion status** before recreating:
    `az search service show --name --resource-group `
  • For Bicep deployments, ensure your naming strategy includes unique suffixes to avoid conflicts
  • For more details, refer to [Azure Search service limits](https://learn.microsoft.com/en-us/azure/search/search-limits-quotas-capacity)
| +| **FailedIdentityOperation / ManagedEnvironmentScheduledForDelete** | Identity operation failed due to pending delete or resource conflict | This error occurs when you attempt to create or update an Azure Container Apps Managed Environment while it has a **pending delete operation** or the resource already exists in a conflicting state.

**Example error messages:**
`FailedIdentityOperation: Identity operation for resource failed with error 'Failed to perform resource identity operation. Status: 'Conflict'. Response: 'Request specified that resource is new, but resource already exists. This may be due to a pending delete operation, try again later.'`

`ManagedEnvironmentScheduledForDelete: The environment 'cae-xxx' is under deletion. Please retry the creation with new name or wait for the deletion completed.`

**Common causes:**
  • Deleting a Container Apps Environment and immediately trying to recreate it with the same name
  • Rapid redeployments using `azd up` without waiting for previous cleanup
  • Resource group deletion in progress while attempting to redeploy
  • Previous deployment failed or was canceled, leaving resources in an inconsistent state
  • Concurrent deployments targeting the same resources

**Resolution:**
  • **Wait for deletion to complete** (5-15 minutes) before redeploying:
    `az containerapp env show --name --resource-group --query "properties.provisioningState"`
  • **Check environment status:** If status is `ScheduledForDelete` or `Deleting`, wait for it to complete
  • **Use a new environment name:** Create a new environment with a different name or use a new resource group:
    `azd env new `
    `azd up`
  • **Force delete and wait:** If the environment is stuck, try force deletion:
    `az containerapp env delete --name --resource-group --yes`
    Wait for deletion to complete before redeploying
  • **Delete associated Container Apps first:** If the environment has apps, delete them before the environment:
    `az containerapp list --environment --resource-group -o table`
    `az containerapp delete --name --resource-group --yes`
  • **Use unique naming:** Implement timestamp or unique suffix in your naming strategy to avoid conflicts

**Reference:**
  • [Azure Container Apps troubleshooting](https://learn.microsoft.com/en-us/azure/container-apps/troubleshooting)
  • [Manage Container Apps environments](https://learn.microsoft.com/en-us/azure/container-apps/environment)
| +| **BadRequest - Parent account does not provision correctly** | Parent AI Services/Cognitive Services account failed to provision | This error occurs when a **child resource** (such as an AI project, model deployment, or other dependent resource) attempts to be created on a **parent Cognitive Services/AI Services account** that has **failed to provision** or is in an incomplete state.

**Example error message:**
`Parent account does not provision correctly, please retry creating the account.`

**Common causes:**
  • Parent AI Services account provisioning failed due to quota, region, or configuration issues
  • Using `restore: true` flag when no soft-deleted resource exists to restore
  • Network or transient errors during parent account creation
  • Invalid configuration on the parent account (e.g., invalid SKU, unsupported region)
  • Previous deployment of the parent account was interrupted or canceled

**Resolution:**
  • **Check parent account status:**
    `az cognitiveservices account show --name --resource-group --query "properties.provisioningState"`
  • **Delete failed parent account and redeploy:**
    `az cognitiveservices account delete --name --resource-group `
    Then run: `azd up`
  • **If using restore flag incorrectly:** Ensure `restore: false` in your Bicep template unless you specifically need to restore a soft-deleted resource
  • **Check for soft-deleted resources:**
    `az cognitiveservices account list-deleted`
  • **Purge soft-deleted resources if needed:**
    `az cognitiveservices account purge --name --resource-group --location `
  • **Verify quota and region availability:** Ensure you have sufficient quota and the service is available in your selected region

**Reference:**
  • [Manage Cognitive Services accounts](https://learn.microsoft.com/en-us/azure/ai-services/manage-resources)
  • [Recover deleted Cognitive Services resources](https://learn.microsoft.com/en-us/azure/ai-services/recover-purge-resources)
| +--------------------------------- -- Troubleshooting steps: +## Miscellaneous - - Review the error messages to identify the cause of the upload failures. - - Check the status of the resource group and confirm whether AKS is running or stopped. - - If AKS is stopped, try restarting the AKS service. - - Attempt the file upload process again using your script. - - If uploads continue to fail after these steps, proceed to start a completely new deployment. +| Issue/Error Code | Description | Steps to Resolve | +|-------------|-------------|------------------| +| **DeploymentModelNotSupported/
ServiceModelDeprecated/
InvalidResourceProperties** | Model not supported or deprecated in selected region |
  • The updated model may not be supported in the selected region. Please verify its availability in the [Azure AI Foundry models](https://learn.microsoft.com/en-us/azure/ai-foundry/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions) document
| +| **FlagMustBeSetForRestore/
NameUnavailable/
CustomDomainInUse** | Soft-deleted resource requires restore flag or purge | This error occurs when you try to deploy a Cognitive Services resource that was **soft-deleted** earlier. Azure requires you to explicitly set the **`restore` flag** to `true` if you want to recover the soft-deleted resource. If you don't want to restore the resource, you must **purge the deleted resource** first before redeploying.

**Example causes:**
  • Trying to redeploy a Cognitive Services account with the same name as a previously deleted one
  • The deleted resource still exists in a **soft-delete retention state**

**How to fix:**
  • If you want to restore → add `"restore": true` in your template properties
  • If you want a fresh deployment → purge the resource using:
    `az cognitiveservices account purge --name --resource-group --location `
  • For more details, refer to [Soft delete and resource restore](https://learn.microsoft.com/en-us/azure/azure-resource-manager/management/delete-resource-group?tabs=azure-powershell)
| +| **LinkedAuthorizationFailed** | Service principal lacks permission to use a linked resource required for deployment | This error occurs when a service principal doesn't have permission to perform an action on a linked resource that is required for the operation (e.g., cluster creation).

**Common causes:**
  • The service principal has permission on the primary resource but lacks permission on the linked scope
  • Missing role assignment for operations like `Microsoft.Network/ddosProtectionPlans/join/action`

**Resolution:**
  • Identify the **service principal**, **resource**, and **operation** from the error message
  • Grant the service principal the required permissions on the linked resource
  • Use [Assign Azure roles using the Azure portal](https://learn.microsoft.com/en-us/azure/role-based-access-control/role-assignments-portal) to add the role assignment
  • For more details, refer to [LinkedAuthorizationFailed error](https://learn.microsoft.com/en-us/troubleshoot/azure/azure-kubernetes/error-codes/linkedauthorizationfailed-error)
| +| **ContainerOperationFailure** | Container image or storage resource does not exist | This error occurs when an operation fails because the **specified container resource does not exist**. This can happen with Azure Container Registry images or Azure Storage blob containers.

**Example error message:**
`ContainerOperationFailure: The specified resource does not exist. RequestId:xxxxx Time:xxxxx`

**Common causes:**
  • **Invalid container image tag:** The specified image tag does not exist in the container registry
  • **Non-existent container registry:** The container registry endpoint is incorrect or inaccessible
  • **Missing blob container:** The storage blob container referenced by the application does not exist
  • **Incorrect storage account URL:** The storage account endpoint is misconfigured
  • **Permission issues:** The managed identity lacks permissions to access the container registry or storage account

**Resolution:**
  • **Verify container image exists:**
    `az acr repository show-tags --name --repository `
  • **Check image tag in deployment:** Ensure the `imageTag` parameter matches an existing tag in the registry
  • **Verify storage containers exist:**
    `az storage container list --account-name --auth-mode login`
  • **Check role assignments:** Ensure the Container App's managed identity has `AcrPull` role on the container registry and `Storage Blob Data Contributor` role on the storage account
  • **Verify storage account URL:** Ensure `APP_STORAGE_BLOB_URL` and `APP_STORAGE_QUEUE_URL` in App Configuration point to the correct storage account

**Reference:**
  • [Azure Container Registry troubleshooting](https://learn.microsoft.com/en-us/azure/container-registry/container-registry-troubleshoot-login)
  • [Azure Storage troubleshooting](https://learn.microsoft.com/en-us/azure/storage/common/storage-troubleshoot-common-errors)
| - ![alt text](./images/troubleshooting/503_4.png) -
+--------------------------------- 💡 Note: If you encounter any other issues, you can refer to the [Common Deployment Errors](https://learn.microsoft.com/en-us/azure/azure-resource-manager/troubleshooting/common-deployment-errors) documentation. -If the problem persists, you can also raise an bug in our [Github Issues](https://github.com/microsoft/Document-Knowledge-Mining-Solution-Accelerator/issues) for further support. \ No newline at end of file +If the problem persists, you can also raise an bug in our [Document Knowledge Generation Github Issues](https://github.com/microsoft/Document-Knowledge-Mining-Solution-Accelerator/issues) for further support. diff --git a/docs/images/AzureHomePage.png b/docs/images/AzureHomePage.png new file mode 100644 index 00000000..801510ec Binary files /dev/null and b/docs/images/AzureHomePage.png differ diff --git a/docs/images/resourcegroup1.png b/docs/images/resourcegroup1.png new file mode 100644 index 00000000..b184d8c7 Binary files /dev/null and b/docs/images/resourcegroup1.png differ diff --git a/infra/main.parameters.json b/infra/main.parameters.json index a649cdda..0e313833 100644 --- a/infra/main.parameters.json +++ b/infra/main.parameters.json @@ -8,6 +8,9 @@ "location": { "value": "${AZURE_LOCATION}" }, + "aiDeploymentsLocation": { + "value": "${AZURE_ENV_OPENAI_LOCATION}" + }, "gptModelDeploymentType": { "value": "${AZURE_ENV_MODEL_DEPLOYMENT_TYPE}" }, diff --git a/infra/main.waf.parameters.json b/infra/main.waf.parameters.json index 6700f98f..337be5fd 100644 --- a/infra/main.waf.parameters.json +++ b/infra/main.waf.parameters.json @@ -8,6 +8,9 @@ "location": { "value": "${AZURE_LOCATION}" }, + "aiDeploymentsLocation": { + "value": "${AZURE_ENV_OPENAI_LOCATION}" + }, "gptModelDeploymentType": { "value": "${AZURE_ENV_MODEL_DEPLOYMENT_TYPE}" }, diff --git a/tests/e2e-test/pytest.ini b/tests/e2e-test/pytest.ini index 76eb64fc..a18b0949 100644 --- a/tests/e2e-test/pytest.ini +++ b/tests/e2e-test/pytest.ini @@ -4,3 +4,6 @@ log_cli_level = INFO log_file = logs/tests.log log_file_level = INFO addopts = -p no:warnings + +markers = + goldenpath: Golden Path tests \ No newline at end of file diff --git a/tests/e2e-test/tests/test_dkm_functional.py b/tests/e2e-test/tests/test_dkm_functional.py index fd068d76..0bad5f00 100644 --- a/tests/e2e-test/tests/test_dkm_functional.py +++ b/tests/e2e-test/tests/test_dkm_functional.py @@ -42,7 +42,7 @@ def capture_screenshot(page, step_name, test_prefix="test"): pass -@pytest.mark.smoke +@pytest.mark.goldenpath def test_golden_path_dkm(login_logout, request): """ Test Case 10591: Golden Path-DKM-test golden path demo script works properly @@ -283,7 +283,7 @@ def test_golden_path_dkm(login_logout, request): logger.removeHandler(handler) -@pytest.mark.smoke + def test_upload_default_github_data(login_logout, request): """ Test Case 10661: DKM-Upload default GitHub repo sample data @@ -350,7 +350,7 @@ def test_upload_default_github_data(login_logout, request): logger.removeHandler(handler) -@pytest.mark.smoke + def test_search_functionality(login_logout, request): """ Test Case 10671: DKM-Verify the search functionality @@ -416,7 +416,7 @@ def test_search_functionality(login_logout, request): logger.removeHandler(handler) -@pytest.mark.smoke + def test_chat_selected_document(login_logout, request): """ Test Case 10704: DKM-Test chat selected document @@ -496,7 +496,7 @@ def test_chat_selected_document(login_logout, request): logger.removeHandler(handler) -@pytest.mark.smoke + def test_chat_multiple_selected_documents(login_logout, request): """ Test Case 10705: DKM-Test chat multiple selected documents @@ -575,7 +575,7 @@ def test_chat_multiple_selected_documents(login_logout, request): logger.removeHandler(handler) -@pytest.mark.smoke + def test_chat_all_documents(login_logout, request): """ Test Case 10706: DKM-Test chat all documents @@ -634,7 +634,7 @@ def test_chat_all_documents(login_logout, request): logger.removeHandler(handler) -@pytest.mark.smoke + def test_jailbreak_questions(login_logout, request): """ Test Case 10707: DKM-Test questions to jailbreak @@ -702,7 +702,7 @@ def test_jailbreak_questions(login_logout, request): logger.removeHandler(handler) -@pytest.mark.smoke + def test_web_knowledge_questions(login_logout, request): """ Test Case 10708: DKM-Test questions to ask web knowledge @@ -761,7 +761,7 @@ def test_web_knowledge_questions(login_logout, request): logger.removeHandler(handler) -@pytest.mark.smoke + def test_send_button_disabled_by_default(login_logout, request): """ Test Case 14111: Bug-13861-DKM - Send prompt icon should be disabled by default @@ -827,7 +827,7 @@ def test_send_button_disabled_by_default(login_logout, request): logger.removeHandler(handler) -@pytest.mark.smoke + def test_validate_empty_spaces_chat_input(login_logout, request): """ Test Case 26217: DKM - Validate chat input handling for Empty / only-spaces @@ -899,7 +899,7 @@ def test_validate_empty_spaces_chat_input(login_logout, request): logger.removeHandler(handler) -@pytest.mark.smoke + def test_upload_different_file_types(login_logout, request): """ Test Case 10664: DKM-Upload one file of each supported filetype @@ -964,7 +964,7 @@ def test_upload_different_file_types(login_logout, request): logger.removeHandler(handler) -@pytest.mark.smoke + def test_upload_large_file(login_logout, request): """ Test Case 10665: OOS_DKM-Upload very large file size @@ -1030,7 +1030,7 @@ def test_upload_large_file(login_logout, request): logger.removeHandler(handler) -@pytest.mark.smoke + def test_upload_zero_byte_file(login_logout, request): """ Test Case 10666: DKM-Upload zero byte file @@ -1085,7 +1085,7 @@ def test_upload_zero_byte_file(login_logout, request): logger.removeHandler(handler) -@pytest.mark.smoke + def test_upload_unsupported_file(login_logout, request): """ Test Case 10667: DKM-Upload unsupported file @@ -1140,7 +1140,7 @@ def test_upload_unsupported_file(login_logout, request): logger.removeHandler(handler) -@pytest.mark.smoke + def test_documents_scrolling_pagination(login_logout, request): """ Test Case 10670: DKM-test documents section scrolling and pagination @@ -1201,7 +1201,7 @@ def test_documents_scrolling_pagination(login_logout, request): logger.removeHandler(handler) -@pytest.mark.smoke + def test_search_with_time_filter(login_logout, request): """ Test Case 10672: DKM-Test search documents with time filter @@ -1272,7 +1272,7 @@ def test_search_with_time_filter(login_logout, request): logger.removeHandler(handler) -@pytest.mark.smoke + def test_left_pane_filters(login_logout, request): """ Test Case 10700: DKM-Test left pane filters @@ -1341,7 +1341,7 @@ def test_left_pane_filters(login_logout, request): logger.removeHandler(handler) -@pytest.mark.smoke + def test_left_pane_and_search_filters(login_logout, request): """ Test Case 10702: DKM-Test left pane filters collision with search filters @@ -1412,7 +1412,7 @@ def test_left_pane_and_search_filters(login_logout, request): logger.removeHandler(handler) -@pytest.mark.smoke + def test_document_details_preview(login_logout, request): """ Test Case 10703: DKM-Test document details preview @@ -1492,7 +1492,7 @@ def test_document_details_preview(login_logout, request): logger.removeHandler(handler) -@pytest.mark.smoke + def test_confirm_references_citations(login_logout, request): """ Test Case 10710: DKM-Confirm references or citations in response @@ -1564,7 +1564,7 @@ def test_confirm_references_citations(login_logout, request): logger.removeHandler(handler) -@pytest.mark.smoke + def test_bug_sensitive_question_stuck(login_logout, request): """ Test Case 13539: Bug 12794 - Response Not Generated for Sensitive Question @@ -1630,7 +1630,7 @@ def test_bug_sensitive_question_stuck(login_logout, request): logger.removeHandler(handler) -@pytest.mark.smoke + def test_bug_chat_session_cleared(login_logout, request): """ Test Case 14704: Bug-13797-DKM-Chat session cleared when switch tabs @@ -1706,7 +1706,7 @@ def test_bug_chat_session_cleared(login_logout, request): logger.removeHandler(handler) -@pytest.mark.smoke + def test_bug_text_file_download(login_logout, request): """ Test Case 16787: Bug 16600 - Text file getting downloaded on click @@ -1768,7 +1768,7 @@ def test_bug_text_file_download(login_logout, request): logger.removeHandler(handler) -@pytest.mark.smoke + def test_bug_clear_all_button(login_logout, request): """ Test Case 16788: Bug 16599 - Clear All Button should reset search box