From 217ceb7e0d950b0861b1f31bf1d22117cb404d44 Mon Sep 17 00:00:00 2001 From: HeidiSteen Date: Thu, 7 Mar 2024 16:54:22 -0800 Subject: [PATCH 1/3] Deleted obsolete AI enrichment notebook --- README.md | 2 +- ...honTutorial-AzureSearch-AIEnrichment.ipynb | 373 ------------------ Tutorial-AI-Enrichment/README.md | 50 --- 3 files changed, 1 insertion(+), 424 deletions(-) delete mode 100644 Tutorial-AI-Enrichment/PythonTutorial-AzureSearch-AIEnrichment.ipynb delete mode 100644 Tutorial-AI-Enrichment/README.md diff --git a/README.md b/README.md index 7ab1d008b..1f59566c9 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ This repository contains Python code samples used in Azure AI Search documentati |--------|-------------| | quickstart | "Day One" introduction to the fundamental tasks of working with a search index: create, load, and query. This sample is a Jupyter Python3 .ipynb file. The index is modeled on a subset of the Hotels dataset, widely used in Azure AI Search samples, but reduced here for readability and comprehension. | | quickstart-semantic-search | Extends the quickstart through modifications that invoke semantic search. This notebook adds a semantic configuration to the index and semantic query options that formulate the query and response. | -| tutorial-ai-enrichment | This sample is a Jupyter Python3 .ipynb file used in the [Python Tutorial: Call Azure AI Services APIs in an Azure AI Search indexing pipeline](https://docs.microsoft.com/azure/search/cognitive-search-tutorial-blob-python). This sample demonstrates Azure AI functionality, adding AI enrichments from Azure AI services to extract, detect, and analyze information from image files or large unstructured document files. | + | search-website-functions-v4 | Shows how to create, load, and query a search index in Python using the Azure.Search.Documents library in the Azure SDK for Python. It also includes application code and sample data so that you can see search integration in the context of a full app. The data is from [https://github.com/zygmuntz/goodbooks-10k](https://github.com/zygmuntz/goodbooks-10k). The app is an Azure Static Web app, using the React library for user interaction, and Azure Function to handle the query requests and responses in the application layer. | ## Archived samples diff --git a/Tutorial-AI-Enrichment/PythonTutorial-AzureSearch-AIEnrichment.ipynb b/Tutorial-AI-Enrichment/PythonTutorial-AzureSearch-AIEnrichment.ipynb deleted file mode 100644 index 6dbec6eb2..000000000 --- a/Tutorial-AI-Enrichment/PythonTutorial-AzureSearch-AIEnrichment.ipynb +++ /dev/null @@ -1,373 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Enrichment sample in Python for Azure AI Search\n", - "\n", - "In this Jupyter Notebook, create and run enrichment steps to unlock searchable content in Azure blobs. It performs operations over mixed content in Azure Storage, such as images and application files, using a skillset that analyzes and extracts text information that becomes searchable in Azure AI Search. The full documentation for this sample can be found at [Tutorial: Use Python and AI to generate searchable content from Azure blobs](https://docs.microsoft.com/azure/search/cognitive-search-tutorial-blob-python).\n", - "\n", - "This sample creates the following objects on your search service:\n", - "\n", - "+ search index\n", - "+ data source\n", - "+ skillset\n", - "+ indexer\n", - "\n", - "In the last step, you'll run queries against the search index to explore the text output that was generated for each blob.\n", - "\n", - "This notebook calls the [Search REST APIs](https://docs.microsoft.com/rest/api/searchservice/), but you can also use the Azure.Search.Documents client library in the Azure SDK for Python to perform the same steps. See this [Python quickstart](https://docs.microsoft.com/azure/search/search-get-started-python) for details.\n", - "\n", - "To run this sample, satisfy all [prerequisites](https://docs.microsoft.com/azure/search/cognitive-search-tutorial-blob-python#prerequisites) stated in the tutorial and upload the sample data to a blob container in Azure Storage account. In this notebook, replace the placeholders for the search service endpoint, the admin API key, Azure Storage connection string, and blob container. Once you've provided all four values, you can run all cells, but the query won't return results until the indexer is finished and the search index is loaded. \n", - "\n", - "We recommend running each step and making sure it completes before moving on." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import json\n", - "import requests\n", - "from pprint import pprint" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Define the names for the data source, skillset, index and indexer\n", - "datasource_name = \"cogsrch-py-datasource\"\n", - "skillset_name = \"cogsrch-py-skillset\"\n", - "index_name = \"cogsrch-py-index\"\n", - "indexer_name = \"cogsrch-py-indexer\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Setup the endpoint\n", - "endpoint = 'https://.search.windows.net/'\n", - "headers = {'Content-Type': 'application/json',\n", - " 'api-key': ''}\n", - "params = {\n", - " 'api-version': '2023-11-01'\n", - "}" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Create a data source\n", - "# This data source points to your Azure Storage account.\n", - "# You should already have a blob container that contains the sample data\n", - "datasourceConnectionString = \"\"\n", - "datasource_payload = {\n", - " \"name\": datasource_name,\n", - " \"description\": \"Demo files to demonstrate search capabilities.\",\n", - " \"type\": \"azureblob\",\n", - " \"credentials\": {\n", - " \"connectionString\": datasourceConnectionString\n", - " },\n", - " \"container\": {\n", - " \"name\": \"\"\n", - " }\n", - "}\n", - "r = requests.put(endpoint + \"/datasources/\" + datasource_name,\n", - " data=json.dumps(datasource_payload), headers=headers, params=params)\n", - "print(r.status_code)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Create a skillset\n", - "skillset_payload = {\n", - " \"name\": skillset_name,\n", - " \"description\":\n", - " \"Extract entities, detect language and extract key-phrases\",\n", - " \"skills\":\n", - " [\n", - " {\n", - " \"@odata.type\": \"#Microsoft.Skills.Text.V3.EntityRecognitionSkill\",\n", - " \"categories\": [\"Organization\"],\n", - " \"defaultLanguageCode\": \"en\",\n", - " \"inputs\": [\n", - " {\n", - " \"name\": \"text\", \n", - " \"source\": \"/document/content\"\n", - " }\n", - " ],\n", - " \"outputs\": [\n", - " {\n", - " \"name\": \"organizations\", \n", - " \"targetName\": \"organizations\"\n", - " }\n", - " ]\n", - " },\n", - " {\n", - " \"@odata.type\": \"#Microsoft.Skills.Text.LanguageDetectionSkill\",\n", - " \"inputs\": [\n", - " {\n", - " \"name\": \"text\", \n", - " \"source\": \"/document/content\"\n", - " }\n", - " ],\n", - " \"outputs\": [\n", - " {\n", - " \"name\": \"languageCode\",\n", - " \"targetName\": \"languageCode\"\n", - " }\n", - " ]\n", - " },\n", - " {\n", - " \"@odata.type\": \"#Microsoft.Skills.Text.SplitSkill\",\n", - " \"textSplitMode\": \"pages\",\n", - " \"maximumPageLength\": 4000,\n", - " \"inputs\": [\n", - " {\n", - " \"name\": \"text\",\n", - " \"source\": \"/document/content\"\n", - " },\n", - " {\n", - " \"name\": \"languageCode\",\n", - " \"source\": \"/document/languageCode\"\n", - " }\n", - " ],\n", - " \"outputs\": [\n", - " {\n", - " \"name\": \"textItems\",\n", - " \"targetName\": \"pages\"\n", - " }\n", - " ]\n", - " },\n", - " {\n", - " \"@odata.type\": \"#Microsoft.Skills.Text.KeyPhraseExtractionSkill\",\n", - " \"context\": \"/document/pages/*\",\n", - " \"inputs\": [\n", - " {\n", - " \"name\": \"text\", \n", - " \"source\": \"/document/pages/*\"\n", - " },\n", - " {\n", - " \"name\": \"languageCode\", \n", - " \"source\": \"/document/languageCode\"\n", - " }\n", - " ],\n", - " \"outputs\": [\n", - " {\n", - " \"name\": \"keyPhrases\",\n", - " \"targetName\": \"keyPhrases\"\n", - " }\n", - " ]\n", - " }\n", - " ]\n", - "}\n", - "\n", - "r = requests.put(endpoint + \"/skillsets/\" + skillset_name,\n", - " data=json.dumps(skillset_payload), headers=headers, params=params)\n", - "print(r.status_code)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Create an index\n", - "# Queries operate over the searchable fields and filterable fields in the index\n", - "index_payload = {\n", - " \"name\": index_name,\n", - " \"fields\": [\n", - " {\n", - " \"name\": \"id\",\n", - " \"type\": \"Edm.String\",\n", - " \"key\": \"true\",\n", - " \"searchable\": \"true\",\n", - " \"filterable\": \"false\",\n", - " \"facetable\": \"false\",\n", - " \"sortable\": \"true\"\n", - " },\n", - " {\n", - " \"name\": \"content\",\n", - " \"type\": \"Edm.String\",\n", - " \"sortable\": \"false\",\n", - " \"searchable\": \"true\",\n", - " \"filterable\": \"false\",\n", - " \"facetable\": \"false\"\n", - " },\n", - " {\n", - " \"name\": \"languageCode\",\n", - " \"type\": \"Edm.String\",\n", - " \"searchable\": \"true\",\n", - " \"filterable\": \"false\",\n", - " \"facetable\": \"false\"\n", - " },\n", - " {\n", - " \"name\": \"keyPhrases\",\n", - " \"type\": \"Collection(Edm.String)\",\n", - " \"searchable\": \"true\",\n", - " \"filterable\": \"false\",\n", - " \"facetable\": \"false\"\n", - " },\n", - " {\n", - " \"name\": \"organizations\",\n", - " \"type\": \"Collection(Edm.String)\",\n", - " \"searchable\": \"true\",\n", - " \"sortable\": \"false\",\n", - " \"filterable\": \"false\",\n", - " \"facetable\": \"false\"\n", - " }\n", - " ]\n", - "}\n", - "\n", - "r = requests.put(endpoint + \"/indexes/\" + index_name,\n", - " data=json.dumps(index_payload), headers=headers, params=params)\n", - "print(r.status_code)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Create an indexer\n", - "indexer_payload = {\n", - " \"name\": indexer_name,\n", - " \"dataSourceName\": datasource_name,\n", - " \"targetIndexName\": index_name,\n", - " \"skillsetName\": skillset_name,\n", - " \"fieldMappings\": [\n", - " {\n", - " \"sourceFieldName\": \"metadata_storage_path\",\n", - " \"targetFieldName\": \"id\",\n", - " \"mappingFunction\":\n", - " {\"name\": \"base64Encode\"}\n", - " },\n", - " {\n", - " \"sourceFieldName\": \"content\",\n", - " \"targetFieldName\": \"content\"\n", - " }\n", - " ],\n", - " \"outputFieldMappings\":\n", - " [\n", - " {\n", - " \"sourceFieldName\": \"/document/organizations\",\n", - " \"targetFieldName\": \"organizations\"\n", - " },\n", - " {\n", - " \"sourceFieldName\": \"/document/pages/*/keyPhrases/*\",\n", - " \"targetFieldName\": \"keyPhrases\"\n", - " },\n", - " {\n", - " \"sourceFieldName\": \"/document/languageCode\",\n", - " \"targetFieldName\": \"languageCode\"\n", - " }\n", - " ],\n", - " \"parameters\":\n", - " {\n", - " \"maxFailedItems\": -1,\n", - " \"maxFailedItemsPerBatch\": -1,\n", - " \"configuration\":\n", - " {\n", - " \"dataToExtract\": \"contentAndMetadata\",\n", - " \"imageAction\": \"generateNormalizedImages\"\n", - " }\n", - " }\n", - "}\n", - "\n", - "r = requests.put(endpoint + \"/indexers/\" + indexer_name,\n", - " data=json.dumps(indexer_payload), headers=headers, params=params)\n", - "print(r.status_code)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Optionally, get indexer status to confirm that it's running\n", - "r = requests.get(endpoint + \"/indexers/\" + indexer_name +\n", - " \"/status\", headers=headers, params=params)\n", - "pprint(json.dumps(r.json(), indent=1))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Query the service for the index definition\n", - "# Query responses can be verbose. If you get \"Output exceeds the size limit. Open the full output data in a text editor\", open the output in an editor.\n", - "r = requests.get(endpoint + \"/indexes/\" + index_name,\n", - " headers=headers, params=params)\n", - "pprint(json.dumps(r.json(), indent=1))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Query the index to return the contents of \"organizations\", created through Entity Recognition during enrichment\n", - "# For keyword search, replace the asterisk with comma-separated query terms: search=microsoft,azure\n", - "r = requests.get(endpoint + \"/indexes/\" + index_name +\n", - " \"/docs?&search=*&$select=organizations\", headers=headers, params=params)\n", - "pprint(json.dumps(r.json(), indent=1))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "This step concludes the walkthrough, showing you how to invoke skills that extract searchable content and information from image and application files in Azure Storage. The processes run on Azure AI Search, and the resulting index can be used in client apps to provide a search experience. For more information, visit the [official docs](https://docs.microsoft.com/azure/search/)." - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3.9.5 64-bit", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.5" - }, - "orig_nbformat": 4, - "vscode": { - "interpreter": { - "hash": "9ff083f0c83558f9261023d47a77b9b3eb892c62cdbe066d046abcad1a5edb5c" - } - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/Tutorial-AI-Enrichment/README.md b/Tutorial-AI-Enrichment/README.md deleted file mode 100644 index ac7b7723d..000000000 --- a/Tutorial-AI-Enrichment/README.md +++ /dev/null @@ -1,50 +0,0 @@ ---- -page_type: sample -languages: - - python -products: - - azure - - azure-cognitive-search -name: AI enrichment tutorial - Jupyter Notebook -description: | - Create an AI enrichment pipeline in Azure AI Search to extract text, structure, and information from raw content, including images and unstructured text. -urlFragment: python-tutorial-cognitive-search ---- - -# Get started with skillsets and AI enrichment in Azure AI Search - -Demonstrates AI enrichment by building a [skillset](https://docs.microsoft.com/azure/search/cognitive-search-working-with-skillsets) that detects and extracts text and text representations of images and scanned documents stored as blobs in Azure Blob storage. This sample leverages cognitive skills that are based on the Azure AI Services APIs, such as entity recognition and language detection. It uses the REST APIs to make calls to Azure AI Search, including index definition, data ingestion and AI enrichment, and query execution. - -This Python sample is in a notebook. For an explanation of each step, see [Python Tutorial: Call Azure AI Services APIs in an enrichment pipeline](https://docs.microsoft.com/azure/search/cognitive-search-tutorial-blob-python). - -## Contents - -| File/folder | Description | -|-------------|-------------| -| `PythonTutorial-AzureSearch-AIEnrichment.ipynb` | Notebook containing the Python code for this sample | -| `.gitignore` | Define what to ignore at commit time. | -| `CONTRIBUTING.md` | Guidelines for contributing to the sample. | -| `README.md` | This README file. | -| `LICENSE` | The license for the sample. | - -## Prerequisites - -- [Anaconda 3.x](https://www.anaconda.com/distribution/#download-section) providing Python 3.x and Jupyter Notebooks -- [Sample file set (mixed content types)](https://github.com/Azure-Samples/azure-search-sample-data/tree/master/mixedContent) -- [Azure Storage account](https://docs.microsoft.com/azure/storage/common/storage-quickstart-create-account) -- [Azure AI Search service](https://docs.microsoft.com/en-us/azure/search/search-create-service-portal) - -## Setup - -1. Clone or download this sample repository. -2. Extract contents if the download is a zip file. Make sure the files are read-write. - -## Running the sample - -1. On the Windows Start menu, select Anaconda3, and then select Jupyter Notebook. -2. Open the PythonTutorial-AzureSearch-AIEnrichment.ipynb file in Jupyter Notebook. -3. Replace and with the service and api-key details of your search service. -4. Replace with a connection string to an Azure Blob storage resource that you created, and to which you uploaded [content files](https://github.com/Azure-Samples/azure-search-sample-data/tree/master/mixedContent) of various file types. -5. Run each step individually. - -By sequentially executing each step, you can verify the printed response status or response output appears before continuing to the next step. The step that creates the indexer, in particular, may take a few minutes to complete. See the tutorial for more details. \ No newline at end of file From 5545043ca3f3412523b6755948c54bd7c702b975 Mon Sep 17 00:00:00 2001 From: HeidiSteen Date: Mon, 11 Mar 2024 20:06:34 -0700 Subject: [PATCH 2/3] Updated python quickstart and semantic quickstart --- .../semantic-search-quickstart.ipynb | 200 +++++++---------- Quickstart/v11/azure-search-quickstart.ipynb | 211 ++++++++---------- README.md | 3 +- 3 files changed, 168 insertions(+), 246 deletions(-) diff --git a/Quickstart-Semantic-Search/semantic-search-quickstart.ipynb b/Quickstart-Semantic-Search/semantic-search-quickstart.ipynb index 887542b9d..cf9cb28db 100644 --- a/Quickstart-Semantic-Search/semantic-search-quickstart.ipynb +++ b/Quickstart-Semantic-Search/semantic-search-quickstart.ipynb @@ -5,7 +5,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Semantic search using the azure.search.documents library in the Azure SDK for Python" + "# Semantic ranking using the azure.search.documents library in the Azure SDK for Python" ] }, { @@ -13,44 +13,14 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "This Jupyter Notebook adds semantic search, using pre-trained models from Microsoft to re-rank results based on a semantic match to the query. " + "This notebook demonstrates a semantic configuration in a search index and the semanti query syntax for reranking search results." ] }, { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Import the libraries needed to create a search index, upload documents, and query the index\n", - "%pip install azure-search-documents\n", - "%pip show azure-search-documents\n", - "%pip install python-dotenv\n", - "\n", - "import os\n", - "from azure.core.credentials import AzureKeyCredential\n", - "from azure.search.documents.indexes import SearchIndexClient \n", - "from azure.search.documents import SearchClient\n", - "from azure.search.documents.indexes.models import ( \n", - " SearchIndex, \n", - " SearchFieldDataType, \n", - " SimpleField, \n", - " SearchableField,\n", - " ComplexField,\n", - " SearchIndex, \n", - " SemanticConfiguration, \n", - " PrioritizedFields, \n", - " SemanticField, \n", - " SemanticSettings, \n", - ")" - ] - }, - { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ - "In this step, initialize the search client used to make each request. Provide the name and admin API key of your search service. If you get ConnectionError \"Failed to establish a new connection\", verify that the api-key is a primary or secondary admin key, and not a query key." + "## Install packages and set variables" ] }, { @@ -59,30 +29,9 @@ "metadata": {}, "outputs": [], "source": [ - "# Set the service endpoint and API key from the environment\n", - "\n", - "service_name = \"\"\n", - "admin_key = \"\"\n", - "\n", - "index_name = \"hotels-quickstart\"\n", - "\n", - "# Create an SDK client\n", - "endpoint = \"https://{}.search.windows.net/\".format(service_name)\n", - "admin_client = SearchIndexClient(endpoint=endpoint,\n", - " index_name=index_name,\n", - " credential=AzureKeyCredential(admin_key))\n", - "\n", - "search_client = SearchClient(endpoint=endpoint,\n", - " index_name=index_name,\n", - " credential=AzureKeyCredential(admin_key))\n" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "In the next cell, the index \"hotels-quickstart\" will be deleted if it previously existed. This step allows you to reuse the index name." + "! pip install azure-search-documents==11.6.0b1 --quiet\n", + "! pip install azure-identity --quiet\n", + "! pip install python-dotenv --quiet" ] }, { @@ -91,12 +40,10 @@ "metadata": {}, "outputs": [], "source": [ - "# Delete the index if it exists\n", - "try:\n", - " result = admin_client.delete_index(index_name)\n", - " print ('Index', index_name, 'Deleted')\n", - "except Exception as ex:\n", - " print (ex)\n" + "# Provide variables\n", + "search_endpoint: str = \"PUT-YOUR-SEARCH-ENDPOINT-HERE\"\n", + "search_api_key: str = \"PUT-YOUR-SEARCH-API-KEY-HERE\"\n", + "index_name: str = \"hotels-quickstart\"" ] }, { @@ -104,7 +51,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Specify the index definition, including the fields that define each search document. This schema adds a semantic configuration that specifies how to use search fields during semantic ranking." + "## Create an index" ] }, { @@ -113,15 +60,34 @@ "metadata": {}, "outputs": [], "source": [ - "# Specify the index schema\n", - "name = index_name\n", + "from azure.core.credentials import AzureKeyCredential\n", + "\n", + "credential = AzureKeyCredential(search_api_key)\n", + "\n", + "from azure.search.documents.indexes import SearchIndexClient\n", + "from azure.search.documents import SearchClient\n", + "from azure.search.documents.indexes.models import (\n", + " ComplexField,\n", + " SimpleField,\n", + " SearchFieldDataType,\n", + " SearchableField,\n", + " SearchIndex,\n", + " SemanticConfiguration,\n", + " SemanticField,\n", + " SemanticPrioritizedFields,\n", + " SemanticSearch\n", + ")\n", + "\n", + "# Create a search schema\n", + "index_client = SearchIndexClient(\n", + " endpoint=search_endpoint, credential=credential)\n", "fields = [\n", " SimpleField(name=\"HotelId\", type=SearchFieldDataType.String, key=True),\n", " SearchableField(name=\"HotelName\", type=SearchFieldDataType.String, sortable=True),\n", " SearchableField(name=\"Description\", type=SearchFieldDataType.String, analyzer_name=\"en.lucene\"),\n", " SearchableField(name=\"Description_fr\", type=SearchFieldDataType.String, analyzer_name=\"fr.lucene\"),\n", " SearchableField(name=\"Category\", type=SearchFieldDataType.String, facetable=True, filterable=True, sortable=True),\n", - " \n", + "\n", " SearchableField(name=\"Tags\", collection=True, type=SearchFieldDataType.String, facetable=True, filterable=True),\n", "\n", " SimpleField(name=\"ParkingIncluded\", type=SearchFieldDataType.Boolean, facetable=True, filterable=True, sortable=True),\n", @@ -136,46 +102,27 @@ " SearchableField(name=\"Country\", type=SearchFieldDataType.String, facetable=True, filterable=True, sortable=True),\n", " ])\n", " ]\n", + "\n", "semantic_config = SemanticConfiguration(\n", " name=\"my-semantic-config\",\n", - " prioritized_fields=PrioritizedFields(\n", + " prioritized_fields=SemanticPrioritizedFields(\n", " title_field=SemanticField(field_name=\"HotelName\"),\n", - " prioritized_keywords_fields=[SemanticField(field_name=\"Category\")],\n", - " prioritized_content_fields=[SemanticField(field_name=\"Description\")]\n", + " keywords_fields=[SemanticField(field_name=\"Category\")],\n", + " content_fields=[SemanticField(field_name=\"Description\")]\n", " )\n", ")\n", "\n", - "semantic_settings = SemanticSettings(configurations=[semantic_config])\n", + "# Create the semantic settings with the configuration\n", + "semantic_search = SemanticSearch(configurations=[semantic_config])\n", + "\n", + "semantic_settings = SemanticSearch(configurations=[semantic_config])\n", "scoring_profiles = []\n", - "suggester = [{'name': 'sg', 'source_fields': ['Tags', 'Address/City', 'Address/Country']}]\n" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Formulate the create_index request. This request targets the indexes collection of your search service and creates an index using the index schema from the previous cell." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "index = SearchIndex(\n", - " name=name,\n", - " fields=fields,\n", - " semantic_settings=semantic_settings,\n", - " scoring_profiles=scoring_profiles,\n", - " suggesters = suggester)\n", + "suggester = [{'name': 'sg', 'source_fields': ['Tags', 'Address/City', 'Address/Country']}]\n", "\n", - "try:\n", - " result = admin_client.create_index(index)\n", - " print ('Index', result.name, 'created')\n", - "except Exception as ex:\n", - " print (ex)" + "# Create the search index with the semantic settings\n", + "index = SearchIndex(name=index_name, fields=fields, suggesters=suggester, scoring_profiles=scoring_profiles, semantic_search=semantic_search)\n", + "result = index_client.create_or_update_index(index)\n", + "print(f' {result.name} created')" ] }, { @@ -183,7 +130,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Next, set up documents to include four hotel documents conforming to the schema." + "## Create a documents payload" ] }, { @@ -192,6 +139,7 @@ "metadata": {}, "outputs": [], "source": [ + "# Create a documents payload\n", "documents = [\n", " {\n", " \"@search.action\": \"upload\",\n", @@ -277,7 +225,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Formulate the request. This upload_documents request targets the docs collection of the hotels-quickstart index and pushes the documents from the previous step into the search index." + "## Upload documents" ] }, { @@ -286,11 +234,19 @@ "metadata": {}, "outputs": [], "source": [ + "# Upload documents to the index\n", + "search_client = SearchClient(endpoint=search_endpoint,\n", + " index_name=index_name,\n", + " credential=credential)\n", "try:\n", " result = search_client.upload_documents(documents=documents)\n", " print(\"Upload of new document succeeded: {}\".format(result[0].succeeded))\n", "except Exception as ex:\n", - " print (ex.message)" + " print (ex.message)\n", + "\n", + "\n", + " index_client = SearchIndexClient(\n", + " endpoint=search_endpoint, credential=credential)" ] }, { @@ -298,11 +254,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "You're now ready to run some queries. For this operation, use search_client. \n", - "\n", - "### Empty query with unscored results\n", - "\n", - "The next cell contains a query expression that executes an empty search (`search=*`), returning an unranked list (search score = 1.0) of arbitrary documents. Because there is no criteria, all documents are included in results. This query prints fields from each document. It also adds `include_total_count=True` to get a count of all documents (4) in the results." + "## Run your first query" ] }, { @@ -311,6 +263,7 @@ "metadata": {}, "outputs": [], "source": [ + "# Run an empty query (returns selected fields, all documents)\n", "results = search_client.search(query_type='simple',\n", " search_text=\"*\" ,\n", " select='HotelName,Description',\n", @@ -328,9 +281,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Full text search with BM25 ranking\n", - "\n", - "The previous query used an empty search string, which bypasses the search engine. In this query, search for \"what hotel has a good restaurant on site\". The query string undergoes lexical analysis and tokenization. The search engine scans for matches and assigns a search score based on term frequency and proximity. Higher scoring matches are returned first. In this query for \"what hotel has a good restaurant on site\", Sublime Cliff Hotel comes out on top because it's description includes \"site\". Terms that occur infrequently raise the search score of the document." + "## Run a term query" ] }, { @@ -339,10 +290,12 @@ "metadata": {}, "outputs": [], "source": [ + "# Run a text query (returns a BM25-scored result set)\n", "results = search_client.search(query_type='simple',\n", " search_text=\"what hotel has a good restaurant on site\" ,\n", - " select='HotelName,HotelId,Description')\n", - "\n", + " select='HotelName,HotelId,Description',\n", + " include_total_count=True)\n", + " \n", "for result in results:\n", " print(result[\"@search.score\"])\n", " print(result[\"HotelName\"])\n", @@ -354,9 +307,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Semantic search with captions\n", - "\n", - "Here's the same query, but with semantic ranking. Notice that the semantic ranker correctly identifies Triple Landscape Hotel as a more relevant result given the initial query. This query also returns captions generated by the models. The inputs are too minimal in this sample to create interesting captions, but the example succeeds in demonstrating the syntax." + "## Run a semantic query" ] }, { @@ -365,6 +316,7 @@ "metadata": {}, "outputs": [], "source": [ + "# Runs a semantic query (runs a BM25-ranked query and promotes the most relevant matches to the top)\n", "results = search_client.search(query_type='semantic', semantic_configuration_name='my-semantic-config',\n", " search_text=\"what hotel has a good restaurant on site\", \n", " select='HotelName,Description,Category', query_caption='extractive')\n", @@ -373,7 +325,7 @@ " print(result[\"@search.reranker_score\"])\n", " print(result[\"HotelName\"])\n", " print(f\"Description: {result['Description']}\")\n", - " \n", + "\n", " captions = result[\"@search.captions\"]\n", " if captions:\n", " caption = captions[0]\n", @@ -384,13 +336,10 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ - "### Add semantic answers\n", - "\n", - "Semantic search can generate answers to a query string that has the characteristics of a question. The generated answer is extracted verbatim from your content. To get a semantic answer, the question and answer must be closely aligned, and the model must find content that clearly answers the question. If potential answers don't have a high enough confidence score, the model won't return an answer. For demonstration purposes, the question in this example is designed to get a response so that you can see the syntax." + "## Return semantic answers" ] }, { @@ -399,9 +348,10 @@ "metadata": {}, "outputs": [], "source": [ + "# Run a semantic query that returns semantic answers \n", "results = search_client.search(query_type='semantic', semantic_configuration_name='my-semantic-config',\n", - " search_text=\"what hotel stands out for its gastronomic excellence\", \n", - " select='HotelName,Description,Category', query_caption='extractive', query_answer=\"extractive\",)\n", + " search_text=\"what hotel is in a historic building\",\n", + " select='HotelName,Description,Category', query_caption='extractive', query_answer=\"extractive\",)\n", "\n", "semantic_answers = results.get_answers()\n", "for answer in semantic_answers:\n", @@ -415,7 +365,7 @@ " print(result[\"@search.reranker_score\"])\n", " print(result[\"HotelName\"])\n", " print(f\"Description: {result['Description']}\")\n", - " \n", + "\n", " captions = result[\"@search.captions\"]\n", " if captions:\n", " caption = captions[0]\n", @@ -448,7 +398,7 @@ "outputs": [], "source": [ "try:\n", - " result = admin_client.delete_index(index_name)\n", + " result = index_client.delete_index(index_name)\n", " print ('Index', index_name, 'Deleted')\n", "except Exception as ex:\n", " print (ex)" @@ -469,7 +419,7 @@ "outputs": [], "source": [ "try:\n", - " result = admin_client.get_index(index_name)\n", + " result = index_client.get_index(index_name)\n", " print (result)\n", "except Exception as ex:\n", " print (ex)\n" diff --git a/Quickstart/v11/azure-search-quickstart.ipynb b/Quickstart/v11/azure-search-quickstart.ipynb index 3e629e771..751751410 100644 --- a/Quickstart/v11/azure-search-quickstart.ipynb +++ b/Quickstart/v11/azure-search-quickstart.ipynb @@ -13,39 +13,14 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "This Jupyter Notebook steps through creating, loading, and querying an index in Azure AI Search index by calling the azure-search-documents library in the Azure SDK for Python. " + "This notebook steps through creating, loading, and querying an index in Azure AI Search index by calling the azure-search-documents library in the Azure SDK for Python. " ] }, { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Import the libraies used in this notebook\n", - "%pip show azure-search-documents\n", - "\n", - "import os\n", - "from azure.core.credentials import AzureKeyCredential\n", - "from azure.search.documents.indexes import SearchIndexClient \n", - "from azure.search.documents import SearchClient\n", - "from azure.search.documents.indexes.models import (\n", - " ComplexField,\n", - " CorsOptions,\n", - " SearchIndex,\n", - " ScoringProfile,\n", - " SearchFieldDataType,\n", - " SimpleField,\n", - " SearchableField\n", - ")" - ] - }, - { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ - "In this step, initialize the search client used to make each request. Provide the name and admin API key of your search service. If you get ConnectionError \"Failed to establish a new connection\", verify that the api-key is a primary or secondary admin key, and not a query key." + "## Install packages and set variables" ] }, { @@ -54,30 +29,9 @@ "metadata": {}, "outputs": [], "source": [ - "# Set the service endpoint and API key from the environment\n", - "\n", - "service_name = \"YOUR-SEARCH-SERVICE-NAME\"\n", - "admin_key = \"YOUR-SEARCH-SERVICE-ADMIN-API-KEY\"\n", - "\n", - "index_name = \"hotels-quickstart\"\n", - "\n", - "# Create an SDK client\n", - "endpoint = \"https://{}.search.windows.net/\".format(service_name)\n", - "admin_client = SearchIndexClient(endpoint=endpoint,\n", - " index_name=index_name,\n", - " credential=AzureKeyCredential(admin_key))\n", - "\n", - "search_client = SearchClient(endpoint=endpoint,\n", - " index_name=index_name,\n", - " credential=AzureKeyCredential(admin_key))\n" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "In the next step, the index \"hotels-quickstart\" is deleted, assuming it already exists on your search service. This action frees up index space and an allows you to reuse the index name. " + "! pip install azure-search-documents==11.6.0b1 --quiet\n", + "! pip install azure-identity --quiet\n", + "! pip install python-dotenv --quiet" ] }, { @@ -86,20 +40,17 @@ "metadata": {}, "outputs": [], "source": [ - "# Delete the index if it exists\n", - "try:\n", - " result = admin_client.delete_index(index_name)\n", - " print ('Index', index_name, 'Deleted')\n", - "except Exception as ex:\n", - " print (ex)\n" + "# Provide variables\n", + "search_endpoint: str = \"PUT-YOUR-SEARCH-ENDPOINT-HERE\"\n", + "search_api_key: str = \"PUT-YOUR-SEARCH-API-KEY-HERE\"\n", + "index_name: str = \"hotels-quickstart\"" ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ - "Specify the index definition, including the fields that define each search document. Fields have a name, type, and attributes that determine how you can use the field. For example, \"searchable\" enables full text search on the field, \"retrievable\" means it can be returned in results, and \"filterable\" allows the field to be used in a filter expression." + "## Create an index" ] }, { @@ -108,15 +59,30 @@ "metadata": {}, "outputs": [], "source": [ - "# Specify the index schema\n", - "name = index_name\n", + "from azure.core.credentials import AzureKeyCredential\n", + "\n", + "credential = AzureKeyCredential(search_api_key)\n", + "\n", + "from azure.search.documents.indexes import SearchIndexClient\n", + "from azure.search.documents import SearchClient\n", + "from azure.search.documents.indexes.models import (\n", + " ComplexField,\n", + " SimpleField,\n", + " SearchFieldDataType,\n", + " SearchableField,\n", + " SearchIndex\n", + ")\n", + "\n", + "# Create a search schema\n", + "index_client = SearchIndexClient(\n", + " endpoint=search_endpoint, credential=credential)\n", "fields = [\n", " SimpleField(name=\"HotelId\", type=SearchFieldDataType.String, key=True),\n", " SearchableField(name=\"HotelName\", type=SearchFieldDataType.String, sortable=True),\n", " SearchableField(name=\"Description\", type=SearchFieldDataType.String, analyzer_name=\"en.lucene\"),\n", " SearchableField(name=\"Description_fr\", type=SearchFieldDataType.String, analyzer_name=\"fr.lucene\"),\n", " SearchableField(name=\"Category\", type=SearchFieldDataType.String, facetable=True, filterable=True, sortable=True),\n", - " \n", + "\n", " SearchableField(name=\"Tags\", collection=True, type=SearchFieldDataType.String, facetable=True, filterable=True),\n", "\n", " SimpleField(name=\"ParkingIncluded\", type=SearchFieldDataType.Boolean, facetable=True, filterable=True, sortable=True),\n", @@ -131,37 +97,14 @@ " SearchableField(name=\"Country\", type=SearchFieldDataType.String, facetable=True, filterable=True, sortable=True),\n", " ])\n", " ]\n", - "cors_options = CorsOptions(allowed_origins=[\"*\"], max_age_in_seconds=60)\n", + "\n", "scoring_profiles = []\n", - "suggester = [{'name': 'sg', 'source_fields': ['Tags', 'Address/City', 'Address/Country']}]\n" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "In the following cell, formulate the request. This create_index request targets the indexes collection of your search service and creates an index based on the index schema you provided in the previous cell." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "index = SearchIndex(\n", - " name=name,\n", - " fields=fields,\n", - " scoring_profiles=scoring_profiles,\n", - " suggesters = suggester,\n", - " cors_options=cors_options)\n", + "suggester = [{'name': 'sg', 'source_fields': ['Tags', 'Address/City', 'Address/Country']}]\n", "\n", - "try:\n", - " result = admin_client.create_index(index)\n", - " print ('Index', result.name, 'created')\n", - "except Exception as ex:\n", - " print (ex)" + "# Create the search index=\n", + "index = SearchIndex(name=index_name, fields=fields, suggesters=suggester, scoring_profiles=scoring_profiles)\n", + "result = index_client.create_or_update_index(index)\n", + "print(f' {result.name} created')" ] }, { @@ -169,7 +112,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Next, provide four documents that conform to the index schema. Specify an upload action for each document." + "## Create a documents payload" ] }, { @@ -178,6 +121,7 @@ "metadata": {}, "outputs": [], "source": [ + "# Create a documents payload\n", "documents = [\n", " {\n", " \"@search.action\": \"upload\",\n", @@ -259,11 +203,10 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ - "Formulate the request. This upload_documents request targets the docs collection of the hotels-quickstart index and pushes the documents provided in the previous step into the search index." + "## Upload documents" ] }, { @@ -272,11 +215,17 @@ "metadata": {}, "outputs": [], "source": [ + "search_client = SearchClient(endpoint=search_endpoint,\n", + " index_name=index_name,\n", + " credential=credential)\n", "try:\n", " result = search_client.upload_documents(documents=documents)\n", " print(\"Upload of new document succeeded: {}\".format(result[0].succeeded))\n", "except Exception as ex:\n", - " print (ex.message)" + " print (ex.message)\n", + "\n", + " index_client = SearchIndexClient(\n", + " endpoint=search_endpoint, credential=credential)" ] }, { @@ -284,9 +233,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "You are now ready to run some queries. For this operation, use search_client. \n", - "\n", - "The next cell contains a query expression that executes an empty search (`search=*`), returning an unranked list (search score = 1.0) of arbitrary documents. Because there is no criteria, all documents are included in results. This query prints just two of the fields in each document. It also adds `include_total_count=True` to get a count of all documents (4) in the results." + "## Run your first query" ] }, { @@ -295,11 +242,17 @@ "metadata": {}, "outputs": [], "source": [ - "results = search_client.search(search_text=\"*\", include_total_count=True)\n", + "# Run an empty query (returns selected fields, all documents)\n", + "results = search_client.search(query_type='simple',\n", + " search_text=\"*\" ,\n", + " select='HotelName,Description',\n", + " include_total_count=True)\n", "\n", "print ('Total Documents Matching Query:', results.get_count())\n", "for result in results:\n", - " print(\"{}: {}\".format(result[\"HotelId\"], result[\"HotelName\"]))\n" + " print(result[\"@search.score\"])\n", + " print(result[\"HotelName\"])\n", + " print(f\"Description: {result['Description']}\")\n" ] }, { @@ -307,7 +260,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "The next query adds whole terms to the search expression (\"wifi\"). This query specifies that results contain only those fields in the select statement. Limiting the fields that come back minimizes the amount of data sent back over the wire and reduces search latency." + "## Run a term query" ] }, { @@ -316,11 +269,17 @@ "metadata": {}, "outputs": [], "source": [ - "results = search_client.search(search_text=\"wifi\", include_total_count=True, select='HotelId,HotelName,Tags')\n", + "# Run a term query\n", + "results = search_client.search(query_type='simple',\n", + " search_text=\"wifi\" ,\n", + " select='HotelName,Description,Tags',\n", + " include_total_count=True)\n", "\n", "print ('Total Documents Matching Query:', results.get_count())\n", "for result in results:\n", - " print(\"{}: {}: {}\".format(result[\"HotelId\"], result[\"HotelName\"], result[\"Tags\"]))\n" + " print(result[\"@search.score\"])\n", + " print(result[\"HotelName\"])\n", + " print(f\"Description: {result['Description']}\")" ] }, { @@ -328,7 +287,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "This query adds a filter expression, returning only those hotels with a rating greater than 4, sorted in descending order." + "## Add a filter" ] }, { @@ -337,7 +296,12 @@ "metadata": {}, "outputs": [], "source": [ - "results = search_client.search(search_text=\"hotels\", select='HotelId,HotelName,Rating', filter='Rating gt 4', order_by='Rating desc')\n", + "# Add a filter\n", + "results = search_client.search(\n", + " search_text=\"hotels\", \n", + " select='HotelId,HotelName,Rating', \n", + " filter='Rating gt 4', \n", + " order_by='Rating desc')\n", "\n", "for result in results:\n", " print(\"{}: {} - {} rating\".format(result[\"HotelId\"], result[\"HotelName\"], result[\"Rating\"]))" @@ -348,7 +312,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "In the next query, add search_fields to scope query matching to a single field. In newer versions of the client library, search_fields is an array. " + "## Scope a query to specific searchable fields" ] }, { @@ -357,7 +321,10 @@ "metadata": {}, "outputs": [], "source": [ - "results = search_client.search(search_text=\"sublime\", search_fields=['HotelName'], select='HotelId,HotelName')\n", + "results = search_client.search(\n", + " search_text=\"sublime\", \n", + " search_fields=['HotelName'], \n", + " select='HotelId,HotelName')\n", "\n", "for result in results:\n", " print(\"{}: {}\".format(result[\"HotelId\"], result[\"HotelName\"]))" @@ -368,7 +335,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Facets are labels that can be used to compose facet navigation structure. This query returns facets and counts for Category." + "## Return facets" ] }, { @@ -377,7 +344,8 @@ "metadata": {}, "outputs": [], "source": [ - "results = search_client.search(search_text=\"*\", facets=[\"Category\"])\n", + "# Return facets\n", + "results = search_client.search(search_text=\"*\", facets=[\"Category\"])\n", "\n", "facets = results.get_facets()\n", "\n", @@ -390,7 +358,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "In this example, look up a specific document based on its key. You would typically want to return a document when a user clicks on a document in a search result." + "## Look up a document " ] }, { @@ -399,12 +367,13 @@ "metadata": {}, "outputs": [], "source": [ + "# Look up a specific document by ID\n", "result = search_client.get_document(key=\"3\")\n", "\n", "print(\"Details for hotel '3' are:\")\n", - "print(\" Name: {}\".format(result[\"HotelName\"]))\n", - "print(\" Rating: {}\".format(result[\"Rating\"]))\n", - "print(\" Category: {}\".format(result[\"Category\"]))" + "print(\"Name: {}\".format(result[\"HotelName\"]))\n", + "print(\"Rating: {}\".format(result[\"Rating\"]))\n", + "print(\"Category: {}\".format(result[\"Category\"]))" ] }, { @@ -412,9 +381,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "In the final example, try out the autocomplete function. This is typically used in a search box to complete partial query terms as the user types into the search box.\n", - "\n", - "When the index was created, a suggester named \"sg\" was also created. Part of the suggester definition includes fields that can be used in autocomplete queries. To simulate user input, assume that the letters \"sa\" represent characters typed by the user. The results include potential term matches for the \"sa\" input." + "## Autocomplete a query" ] }, { @@ -423,8 +390,12 @@ "metadata": {}, "outputs": [], "source": [ + "# Autocomplete a query\n", "search_suggestion = 'sa'\n", - "results = search_client.autocomplete(search_text=search_suggestion, suggester_name=\"sg\", mode='twoTerms')\n", + "results = search_client.autocomplete(\n", + " search_text=search_suggestion, \n", + " suggester_name=\"sg\",\n", + " mode='twoTerms')\n", "\n", "print(\"Autocomplete for:\", search_suggestion)\n", "for result in results:\n", @@ -436,6 +407,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ + "## Clean up\n", + "\n", "If you are finished with this index, you can delete it by running the following lines. Deleting unnecessary indexes frees up space for stepping through more quickstarts and tutorials." ] }, @@ -446,7 +419,7 @@ "outputs": [], "source": [ "try:\n", - " result = admin_client.delete_index(index_name)\n", + " result = index_client.delete_index(index_name)\n", " print ('Index', index_name, 'Deleted')\n", "except Exception as ex:\n", " print (ex)" @@ -467,7 +440,7 @@ "outputs": [], "source": [ "try:\n", - " result = admin_client.get_index(index_name)\n", + " result = index_client.get_index(index_name)\n", " print (result)\n", "except Exception as ex:\n", " print (ex)\n" diff --git a/README.md b/README.md index 1f59566c9..71125f928 100644 --- a/README.md +++ b/README.md @@ -4,9 +4,8 @@ This repository contains Python code samples used in Azure AI Search documentati | Sample | Description | |--------|-------------| -| quickstart | "Day One" introduction to the fundamental tasks of working with a search index: create, load, and query. This sample is a Jupyter Python3 .ipynb file. The index is modeled on a subset of the Hotels dataset, widely used in Azure AI Search samples, but reduced here for readability and comprehension. | +| quickstart | "Day One" introduction to the fundamental tasks of working with a search index: create, load, and query. This sample is a notebook .ipynb file. The index is modeled on a subset of the Hotels dataset, widely used in Azure AI Search samples, but reduced here for readability and comprehension. | | quickstart-semantic-search | Extends the quickstart through modifications that invoke semantic search. This notebook adds a semantic configuration to the index and semantic query options that formulate the query and response. | - | search-website-functions-v4 | Shows how to create, load, and query a search index in Python using the Azure.Search.Documents library in the Azure SDK for Python. It also includes application code and sample data so that you can see search integration in the context of a full app. The data is from [https://github.com/zygmuntz/goodbooks-10k](https://github.com/zygmuntz/goodbooks-10k). The app is an Azure Static Web app, using the React library for user interaction, and Azure Function to handle the query requests and responses in the application layer. | ## Archived samples From 041f3def6a7bad374564b049ebc332994a1f5949 Mon Sep 17 00:00:00 2001 From: HeidiSteen Date: Mon, 11 Mar 2024 20:11:38 -0700 Subject: [PATCH 3/3] Removed obsolete REST example --- Quickstart/{v11 => }/README.md | 4 +- Quickstart/REST/azure-search-quickstart.ipynb | 404 ------------------ .../{v11 => }/azure-search-quickstart.ipynb | 0 3 files changed, 1 insertion(+), 407 deletions(-) rename Quickstart/{v11 => }/README.md (93%) delete mode 100644 Quickstart/REST/azure-search-quickstart.ipynb rename Quickstart/{v11 => }/azure-search-quickstart.ipynb (100%) diff --git a/Quickstart/v11/README.md b/Quickstart/README.md similarity index 93% rename from Quickstart/v11/README.md rename to Quickstart/README.md index 83bc15965..7bb18b9df 100644 --- a/Quickstart/v11/README.md +++ b/Quickstart/README.md @@ -21,7 +21,7 @@ This sample is a Jupyter Python3 .ipynb file to perform the actions against the ## Prerequisites -* Visual Studio Code with the Python extension (or equivalent tool), with Python 3.7 or later +* Visual Studio Code with the Python extension (or equivalent tool), with Python 3.10 or later * [azure-search-documents package](https://pypi.org/project/azure-search-documents/) from the Azure SDK for Python @@ -35,8 +35,6 @@ This sample is a Jupyter Python3 .ipynb file to perform the actions against the 1. Open the azure-search-quickstart.ipynb file in Visual Studio Code. -1. Open an integrated terminal and run `pip install azure-search-documents`. - 1. Set the service endpoint and API key for your search service: * service_name = "YOUR-SEARCH-SERVICE-NAME" diff --git a/Quickstart/REST/azure-search-quickstart.ipynb b/Quickstart/REST/azure-search-quickstart.ipynb deleted file mode 100644 index d0b40029b..000000000 --- a/Quickstart/REST/azure-search-quickstart.ipynb +++ /dev/null @@ -1,404 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Create a search index using REST APIs and Python" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "This Jupyter Notebook demonstrates index creation, data ingestion, and queries of an Azure AI Search index by calling the REST APIs from Python code. This notebook is a companion document to this [Python quickstart](https://docs.microsoft.com/azure/search/search-get-started-python). \n", - "\n", - "\n", - "As a first step, load the libraries used for working with JSON and formulating HTTP requests." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import json\n", - "import requests\n", - "from pprint import pprint" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "In the second cell, input the request elements that will be constants on every request. Replace the search service name (YOUR-SEARCH-SERVICE-NAME) and admin API key (YOUR-ADMIN-API-KEY) with valid values. If you get ConnectionError \"Failed to establish a new connection\", verify that the api-key is a primary or secondary admin key, and that all leading and trailing characters (? and /) are in place." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "endpoint = 'https://.search.windows.net/'\n", - "api_version = '?api-version=2019-05-06'\n", - "headers = {'Content-Type': 'application/json',\n", - " 'api-key': '' }" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "In the third cell, formulate the request. This GET request targets the indexes collection of your search service and selects the name property of existing indexes so that you can see which indexes already exist. Index names must be unique. Check the list to make sure \"hotels-quickstart\" isn't listed.." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "url = endpoint + \"indexes\" + api_version + \"&$select=name\"\n", - "response = requests.get(url, headers=headers)\n", - "index_list = response.json()\n", - "pprint(index_list)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Specify the index definition, including the fields that define each search document. Fields have a name type, and attributes that determine how you can use the field. For example, \"searchable\" enables full text search on the field, \"retrievable\" means it can be returned in results, and \"filterable\" allows the field to be used in a filter expression." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "index_schema = {\n", - " \"name\": \"hotels-quickstart\", \n", - " \"fields\": [\n", - " {\"name\": \"HotelId\", \"type\": \"Edm.String\", \"key\": \"true\", \"filterable\": \"true\"},\n", - " {\"name\": \"HotelName\", \"type\": \"Edm.String\", \"searchable\": \"true\", \"filterable\": \"false\", \"sortable\": \"true\", \"facetable\": \"false\"},\n", - " {\"name\": \"Description\", \"type\": \"Edm.String\", \"searchable\": \"true\", \"filterable\": \"false\", \"sortable\": \"false\", \"facetable\": \"false\", \"analyzer\": \"en.lucene\"},\n", - " {\"name\": \"Description_fr\", \"type\": \"Edm.String\", \"searchable\": \"true\", \"filterable\": \"false\", \"sortable\": \"false\", \"facetable\": \"false\", \"analyzer\": \"fr.lucene\"},\n", - " {\"name\": \"Category\", \"type\": \"Edm.String\", \"searchable\": \"true\", \"filterable\": \"true\", \"sortable\": \"true\", \"facetable\": \"true\"},\n", - " {\"name\": \"Tags\", \"type\": \"Collection(Edm.String)\", \"searchable\": \"true\", \"filterable\": \"true\", \"sortable\": \"false\", \"facetable\": \"true\"},\n", - " {\"name\": \"ParkingIncluded\", \"type\": \"Edm.Boolean\", \"filterable\": \"true\", \"sortable\": \"true\", \"facetable\": \"true\"},\n", - " {\"name\": \"LastRenovationDate\", \"type\": \"Edm.DateTimeOffset\", \"filterable\": \"true\", \"sortable\": \"true\", \"facetable\": \"true\"},\n", - " {\"name\": \"Rating\", \"type\": \"Edm.Double\", \"filterable\": \"true\", \"sortable\": \"true\", \"facetable\": \"true\"},\n", - " {\"name\": \"Address\", \"type\": \"Edm.ComplexType\", \n", - " \"fields\": [\n", - " {\"name\": \"StreetAddress\", \"type\": \"Edm.String\", \"filterable\": \"false\", \"sortable\": \"false\", \"facetable\": \"false\", \"searchable\": \"true\"},\n", - " {\"name\": \"City\", \"type\": \"Edm.String\", \"searchable\": \"true\", \"filterable\": \"true\", \"sortable\": \"true\", \"facetable\": \"true\"},\n", - " {\"name\": \"StateProvince\", \"type\": \"Edm.String\", \"searchable\": \"true\", \"filterable\": \"true\", \"sortable\": \"true\", \"facetable\": \"true\"},\n", - " {\"name\": \"PostalCode\", \"type\": \"Edm.String\", \"searchable\": \"true\", \"filterable\": \"true\", \"sortable\": \"true\", \"facetable\": \"true\"},\n", - " {\"name\": \"Country\", \"type\": \"Edm.String\", \"searchable\": \"true\", \"filterable\": \"true\", \"sortable\": \"true\", \"facetable\": \"true\"}\n", - " ]\n", - " }\n", - " ]\n", - "}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "In the following cell, formulate the request. This POST request targets the indexes collection of your search service and creates an index based on the index schema you provided in the previous cell." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "url = endpoint + \"indexes\" + api_version\n", - "response = requests.post(url, headers=headers, json=index_schema)\n", - "index = response.json()\n", - "pprint(index)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Next, provide four documents that conform to the index schema. Specify an upload action for each document." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "documents = {\n", - " \"value\": [\n", - " {\n", - " \"@search.action\": \"upload\",\n", - " \"HotelId\": \"1\",\n", - " \"HotelName\": \"Secret Point Motel\",\n", - " \"Description\": \"The hotel is ideally located on the main commercial artery of the city in the heart of New York. A few minutes away is Time's Square and the historic centre of the city, as well as other places of interest that make New York one of America's most attractive and cosmopolitan cities.\",\n", - " \"Description_fr\": \"L'hôtel est idéalement situé sur la principale artère commerciale de la ville en plein cœur de New York. A quelques minutes se trouve la place du temps et le centre historique de la ville, ainsi que d'autres lieux d'intérêt qui font de New York l'une des villes les plus attractives et cosmopolites de l'Amérique.\",\n", - " \"Category\": \"Boutique\",\n", - " \"Tags\": [ \"pool\", \"air conditioning\", \"concierge\" ],\n", - " \"ParkingIncluded\": \"false\",\n", - " \"LastRenovationDate\": \"1970-01-18T00:00:00Z\",\n", - " \"Rating\": 3.60,\n", - " \"Address\": {\n", - " \"StreetAddress\": \"677 5th Ave\",\n", - " \"City\": \"New York\",\n", - " \"StateProvince\": \"NY\",\n", - " \"PostalCode\": \"10022\",\n", - " \"Country\": \"USA\"\n", - " }\n", - " },\n", - " {\n", - " \"@search.action\": \"upload\",\n", - " \"HotelId\": \"2\",\n", - " \"HotelName\": \"Twin Dome Motel\",\n", - " \"Description\": \"The hotel is situated in a nineteenth century plaza, which has been expanded and renovated to the highest architectural standards to create a modern, functional, and first-class hotel in which art and unique historical elements coexist with the most modern comforts.\",\n", - " \"Description_fr\": \"L'hôtel est situé dans une place du XIXe siècle, qui a été agrandie et rénovée aux plus hautes normes architecturales pour créer un hôtel moderne, fonctionnel et de première classe dans lequel l'art et les éléments historiques uniques coexistent avec le confort le plus moderne.\",\n", - " \"Category\": \"Boutique\",\n", - " \"Tags\": [ \"pool\", \"free wifi\", \"concierge\" ],\n", - " \"ParkingIncluded\": \"false\",\n", - " \"LastRenovationDate\": \"1979-02-18T00:00:00Z\",\n", - " \"Rating\": 3.60,\n", - " \"Address\": {\n", - " \"StreetAddress\": \"140 University Town Center Dr\",\n", - " \"City\": \"Sarasota\",\n", - " \"StateProvince\": \"FL\",\n", - " \"PostalCode\": \"34243\",\n", - " \"Country\": \"USA\"\n", - " }\n", - " },\n", - " {\n", - " \"@search.action\": \"upload\",\n", - " \"HotelId\": \"3\",\n", - " \"HotelName\": \"Triple Landscape Hotel\",\n", - " \"Description\": \"The Hotel stands out for its gastronomic excellence under the management of William Dough, who advises on and oversees all of the Hotel’s restaurant services.\",\n", - " \"Description_fr\": \"L'hôtel est situé dans une place du XIXe siècle, qui a été agrandie et rénovée aux plus hautes normes architecturales pour créer un hôtel moderne, fonctionnel et de première classe dans lequel l'art et les éléments historiques uniques coexistent avec le confort le plus moderne.\",\n", - " \"Category\": \"Resort and Spa\",\n", - " \"Tags\": [ \"air conditioning\", \"bar\", \"continental breakfast\" ],\n", - " \"ParkingIncluded\": \"true\",\n", - " \"LastRenovationDate\": \"2015-09-20T00:00:00Z\",\n", - " \"Rating\": 4.80,\n", - " \"Address\": {\n", - " \"StreetAddress\": \"3393 Peachtree Rd\",\n", - " \"City\": \"Atlanta\",\n", - " \"StateProvince\": \"GA\",\n", - " \"PostalCode\": \"30326\",\n", - " \"Country\": \"USA\"\n", - " }\n", - " },\n", - " {\n", - " \"@search.action\": \"upload\",\n", - " \"HotelId\": \"4\",\n", - " \"HotelName\": \"Sublime Cliff Hotel\",\n", - " \"Description\": \"Sublime Cliff Hotel is located in the heart of the historic center of Sublime in an extremely vibrant and lively area within short walking distance to the sites and landmarks of the city and is surrounded by the extraordinary beauty of churches, buildings, shops and monuments. Sublime Cliff is part of a lovingly restored 1800 palace.\",\n", - " \"Description_fr\": \"Le sublime Cliff Hotel est situé au coeur du centre historique de sublime dans un quartier extrêmement animé et vivant, à courte distance de marche des sites et monuments de la ville et est entouré par l'extraordinaire beauté des églises, des bâtiments, des commerces et Monuments. Sublime Cliff fait partie d'un Palace 1800 restauré avec amour.\",\n", - " \"Category\": \"Boutique\",\n", - " \"Tags\": [ \"concierge\", \"view\", \"24-hour front desk service\" ],\n", - " \"ParkingIncluded\": \"true\",\n", - " \"LastRenovationDate\": \"1960-02-06T00:00:00Z\",\n", - " \"Rating\": 4.60,\n", - " \"Address\": {\n", - " \"StreetAddress\": \"7400 San Pedro Ave\",\n", - " \"City\": \"San Antonio\",\n", - " \"StateProvince\": \"TX\",\n", - " \"PostalCode\": \"78216\",\n", - " \"Country\": \"USA\"\n", - " }\n", - " }\n", - "]\n", - "}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Formulate the request. This POST request targets the docs collection of the hotels-quickstart index and pushes the documents provided in the previous step." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "url = endpoint + \"indexes/hotels-quickstart/docs/index\" + api_version\n", - "response = requests.post(url, headers=headers, json=documents)\n", - "index_content = response.json()\n", - "pprint(index_content)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "You are now ready to run some queries. The next cell contains a query expression that executes an empty search (search=*), returning an unranked list (search score = 1.0) of arbitrary documents. By default, Azure AI Search returns 50 matches at a time. As structured, this query returns an entire document structure and values. Add $count=true to get a count of all documents (4) in the results." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "searchstring = '&search=*&$count=true'\n", - "\n", - "url = endpoint + \"indexes/hotels-quickstart/docs\" + api_version + searchstring\n", - "response = requests.get(url, headers=headers, json=searchstring)\n", - "query = response.json()\n", - "pprint(query)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The next query adds whole terms to the search expression (\"hotels\" and \"wifi\") and selects just a few fields to return in the results." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "searchstring = '&search=hotels wifi&$count=true&$select=HotelId,HotelName'\n", - "\n", - "url = endpoint + \"indexes/hotels-quickstart/docs\" + api_version + searchstring\n", - "response = requests.get(url, headers=headers, json=searchstring)\n", - "query = response.json()\n", - "pprint(query)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "This query adds a $filter expression, returning only those hotels with a rating greater than 4." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "searchstring = '&search=*&$filter=Rating gt 4&$select=HotelId,HotelName,Description'\n", - "\n", - "url = endpoint + \"indexes/hotels-quickstart/docs\" + api_version + searchstring\n", - "response = requests.get(url, headers=headers, json=searchstring)\n", - "query = response.json()\n", - "pprint(query)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "By default, the search engine returns the top 50 documents but you can use top and skip to add pagination and choose how many documents in each result. This query returns two documents in each result set." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "searchstring = '&search=boutique&$top=2&$select=HotelId,HotelName,Description'\n", - "\n", - "url = endpoint + \"indexes/hotels-quickstart/docs\" + api_version + searchstring\n", - "response = requests.get(url, headers=headers, json=searchstring)\n", - "query = response.json()\n", - "pprint(query)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "In this last example, use $orderby to sort results by city. This example includes fields from the Address collection." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "searchstring = '&search=pool&$orderby=Address/City&$select=HotelId, HotelName, Address/City, Address/StateProvince'\n", - "\n", - "url = endpoint + \"indexes/hotels-quickstart/docs\" + api_version + searchstring\n", - "response = requests.get(url, headers=headers, json=searchstring)\n", - "query = response.json()\n", - "pprint(query)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "If you are finished with this index, you can delete it by running the following lines. Deleting unnecessary indexes frees up space for steeping through more quickstarts and tutorials." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "url = endpoint + \"indexes/hotels-quickstart\" + api_version\n", - "response = requests.delete(url, headers=headers)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Confirm the index deletion by running the following script that lists all of the indexes on your search service. If hotels-quickstart is not listed, you've successfully deleted the index and have completed this quickstart." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "url = endpoint + \"indexes\" + api_version + \"&$select=name\"\n", - "\n", - "response = requests.get(url, headers=headers)\n", - "index_list = response.json()\n", - "pprint(index_list)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.3" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/Quickstart/v11/azure-search-quickstart.ipynb b/Quickstart/azure-search-quickstart.ipynb similarity index 100% rename from Quickstart/v11/azure-search-quickstart.ipynb rename to Quickstart/azure-search-quickstart.ipynb