diff --git a/.github/workflows/docs-freeze.yml b/.github/workflows/docs-freeze.yml new file mode 100644 index 00000000..3506c0f8 --- /dev/null +++ b/.github/workflows/docs-freeze.yml @@ -0,0 +1,65 @@ +on: + workflow_dispatch: + +name: Docs - Update freeze cache + +env: + UV_VERSION: "0.4.x" + PYTHON_VERSION: 3.12 + +permissions: + contents: write + +jobs: + docs-freeze: + runs-on: ubuntu-latest + + env: + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} + + steps: + - name: Check out repository + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: 🔵 Set up Quarto + uses: quarto-dev/quarto-actions/setup@v2 + with: + version: 1.9.37 + + - name: 🚀 Install uv + uses: astral-sh/setup-uv@v3 + with: + version: ${{ env.UV_VERSION }} + + - name: 🐍 Set up Python ${{ env.PYTHON_VERSION }} + run: uv python install ${{ env.PYTHON_VERSION }} + + - name: 📦 Install chatlas and dependencies + run: uv sync --python ${{ env.PYTHON_VERSION }} --all-extras + + - name: 🔌 Activate venv + run: | + source .venv/bin/activate + echo "$VIRTUAL_ENV/bin" >> $GITHUB_PATH + echo "VIRTUAL_ENV=$VIRTUAL_ENV" >> $GITHUB_ENV + + - name: Run quartodoc + run: make quartodoc + + - name: 🧊 Render docs to populate freeze cache + run: quarto render docs + + - name: 💾 Commit updated freeze cache + run: | + git config user.name "github-actions[bot]" + git config user.email "github-actions[bot]@users.noreply.github.com" + git add docs/_freeze/ + if git diff --staged --quiet; then + echo "No freeze cache changes to commit." + else + git commit -m "docs: update freeze cache [skip ci]" + git push origin HEAD + fi diff --git a/.github/workflows/docs-publish.yml b/.github/workflows/docs-publish.yml index b468f9e5..d52f0078 100644 --- a/.github/workflows/docs-publish.yml +++ b/.github/workflows/docs-publish.yml @@ -34,7 +34,7 @@ jobs: - name: 🔵 Set up Quarto uses: quarto-dev/quarto-actions/setup@v2 with: - version: 1.6.26 + version: 1.9.37 - name: 🚀 Install uv uses: astral-sh/setup-uv@v3 diff --git a/docs/.gitignore b/docs/.gitignore index 12384f0a..aec0dc7d 100644 --- a/docs/.gitignore +++ b/docs/.gitignore @@ -8,3 +8,7 @@ objects.txt /reference CHANGELOG.md +CHANGELOG.html +/CHANGELOG_files/ + +**/*.quarto_ipynb diff --git a/docs/_freeze/site_libs/clipboard/clipboard.min.js b/docs/_freeze/site_libs/clipboard/clipboard.min.js new file mode 100644 index 00000000..1103f811 --- /dev/null +++ b/docs/_freeze/site_libs/clipboard/clipboard.min.js @@ -0,0 +1,7 @@ +/*! + * clipboard.js v2.0.11 + * https://clipboardjs.com/ + * + * Licensed MIT © Zeno Rocha + */ +!function(t,e){"object"==typeof exports&&"object"==typeof module?module.exports=e():"function"==typeof define&&define.amd?define([],e):"object"==typeof exports?exports.ClipboardJS=e():t.ClipboardJS=e()}(this,function(){return n={686:function(t,e,n){"use strict";n.d(e,{default:function(){return b}});var e=n(279),i=n.n(e),e=n(370),u=n.n(e),e=n(817),r=n.n(e);function c(t){try{return document.execCommand(t)}catch(t){return}}var a=function(t){t=r()(t);return c("cut"),t};function o(t,e){var n,o,t=(n=t,o="rtl"===document.documentElement.getAttribute("dir"),(t=document.createElement("textarea")).style.fontSize="12pt",t.style.border="0",t.style.padding="0",t.style.margin="0",t.style.position="absolute",t.style[o?"right":"left"]="-9999px",o=window.pageYOffset||document.documentElement.scrollTop,t.style.top="".concat(o,"px"),t.setAttribute("readonly",""),t.value=n,t);return e.container.appendChild(t),e=r()(t),c("copy"),t.remove(),e}var f=function(t){var e=1\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
namescore
0Technology0.94
1Business0.04
2Other0.02
\n\n```\n:::\n:::\n\n\n", + "supporting": [ + "classification_files" + ], + "filters": [], + "includes": { + "include-in-header": [ + "\n\n\n" + ] + } + } +} \ No newline at end of file diff --git a/docs/_freeze/structured-data/entity-recognition/execute-results/html.json b/docs/_freeze/structured-data/entity-recognition/execute-results/html.json new file mode 100644 index 00000000..4ee1a853 --- /dev/null +++ b/docs/_freeze/structured-data/entity-recognition/execute-results/html.json @@ -0,0 +1,16 @@ +{ + "hash": "99534743ffcbbb600a2d7fa317246f3d", + "result": { + "engine": "jupyter", + "markdown": "---\ntitle: Entity recognition\ncallout-appearance: simple\n---\n\nThe following example, which [closely inspired by the Claude documentation](https://github.com/anthropics/anthropic-cookbook/blob/main/tool_use/extracting_structured_json.ipynb), shows how `.chat_structured()` can be used to perform entity recognition.\n\n::: {#8a46015f .cell execution_count=1}\n``` {.python .cell-code}\nfrom chatlas import ChatOpenAI\nfrom pydantic import BaseModel, Field\nimport pandas as pd\n\n# | warning: false\ntext = \"John works at Google in New York. He met with Sarah, the CEO of Acme Inc., last week in San Francisco.\"\n\n\nclass NamedEntity(BaseModel):\n \"\"\"Named entity in the text.\"\"\"\n\n name: str = Field(description=\"The extracted entity name\")\n\n type_: str = Field(description=\"The entity type, e.g. 'person', 'location', 'organization'\")\n\n context: str = Field(description=\"The context in which the entity appears in the text.\")\n\n\nclass NamedEntities(BaseModel):\n \"\"\"Named entities in the text.\"\"\"\n\n entities: list[NamedEntity] = Field(description=\"Array of named entities\")\n\n\nchat = ChatOpenAI()\ndata = chat.chat_structured(text, data_model=NamedEntities)\npd.DataFrame([e.model_dump() for e in data.entities])\n```\n\n::: {.cell-output .cell-output-display execution_count=1}\n```{=html}\n
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
nametype_context
0JohnpersonJohn works at Google in New York.
1GoogleorganizationJohn works at Google in New York.
2New YorklocationJohn works at Google in New York.
3SarahpersonHe met with Sarah, the CEO of Acme Inc., last ...
4Acme Inc.organizationHe met with Sarah, the CEO of Acme Inc., last ...
5San FranciscolocationHe met with Sarah, the CEO of Acme Inc., last ...
\n
\n```\n:::\n:::\n\n\n", + "supporting": [ + "entity-recognition_files" + ], + "filters": [], + "includes": { + "include-in-header": [ + "\n\n\n" + ] + } + } +} \ No newline at end of file diff --git a/docs/_freeze/structured-data/multi-modal/execute-results/html.json b/docs/_freeze/structured-data/multi-modal/execute-results/html.json new file mode 100644 index 00000000..3f8277b4 --- /dev/null +++ b/docs/_freeze/structured-data/multi-modal/execute-results/html.json @@ -0,0 +1,16 @@ +{ + "hash": "a659d118cf15f9282fe0fc69d7a2160b", + "result": { + "engine": "jupyter", + "markdown": "---\ntitle: Multi-modal input\ncallout-appearance: simple\n---\n\n### PDFs\n\nThis example comes from [Google's cookbook](https://github.com/google-gemini/cookbook/blob/main/examples/Pdf_structured_outputs_on_invoices_and_forms.ipynb) and extracts structured data from [a PDF invoice](https://storage.googleapis.com/generativeai-downloads/data/pdf_structured_outputs/invoice.pdf). The goal is to extract the invoice number, date, and all list items with description, quantity, and gross worth, as well as the total gross worth.\n\n```python\nimport chatlas as ctl\nfrom pydantic import BaseModel, Field\n\n\nclass Item(BaseModel):\n description: str = Field(description=\"The description of the item\")\n quantity: float = Field(description=\"The Qty of the item\")\n gross_worth: float = Field(description=\"The gross worth of the item\")\n\n\nclass Invoice(BaseModel):\n \"\"\"Extract the invoice number, date and all list items with description, quantity and gross worth and the total gross worth.\"\"\"\n\n invoice_number: str = Field(description=\"The invoice number e.g. 1234567890\")\n date: str = Field(description=\"The date of the invoice e.g. 10/09/2012\")\n items: list[Item] = Field(\n description=\"The list of items with description, quantity and gross worth\"\n )\n total_gross_worth: float = Field(description=\"The total gross worth of the invoice\")\n\n\n_ = Invoice.model_rebuild()\n\nchat = ctl.ChatOpenAI()\nres = chat.chat_structured(\n \"https://storage.googleapis.com/generativeai-downloads/data/pdf_structured_outputs/invoice.pdf\",\n data_model=Invoice,\n)\nres.model_dump_json(indent=2)\n```\n\n::: chatlas-response-container\n\n```python\n{\n 'invoice_number': 'INV-123456789',\n 'date': '09/10/2023',\n 'items': [\n {'description': 'Laptop', 'quantity': 2, 'gross_worth': 2000},\n {'description': 'Smartphone', 'quantity': 5, 'gross_worth': 3500},\n {'description': 'Tablet', 'quantity': 3, 'gross_worth': 1200}\n ],\n 'total_gross_worth': 6700\n}\n```\n\n:::\n\n\n\n### Images\n\nThis example comes from [Dan Nguyen](https://gist.github.com/dannguyen/faaa56cebf30ad51108a9fe4f8db36d8) (you can see other interesting applications at that link). The goal is to extract structured data from this screenshot:\n\n![Screenshot of schedule A: a table showing assets and \"unearned\" income](/congressional-assets.png)\n\nEven without any descriptions, ChatGPT does pretty well:\n\n::: {#d6d3b701 .cell execution_count=1}\n``` {.python .cell-code}\nimport chatlas as ctl\nfrom pydantic import BaseModel, Field\nimport pandas as pd\n\nclass Asset(BaseModel):\n assert_name: str\n owner: str\n location: str\n asset_value_low: int\n asset_value_high: int\n income_type: str\n income_low: int\n income_high: int\n tx_gt_1000: bool\n\nclass DisclosureReport(BaseModel):\n assets: list[Asset]\n\nchat = ctl.ChatOpenAI()\ndata = chat.chat_structured(\n ctl.content_image_file(\"../images/congressional-assets.png\"),\n data_model=DisclosureReport,\n)\npd.DataFrame([c.model_dump() for c in data.assets])\n```\n\n::: {.cell-output .cell-output-display execution_count=1}\n```{=html}\n
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
assert_nameownerlocationasset_value_lowasset_value_highincome_typeincome_lowincome_hightx_gt_1000
011 Zinfandel Lane - Home & Vineyard [RP]JTSt. Helena/Napa, CA, US500000125000000Grape Sales1000011000000False
125 Point Lobos - Commercial Property [RP]SPSan Francisco/San Francisco, CA, US500000125000000Rent1000011000000False
\n
\n```\n:::\n:::\n\n\n", + "supporting": [ + "multi-modal_files" + ], + "filters": [], + "includes": { + "include-in-header": [ + "\n\n\n" + ] + } + } +} \ No newline at end of file diff --git a/docs/_quarto.yml b/docs/_quarto.yml index e77eff7e..e466b5d1 100644 --- a/docs/_quarto.yml +++ b/docs/_quarto.yml @@ -1,5 +1,16 @@ project: type: website + render: + - "*.qmd" + - "CHANGELOG.md" + - "get-started/*.qmd" + - "misc/*.qmd" + - "structured-data/*.qmd" + - "tool-calling/*.qmd" + - "dev/*.qmd" + - "reference/*.qmd" + - "scale.qmd" + - "why-chatlas.qmd" resources: - objects.txt - images/ @@ -30,7 +41,14 @@ website: title: chatlas page-navigation: true site-url: https://posit-dev.github.io/chatlas/ + plausible-analytics: | + + description: Your friendly guide to building LLM chat apps in Python with less effort and more clarity. + llms-txt: true bread-crumbs: true open-graph: true @@ -103,10 +121,12 @@ website: - misc/evals.qmd +execute: + freeze: auto + format: html: include-in-header: - - "include-in-header.html" - text: | theme: diff --git a/docs/_sidebar.yml b/docs/_sidebar.yml index e32c8dad..08355ed2 100644 --- a/docs/_sidebar.yml +++ b/docs/_sidebar.yml @@ -40,16 +40,26 @@ website: - reference/ToolRejectError.qmd section: Tool calling - contents: + - reference/tool_web_search.qmd + - reference/tool_web_fetch.qmd + section: Built-in tools + - contents: + - reference/parallel_chat.qmd + - reference/parallel_chat_text.qmd + - reference/parallel_chat_structured.qmd - reference/batch_chat.qmd - reference/batch_chat_text.qmd - reference/batch_chat_structured.qmd - reference/batch_chat_completed.qmd - section: Batch chat + section: Parallel and batch chat - contents: - reference/interpolate.qmd - reference/interpolate_file.qmd section: Prompt interpolation - contents: + - reference/AssistantTurn.qmd + - reference/UserTurn.qmd + - reference/SystemTurn.qmd - reference/Turn.qmd section: Turns - contents: @@ -67,6 +77,10 @@ website: - reference/types.ContentText.qmd - reference/types.ContentToolRequest.qmd - reference/types.ContentToolResult.qmd + - reference/types.ContentToolRequestSearch.qmd + - reference/types.ContentToolResponseSearch.qmd + - reference/types.ContentToolRequestFetch.qmd + - reference/types.ContentToolResponseFetch.qmd - reference/types.ChatResponse.qmd - reference/types.ChatResponseAsync.qmd - reference/types.ImageContentTypes.qmd diff --git a/docs/include-in-header.html b/docs/include-in-header.html deleted file mode 100644 index e76b403c..00000000 --- a/docs/include-in-header.html +++ /dev/null @@ -1,6 +0,0 @@ - - - diff --git a/pyproject.toml b/pyproject.toml index c4829287..9b9ab8d7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -67,7 +67,7 @@ dev = [ "tenacity" ] docs = [ - "griffe>=1", + "griffe>=1.3.2,<2.0.0", "quartodoc>=0.7", # Quarto requires... "ipykernel",