diff --git a/LICENSE b/LICENSE index 9b45062..627571f 100644 --- a/LICENSE +++ b/LICENSE @@ -1,190 +1,201 @@ -Apache License -Version 2.0, January 2004 -http://www.apache.org/licenses/ - -TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - -1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - -2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - -3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - -4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - -5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - -6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - -7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - -8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - -9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - -END OF TERMS AND CONDITIONS - -Copyright 2025 TABStack - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2025 Mozilla + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/README.md b/README.md index a751456..40a3ce6 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# TABStack AI Python SDK +# Tabstack Python SDK [![PyPI version](https://badge.fury.io/py/tabstack.svg)](https://badge.fury.io/py/tabstack) [![Python Versions](https://img.shields.io/pypi/pyversions/tabstack.svg)](https://pypi.org/project/tabstack/) @@ -6,7 +6,10 @@ [![Tests](https://github.com/Mozilla-Ocho/tabstack-python/workflows/Tests/badge.svg)](https://github.com/Mozilla-Ocho/tabstack-python/actions) [![codecov](https://codecov.io/gh/Mozilla-Ocho/tabstack-python/branch/main/graph/badge.svg)](https://codecov.io/gh/Mozilla-Ocho/tabstack-python) -Python SDK for [TABStack AI](https://tabstack.ai) - Extract, Generate, and Automate web content using AI. +> [!WARNING] +> **Early Release**: This SDK is in early development. The API may change in future releases as we refine and improve the library based on user feedback. + +Python SDK for [Tabstack](https://tabstack.ai) - Extract, Generate, and Automate web content using AI. ## Features @@ -58,11 +61,11 @@ pip install -e ".[dev]" ```python import asyncio import os -from tabstack import TABStack +from tabstack import Tabstack async def main(): # Initialize the client with connection pooling - async with TABStack( + async with Tabstack( api_key=os.getenv('TABSTACK_API_KEY'), max_connections=100, max_keepalive_connections=20 @@ -124,7 +127,7 @@ async def main(): ) # Automate web tasks (streaming) - async for event in tabs.automate.execute( + async for event in tabs.agent.automate( task="Find the top 3 trending repositories and extract their details", url="https://github.com/trending" ): @@ -144,9 +147,9 @@ All methods are async and should be awaited. The client supports async context m ### Client Initialization ```python -from tabstack import TABStack +from tabstack import Tabstack -async with TABStack( +async with Tabstack( api_key="your-api-key", base_url="https://api.tabstack.ai/", # optional max_connections=100, # optional @@ -159,7 +162,7 @@ async with TABStack( ``` **Parameters:** -- `api_key` (str, required): Your TABStack API key +- `api_key` (str, required): Your Tabstack API key - `base_url` (str, optional): API base URL. Default: `https://api.tabstack.ai/` - `max_connections` (int, optional): Maximum concurrent connections. Default: `100` - `max_keepalive_connections` (int, optional): Maximum idle connections to keep alive. Default: `20` @@ -191,27 +194,6 @@ print(result.content) print(result.metadata.title) ``` -#### `extract.schema(url, instructions, nocache=False)` - -Generate a JSON Schema by analyzing the structure of a webpage. - -**Parameters:** -- `url` (str): URL to analyze -- `instructions` (str): Instructions for what data to extract (max 1000 characters) -- `nocache` (bool): Bypass cache. Default: `False` - -**Returns:** `SchemaResponse` with generated `schema` dict - -**Example:** -```python -result = await tabs.extract.schema( - url="https://example.com/products", - instructions="Extract product listings with name, price, and availability" -) -# Use the schema for extraction -data = await tabs.extract.json(url="https://example.com/products", schema=result.schema) -``` - #### `extract.json(url, schema, nocache=False)` Extract structured JSON data from a URL using a schema. @@ -269,11 +251,11 @@ result = await tabs.generate.json( ) ``` -### Automate Operator +### Agent Client -The Automate operator executes complex web automation tasks using natural language. +The Agent client executes complex web automation tasks using natural language. -#### `automate.execute(task, url=None, schema=None)` +#### `agent.automate(task, url=None, schema=None)` Execute an AI-powered browser automation task (returns async iterator for Server-Sent Events). @@ -305,7 +287,7 @@ schema = { } } -async for event in tabs.automate.execute( +async for event in tabs.agent.automate( task="Find trending repositories and extract their names and star counts", url="https://github.com/trending", schema=schema @@ -318,7 +300,7 @@ async for event in tabs.automate.execute( ## Working with JSON Schemas -TABStack uses standard JSON Schema for defining data structures. Here are common patterns: +Tabstack uses standard JSON Schema for defining data structures. Here are common patterns: ### Basic Object ```python @@ -400,7 +382,7 @@ The SDK provides specific exception classes for different error scenarios: ```python import asyncio -from tabstack import TABStack +from tabstack import Tabstack from tabstack.exceptions import ( BadRequestError, UnauthorizedError, @@ -410,7 +392,7 @@ from tabstack.exceptions import ( ) async def main(): - async with TABStack(api_key="your-api-key") as tabs: + async with Tabstack(api_key="your-api-key") as tabs: try: result = await tabs.extract.markdown(url="https://example.com") except UnauthorizedError: @@ -476,7 +458,7 @@ mypy tabstack/ ``` tests/ ├── conftest.py # Shared pytest fixtures -├── test_client.py # TABStack client tests +├── test_client.py # Tabstack client tests ├── test_extract.py # Extract operator tests ├── test_generate.py # Generate operator tests ├── test_automate.py # Automate operator tests diff --git a/examples/basic_usage.py b/examples/basic_usage.py index 89a82fe..f05c89f 100644 --- a/examples/basic_usage.py +++ b/examples/basic_usage.py @@ -1,15 +1,15 @@ -"""Example usage of TABStack AI SDK.""" +"""Example usage of Tabstack SDK.""" import asyncio import os -from tabstack import TABStack +from tabstack import Tabstack async def main(): """Run all examples.""" # Initialize the client with connection pooling - async with TABStack( + async with Tabstack( api_key=os.getenv("TABSTACK_API_KEY", "your-api-key-here"), max_connections=50, max_keepalive_connections=10, @@ -29,27 +29,8 @@ async def main(): print("\n") - # Example 2: Generate schema from URL - print("Example 2: Generate Schema") - print("-" * 50) - try: - result = await tabs.extract.schema( - url="https://news.ycombinator.com", - instructions="extract top stories with title, points, and author", - ) - # result.schema is a JSON Schema dict that can be used directly - print(f"Generated schema: {result.schema}") - # You can now use this schema directly with extract.json() - # data = await tabs.extract.json( - # url="https://news.ycombinator.com", schema=result.schema - # ) - except Exception as e: - print(f"Error: {e}") - - print("\n") - - # Example 3: Extract structured JSON data - print("Example 3: Extract Structured JSON") + # Example 2: Extract structured JSON data + print("Example 2: Extract Structured JSON") print("-" * 50) try: schema = { @@ -76,8 +57,8 @@ async def main(): print("\n") - # Example 4: Generate transformed content with AI - print("Example 4: Generate Transformed Content") + # Example 3: Generate transformed content with AI + print("Example 3: Generate Transformed Content") print("-" * 50) try: summary_schema = { @@ -109,15 +90,13 @@ async def main(): print("\n") - # Example 5: Automate web tasks (streaming) - print("Example 5: Web Automation (Streaming)") + # Example 4: Automate web tasks (streaming) + print("Example 4: Web Automation (Streaming)") print("-" * 50) try: - async for event in tabs.automate.execute( + async for event in tabs.agent.automate( task="Find the top 3 trending repositories and extract their details", url="https://github.com/trending", - guardrails="browse and extract only, don't interact with repositories", - max_iterations=20, ): if event.type == "task:completed": print(f"✓ Task completed: {event.data.get('finalAnswer', 'N/A')}") diff --git a/pyproject.toml b/pyproject.toml index fc531cc..3823e13 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,12 +5,12 @@ build-backend = "setuptools.build_meta" [project] name = "tabstack" version = "1.0.0" -description = "Python SDK for TABStack AI - Extract, Generate, and Automate web content" +description = "Python SDK for Tabstack - Extract, Generate, and Automate web content" readme = "README.md" requires-python = ">=3.10" license = {text = "Apache-2.0"} authors = [ - {name = "TABStack", email = "support@tabstack.ai"} + {name = "Tabstack", email = "support@tabstack.ai"} ] keywords = ["web-scraping", "ai", "automation", "data-extraction", "web-automation"] classifiers = [ diff --git a/setup.cfg b/setup.cfg index 64ce4b8..b4c4361 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,11 +1,11 @@ [metadata] name = tabstack-ai version = 1.0.0 -description = Python SDK for TABStack AI - Extract, Generate, and Automate web content +description = Python SDK for Tabstack - Extract, Generate, and Automate web content long_description = file: README.md long_description_content_type = text/markdown url = https://github.com/Mozilla-Ocho/tabstack-python -author = TABStack +author = Tabstack author_email = support@tabstack.ai license = MIT classifiers = diff --git a/setup.py b/setup.py index 1e1a11f..b94b704 100644 --- a/setup.py +++ b/setup.py @@ -7,14 +7,14 @@ with open("README.md", encoding="utf-8") as f: long_description = f.read() except FileNotFoundError: - long_description = "Python SDK for TABStack AI" + long_description = "Python SDK for Tabstack" setup( name="tabstack-ai", version="1.0.0", - author="TABStack", + author="Tabstack", author_email="support@tabstack.ai", - description="Python SDK for TABStack AI - Extract, Generate, and Automate web content", + description="Python SDK for Tabstack - Extract, Generate, and Automate web content", long_description=long_description, long_description_content_type="text/markdown", url="https://github.com/Mozilla-Ocho/tabstack-python", diff --git a/tabstack/__init__.py b/tabstack/__init__.py index b8a0b2b..394f51f 100644 --- a/tabstack/__init__.py +++ b/tabstack/__init__.py @@ -1,6 +1,6 @@ -"""TABStack AI Python SDK. +"""Tabstack Python SDK. -This SDK provides a Python interface to the TABStack AI API for web content +This SDK provides a Python interface to the Tabstack API for web content extraction, AI-powered content generation, and browser automation. The SDK provides three main operators: @@ -9,15 +9,15 @@ - **Generate**: Transform and enhance web data using AI - **Automate**: Execute complex browser automation tasks with natural language -The SDK supports both async (TABStack) and sync (TABStackSync) clients: +The SDK supports both async (Tabstack) and sync (TabstackSync) clients: Async Example: >>> import asyncio >>> import os - >>> from tabstack import TABStack + >>> from tabstack import Tabstack >>> >>> async def main(): - ... async with TABStack(api_key=os.getenv('TABSTACK_API_KEY')) as tabs: + ... async with Tabstack(api_key=os.getenv('TABSTACK_API_KEY')) as tabs: ... # Extract markdown from a URL ... result = await tabs.extract.markdown(url="https://example.com") ... print(result.content) @@ -26,32 +26,16 @@ Sync Example: >>> import os - >>> from tabstack import TABStackSync + >>> from tabstack import TabstackSync >>> - >>> with TABStackSync(api_key=os.getenv('TABSTACK_API_KEY')) as tabs: + >>> with TabstackSync(api_key=os.getenv('TABSTACK_API_KEY')) as tabs: ... # Extract markdown from a URL (no async/await needed) ... result = tabs.extract.markdown(url="https://example.com") ... print(result.content) -Workflow: Schema Generation → Data Extraction - >>> async def extract_with_generated_schema(): - ... async with TABStack(api_key=os.getenv('TABSTACK_API_KEY')) as tabs: - ... # First, generate a schema from the content - ... schema_result = await tabs.extract.schema( - ... url="https://news.ycombinator.com", - ... instructions="extract top stories with title, points, and author" - ... ) - ... - ... # Then use the generated schema to extract structured data - ... data = await tabs.extract.json( - ... url="https://news.ycombinator.com", - ... schema=schema_result.schema - ... ) - ... print(data.data) - Workflow: Extract → Transform >>> async def extract_and_transform(): - ... async with TABStack(api_key=os.getenv('TABSTACK_API_KEY')) as tabs: + ... async with Tabstack(api_key=os.getenv('TABSTACK_API_KEY')) as tabs: ... # Define schema for transformed output ... summary_schema = { ... "type": "object", @@ -80,28 +64,27 @@ Workflow: Browser Automation >>> async def automate_task(): - ... async with TABStack(api_key=os.getenv('TABSTACK_API_KEY')) as tabs: + ... async with Tabstack(api_key=os.getenv('TABSTACK_API_KEY')) as tabs: ... # Execute complex web automation tasks - ... async for event in tabs.automate.execute( + ... async for event in tabs.agent.automate( ... task="Extract the top 5 trending repositories", - ... url="https://github.com/trending", - ... guardrails="browse and extract only, do not click stars or forks" + ... url="https://github.com/trending" ... ): ... if event.type == "task:completed": ... print(f"Task complete: {event.data.get('finalAnswer')}") """ -from .automate import Automate -from .automate_sync import AutomateSync -from .client import TABStack -from .client_sync import TABStackSync +from .agent import Agent +from .agent_sync import AgentSync +from .client import Tabstack +from .client_sync import TabstackSync from .exceptions import ( APIError, BadRequestError, InvalidURLError, ServerError, ServiceUnavailableError, - TABStackError, + TabstackError, UnauthorizedError, ) from .extract import Extract @@ -114,31 +97,29 @@ JsonResponse, MarkdownResponse, Metadata, - SchemaResponse, ) __version__ = "1.0.0" __all__ = [ # Main clients - "TABStack", # Async client - "TABStackSync", # Sync client + "Tabstack", # Async client + "TabstackSync", # Sync client # Async operators "Extract", "Generate", - "Automate", + "Agent", # Sync operators "ExtractSync", "GenerateSync", - "AutomateSync", + "AgentSync", # Response types "MarkdownResponse", - "SchemaResponse", "JsonResponse", "Metadata", "AutomateEvent", "EventData", # Exceptions - "TABStackError", + "TabstackError", "BadRequestError", "UnauthorizedError", "InvalidURLError", diff --git a/tabstack/_http_client.py b/tabstack/_http_client.py index dfc5471..27764d2 100644 --- a/tabstack/_http_client.py +++ b/tabstack/_http_client.py @@ -1,4 +1,4 @@ -"""Internal HTTP client for TABStack AI SDK.""" +"""Internal HTTP client for Tabstack SDK.""" from typing import Any, AsyncIterator, Dict, Optional @@ -8,15 +8,15 @@ class HTTPClient: - """Internal async HTTP client for TABStack API requests. + """Internal async HTTP client for Tabstack API requests. - Handles HTTP communication with the TABStack API, including: + Handles HTTP communication with the Tabstack API, including: - Connection pooling and keepalive for performance - Request authentication with API keys - Error response parsing and exception mapping - Server-Sent Events (SSE) streaming for automate endpoint - This is an internal class. Users should use the TABStack client instead. + This is an internal class. Users should use the Tabstack client instead. """ def __init__( @@ -92,7 +92,7 @@ async def post(self, path: str, data: Optional[Dict[str, Any]] = None) -> Dict[s Response data as dictionary Raises: - TABStackError: On API errors + TabstackError: On API errors """ client = await self._get_client() headers = get_http_headers(self.api_key) @@ -127,7 +127,7 @@ async def post_stream( Lines from the streaming response Raises: - TABStackError: On API errors + TabstackError: On API errors """ client = await self._get_client() headers = get_http_headers(self.api_key) diff --git a/tabstack/_http_client_sync.py b/tabstack/_http_client_sync.py index 9c2d530..ea2556d 100644 --- a/tabstack/_http_client_sync.py +++ b/tabstack/_http_client_sync.py @@ -1,4 +1,4 @@ -"""Synchronous HTTP client for TABStack AI SDK.""" +"""Synchronous HTTP client for Tabstack SDK.""" from typing import Any, Dict, Iterator, Optional @@ -8,15 +8,15 @@ class HTTPClientSync: - """Synchronous HTTP client for TABStack API requests. + """Synchronous HTTP client for Tabstack API requests. - Handles HTTP communication with the TABStack API, including: + Handles HTTP communication with the Tabstack API, including: - Connection pooling and keepalive for performance - Request authentication with API keys - Error response parsing and exception mapping - Server-Sent Events (SSE) streaming for automate endpoint - This is an internal class. Users should use the TABStackSync client instead. + This is an internal class. Users should use the TabstackSync client instead. """ def __init__( @@ -92,7 +92,7 @@ def post(self, path: str, data: Optional[Dict[str, Any]] = None) -> Dict[str, An Response data as dictionary Raises: - TABStackError: On API errors + TabstackError: On API errors """ client = self._get_client() headers = get_http_headers(self.api_key) @@ -125,7 +125,7 @@ def post_stream(self, path: str, data: Optional[Dict[str, Any]] = None) -> Itera Lines from the streaming response Raises: - TABStackError: On API errors + TabstackError: On API errors """ client = self._get_client() headers = get_http_headers(self.api_key) diff --git a/tabstack/_shared.py b/tabstack/_shared.py index 2d1fcde..0f4db95 100644 --- a/tabstack/_shared.py +++ b/tabstack/_shared.py @@ -143,7 +143,7 @@ def handle_error_response(status: int, body: bytes) -> None: body: Response body Raises: - TABStackError: Appropriate exception based on status code + TabstackError: Appropriate exception based on status code """ from .exceptions import ( APIError, diff --git a/tabstack/automate.py b/tabstack/agent.py similarity index 92% rename from tabstack/automate.py rename to tabstack/agent.py index 80ca800..f873b70 100644 --- a/tabstack/automate.py +++ b/tabstack/agent.py @@ -1,4 +1,4 @@ -"""Automate operator for TABStack AI SDK.""" +"""Agent client for Tabstack SDK.""" from typing import Any, AsyncIterator, Dict, Optional @@ -8,10 +8,10 @@ from .utils import validate_json_schema -class Automate: - """Automate operator for AI-powered browser automation. +class Agent: + """Agent client for AI-powered browser automation. - The Automate operator enables complex, multi-step web automation tasks using + The Agent client enables complex, multi-step web automation tasks using natural language instructions. An AI agent navigates a real browser, performing actions like clicking, filling forms, and extracting data. @@ -26,14 +26,14 @@ class Automate: """ def __init__(self, http_client: HTTPClient) -> None: - """Initialize Automate operator. + """Initialize Agent client. Args: http_client: HTTP client for making API requests """ self._http = http_client - async def execute( + async def automate( self, task: str, url: Optional[str] = None, @@ -60,8 +60,8 @@ async def execute( ServiceUnavailableError: If automate service is not available Example: - >>> async with TABStack(api_key="your-key") as tabs: - ... async for event in tabs.automate.execute( + >>> async with Tabstack(api_key="your-key") as tabs: + ... async for event in tabs.agent.automate( ... task="Find the top 3 trending repositories", ... url="https://github.com/trending" ... ): diff --git a/tabstack/automate_sync.py b/tabstack/agent_sync.py similarity index 92% rename from tabstack/automate_sync.py rename to tabstack/agent_sync.py index 76ce97f..a1c47f9 100644 --- a/tabstack/automate_sync.py +++ b/tabstack/agent_sync.py @@ -1,4 +1,4 @@ -"""Synchronous Automate operator for TABStack AI SDK.""" +"""Synchronous Agent client for Tabstack SDK.""" from typing import Any, Dict, Iterator, Optional @@ -8,10 +8,10 @@ from .utils import validate_json_schema -class AutomateSync: - """Synchronous Automate operator for AI-powered browser automation. +class AgentSync: + """Synchronous Agent client for AI-powered browser automation. - The Automate operator enables complex, multi-step web automation tasks using + The Agent client enables complex, multi-step web automation tasks using natural language instructions. An AI agent navigates a real browser, performing actions like clicking, filling forms, and extracting data. @@ -26,14 +26,14 @@ class AutomateSync: """ def __init__(self, http_client: HTTPClientSync) -> None: - """Initialize Automate operator. + """Initialize Agent client. Args: http_client: Sync HTTP client for making API requests """ self._http = http_client - def execute( + def automate( self, task: str, url: Optional[str] = None, @@ -60,8 +60,8 @@ def execute( ServiceUnavailableError: If automate service is not available Example: - >>> with TABStackSync(api_key="your-key") as tabs: - ... for event in tabs.automate.execute( + >>> with TabstackSync(api_key="your-key") as tabs: + ... for event in tabs.agent.automate( ... task="Find the top 3 trending repositories", ... url="https://github.com/trending" ... ): diff --git a/tabstack/client.py b/tabstack/client.py index 71fa6fa..5c94736 100644 --- a/tabstack/client.py +++ b/tabstack/client.py @@ -1,27 +1,27 @@ -"""Main client for TABStack AI SDK.""" +"""Main client for Tabstack SDK.""" from typing import Any from ._http_client import HTTPClient -from .automate import Automate +from .agent import Agent from .extract import Extract from .generate import Generate -class TABStack: - """TABStack AI async client for web content extraction, generation, and automation. +class Tabstack: + """Tabstack async client for web content extraction, generation, and automation. - This is the main entry point for the TABStack AI SDK. Initialize it with your + This is the main entry point for the Tabstack SDK. Initialize it with your API key to access the extract, generate, and automate operators. All operations are async and support connection pooling for efficient resource usage. Example: >>> import asyncio >>> import os - >>> from tabstack import TABStack + >>> from tabstack import Tabstack >>> >>> async def main(): - ... async with TABStack(api_key=os.getenv('TABSTACK_API_KEY')) as tabs: + ... async with Tabstack(api_key=os.getenv('TABSTACK_API_KEY')) as tabs: ... result = await tabs.extract.markdown(url="https://example.com") ... print(result.content) >>> @@ -37,11 +37,11 @@ def __init__( keepalive_expiry: float = 30.0, # API's connection timeout is ~30s timeout: float = 60.0, # Web scraping/AI operations can take time ) -> None: - """Initialize TABStack async client with connection pooling. + """Initialize Tabstack async client with connection pooling. Args: - api_key: Your TABStack API key for authentication - base_url: Base URL for the TABStack API (default: https://api.tabstack.ai/) + api_key: Your Tabstack API key for authentication + base_url: Base URL for the Tabstack API (default: https://api.tabstack.ai/) max_connections: Maximum number of connections in the pool (default: 100) max_keepalive_connections: Maximum idle connections to keep alive (default: 20) keepalive_expiry: Time in seconds to keep idle connections alive (default: 30.0) @@ -51,7 +51,7 @@ def __init__( ValueError: If api_key is empty or None Example: - >>> async with TABStack( + >>> async with Tabstack( ... api_key="your-api-key-here", ... max_connections=50, ... max_keepalive_connections=10 @@ -74,13 +74,13 @@ def __init__( # Initialize operators (each shares the same HTTP client for connection reuse) self.extract = Extract(self._http_client) self.generate = Generate(self._http_client) - self.automate = Automate(self._http_client) + self.agent = Agent(self._http_client) async def close(self) -> None: """Close the HTTP client and release all connections. Example: - >>> tabs = TABStack(api_key="your-key") + >>> tabs = Tabstack(api_key="your-key") >>> try: ... result = await tabs.extract.markdown(url="https://example.com") ... finally: @@ -88,11 +88,11 @@ async def close(self) -> None: """ await self._http_client.close() - async def __aenter__(self) -> "TABStack": + async def __aenter__(self) -> "Tabstack": """Async context manager entry. Example: - >>> async with TABStack(api_key="your-key") as tabs: + >>> async with Tabstack(api_key="your-key") as tabs: ... result = await tabs.extract.markdown(url="https://example.com") """ return self @@ -103,4 +103,4 @@ async def __aexit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None: def __repr__(self) -> str: """String representation of the client.""" - return f"TABStack(base_url='{self._http_client.base_url}')" + return f"Tabstack(base_url='{self._http_client.base_url}')" diff --git a/tabstack/client_sync.py b/tabstack/client_sync.py index 3c2e541..77eaa28 100644 --- a/tabstack/client_sync.py +++ b/tabstack/client_sync.py @@ -1,26 +1,26 @@ -"""Synchronous main client for TABStack AI SDK.""" +"""Synchronous main client for Tabstack SDK.""" from typing import Any from ._http_client_sync import HTTPClientSync -from .automate_sync import AutomateSync +from .agent_sync import AgentSync from .extract_sync import ExtractSync from .generate_sync import GenerateSync -class TABStackSync: - """TABStack AI synchronous client for web content extraction, generation, and automation. +class TabstackSync: + """Tabstack synchronous client for web content extraction, generation, and automation. - This is the synchronous version of the TABStack AI SDK. Use this when you don't need - async/await support. For async support, use the `TABStack` class instead. + This is the synchronous version of the Tabstack SDK. Use this when you don't need + async/await support. For async support, use the `Tabstack` class instead. All operations are synchronous and support connection pooling for efficient resource usage. Example: >>> import os - >>> from tabstack import TABStackSync + >>> from tabstack import TabstackSync >>> - >>> with TABStackSync(api_key=os.getenv('TABSTACK_API_KEY')) as tabs: + >>> with TabstackSync(api_key=os.getenv('TABSTACK_API_KEY')) as tabs: ... result = tabs.extract.markdown(url="https://example.com") ... print(result.content) """ @@ -34,11 +34,11 @@ def __init__( keepalive_expiry: float = 30.0, # API's connection timeout is ~30s timeout: float = 60.0, # Web scraping/AI operations can take time ) -> None: - """Initialize TABStack synchronous client with connection pooling. + """Initialize Tabstack synchronous client with connection pooling. Args: - api_key: Your TABStack API key for authentication - base_url: Base URL for the TABStack API (default: https://api.tabstack.ai/) + api_key: Your Tabstack API key for authentication + base_url: Base URL for the Tabstack API (default: https://api.tabstack.ai/) max_connections: Maximum number of connections in the pool (default: 100) max_keepalive_connections: Maximum idle connections to keep alive (default: 20) keepalive_expiry: Time in seconds to keep idle connections alive (default: 30.0) @@ -48,7 +48,7 @@ def __init__( ValueError: If api_key is empty or None Example: - >>> with TABStackSync( + >>> with TabstackSync( ... api_key="your-api-key-here", ... max_connections=50, ... max_keepalive_connections=10 @@ -71,13 +71,13 @@ def __init__( # Initialize operators (each shares the same HTTP client for connection reuse) self.extract = ExtractSync(self._http_client) self.generate = GenerateSync(self._http_client) - self.automate = AutomateSync(self._http_client) + self.agent = AgentSync(self._http_client) def close(self) -> None: """Close the HTTP client and release all connections. Example: - >>> tabs = TABStackSync(api_key="your-key") + >>> tabs = TabstackSync(api_key="your-key") >>> try: ... result = tabs.extract.markdown(url="https://example.com") ... finally: @@ -85,11 +85,11 @@ def close(self) -> None: """ self._http_client.close() - def __enter__(self) -> "TABStackSync": + def __enter__(self) -> "TabstackSync": """Sync context manager entry. Example: - >>> with TABStackSync(api_key="your-key") as tabs: + >>> with TabstackSync(api_key="your-key") as tabs: ... result = tabs.extract.markdown(url="https://example.com") """ return self @@ -100,4 +100,4 @@ def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None: def __repr__(self) -> str: """String representation of the client.""" - return f"TABStackSync(base_url='{self._http_client.base_url}')" + return f"TabstackSync(base_url='{self._http_client.base_url}')" diff --git a/tabstack/exceptions.py b/tabstack/exceptions.py index 21e259c..d0e6338 100644 --- a/tabstack/exceptions.py +++ b/tabstack/exceptions.py @@ -1,10 +1,10 @@ -"""Custom exceptions for TABStack AI SDK.""" +"""Custom exceptions for Tabstack SDK.""" from typing import Optional -class TABStackError(Exception): - """Base exception for all TABStack AI errors.""" +class TabstackError(Exception): + """Base exception for all Tabstack errors.""" def __init__(self, message: str, status_code: Optional[int] = None) -> None: """Initialize error. @@ -18,7 +18,7 @@ def __init__(self, message: str, status_code: Optional[int] = None) -> None: super().__init__(self.message) -class BadRequestError(TABStackError): +class BadRequestError(TabstackError): """Exception for 400 Bad Request errors. Raised when the request is malformed or missing required fields @@ -36,7 +36,7 @@ def __init__(self, message: str) -> None: super().__init__(message, status_code=400) -class UnauthorizedError(TABStackError): +class UnauthorizedError(TabstackError): """Exception for 401 Unauthorized errors. Raised when the API key is invalid or missing. Verify your API key @@ -55,7 +55,7 @@ def __init__(self, message: str = "Unauthorized - Invalid or missing API key") - super().__init__(message, status_code=401) -class InvalidURLError(TABStackError): +class InvalidURLError(TabstackError): """Exception for 422 Unprocessable Entity errors related to URLs. Raised when the provided URL is invalid, inaccessible, or returns an error @@ -74,7 +74,7 @@ def __init__(self, message: str = "Invalid or inaccessible URL") -> None: super().__init__(message, status_code=422) -class ServerError(TABStackError): +class ServerError(TabstackError): """Exception for 500 Internal Server Error. Raised when the server encounters an error processing the request. @@ -93,7 +93,7 @@ def __init__(self, message: str = "Internal server error") -> None: super().__init__(message, status_code=500) -class ServiceUnavailableError(TABStackError): +class ServiceUnavailableError(TabstackError): """Exception for 503 Service Unavailable errors. Raised when a service (e.g., automate) is temporarily unavailable, @@ -112,7 +112,7 @@ def __init__(self, message: str = "Service unavailable") -> None: super().__init__(message, status_code=503) -class APIError(TABStackError): +class APIError(TabstackError): """Generic API error for unexpected status codes.""" def __init__(self, message: str, status_code: int) -> None: diff --git a/tabstack/extract.py b/tabstack/extract.py index f4a46dc..7993975 100644 --- a/tabstack/extract.py +++ b/tabstack/extract.py @@ -1,10 +1,10 @@ -"""Extract operator for TABStack AI SDK.""" +"""Extract operator for Tabstack SDK.""" -from typing import Any, Dict, Optional +from typing import Any, Dict from ._http_client import HTTPClient -from ._shared import build_json_extract_request, build_markdown_request, build_schema_request -from .types import JsonResponse, MarkdownResponse, SchemaResponse +from ._shared import build_json_extract_request, build_markdown_request +from .types import JsonResponse, MarkdownResponse from .utils import validate_json_schema @@ -14,7 +14,6 @@ class Extract: The Extract operator converts web content into structured formats without AI transformation. Use Extract when you want to: - Convert HTML to clean Markdown - - Discover data structure automatically with schema generation - Extract structured data that exists in the page (no summarization/transformation) For AI-powered transformation of content, use the Generate operator instead. @@ -54,7 +53,7 @@ async def markdown( ServerError: If server encounters an error Example: - >>> async with TABStack(api_key="your-key") as tabs: + >>> async with Tabstack(api_key="your-key") as tabs: ... result = await tabs.extract.markdown( ... url="https://example.com/blog/article", ... metadata=True @@ -66,48 +65,6 @@ async def markdown( response = await self._http.post("v1/extract/markdown", request_data) return MarkdownResponse.from_dict(response) - async def schema( - self, url: str, instructions: Optional[str] = None, nocache: bool = False - ) -> SchemaResponse: - """Generate JSON Schema from URL content using AI. - - Analyzes the structure of content on a page and generates a JSON Schema - that describes it. The generated schema can then be used with extract.json() - to extract data from similar pages. - - Instructions help guide the AI to focus on specific data. Keep instructions - under 1000 characters for best results. - - Args: - url: URL to analyze and extract schema from - instructions: Optional guidance for schema generation (max 1000 characters). - Example: "extract top stories with title, points, and author" - nocache: Bypass cache and force fresh data retrieval - - Returns: - SchemaResponse containing the generated JSON Schema dict - - Raises: - BadRequestError: If URL is missing or instructions exceed 1000 characters - UnauthorizedError: If API key is invalid - InvalidURLError: If URL is invalid or inaccessible - ServerError: If server encounters an error - - Example: - >>> async with TABStack(api_key="your-key") as tabs: - ... result = await tabs.extract.schema( - ... url="https://news.ycombinator.com", - ... instructions="extract top stories with title, points, and author" - ... ) - ... data = await tabs.extract.json( - ... url="https://news.ycombinator.com", - ... schema=result.schema - ... ) - """ - request_data = build_schema_request(url, instructions, nocache) - response = await self._http.post("v1/extract/json/schema", request_data) - return SchemaResponse.from_dict(response) - async def json(self, url: str, schema: Dict[str, Any], nocache: bool = False) -> JsonResponse: """Extract structured JSON data from URL content. @@ -134,7 +91,7 @@ async def json(self, url: str, schema: Dict[str, Any], nocache: bool = False) -> ServerError: If server encounters an error Example: - >>> async with TABStack(api_key="your-key") as tabs: + >>> async with Tabstack(api_key="your-key") as tabs: ... schema = { ... "type": "object", ... "properties": { diff --git a/tabstack/extract_sync.py b/tabstack/extract_sync.py index 222a9aa..70eb8f0 100644 --- a/tabstack/extract_sync.py +++ b/tabstack/extract_sync.py @@ -1,10 +1,10 @@ -"""Synchronous Extract operator for TABStack AI SDK.""" +"""Synchronous Extract operator for Tabstack SDK.""" -from typing import Any, Dict, Optional +from typing import Any, Dict from ._http_client_sync import HTTPClientSync -from ._shared import build_json_extract_request, build_markdown_request, build_schema_request -from .types import JsonResponse, MarkdownResponse, SchemaResponse +from ._shared import build_json_extract_request, build_markdown_request +from .types import JsonResponse, MarkdownResponse from .utils import validate_json_schema @@ -14,7 +14,6 @@ class ExtractSync: The Extract operator converts web content into structured formats without AI transformation. Use Extract when you want to: - Convert HTML to clean Markdown - - Discover data structure automatically with schema generation - Extract structured data that exists in the page (no summarization/transformation) For AI-powered transformation of content, use the Generate operator instead. @@ -52,7 +51,7 @@ def markdown(self, url: str, metadata: bool = False, nocache: bool = False) -> M ServerError: If server encounters an error Example: - >>> with TABStackSync(api_key="your-key") as tabs: + >>> with TabstackSync(api_key="your-key") as tabs: ... result = tabs.extract.markdown( ... url="https://example.com/blog/article", ... metadata=True @@ -64,48 +63,6 @@ def markdown(self, url: str, metadata: bool = False, nocache: bool = False) -> M response = self._http.post("v1/extract/markdown", request_data) return MarkdownResponse.from_dict(response) - def schema( - self, url: str, instructions: Optional[str] = None, nocache: bool = False - ) -> SchemaResponse: - """Generate JSON Schema from URL content using AI. - - Analyzes the structure of content on a page and generates a JSON Schema - that describes it. The generated schema can then be used with extract.json() - to extract data from similar pages. - - Instructions help guide the AI to focus on specific data. Keep instructions - under 1000 characters for best results. - - Args: - url: URL to analyze and extract schema from - instructions: Optional guidance for schema generation (max 1000 characters). - Example: "extract top stories with title, points, and author" - nocache: Bypass cache and force fresh data retrieval - - Returns: - SchemaResponse containing the generated JSON Schema dict - - Raises: - BadRequestError: If URL is missing or instructions exceed 1000 characters - UnauthorizedError: If API key is invalid - InvalidURLError: If URL is invalid or inaccessible - ServerError: If server encounters an error - - Example: - >>> with TABStackSync(api_key="your-key") as tabs: - ... result = tabs.extract.schema( - ... url="https://news.ycombinator.com", - ... instructions="extract top stories with title, points, and author" - ... ) - ... data = tabs.extract.json( - ... url="https://news.ycombinator.com", - ... schema=result.schema - ... ) - """ - request_data = build_schema_request(url, instructions, nocache) - response = self._http.post("v1/extract/json/schema", request_data) - return SchemaResponse.from_dict(response) - def json(self, url: str, schema: Dict[str, Any], nocache: bool = False) -> JsonResponse: """Extract structured JSON data from URL content. @@ -132,7 +89,7 @@ def json(self, url: str, schema: Dict[str, Any], nocache: bool = False) -> JsonR ServerError: If server encounters an error Example: - >>> with TABStackSync(api_key="your-key") as tabs: + >>> with TabstackSync(api_key="your-key") as tabs: ... schema = { ... "type": "object", ... "properties": { diff --git a/tabstack/generate.py b/tabstack/generate.py index f2cea41..ca7dfc8 100644 --- a/tabstack/generate.py +++ b/tabstack/generate.py @@ -1,4 +1,4 @@ -"""Generate operator for TABStack AI SDK.""" +"""Generate operator for Tabstack SDK.""" from typing import Any, Dict @@ -62,7 +62,7 @@ async def json( ServerError: If server encounters an error Example: - >>> async with TABStack(api_key="your-key") as tabs: + >>> async with Tabstack(api_key="your-key") as tabs: ... schema = { ... "type": "object", ... "properties": { diff --git a/tabstack/generate_sync.py b/tabstack/generate_sync.py index 7536a17..45aa231 100644 --- a/tabstack/generate_sync.py +++ b/tabstack/generate_sync.py @@ -1,4 +1,4 @@ -"""Synchronous Generate operator for TABStack AI SDK.""" +"""Synchronous Generate operator for Tabstack SDK.""" from typing import Any, Dict @@ -62,7 +62,7 @@ def json( ServerError: If server encounters an error Example: - >>> with TABStackSync(api_key="your-key") as tabs: + >>> with TabstackSync(api_key="your-key") as tabs: ... schema = { ... "type": "object", ... "properties": { diff --git a/tabstack/types.py b/tabstack/types.py index cf5c7b3..17bca6c 100644 --- a/tabstack/types.py +++ b/tabstack/types.py @@ -1,4 +1,4 @@ -"""Type definitions and response models for TABStack AI SDK.""" +"""Type definitions and response models for Tabstack SDK.""" from typing import Any, Dict, Optional diff --git a/tests/__init__.py b/tests/__init__.py index 2242973..f1570bb 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -1 +1 @@ -"""Tests for TABStack Python SDK.""" +"""Tests for Tabstack Python SDK.""" diff --git a/tests/conftest.py b/tests/conftest.py index c0ab793..0c03dac 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,4 +1,4 @@ -"""Shared pytest fixtures for TABStack SDK tests. +"""Shared pytest fixtures for Tabstack SDK tests. Provides fixtures for mocking HTTP responses and creating test clients. """ diff --git a/tests/test_automate.py b/tests/test_automate.py deleted file mode 100644 index bafda9d..0000000 --- a/tests/test_automate.py +++ /dev/null @@ -1,195 +0,0 @@ -"""Tests for Automate operator.""" - -from typing import Any - -import pytest - -from tabstack.automate import Automate -from tabstack.types import AutomateEvent - - -class TestAutomateExecute: - """Tests for automate execution.""" - - async def test_execute_streaming(self, mocker: Any, mock_automate_events: list[str]) -> None: - """Test automate execute with streaming events.""" - mock_http = mocker.Mock() - - # Mock the streaming response - async def mock_stream(path, data): # type: ignore - for event_line in mock_automate_events: - yield event_line - - # Use Mock instead of AsyncMock for post_stream - mock_http.post_stream = mocker.Mock(side_effect=mock_stream) - - automate = Automate(mock_http) - events = [] - async for event in automate.execute(task="Extract data", url="https://example.com"): - events.append(event) - - # Should have received all events - assert len(events) == 4 - assert all(isinstance(e, AutomateEvent) for e in events) - - # Check event types - assert events[0].type == "start" - assert events[1].type == "agent:navigating" - assert events[2].type == "agent:extracted" - assert events[3].type == "task:completed" - - # Verify API was called correctly - mock_http.post_stream.assert_called_once_with( - "v1/automate", - { - "task": "Extract data", - "url": "https://example.com", - }, - ) - - async def test_execute_with_schema(self, mocker: Any, json_schema: dict[str, Any]) -> None: - """Test automate execute with JSON schema.""" - mock_http = mocker.Mock() - - async def mock_stream(path, data): # type: ignore - yield "event: task:completed" - yield 'data: {"finalAnswer": "Done", "success": true}' - yield "" # Empty line completes the event - - # Use Mock instead of AsyncMock for post_stream - mock_http.post_stream = mocker.Mock(side_effect=mock_stream) - - automate = Automate(mock_http) - events = [] - async for event in automate.execute( - task="Extract data", url="https://example.com", schema=json_schema - ): - events.append(event) - - # Verify schema was passed - assert len(events) > 0 - call_args = mock_http.post_stream.call_args - assert call_args[0][1]["schema"] == json_schema - - async def test_execute_validates_schema(self, mocker: Any) -> None: - """Test automate validates schema before sending.""" - mock_http = mocker.AsyncMock() - automate = Automate(mock_http) - - # Invalid schema should raise ValueError - invalid_schema = {"missing": "type"} - with pytest.raises(ValueError, match="Schema must have a 'type' field"): - async for _ in automate.execute( - task="Test", url="https://example.com", schema=invalid_schema - ): - pass - - async def test_execute_parses_event_data(self, mocker: Any) -> None: - """Test automate correctly parses event data.""" - mock_http = mocker.Mock() - - async def mock_stream(path, data): # type: ignore - yield "event: agent:extracted" - yield 'data: {"extractedData": {"title": "Test Title", "count": 42}}' - yield "" # Empty line completes the event - - # Use Mock instead of AsyncMock for post_stream - mock_http.post_stream = mocker.Mock(side_effect=mock_stream) - - automate = Automate(mock_http) - events = [] - async for event in automate.execute(task="Test", url="https://example.com"): - events.append(event) - - assert len(events) == 1 - event = events[0] - assert event.type == "agent:extracted" - # Access via snake_case (converted from camelCase) - assert event.data.extracted_data["title"] == "Test Title" - assert event.data.extracted_data["count"] == 42 - - async def test_execute_handles_malformed_sse(self, mocker: Any) -> None: - """Test automate handles malformed SSE gracefully.""" - mock_http = mocker.Mock() - - async def mock_stream(path, data): # type: ignore - yield "event: start" # Event without data - yield "" # Complete the event (will have no data) - yield "data: not-json" # Data without event (invalid) - yield "event: valid" # Valid event - yield 'data: {"message": "ok"}' - yield "" # Complete the event - - # Use Mock instead of AsyncMock for post_stream - mock_http.post_stream = mocker.Mock(side_effect=mock_stream) - - automate = Automate(mock_http) - events = [] - async for event in automate.execute(task="Test", url="https://example.com"): - events.append(event) - - # Should have at least parsed the valid event - # (implementation may vary on how it handles malformed events) - assert len(events) >= 1 - - async def test_execute_with_empty_task(self, mocker: Any) -> None: - """Test automate with empty task string.""" - mock_http = mocker.Mock() - - async def mock_stream(path, data): # type: ignore - yield "event: task:completed" - yield 'data: {"finalAnswer": "Done"}' - yield "" # Empty line completes the event - - # Use Mock instead of AsyncMock for post_stream - mock_http.post_stream = mocker.Mock(side_effect=mock_stream) - - automate = Automate(mock_http) - events = [] - async for event in automate.execute(task="", url="https://example.com"): - events.append(event) - - # Should still call API (API will validate) - mock_http.post_stream.assert_called_once() - - async def test_execute_event_types(self, mocker: Any) -> None: - """Test various event types are parsed correctly.""" - mock_http = mocker.Mock() - - async def mock_stream(path, data): # type: ignore - # Various event types from the API - yield "event: start" - yield 'data: {"message": "Starting"}' - yield "" - yield "event: agent:navigating" - yield 'data: {"url": "https://test.com"}' - yield "" - yield "event: agent:thinking" - yield 'data: {"thought": "Analyzing page"}' - yield "" - yield "event: agent:extracted" - yield 'data: {"extractedData": {}}' - yield "" - yield "event: agent:action" - yield 'data: {"action": "click", "selector": "button"}' - yield "" - yield "event: task:completed" - yield 'data: {"finalAnswer": "Done", "success": true}' - yield "" - - # Use Mock instead of AsyncMock for post_stream - mock_http.post_stream = mocker.Mock(side_effect=mock_stream) - - automate = Automate(mock_http) - events = [] - async for event in automate.execute(task="Test", url="https://example.com"): - events.append(event) - - assert len(events) == 6 - event_types = [e.type for e in events] - assert "start" in event_types - assert "agent:navigating" in event_types - assert "agent:thinking" in event_types - assert "agent:extracted" in event_types - assert "agent:action" in event_types - assert "task:completed" in event_types diff --git a/tests/test_automate_sync.py b/tests/test_automate_sync.py deleted file mode 100644 index b8c2007..0000000 --- a/tests/test_automate_sync.py +++ /dev/null @@ -1,195 +0,0 @@ -"""Tests for AutomateSync operator.""" - -from typing import Any - -import pytest - -from tabstack.automate_sync import AutomateSync -from tabstack.types import AutomateEvent - - -class TestAutomateSyncExecute: - """Tests for automate execution.""" - - def test_execute_streaming(self, mocker: Any, mock_automate_events: list[str]) -> None: - """Test automate execute with streaming events.""" - mock_http = mocker.Mock() - - # Mock the streaming response - def mock_stream(path, data): # type: ignore - for event_line in mock_automate_events: - yield event_line - - # Use Mock instead of AsyncMock for post_stream - mock_http.post_stream = mocker.Mock(side_effect=mock_stream) - - automate = AutomateSync(mock_http) - events = [] - for event in automate.execute(task="Extract data", url="https://example.com"): - events.append(event) - - # Should have received all events - assert len(events) == 4 - assert all(isinstance(e, AutomateEvent) for e in events) - - # Check event types - assert events[0].type == "start" - assert events[1].type == "agent:navigating" - assert events[2].type == "agent:extracted" - assert events[3].type == "task:completed" - - # Verify API was called correctly - mock_http.post_stream.assert_called_once_with( - "v1/automate", - { - "task": "Extract data", - "url": "https://example.com", - }, - ) - - def test_execute_with_schema(self, mocker: Any, json_schema: dict[str, Any]) -> None: - """Test automate execute with JSON schema.""" - mock_http = mocker.Mock() - - def mock_stream(path, data): # type: ignore - yield "event: task:completed" - yield 'data: {"finalAnswer": "Done", "success": true}' - yield "" # Empty line completes the event - - # Use Mock instead of AsyncMock for post_stream - mock_http.post_stream = mocker.Mock(side_effect=mock_stream) - - automate = AutomateSync(mock_http) - events = [] - for event in automate.execute( - task="Extract data", url="https://example.com", schema=json_schema - ): - events.append(event) - - # Verify schema was passed - assert len(events) > 0 - call_args = mock_http.post_stream.call_args - assert call_args[0][1]["schema"] == json_schema - - def test_execute_validates_schema(self, mocker: Any) -> None: - """Test automate validates schema before sending.""" - mock_http = mocker.Mock() - automate = AutomateSync(mock_http) - - # Invalid schema should raise ValueError - invalid_schema = {"missing": "type"} - with pytest.raises(ValueError, match="Schema must have a 'type' field"): - for _ in automate.execute( - task="Test", url="https://example.com", schema=invalid_schema - ): - pass - - def test_execute_parses_event_data(self, mocker: Any) -> None: - """Test automate correctly parses event data.""" - mock_http = mocker.Mock() - - def mock_stream(path, data): # type: ignore - yield "event: agent:extracted" - yield 'data: {"extractedData": {"title": "Test Title", "count": 42}}' - yield "" # Empty line completes the event - - # Use Mock instead of AsyncMock for post_stream - mock_http.post_stream = mocker.Mock(side_effect=mock_stream) - - automate = AutomateSync(mock_http) - events = [] - for event in automate.execute(task="Test", url="https://example.com"): - events.append(event) - - assert len(events) == 1 - event = events[0] - assert event.type == "agent:extracted" - # Access via snake_case (converted from camelCase) - assert event.data.extracted_data["title"] == "Test Title" - assert event.data.extracted_data["count"] == 42 - - def test_execute_handles_malformed_sse(self, mocker: Any) -> None: - """Test automate handles malformed SSE gracefully.""" - mock_http = mocker.Mock() - - def mock_stream(path, data): # type: ignore - yield "event: start" # Event without data - yield "" # Complete the event (will have no data) - yield "data: not-json" # Data without event (invalid) - yield "event: valid" # Valid event - yield 'data: {"message": "ok"}' - yield "" # Complete the event - - # Use Mock instead of AsyncMock for post_stream - mock_http.post_stream = mocker.Mock(side_effect=mock_stream) - - automate = AutomateSync(mock_http) - events = [] - for event in automate.execute(task="Test", url="https://example.com"): - events.append(event) - - # Should have at least parsed the valid event - # (implementation may vary on how it handles malformed events) - assert len(events) >= 1 - - def test_execute_with_empty_task(self, mocker: Any) -> None: - """Test automate with empty task string.""" - mock_http = mocker.Mock() - - def mock_stream(path, data): # type: ignore - yield "event: task:completed" - yield 'data: {"finalAnswer": "Done"}' - yield "" # Empty line completes the event - - # Use Mock instead of AsyncMock for post_stream - mock_http.post_stream = mocker.Mock(side_effect=mock_stream) - - automate = AutomateSync(mock_http) - events = [] - for event in automate.execute(task="", url="https://example.com"): - events.append(event) - - # Should still call API (API will validate) - mock_http.post_stream.assert_called_once() - - def test_execute_event_types(self, mocker: Any) -> None: - """Test various event types are parsed correctly.""" - mock_http = mocker.Mock() - - def mock_stream(path, data): # type: ignore - # Various event types from the API - yield "event: start" - yield 'data: {"message": "Starting"}' - yield "" - yield "event: agent:navigating" - yield 'data: {"url": "https://test.com"}' - yield "" - yield "event: agent:thinking" - yield 'data: {"thought": "Analyzing page"}' - yield "" - yield "event: agent:extracted" - yield 'data: {"extractedData": {}}' - yield "" - yield "event: agent:action" - yield 'data: {"action": "click", "selector": "button"}' - yield "" - yield "event: task:completed" - yield 'data: {"finalAnswer": "Done", "success": true}' - yield "" - - # Use Mock instead of AsyncMock for post_stream - mock_http.post_stream = mocker.Mock(side_effect=mock_stream) - - automate = AutomateSync(mock_http) - events = [] - for event in automate.execute(task="Test", url="https://example.com"): - events.append(event) - - assert len(events) == 6 - event_types = [e.type for e in events] - assert "start" in event_types - assert "agent:navigating" in event_types - assert "agent:thinking" in event_types - assert "agent:extracted" in event_types - assert "agent:action" in event_types - assert "task:completed" in event_types diff --git a/tests/test_client.py b/tests/test_client.py index c7541da..4f99c8c 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -1,63 +1,63 @@ -"""Tests for main TABStack client.""" +"""Tests for main Tabstack client.""" from typing import Any import pytest -from tabstack import TABStack -from tabstack.automate import Automate +from tabstack import Tabstack +from tabstack.agent import Agent from tabstack.extract import Extract from tabstack.generate import Generate -class TestTABStackInitialization: - """Tests for TABStack client initialization.""" +class TestTabstackInitialization: + """Tests for Tabstack client initialization.""" def test_initialization_with_api_key(self) -> None: """Test client initialization with API key.""" - client = TABStack(api_key="test_key_123") + client = Tabstack(api_key="test_key_123") assert client._http_client.api_key == "test_key_123" def test_initialization_with_custom_base_url(self) -> None: """Test client initialization with custom base URL.""" - client = TABStack(api_key="test_key", base_url="https://custom.api.com") + client = Tabstack(api_key="test_key", base_url="https://custom.api.com") assert client._http_client.base_url == "https://custom.api.com" def test_initialization_missing_api_key(self) -> None: """Test initialization without API key raises error.""" with pytest.raises(TypeError): - TABStack() # type: ignore + Tabstack() # type: ignore def test_operators_are_initialized(self) -> None: """Test all operators are properly initialized.""" - client = TABStack(api_key="test_key") + client = Tabstack(api_key="test_key") assert isinstance(client.extract, Extract) assert isinstance(client.generate, Generate) - assert isinstance(client.automate, Automate) + assert isinstance(client.agent, Agent) def test_operators_share_http_client(self) -> None: """Test all operators share the same HTTP client.""" - client = TABStack(api_key="test_key") + client = Tabstack(api_key="test_key") # All operators should use the same HTTP client instance assert client.extract._http is client._http_client assert client.generate._http is client._http_client - assert client.automate._http is client._http_client + assert client.agent._http is client._http_client -class TestTABStackContextManager: +class TestTabstackContextManager: """Tests for async context manager support.""" async def test_context_manager_usage(self) -> None: - """Test using TABStack as async context manager.""" - async with TABStack(api_key="test_key") as client: - assert isinstance(client, TABStack) + """Test using Tabstack as async context manager.""" + async with Tabstack(api_key="test_key") as client: + assert isinstance(client, Tabstack) assert isinstance(client.extract, Extract) # Client should be closed after context async def test_context_manager_closes_http_client(self, mocker: Any) -> None: """Test context manager closes HTTP client.""" - client = TABStack(api_key="test_key") + client = Tabstack(api_key="test_key") # Mock the close method mock_close = mocker.AsyncMock() @@ -70,7 +70,7 @@ async def test_context_manager_closes_http_client(self, mocker: Any) -> None: async def test_manual_close(self, mocker: Any) -> None: """Test manually closing the client.""" - client = TABStack(api_key="test_key") + client = Tabstack(api_key="test_key") mock_close = mocker.AsyncMock() client._http_client.close = mock_close @@ -80,8 +80,8 @@ async def test_manual_close(self, mocker: Any) -> None: mock_close.assert_called_once() -class TestTABStackIntegration: - """Integration tests using TABStack client.""" +class TestTabstackIntegration: + """Integration tests using Tabstack client.""" async def test_extract_markdown_integration(self, mocker: Any) -> None: """Test complete flow for extracting markdown.""" @@ -97,7 +97,7 @@ async def test_extract_markdown_integration(self, mocker: Any) -> None: mock_httpx_client = mocker.AsyncMock() mock_httpx_client.post.return_value = mock_response - client = TABStack(api_key="test_key") + client = Tabstack(api_key="test_key") client._http_client._client = mock_httpx_client result = await client.extract.markdown(url="https://example.com") @@ -116,7 +116,7 @@ async def test_generate_json_integration(self, mocker: Any) -> None: mock_httpx_client = mocker.AsyncMock() mock_httpx_client.post.return_value = mock_response - client = TABStack(api_key="test_key") + client = Tabstack(api_key="test_key") client._http_client._client = mock_httpx_client schema = {"type": "object", "properties": {"summary": {"type": "string"}}} @@ -146,11 +146,11 @@ async def mock_aiter_bytes(chunk_size: int): # type: ignore mock_httpx_client = mocker.AsyncMock() mock_httpx_client.stream = mocker.MagicMock(return_value=mock_stream_cm) - client = TABStack(api_key="test_key") + client = Tabstack(api_key="test_key") client._http_client._client = mock_httpx_client events = [] - async for event in client.automate.execute(task="Test", url="https://example.com"): + async for event in client.agent.automate(task="Test", url="https://example.com"): events.append(event) assert len(events) >= 1 diff --git a/tests/test_client_sync.py b/tests/test_client_sync.py index 02899b4..6c6ba70 100644 --- a/tests/test_client_sync.py +++ b/tests/test_client_sync.py @@ -1,61 +1,61 @@ -"""Tests for TABStackSync client.""" +"""Tests for TabstackSync client.""" from typing import Any import pytest -from tabstack import TABStackSync -from tabstack.automate_sync import AutomateSync +from tabstack import TabstackSync +from tabstack.agent_sync import AgentSync from tabstack.extract_sync import ExtractSync from tabstack.generate_sync import GenerateSync -class TestTABStackSyncInitialization: - """Tests for TABStackSync client initialization.""" +class TestTabstackSyncInitialization: + """Tests for TabstackSync client initialization.""" def test_initialization_with_api_key(self) -> None: """Test client initialization with API key.""" - client = TABStackSync(api_key="test_key_123") + client = TabstackSync(api_key="test_key_123") assert client._http_client.api_key == "test_key_123" def test_initialization_with_custom_base_url(self) -> None: """Test client initialization with custom base URL.""" - client = TABStackSync(api_key="test_key", base_url="https://custom.api.com") + client = TabstackSync(api_key="test_key", base_url="https://custom.api.com") assert client._http_client.base_url == "https://custom.api.com" def test_initialization_missing_api_key(self) -> None: """Test initialization without API key raises error.""" with pytest.raises(TypeError): - TABStackSync() # type: ignore + TabstackSync() # type: ignore def test_operators_are_initialized(self) -> None: """Test all operators are properly initialized.""" - client = TABStackSync(api_key="test_key") + client = TabstackSync(api_key="test_key") assert isinstance(client.extract, ExtractSync) assert isinstance(client.generate, GenerateSync) - assert isinstance(client.automate, AutomateSync) + assert isinstance(client.agent, AgentSync) def test_operators_share_http_client(self) -> None: """Test all operators share the same HTTP client.""" - client = TABStackSync(api_key="test_key") + client = TabstackSync(api_key="test_key") # All operators should use the same HTTP client instance assert client.extract._http is client._http_client assert client.generate._http is client._http_client - assert client.automate._http is client._http_client + assert client.agent._http is client._http_client -class TestTABStackSyncContextManager: +class TestTabstackSyncContextManager: """Tests for sync context manager support.""" def test_context_manager_usage(self) -> None: - """Test using TABStackSync as context manager.""" - with TABStackSync(api_key="test_key") as client: - assert isinstance(client, TABStackSync) + """Test using TabstackSync as context manager.""" + with TabstackSync(api_key="test_key") as client: + assert isinstance(client, TabstackSync) assert isinstance(client.extract, ExtractSync) def test_context_manager_closes_http_client(self, mocker: Any) -> None: """Test context manager closes HTTP client.""" - client = TABStackSync(api_key="test_key") + client = TabstackSync(api_key="test_key") # Mock the close method mock_close = mocker.Mock() @@ -68,7 +68,7 @@ def test_context_manager_closes_http_client(self, mocker: Any) -> None: def test_manual_close(self, mocker: Any) -> None: """Test manually closing the client.""" - client = TABStackSync(api_key="test_key") + client = TabstackSync(api_key="test_key") mock_close = mocker.Mock() client._http_client.close = mock_close diff --git a/tests/test_exceptions.py b/tests/test_exceptions.py index 4f428ef..a3e1010 100644 --- a/tests/test_exceptions.py +++ b/tests/test_exceptions.py @@ -6,24 +6,24 @@ InvalidURLError, ServerError, ServiceUnavailableError, - TABStackError, + TabstackError, UnauthorizedError, ) -class TestTABStackError: - """Tests for base TABStackError.""" +class TestTabstackError: + """Tests for base TabstackError.""" def test_error_with_message_and_status(self) -> None: """Test error initialization with message and status code.""" - error = TABStackError("Test error", status_code=418) + error = TabstackError("Test error", status_code=418) assert str(error) == "Test error" assert error.message == "Test error" assert error.status_code == 418 def test_error_with_message_only(self) -> None: """Test error initialization with message only.""" - error = TABStackError("Test error") + error = TabstackError("Test error") assert str(error) == "Test error" assert error.message == "Test error" assert error.status_code is None @@ -40,9 +40,9 @@ def test_error_initialization(self) -> None: assert error.status_code == 400 def test_inherits_from_tabstack_error(self) -> None: - """Test BadRequestError inherits from TABStackError.""" + """Test BadRequestError inherits from TabstackError.""" error = BadRequestError("Test") - assert isinstance(error, TABStackError) + assert isinstance(error, TabstackError) class TestUnauthorizedError: @@ -61,9 +61,9 @@ def test_error_with_default_message(self) -> None: assert error.status_code == 401 def test_inherits_from_tabstack_error(self) -> None: - """Test UnauthorizedError inherits from TABStackError.""" + """Test UnauthorizedError inherits from TabstackError.""" error = UnauthorizedError() - assert isinstance(error, TABStackError) + assert isinstance(error, TabstackError) class TestInvalidURLError: @@ -82,9 +82,9 @@ def test_error_with_default_message(self) -> None: assert error.status_code == 422 def test_inherits_from_tabstack_error(self) -> None: - """Test InvalidURLError inherits from TABStackError.""" + """Test InvalidURLError inherits from TabstackError.""" error = InvalidURLError() - assert isinstance(error, TABStackError) + assert isinstance(error, TabstackError) class TestServerError: @@ -103,9 +103,9 @@ def test_error_with_default_message(self) -> None: assert error.status_code == 500 def test_inherits_from_tabstack_error(self) -> None: - """Test ServerError inherits from TABStackError.""" + """Test ServerError inherits from TabstackError.""" error = ServerError() - assert isinstance(error, TABStackError) + assert isinstance(error, TabstackError) class TestServiceUnavailableError: @@ -124,9 +124,9 @@ def test_error_with_default_message(self) -> None: assert error.status_code == 503 def test_inherits_from_tabstack_error(self) -> None: - """Test ServiceUnavailableError inherits from TABStackError.""" + """Test ServiceUnavailableError inherits from TabstackError.""" error = ServiceUnavailableError() - assert isinstance(error, TABStackError) + assert isinstance(error, TabstackError) class TestAPIError: @@ -140,6 +140,6 @@ def test_error_with_custom_status(self) -> None: assert error.status_code == 429 def test_inherits_from_tabstack_error(self) -> None: - """Test APIError inherits from TABStackError.""" + """Test APIError inherits from TabstackError.""" error = APIError("Test", 418) - assert isinstance(error, TABStackError) + assert isinstance(error, TabstackError) diff --git a/tests/test_extract.py b/tests/test_extract.py index f41d862..13ce6ce 100644 --- a/tests/test_extract.py +++ b/tests/test_extract.py @@ -5,7 +5,7 @@ import pytest from tabstack.extract import Extract -from tabstack.types import JsonResponse, MarkdownResponse, SchemaResponse +from tabstack.types import JsonResponse, MarkdownResponse class TestExtractMarkdown: @@ -68,42 +68,6 @@ async def test_markdown_with_nocache(self, mocker: Any) -> None: ) -class TestExtractSchema: - """Tests for schema generation.""" - - async def test_schema_generation( - self, mocker: Any, mock_schema_response: dict[str, Any] - ) -> None: - """Test schema generation from URL.""" - mock_http = mocker.AsyncMock() - mock_http.post.return_value = mock_schema_response - - extract = Extract(mock_http) - result = await extract.schema(url="https://example.com", instructions="Extract products") - - assert isinstance(result, SchemaResponse) - assert result.schema == mock_schema_response - assert "properties" in result.schema - mock_http.post.assert_called_once_with( - "v1/extract/json/schema", - { - "url": "https://example.com", - "instructions": "Extract products", - }, - ) - - async def test_schema_with_nocache(self, mocker: Any) -> None: - """Test schema generation with nocache flag.""" - mock_http = mocker.AsyncMock() - mock_http.post.return_value = {"type": "object", "properties": {}} - - extract = Extract(mock_http) - await extract.schema(url="https://example.com", instructions="Test", nocache=True) - - call_args = mock_http.post.call_args - assert call_args[0][1]["nocache"] is True - - class TestExtractJson: """Tests for JSON extraction.""" diff --git a/tests/test_extract_sync.py b/tests/test_extract_sync.py index 144810f..536caf2 100644 --- a/tests/test_extract_sync.py +++ b/tests/test_extract_sync.py @@ -5,7 +5,7 @@ import pytest from tabstack.extract_sync import ExtractSync -from tabstack.types import JsonResponse, MarkdownResponse, SchemaResponse +from tabstack.types import JsonResponse, MarkdownResponse class TestExtractSyncMarkdown: @@ -68,40 +68,6 @@ def test_markdown_with_nocache(self, mocker: Any) -> None: ) -class TestExtractSyncSchema: - """Tests for schema generation.""" - - def test_schema_generation(self, mocker: Any, mock_schema_response: dict[str, Any]) -> None: - """Test schema generation from URL.""" - mock_http = mocker.Mock() - mock_http.post.return_value = mock_schema_response - - extract = ExtractSync(mock_http) - result = extract.schema(url="https://example.com", instructions="Extract products") - - assert isinstance(result, SchemaResponse) - assert result.schema == mock_schema_response - assert "properties" in result.schema - mock_http.post.assert_called_once_with( - "v1/extract/json/schema", - { - "url": "https://example.com", - "instructions": "Extract products", - }, - ) - - def test_schema_with_nocache(self, mocker: Any) -> None: - """Test schema generation with nocache flag.""" - mock_http = mocker.Mock() - mock_http.post.return_value = {"type": "object", "properties": {}} - - extract = ExtractSync(mock_http) - extract.schema(url="https://example.com", instructions="Test", nocache=True) - - call_args = mock_http.post.call_args - assert call_args[0][1]["nocache"] is True - - class TestExtractSyncJson: """Tests for JSON extraction.""" diff --git a/tests/test_integration.py b/tests/test_integration.py index 4332256..b99621f 100644 --- a/tests/test_integration.py +++ b/tests/test_integration.py @@ -4,66 +4,7 @@ import pytest -from tabstack import TABStack - - -class TestSchemaGenerationToExtraction: - """Test workflow: schema generation → data extraction.""" - - async def test_generate_schema_then_extract_data(self, mocker: Any) -> None: - """Test generating a schema and then using it to extract data.""" - # Mock HTTP responses - mock_response_1 = mocker.Mock() - mock_response_1.status_code = 200 - # Schema generation response - schema = { - "type": "object", - "properties": { - "items": { - "type": "array", - "items": { - "type": "object", - "properties": { - "title": {"type": "string"}, - "price": {"type": "number"}, - }, - }, - } - }, - } - mock_response_1.json.return_value = schema - mock_response_1.content = b"{}" - - # Extraction response - mock_response_2 = mocker.Mock() - mock_response_2.status_code = 200 - extracted_data = { - "items": [ - {"title": "Product 1", "price": 19.99}, - {"title": "Product 2", "price": 29.99}, - ] - } - mock_response_2.json.return_value = extracted_data - mock_response_2.content = b"{}" - - mock_httpx_client = mocker.AsyncMock() - mock_httpx_client.post.side_effect = [mock_response_1, mock_response_2] - - async with TABStack(api_key="test_key") as tabs: - tabs._http_client._client = mock_httpx_client - - # Step 1: Generate schema - schema_result = await tabs.extract.schema( - url="https://example.com/products", instructions="Extract product list" - ) - - # Step 2: Use schema to extract data - data_result = await tabs.extract.json( - url="https://example.com/products", schema=schema_result.schema - ) - - assert len(data_result.data["items"]) == 2 - assert data_result.data["items"][0]["title"] == "Product 1" +from tabstack import Tabstack class TestExtractTransformWorkflow: @@ -92,7 +33,7 @@ async def test_extract_markdown_then_transform(self, mocker: Any) -> None: mock_httpx_client = mocker.AsyncMock() mock_httpx_client.post.side_effect = [mock_response_1, mock_response_2] - async with TABStack(api_key="test_key") as tabs: + async with Tabstack(api_key="test_key") as tabs: tabs._http_client._client = mock_httpx_client # Step 1: Extract markdown (just to test the workflow) @@ -147,7 +88,7 @@ async def mock_aiter_bytes(chunk_size: int): # type: ignore mock_httpx_client = mocker.AsyncMock() mock_httpx_client.stream = mocker.MagicMock(return_value=mock_stream_cm) - async with TABStack(api_key="test_key") as tabs: + async with Tabstack(api_key="test_key") as tabs: tabs._http_client._client = mock_httpx_client schema = { @@ -156,7 +97,7 @@ async def mock_aiter_bytes(chunk_size: int): # type: ignore } events = [] - async for event in tabs.automate.execute( + async for event in tabs.agent.automate( task="Find and extract results", url="https://example.com", schema=schema, @@ -186,7 +127,7 @@ async def test_invalid_url_handling(self, mocker: Any) -> None: mock_httpx_client = mocker.AsyncMock() mock_httpx_client.post.return_value = mock_response - async with TABStack(api_key="test_key") as tabs: + async with Tabstack(api_key="test_key") as tabs: tabs._http_client._client = mock_httpx_client with pytest.raises(InvalidURLError, match="URL not found"): @@ -203,7 +144,7 @@ async def test_unauthorized_handling(self, mocker: Any) -> None: mock_httpx_client = mocker.AsyncMock() mock_httpx_client.post.return_value = mock_response - async with TABStack(api_key="bad_key") as tabs: + async with Tabstack(api_key="bad_key") as tabs: tabs._http_client._client = mock_httpx_client with pytest.raises(UnauthorizedError, match="Invalid API key"): @@ -220,7 +161,7 @@ async def test_server_error_handling(self, mocker: Any) -> None: mock_httpx_client = mocker.AsyncMock() mock_httpx_client.post.return_value = mock_response - async with TABStack(api_key="test_key") as tabs: + async with Tabstack(api_key="test_key") as tabs: tabs._http_client._client = mock_httpx_client with pytest.raises(ServerError, match="Internal server error"): @@ -240,7 +181,7 @@ async def test_multiple_extractions(self, mocker: Any) -> None: mock_httpx_client = mocker.AsyncMock() mock_httpx_client.post.return_value = mock_response - async with TABStack(api_key="test_key") as tabs: + async with Tabstack(api_key="test_key") as tabs: tabs._http_client._client = mock_httpx_client # Perform multiple operations