Skip to content

Commit ab1f275

Browse files
✨ add support for Client V2 (#194)
1 parent d0bc809 commit ab1f275

File tree

314 files changed

+3279
-712
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

314 files changed

+3279
-712
lines changed

.github/workflows/_test-code-samples.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,4 +32,4 @@ jobs:
3232
env:
3333
MINDEE_LOG_LEVEL: DEBUG
3434
run: |
35-
./spec/test_code_samples.sh ${{ secrets.MINDEE_ACCOUNT_SE_TESTS }} ${{ secrets.MINDEE_ENDPOINT_SE_TESTS }} ${{ secrets.MINDEE_API_KEY_SE_TESTS }}
35+
./spec/test_code_samples.sh ${{ secrets.MINDEE_ACCOUNT_SE_TESTS }} ${{ secrets.MINDEE_ENDPOINT_SE_TESTS }} ${{ secrets.MINDEE_API_KEY_SE_TESTS }} ${{ secrets.MINDEE_V2_SE_TESTS_API_KEY }} ${{ secrets.MINDEE_V2_SE_TESTS_FINDOC_MODEL_ID }}

.github/workflows/_test-integrations.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,9 @@ jobs:
5656
env:
5757
MINDEE_API_KEY: ${{ secrets.MINDEE_API_KEY_SE_TESTS }}
5858
WORKFLOW_ID: ${{ secrets.WORKFLOW_ID_SE_TESTS }}
59+
MINDEE_V2_API_KEY: ${{ secrets.MINDEE_V2_SE_TESTS_API_KEY }}
60+
MINDEE_V2_FINDOC_MODEL_ID: ${{ secrets.MINDEE_V2_SE_TESTS_FINDOC_MODEL_ID }}
61+
MINDEE_V2_SE_TESTS_BLANK_PDF_URL: ${{ secrets.MINDEE_V2_SE_TESTS_BLANK_PDF_URL }}
5962
MINDEE_LOG_LEVEL: DEBUG
6063
run: |
6164
bundle exec rake integration

.github/workflows/pull-request.yml

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,18 +3,22 @@ name: Pull Request
33
on:
44
pull_request:
55

6+
permissions:
7+
contents: read
8+
pull-requests: read
9+
610
jobs:
711
static_analysis:
8-
uses: mindee/mindee-api-ruby/.github/workflows/_static-analysis.yml@main
12+
uses: ./.github/workflows/_static-analysis.yml
913
test_units:
10-
uses: mindee/mindee-api-ruby/.github/workflows/_test-units.yml@main
14+
uses: ./.github/workflows/_test-units.yml
1115
needs: static_analysis
1216
secrets: inherit
1317
test_integrations:
14-
uses: mindee/mindee-api-ruby/.github/workflows/_test-integrations.yml@main
18+
uses: ./.github/workflows/_test-integrations.yml
1519
needs: test_units
1620
secrets: inherit
1721
test_code_samples:
18-
uses: mindee/mindee-api-ruby/.github/workflows/_test-code-samples.yml@main
22+
uses: ./.github/workflows/_test-code-samples.yml
1923
needs: test_units
2024
secrets: inherit

docs/code_samples/default_v2.txt

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
require 'mindee'
2+
3+
input_path = '/path/to/the/file.ext'
4+
api_key = 'MY_API_KEY'
5+
model_id = 'MY_MODEL_ID'
6+
7+
# Init a new client
8+
mindee_client = Mindee::ClientV2.new(api_key: api_key)
9+
10+
# Set inference parameters
11+
params = Mindee::Input::InferenceParameters.new(
12+
# ID of the model, required.
13+
model_id,
14+
# If set to `True`, will enable Retrieval-Augmented Generation.
15+
rag: false,
16+
)
17+
18+
# Load a file from disk
19+
input_source = Mindee::Input::Source::PathInputSource.new(input_path)
20+
21+
# Send for processing
22+
response = mindee_client.enqueue_and_get_inference(
23+
input_source,
24+
params # Note: this parameter can also be provided as a Hash.
25+
)
26+
27+
# Print a brief summary of the parsed data
28+
puts response.inference

lib/mindee.rb

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
# frozen_string_literal: true
22

33
require 'mindee/client'
4+
require 'mindee/client_v2'
5+
require 'mindee/page_options'
46
require 'mindee/logging'
57

68
module Mindee
@@ -54,6 +56,10 @@ module Standard
5456
# Universal fields and functions.
5557
module Universal
5658
end
59+
60+
# V2-specific module.
61+
module V2
62+
end
5763
end
5864

5965
# Document input-related internals.

lib/mindee/client.rb

Lines changed: 11 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -2,38 +2,17 @@
22

33
require_relative 'input'
44
require_relative 'http'
5-
require_relative 'product'
5+
require_relative 'logging'
6+
require_relative 'page_options'
67
require_relative 'parsing/common/api_response'
78
require_relative 'parsing/common/job'
89
require_relative 'parsing/common/workflow_response'
9-
require_relative 'logging'
10+
require_relative 'product'
1011

1112
# Default owner for products.
1213
OTS_OWNER = 'mindee'
1314

1415
module Mindee
15-
# Class for page options in parse calls.
16-
#
17-
# @!attribute page_indexes [Array[Integer]] Zero-based list of page indexes.
18-
# @!attribute operation [:KEEP_ONLY, :REMOVE] Operation to apply on the document, given the specified page indexes:
19-
# * `:KEEP_ONLY` - keep only the specified pages, and remove all others.
20-
# * `:REMOVE` - remove the specified pages, and keep all others.
21-
# @!attribute on_min_pages [Integer, nil] Apply the operation only if the document has at least this many pages.
22-
class PageOptions
23-
attr_accessor :page_indexes, :operation, :on_min_pages
24-
25-
def initialize(params: {})
26-
params ||= {}
27-
params = params.transform_keys(&:to_sym)
28-
@page_indexes = params.fetch(
29-
:page_indexes,
30-
[] # : Array[Integer]
31-
)
32-
@operation = params.fetch(:operation, :KEEP_ONLY)
33-
@on_min_pages = params.fetch(:on_min_pages, nil)
34-
end
35-
end
36-
3716
# Class for configuration options in parse calls.
3817
#
3918
# @!attribute all_words [bool] Whether to include the full text for each page.
@@ -89,8 +68,9 @@ def initialize(params: {})
8968
# * `:KEEP_ONLY` - keep only the specified pages, and remove all others.
9069
# * `:REMOVE` - remove the specified pages, and keep all others.
9170
# * `:on_min_pages` Apply the operation only if the document has at least this many pages.
71+
# @!attribute close_file [bool, nil] Whether to close the file after sending it. Defaults to true.
9272
class WorkflowOptions
93-
attr_accessor :document_alias, :priority, :full_text, :public_url, :page_options, :rag
73+
attr_accessor :document_alias, :priority, :full_text, :public_url, :page_options, :rag, :close_file
9474

9575
def initialize(params: {})
9676
params = params.transform_keys(&:to_sym)
@@ -102,6 +82,7 @@ def initialize(params: {})
10282
raw_page_options = params.fetch(:page_options, nil)
10383
raw_page_options = PageOptions.new(params: raw_page_options) unless raw_page_options.is_a?(PageOptions)
10484
@page_options = raw_page_options
85+
@close_file = params.fetch(:close_file, true)
10586
end
10687
end
10788

@@ -326,7 +307,7 @@ def execute_workflow(input_source, workflow_id, options: {})
326307
process_pdf_if_required(input_source, opts)
327308
end
328309

329-
workflow_endpoint = Mindee::HTTP::WorkflowEndpoint.new(workflow_id, api_key: @api_key)
310+
workflow_endpoint = Mindee::HTTP::WorkflowEndpoint.new(workflow_id, api_key: @api_key.to_s)
330311
logger.debug("Sending document to workflow '#{workflow_id}'")
331312

332313
prediction, raw_http = workflow_endpoint.execute_workflow(
@@ -455,11 +436,11 @@ def initialize_endpoint(product_class, endpoint_name: '', account_name: '', vers
455436
account_name = fix_account_name(account_name)
456437
version = fix_version(product_class, version)
457438

458-
HTTP::Endpoint.new(account_name, endpoint_name, version, api_key: @api_key)
439+
HTTP::Endpoint.new(account_name, endpoint_name, version, api_key: @api_key.to_s)
459440
end
460441

461442
def fix_endpoint_name(product_class, endpoint_name)
462-
endpoint_name.nil? || endpoint_name.empty? ? product_class.endpoint_name : endpoint_name
443+
endpoint_name.nil? || endpoint_name.empty? ? product_class.endpoint_name.to_s : endpoint_name.to_s
463444
end
464445

465446
def fix_account_name(account_name)
@@ -474,11 +455,11 @@ def fix_account_name(account_name)
474455
def fix_version(product_class, version)
475456
return version unless version.nil? || version.empty?
476457

477-
if product_class.endpoint_version.nil? || product_class.endpoint_version.empty?
458+
if product_class.endpoint_version.nil? || product_class.endpoint_version.to_s.empty?
478459
logger.debug('No version provided for a custom build, will attempt to poll version 1 by default.')
479460
return '1'
480461
end
481-
product_class.endpoint_version
462+
product_class.endpoint_version || ''
482463
end
483464

484465
# If needed, converts the parsing options provided as a hash into a proper ParseOptions object.

lib/mindee/client_v2.rb

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
# frozen_string_literal: true
2+
3+
require_relative 'input'
4+
require_relative 'http'
5+
require_relative 'product'
6+
require_relative 'parsing/common/api_response'
7+
require_relative 'parsing/common/job'
8+
require_relative 'parsing/common/workflow_response'
9+
require_relative 'logging'
10+
11+
module Mindee
12+
# Mindee V2 API Client.
13+
class ClientV2
14+
# @return [HTTP::MindeeApiV2]
15+
private attr_reader :mindee_api
16+
17+
# @param api_key [String]
18+
def initialize(api_key: '')
19+
@mindee_api = Mindee::HTTP::MindeeApiV2.new(api_key: api_key)
20+
end
21+
22+
# Retrieves an inference.
23+
# @param inference_id [String]
24+
# @return [Mindee::Parsing::V2::InferenceResponse]
25+
def get_inference(inference_id)
26+
@mindee_api.req_get_inference(inference_id)
27+
end
28+
29+
# Retrieves an inference.
30+
# @param job_id [String]
31+
# @return [Mindee::Parsing::V2::JobResponse]
32+
def get_job(job_id)
33+
@mindee_api.req_get_job(job_id)
34+
end
35+
36+
# Enqueue a document for async parsing.
37+
# @param input_source [Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::URLInputSource]
38+
# The source of the input document (local file or URL).
39+
# @param params [Hash, InferenceParameters]
40+
# @return [Mindee::Parsing::V2::JobResponse]
41+
def enqueue_inference(input_source, params)
42+
normalized_params = normalize_inference_parameters(params)
43+
logger.debug("Enqueueing document to model '#{normalized_params.model_id}'.")
44+
45+
@mindee_api.req_post_inference_enqueue(input_source, normalized_params)
46+
end
47+
48+
# Enqueue a document for async parsing and automatically try to retrieve it.
49+
# @param input_source [Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::URLInputSource]
50+
# The source of the input document (local file or URL).
51+
# @param params [Hash, InferenceParameters] Parameters for the inference.
52+
# @return [Mindee::Parsing::V2::InferenceResponse]
53+
def enqueue_and_get_inference(input_source, params)
54+
normalized_params = normalize_inference_parameters(params)
55+
normalized_params.validate_async_params
56+
enqueue_response = enqueue_inference(input_source, normalized_params)
57+
58+
if enqueue_response.job.id.nil? || enqueue_response.job.id.empty?
59+
logger.error("Failed enqueueing:\n#{enqueue_response.raw_http}")
60+
raise Mindee::Errors::MindeeError, 'Enqueueing of the document failed.'
61+
end
62+
63+
job_id = enqueue_response.job.id
64+
logger.debug("Successfully enqueued document with job id: #{job_id}.")
65+
66+
sleep(normalized_params.polling_options.initial_delay_sec)
67+
retry_counter = 1
68+
poll_results = get_job(job_id)
69+
70+
while retry_counter < normalized_params.polling_options.max_retries
71+
if poll_results.job.status == 'Failed'
72+
break
73+
elsif poll_results.job.status == 'Processed'
74+
return get_inference(poll_results.job.id)
75+
end
76+
77+
logger.debug(
78+
"Successfully enqueued inference with job id: #{job_id}.\n" \
79+
"Attempt n°#{retry_counter}/#{normalized_params.polling_options.max_retries}.\n" \
80+
"Job status: #{poll_results.job.status}."
81+
)
82+
83+
sleep(normalized_params.polling_options.delay_sec)
84+
poll_results = get_job(job_id)
85+
retry_counter += 1
86+
end
87+
88+
error = poll_results.job.error
89+
unless error.nil?
90+
err_to_raise = Mindee::Errors::MindeeHTTPErrorV2.new(error)
91+
# NOTE: purposefully decoupled from the line above, otherwise rubocop thinks `error` is a `message` param.
92+
raise err_to_raise
93+
end
94+
95+
sec_count = normalized_params.polling_options.delay_sec * retry_counter
96+
raise Mindee::Errors::MindeeError,
97+
"Asynchronous parsing request timed out after #{sec_count} seconds"
98+
end
99+
100+
# If needed, converts the parsing options provided as a hash into a proper InferenceParameters object.
101+
# @param params [Hash, InferenceParameters] Params.
102+
# @return [InferenceParameters]
103+
def normalize_inference_parameters(params)
104+
return params if params.is_a?(Input::InferenceParameters)
105+
106+
Input::InferenceParameters.from_hash(params: params)
107+
end
108+
end
109+
end

lib/mindee/errors.rb

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,5 @@
22

33
require_relative 'errors/mindee_error'
44
require_relative 'errors/mindee_http_error'
5+
require_relative 'errors/mindee_http_error_v2'
56
require_relative 'errors/mindee_input_error'
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
# frozen_string_literal: true
2+
3+
require_relative 'mindee_error'
4+
5+
module Mindee
6+
module Errors
7+
# API V2 HttpError
8+
class MindeeHTTPErrorV2 < MindeeError
9+
# @return [Integer]
10+
attr_reader :status
11+
# @return [String]
12+
attr_reader :detail
13+
14+
# @param http_error [Hash, Parsing::V2::ErrorResponse]
15+
def initialize(http_error)
16+
if http_error.is_a?(Parsing::V2::ErrorResponse)
17+
http_error = { 'detail' => http_error.detail,
18+
'status' => http_error.status }
19+
end
20+
@status = http_error['status']
21+
@detail = http_error['detail']
22+
super("HTTP error: #{@status} - #{@detail}")
23+
end
24+
end
25+
end
26+
end

lib/mindee/geometry/point.rb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,9 @@ module Geometry
66
# A relative set of coordinates (X, Y) on the document.
77
class Point
88
# @return [Float]
9-
attr_accessor :x
9+
attr_reader :x
1010
# @return [Float]
11-
attr_accessor :y
11+
attr_reader :y
1212

1313
# rubocop:disable Naming/MethodParameterName
1414

0 commit comments

Comments
 (0)