Skip to content

Commit 8ea8f52

Browse files
authored
✨ add optional features (#203)
1 parent 625d428 commit 8ea8f52

File tree

8 files changed

+132
-48
lines changed

8 files changed

+132
-48
lines changed

.github/PULL_REQUEST_TEMPLATE.md

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3,18 +3,6 @@
33
<!--- Why is this change required? What problem does it solve? -->
44

55

6-
## Related Issue
7-
<!--- If suggesting a new feature or change, please discuss it in an issue first -->
8-
<!--- If fixing a bug, there should be an issue describing it with steps to reproduce -->
9-
<!--- Please link to the issue here: -->
10-
11-
12-
## How Has This Been Tested
13-
<!--- Please describe in detail how you tested your changes. -->
14-
<!--- Include details of your testing environment, and the tests you ran to -->
15-
<!--- see how your change affects other areas of the code, etc. -->
16-
17-
186
## Types of changes
197
<!--- What types of changes does your code introduce? Put an `x` in all the boxes that apply: -->
208

.github/workflows/_test-integrations.yml

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,6 @@ jobs:
2020
- "macos-latest"
2121
ruby:
2222
- "3.0"
23-
- "3.1"
24-
- "3.2"
25-
- "3.3"
2623
- "3.4"
2724
steps:
2825
- uses: actions/checkout@v4

docs/code_samples/default_v2.txt

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,18 @@ mindee_client = Mindee::ClientV2.new(api_key: api_key)
1111
inference_params = Mindee::Input::InferenceParameters.new(
1212
# ID of the model, required.
1313
model_id,
14-
# If set to `true`, will enable Retrieval-Augmented Generation.
15-
rag: false,
14+
15+
# Options: set to `true` or `false` to override defaults
16+
17+
# Enhance extraction accuracy with Retrieval-Augmented Generation.
18+
rag: nil,
19+
# Extract the full text content from the document as strings.
20+
raw_text: nil,
21+
# Calculate bounding box polygons for all fields.
22+
polygon: nil,
23+
# Boost the precision and accuracy of all extractions.
24+
# Calculate confidence scores for all fields.
25+
confidence: nil,
1626
)
1727

1828
# Load a file from disk

lib/mindee/http/mindee_api_v2.rb

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -123,8 +123,14 @@ def enqueue(input_source, params)
123123
file_data, file_metadata = input_source.read_contents(close: params.close_file)
124124
[['file', file_data, file_metadata]] # : Array[untyped]
125125
end
126-
form_data.push ['model_id', params.model_id]
127-
form_data.push ['rag', 'true'] if params.rag
126+
form_data.push(['model_id', params.model_id])
127+
128+
# deal with optional features
129+
form_data.push(['rag', params.rag.to_s]) unless params.rag.nil?
130+
form_data.push(['raw_text', params.raw_text.to_s]) unless params.raw_text.nil?
131+
form_data.push(['polygon', params.polygon.to_s]) unless params.polygon.nil?
132+
form_data.push(['confidence', params.confidence.to_s]) unless params.confidence.nil?
133+
128134
form_data.push ['file_alias', params.file_alias] if params.file_alias
129135
unless params.webhook_ids.nil? || params.webhook_ids.empty?
130136
form_data.push ['webhook_ids', params.webhook_ids.join(',')]

lib/mindee/input/inference_parameters.rb

Lines changed: 40 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,21 @@ class InferenceParameters
77
# @return [String] ID of the model (required).
88
attr_reader :model_id
99

10-
# @return [Boolean, nil] Enable Retrieval-Augmented Generation.
10+
# @return [Boolean, nil] Enhance extraction accuracy with Retrieval-Augmented Generation.
1111
attr_reader :rag
1212

13+
# @return [Boolean, nil] Extract the full text content from the document as strings,
14+
# and fill the raw_text` attribute.
15+
attr_reader :raw_text
16+
17+
# @return [Boolean, nil] Calculate bounding box polygons for all fields,
18+
# and fill their `locations` attribute.
19+
attr_reader :polygon
20+
21+
# @return [Boolean, nil] Boost the precision and accuracy of all extractions.
22+
# Calculate confidence scores for all fields, and fill their confidence attribute.
23+
attr_reader :confidence
24+
1325
# @return [String, nil] Optional alias for the file.
1426
attr_reader :file_alias
1527

@@ -22,21 +34,39 @@ class InferenceParameters
2234
# @return [Boolean, nil] Whether to close the file after parsing.
2335
attr_reader :close_file
2436

37+
# rubocop:disable Metrics/ParameterLists
2538
# @param [String] model_id ID of the model
26-
# @param [FalseClass] rag Whether to enable rag.
39+
# @param [nil] rag Whether to enable RAG.
40+
# @param [nil] raw_text Whether to enable rax text.
41+
# @param [nil] polygon Whether to enable polygons.
42+
# @param [nil] confidence Whether to enable confidence scores.
2743
# @param [nil] file_alias File alias, if applicable.
2844
# @param [nil] webhook_ids
2945
# @param [nil] polling_options
3046
# @param [TrueClass] close_file
31-
def initialize(model_id, rag: false, file_alias: nil, webhook_ids: nil, polling_options: nil, close_file: true)
47+
def initialize(
48+
model_id,
49+
rag: nil,
50+
raw_text: nil,
51+
polygon: nil,
52+
confidence: nil,
53+
file_alias: nil,
54+
webhook_ids: nil,
55+
polling_options: nil,
56+
close_file: true
57+
)
3258
raise Errors::MindeeInputError, 'Model ID is required.' if model_id.empty? || model_id.nil?
3359

3460
@model_id = model_id
35-
@rag = rag || false
61+
@rag = rag
62+
@raw_text = raw_text
63+
@polygon = polygon
64+
@confidence = confidence
3665
@file_alias = file_alias
3766
@webhook_ids = webhook_ids || []
3867
@polling_options = get_clean_polling_options(polling_options)
3968
@close_file = close_file.nil? || close_file
69+
# rubocop:enable Metrics/ParameterLists
4070
end
4171

4272
# Validates the parameters for async auto-polling
@@ -70,7 +100,10 @@ def self.from_hash(params: {})
70100
end
71101

72102
model_id = params.fetch(:model_id)
73-
rag = params.fetch(:rag, false)
103+
rag = params.fetch(:rag, nil)
104+
raw_text = params.fetch(:raw_text, nil)
105+
polygon = params.fetch(:polygon, nil)
106+
confidence = params.fetch(:confidence, nil)
74107
file_alias = params.fetch(:file_alias, nil)
75108
webhook_ids = params.fetch(:webhook_ids, [])
76109
polling_options_input = params.fetch(:page_options, PollingOptions.new)
@@ -83,8 +116,8 @@ def self.from_hash(params: {})
83116
)
84117
end
85118
close_file = params.fetch(:close_file, true)
86-
InferenceParameters.new(model_id, rag: rag, file_alias: file_alias, webhook_ids: webhook_ids,
87-
close_file: close_file)
119+
InferenceParameters.new(model_id, rag: rag, raw_text: raw_text, polygon: polygon, confidence: confidence,
120+
file_alias: file_alias, webhook_ids: webhook_ids, close_file: close_file)
88121
end
89122

90123
private

sig/mindee/input/inference_parameters.rbs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,21 @@ module Mindee
33
module Input
44
class InferenceParameters
55
attr_reader close_file: bool
6+
attr_reader confidence: bool?
67
attr_reader file_alias: String?
78
attr_reader model_id: String
89
attr_reader polling_options: PollingOptions
10+
attr_reader polygon: bool?
911
attr_reader rag: bool?
12+
attr_reader raw_text: bool?
1013
attr_reader webhook_ids: Array[String]?
1114

1215
def initialize: (
1316
String,
1417
?rag: bool?,
18+
?raw_text: bool?,
19+
?polygon: bool?,
20+
?confidence: bool?,
1521
?file_alias: String?,
1622
?webhook_ids: Array[String]?,
1723
?polling_options: Hash[Symbol | String, untyped] | PollingOptions?,

spec/client_v2_integration.rb

Lines changed: 62 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -17,49 +17,90 @@
1717
max_retries: 80
1818
)
1919

20-
params = Mindee::Input::InferenceParameters.new(model_id,
21-
rag: false,
22-
file_alias: 'ruby-integration-test',
23-
polling_options: polling)
20+
params = Mindee::Input::InferenceParameters.new(
21+
model_id,
22+
rag: false,
23+
raw_text: true,
24+
polygon: false,
25+
confidence: false,
26+
file_alias: 'ruby-integration-test',
27+
polling_options: polling
28+
)
2429

2530
response = client.enqueue_and_get_inference(input, params)
2631

2732
expect(response).not_to be_nil
2833
expect(response.inference).not_to be_nil
2934

30-
expect(response.inference.file).not_to be_nil
31-
expect(response.inference.file.name).to eq('multipage_cut-2.pdf')
35+
file = response.inference.file
36+
expect(file).not_to be_nil
37+
expect(file).to be_a(Mindee::Parsing::V2::InferenceFile)
38+
expect(file.name).to eq('multipage_cut-2.pdf')
39+
expect(file.page_count).to eq(2)
40+
41+
model = response.inference.model
42+
expect(model).not_to be_nil
43+
expect(model).to be_a(Mindee::Parsing::V2::InferenceModel)
44+
expect(model.id).to eq(model_id)
3245

33-
expect(response.inference.model).not_to be_nil
34-
expect(response.inference.model.id).to eq(model_id)
46+
active_options = response.inference.active_options
47+
expect(active_options).not_to be_nil
48+
expect(active_options).to be_a(Mindee::Parsing::V2::InferenceActiveOptions)
49+
expect(active_options.raw_text).to eq(true)
50+
expect(active_options.polygon).to eq(false)
51+
expect(active_options.confidence).to eq(false)
52+
expect(active_options.rag).to eq(false)
3553

36-
expect(response.inference.active_options).not_to be_nil
54+
result = response.inference.result
55+
expect(result).not_to be_nil
3756

38-
expect(response.inference.result).not_to be_nil
39-
expect(response.inference.result.raw_text).to be_nil
40-
expect(response.inference.result.fields).not_to be_nil
57+
expect(result.raw_text).not_to be_nil
58+
expect(result.raw_text.pages.length).to eq(2)
59+
60+
expect(result.fields).not_to be_nil
4161
end
4262

4363
it 'parses a filled single-page image successfully' do
4464
src_path = File.join(__dir__ || './', 'data', 'products', 'financial_document', 'default_sample.jpg')
4565
input = Mindee::Input::Source::FileInputSource.new(File.open(src_path, 'rb'), 'default_sample.jpg')
4666

47-
params = Mindee::Input::InferenceParameters.new(model_id,
48-
rag: false,
49-
file_alias: 'ruby-integration-test')
67+
params = Mindee::Input::InferenceParameters.new(
68+
model_id,
69+
raw_text: false,
70+
polygon: false,
71+
confidence: false,
72+
rag: false,
73+
file_alias: 'ruby-integration-test'
74+
)
5075

5176
response = client.enqueue_and_get_inference(input, params)
5277
expect(response).not_to be_nil
5378

54-
expect(response.inference).not_to be_nil
55-
expect(response.inference.file.name).to eq('default_sample.jpg')
79+
file = response.inference.file
80+
expect(file).not_to be_nil
81+
expect(file).to be_a(Mindee::Parsing::V2::InferenceFile)
82+
expect(file.name).to eq('default_sample.jpg')
83+
expect(file.page_count).to eq(1)
84+
85+
model = response.inference.model
86+
expect(model).not_to be_nil
87+
expect(model).to be_a(Mindee::Parsing::V2::InferenceModel)
88+
expect(model.id).to eq(model_id)
89+
90+
active_options = response.inference.active_options
91+
expect(active_options).not_to be_nil
92+
expect(active_options).to be_a(Mindee::Parsing::V2::InferenceActiveOptions)
93+
expect(active_options.raw_text).to eq(false)
94+
expect(active_options.polygon).to eq(false)
95+
expect(active_options.confidence).to eq(false)
96+
expect(active_options.rag).to eq(false)
5697

57-
expect(response.inference.model).not_to be_nil
58-
expect(response.inference.model.id).to eq(model_id)
98+
result = response.inference.result
99+
expect(result).not_to be_nil
59100

60-
expect(response.inference.active_options).not_to be_nil
101+
expect(result.raw_text).to be_nil
61102

62-
fields = response.inference.result.fields
103+
fields = result.fields
63104
expect(fields).not_to be_nil
64105
expect(fields['supplier_name']).not_to be_nil
65106
expect(fields['supplier_name'].value).to eq('John Smith')

spec/parsing/v2/inference_spec.rb

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,12 @@ def load_v2_inference(resource_path)
2727
describe 'simple' do
2828
it 'loads a blank inference with valid properties' do
2929
response = load_v2_inference(blank_path)
30-
fields = response.inference.result.fields
3130

31+
fields = response.inference.result.fields
3232
expect(fields).not_to be_empty
33+
expect(fields).to be_a(Mindee::Parsing::V2::Field::InferenceFields)
3334
expect(fields.size).to eq(21)
35+
3436
expect(fields).to have_key('taxes')
3537
expect(fields['taxes']).not_to be_nil
3638
expect(fields['taxes']).to be_a(list_field)
@@ -180,6 +182,7 @@ def load_v2_inference(resource_path)
180182
expect(active_options.raw_text).to eq(true)
181183

182184
fields = response.inference.result.fields
185+
expect(fields).to be_a(Mindee::Parsing::V2::Field::InferenceFields)
183186

184187
expect(fields['field_simple_string']).to be_a(simple_field)
185188
expect(fields['field_simple_string'].value).to eq('field_simple_string-value')

0 commit comments

Comments
 (0)