Skip to content

Commit 34af90a

Browse files
committed
fix(file): use operation for get blocks
1 parent 25b7e70 commit 34af90a

3 files changed

Lines changed: 123 additions & 59 deletions

File tree

intezer_sdk/_api.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -515,6 +515,11 @@ def get_code_reuse_by_code_block(self, sha256: str):
515515
'POST', f'/files/{sha256}/code-reuse-by-code-block'
516516
)
517517

518+
if response.status_code == HTTPStatus.NOT_FOUND:
519+
raise errors.HashDoesNotExistError(response)
520+
if response.status_code == HTTPStatus.CONFLICT:
521+
raise ValueError('sha256 is not a code item')
522+
518523
raise_for_status(response)
519524

520525
return response.json()['result_url']

intezer_sdk/file.py

Lines changed: 22 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,16 @@
1+
import datetime
12
from dataclasses import dataclass
2-
from http import HTTPStatus
3-
from time import sleep
43
from typing import IO
5-
from typing import Optional
64
from typing import List
5+
from typing import Optional
76
from typing import Union
87

8+
from intezer_sdk import _operation
99
from intezer_sdk import consts
10-
from intezer_sdk import errors
10+
from intezer_sdk import operation
1111
from intezer_sdk._api import IntezerApi
12-
from intezer_sdk._api import raise_for_status
13-
from intezer_sdk.api import get_global_api
1412
from intezer_sdk.api import IntezerApiClient
13+
from intezer_sdk.api import get_global_api
1514
from intezer_sdk.index import Index
1615

1716

@@ -25,6 +24,7 @@ class Block:
2524
def is_common(self):
2625
return self.software_type == 'common'
2726

27+
2828
class File:
2929
"""
3030
File is a class for file-related operations including indexing and downloading.
@@ -49,6 +49,7 @@ def __init__(self,
4949
self._sha256 = sha256
5050
self._api = IntezerApi(api or get_global_api())
5151
self._index: Optional[Index] = None
52+
self._operations = {}
5253

5354
@property
5455
def sha256(self) -> str:
@@ -154,40 +155,25 @@ def download(self,
154155

155156
self._api.download_file_by_sha256(self._sha256, path, output_stream, password_protection)
156157

157-
def _get_result_from_task(self, result_url: str, sleep_time: int):
158-
response = self._api.api.request_with_refresh_expired_access_token(
159-
'GET', result_url)
160-
161-
if response.status_code == HTTPStatus.NOT_FOUND:
162-
raise errors.HashDoesNotExistError(response)
163-
if response.status_code == HTTPStatus.CONFLICT:
164-
raise ValueError('sha256 is not a code item')
165-
166-
while response.status_code == HTTPStatus.ACCEPTED:
167-
sleep(sleep_time)
168-
response = self._api.api.request_with_refresh_expired_access_token(
169-
'GET', result_url)
170-
raise_for_status(response)
171-
return response.json()['result']
172-
173-
def get_code_blocks(self, interval: int = consts.CHECK_STATUS_INTERVAL) -> List[Block]:
174-
'''
158+
def get_code_blocks(self,
159+
wait: Union[bool, int] = False,
160+
wait_timeout: Optional[datetime.timedelta] = None) -> operation.Operation:
161+
"""
175162
Retrieves a report containing information about reused code blocks for the given SHA-256 hash.
176163
177-
:param interval: The interval to wait between checks.
164+
:param wait: Should wait until the operation completes.
165+
:param wait_timeout: Maximum duration to wait for operation completion.
178166
179167
Returns:
180-
List[Block]: A sorted list of Block objects representing the code blocks found in the analysis.
181-
'''
168+
operation.Operation: An operation object that will contain the code blocks result.
169+
"""
182170
if not self._sha256:
183171
raise ValueError('Code block report is only supported for sha256-based files')
184-
172+
185173
result_url = self._api.get_code_reuse_by_code_block(self._sha256)
186-
# This endpoint acts different. We don't get a status and instead have to use
187-
# the HTTP status code to wait for the report.
188-
result = self._get_result_from_task(result_url, interval)
189-
blocks: list[Block] = []
190-
for address, block in result['blocks'].items():
191-
blocks.append(
192-
Block(int(address), block['software_type'], block['code_reuse']))
193-
return sorted(blocks, key=lambda b: b.address)
174+
return _operation.handle_operation(self._operations,
175+
self._api,
176+
'Code blocks',
177+
result_url,
178+
wait,
179+
wait_timeout)

tests/unit/test_file.py

Lines changed: 96 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
import datetime
22
import io
33
from http import HTTPStatus
4-
import json
5-
import os
64
from unittest.mock import mock_open
75
from unittest.mock import patch
86

@@ -281,29 +279,104 @@ def test_download_to_path_succeeds(self):
281279
mock_file.assert_called_once()
282280
mock_file().write.assert_called_with(b'file_content')
283281

284-
def test_code_reuse_by_block(self):
285-
TEST_HASH = '73c677dd3b264e7eb80e26e78ac9df1dba30915b5ce3b1bc1c83db52b9c6b30e'
286-
287-
def load_response_json(file_name: str) -> dict:
288-
path_to_file = os.path.join(os.path.dirname(__file__), '..', 'resources', file_name)
289-
with open(path_to_file, 'rb') as file:
290-
return json.load(file)
291-
282+
def test_get_code_blocks_returns_operation_when_wait_is_true(self):
283+
# Arrange
284+
test_hash = '73c677dd3b264e7eb80e26e78ac9df1dba30915b5ce3b1bc1c83db52b9c6b30e'
285+
result_url = f'/analyses/51ea282b-0542-4578-a44a-e60fdfb0d3ec/code-reuse-by-code-block'
286+
result = {
287+
'blocks': {
288+
'101220': {
289+
'code_reuse': ['Common'],
290+
'software_type': 'common'
291+
},
292+
'101244': {
293+
'code_reuse': ['WannaCry'],
294+
'software_type': 'malware'
295+
}
296+
}
297+
}
298+
292299
with responses.RequestsMock() as mock:
293300
mock.post(
294-
url=consts.ANALYZE_URL +
295-
f'/api/v2-0/files/{TEST_HASH}/code-reuse-by-code-block',
296-
status=HTTPStatus.OK,
297-
json=load_response_json('code_reuse_block_response.json'))
301+
url=f'{self.full_url}/files/{test_hash}/code-reuse-by-code-block',
302+
status=HTTPStatus.OK,
303+
json={'result_url': result_url})
298304
mock.get(
299-
url=consts.ANALYZE_URL +
300-
'/api/v2-0/analyses/51ea282b-0542-4578-a44a-e60fdfb0d3ec/code-reuse-by-code-block',
301-
status=HTTPStatus.OK,
302-
json=load_response_json('code_reuse_block_report.json'))
305+
url=f'{self.full_url}{result_url}',
306+
status=HTTPStatus.OK,
307+
json={'status': 'succeeded', 'result': result})
308+
309+
file_object = File(sha256=test_hash)
310+
311+
# Act
312+
operation = file_object.get_code_blocks(wait=True)
313+
314+
# Assert
315+
self.assertEqual(operation.status, consts.AnalysisStatusCode.FINISHED)
316+
self.assertIsNotNone(operation.result)
317+
self.assertIn('blocks', operation.result)
318+
self.assertEqual(len(operation.result['blocks']), 2)
319+
self.assertEqual(operation.result['blocks']['101220']['software_type'], 'common')
320+
self.assertEqual(operation.result['blocks']['101244']['software_type'], 'malware')
321+
322+
def test_get_code_blocks_for_file_path_raises_value_error(self):
323+
# Arrange
324+
file_obj = File(file_path='/path/to/file')
325+
326+
# Act + Assert
327+
with self.assertRaises(ValueError):
328+
file_obj.get_code_blocks()
329+
330+
def test_get_code_blocks_without_wait_returns_operation_in_progress(self):
331+
# Arrange
332+
test_hash = 'a' * 64
333+
result_url = f'/analyses/test-id/code-reuse-by-code-block'
334+
335+
with responses.RequestsMock() as mock:
336+
mock.post(
337+
url=f'{self.full_url}/files/{test_hash}/code-reuse-by-code-block',
338+
status=HTTPStatus.OK,
339+
json={'result_url': result_url})
303340

304-
file_object = File(sha256=TEST_HASH)
305-
report = file_object.get_code_blocks()
341+
file_object = File(sha256=test_hash)
306342

307-
self.assertEqual(len(report), 2527)
308-
self.assertEqual(len([x for x in report if x.is_common]), 1371)
309-
self.assertEqual(len([x for x in report if x.software_type == 'malware']), 171)
343+
# Act
344+
operation = file_object.get_code_blocks()
345+
346+
# Assert
347+
self.assertEqual(operation.status, consts.AnalysisStatusCode.IN_PROGRESS)
348+
self.assertIsNone(operation.result)
349+
350+
def test_get_code_blocks_raises_hash_does_not_exist_error_when_file_not_found(self):
351+
# Arrange
352+
test_hash = 'a' * 64
353+
354+
with responses.RequestsMock() as mock:
355+
mock.post(
356+
url=f'{self.full_url}/files/{test_hash}/code-reuse-by-code-block',
357+
status=HTTPStatus.NOT_FOUND,
358+
json={'error': 'File not found'})
359+
360+
file_object = File(sha256=test_hash)
361+
362+
# Act + Assert
363+
with self.assertRaises(errors.HashDoesNotExistError):
364+
file_object.get_code_blocks()
365+
366+
def test_get_code_blocks_raises_value_error_when_file_is_not_code_item(self):
367+
# Arrange
368+
test_hash = 'a' * 64
369+
370+
with responses.RequestsMock() as mock:
371+
mock.post(
372+
url=f'{self.full_url}/files/{test_hash}/code-reuse-by-code-block',
373+
status=HTTPStatus.CONFLICT,
374+
json={'error': 'Not a code item'})
375+
376+
file_object = File(sha256=test_hash)
377+
378+
# Act + Assert
379+
with self.assertRaises(ValueError) as context:
380+
file_object.get_code_blocks()
381+
382+
self.assertEqual(str(context.exception), 'sha256 is not a code item')

0 commit comments

Comments
 (0)