diff --git a/docs/How-to-run-CLI-Usage.md b/docs/How-to-run-CLI-Usage.md index 06e062e..83a2b2a 100644 --- a/docs/How-to-run-CLI-Usage.md +++ b/docs/How-to-run-CLI-Usage.md @@ -22,7 +22,8 @@ The following main commands are currently implemented: - [`discovery`](./How-to-run-discover-measured-patterns.md): discover measured patterns within a project source code - [`manual-discovery`](./How-to-run-manual-discovery.md): execute discovery rules (normally associated to patterns) within a project source code - reporting: create reports about SAST measurement and/or pattern discovery (**CONTINUE**) - - [`sastreport`](./How-to-run-sastreport.md): fetch last SAST measurements for tools against patterns and aggregate in a common csv file + - [`sastreport`](./How-to-run-sastreport.md): fetch last SAST measurements for tools against patterns and aggregate in a common csv file +- [`patternrepair`](./How-to-run-patternrepair.md): Can repair a pattern in your pattern library, i.e. checks the JSON file, creates a README file etc. The following are under-investigation: diff --git a/docs/How-to-run-patternrepair.md b/docs/How-to-run-patternrepair.md new file mode 100644 index 0000000..4ec989f --- /dev/null +++ b/docs/How-to-run-patternrepair.md @@ -0,0 +1,70 @@ +# How to run: Pattern repair + +## Overview + +This command can be used to repair a pattern in your library. At the moment this is only supported for PHP. + +## Command line + +To repair a pattern use: + +```text +usage: tpframework [OPTIONS] COMMAND patternrepair [-h] -l LANGUAGE (-p PATTERN_ID [PATTERN_ID ...] | --pattern-range RANGE_START-RANGE_END | -a) [--tp-lib TP_LIB_DIR] + [--output-dir OUTPUT_DIR] [--masking-file MASKING_FILE] [--measurement-results MEASUREMENT_DIR] + [--checkdiscoveryrules-results CHECKDISCOVERYRULES_FILE] [--skip-readme] + +options: + -h, --help show this help message and exit + -l LANGUAGE, --language LANGUAGE + Programming language targeted + -p PATTERN_ID [PATTERN_ID ...], --patterns PATTERN_ID [PATTERN_ID ...] + Specify pattern(s) ID(s) to test for discovery + --pattern-range RANGE_START-RANGE_END + Specify pattern ID range separated by`-` (ex. 10-50) + -a, --all-patterns Test discovery for all available patterns + --tp-lib TP_LIB_DIR Absolute path to alternative pattern library, default resolves to `./testability_patterns` + --output-dir OUTPUT_DIR + Absolute path to the folder where outcomes (e.g., log file, export file if any) will be stored, default resolves to `./out` + --masking-file MASKING_FILE + Absolute path to a json file, that contains a mapping, if the name for some measurement tools should be kept secret, default is None + --measurement-results MEASUREMENT_DIR + Absolute path to the folder where measurement results are stored, default resolves to `./measurements` + --checkdiscoveryrules-results CHECKDISCOVERYRULES_FILE + Absolute path to the csv file, where the results of the `checkdiscoveryrules` command are stored, default resolves to `./checkdiscoveryrules.csv` + --skip-readme If set, the README generation is skipped. +``` + +By default, the `patternrepair` will create a README file for a pattern, where an overview of the pattern is presented together with some measurement results, if available. +For the generation of the REAMDE, there are a few files mandatory: +First of all, there has to be a csv file, that contains the results of the `checkdiscoveryrules` command for the patterns, that should be repaired. +Second, the results of the `measurement` command in a directory, structured similary to the pattern library. +Additionally you can provide a masking file, that can be used to mask the names of tools used for `measurement`. +The masking file should be a JSON file of the format `{: }`. + +If `--skip-readme` is set, None of the files is required and no new README file will be generated. + +## Example + +`tpframework patternrepair -l php -p 1 --skip-readme` + +This command will take a look at PHP pattern 1 and tries to repair it, without generating a new README file. +During that process it might provide you some feedback about files, that need manual review. +The tool checks for the following things: + +- make sure, a pattern JSON file exists +- ensure all relative links are correct +- collect all instances within the pattern path (an instance is identified by a directory, that contains a JSON file in the instance format) +- make sure the pattern name is correct (therefor the pattern name is derived from the directory name) +- check the description field and warn if there is no description +- check the given tags +- validates the pattern json against the pattern json scheme +- for each instance, repairing means: + - ensuring a instance JSON file with the required keys is available + - ensures all relative links exist + - check the scala rule if exists and iff necessary adjust the variable names + - check the description and again warn if there is no description provided + - checks that the field `expectation:expectation` is the opposite of `properties:negative_test_case` + - validates the instance json against the instance json scheme + - for PHP patterns: + - generates new opcode for each php file + - changes source line and sink line in the pattern JSON, according to the comments `// source`, `// sink` in the php file diff --git a/qualitytests/cli/test_interface.py b/qualitytests/cli/test_interface.py index af6fa88..095101a 100644 --- a/qualitytests/cli/test_interface.py +++ b/qualitytests/cli/test_interface.py @@ -1,5 +1,6 @@ from pathlib import Path from typing import Dict +from unittest.mock import patch, call import json import sys @@ -13,10 +14,9 @@ from qualitytests.qualitytests_utils import join_resources_path, create_mock_cpg, \ get_result_output_dir, get_logfile_path, in_logfile, init_measure_test, \ - init_sastreport_test, init_test + init_sastreport_test, init_test, create_pattern -@pytest.mark.asyncio class TestInterface: @@ -253,3 +253,45 @@ def test_check_discovery_rules_3(self, tmp_path, capsys, mocker): logfile = get_logfile_path(captured_out_lines) assert logfile and logfile.is_file() + + def test_repair_patterns_not_including_readme(self): + sample_tp_lib = join_resources_path("sample_patlib") + test_pattern = create_pattern() + with patch("core.pattern.Pattern.init_from_id_and_language") as init_pattern_mock, \ + patch("core.pattern.Pattern.repair") as patternrepair_mock, \ + patch("core.utils.check_file_exist") as check_file_exists_mock, \ + patch("core.utils.check_measurement_results_exist") as measurement_result_exist_mock, \ + patch("pathlib.Path.mkdir") as mkdir_mock: + init_pattern_mock.return_value = test_pattern + interface.repair_patterns("JS", [1,2,3], None, True, Path("measurements"), Path("dr_results.csv"), Path("out"), sample_tp_lib) + + patternrepair_mock.assert_called_with(False, + discovery_rule_results=Path("dr_results.csv"), + measurement_results=Path("measurements"), + masking_file=None) + expected_calls = [call(1, "JS", sample_tp_lib), call(2, "JS", sample_tp_lib), call(3, "JS", sample_tp_lib)] + init_pattern_mock.assert_has_calls(expected_calls) + check_file_exists_mock.assert_not_called() + measurement_result_exist_mock.assert_not_called() + mkdir_mock.assert_called() + + def test_repair_patterns_not_including_readme(self): + sample_tp_lib = join_resources_path("sample_patlib") + test_pattern = create_pattern() + with patch("core.pattern.Pattern.init_from_id_and_language") as init_pattern_mock, \ + patch("core.pattern.Pattern.repair") as patternrepair_mock, \ + patch("core.utils.check_file_exist") as check_file_exists_mock, \ + patch("core.utils.check_measurement_results_exist") as measurement_result_exist_mock, \ + patch("pathlib.Path.mkdir") as mkdir_mock: + init_pattern_mock.return_value = test_pattern + interface.repair_patterns("JS", [1,2,3], None, False, Path("measurements"), Path("dr_results.csv"), Path("out"), sample_tp_lib) + + patternrepair_mock.assert_called_with(True, + discovery_rule_results=Path("dr_results.csv"), + measurement_results=Path("measurements"), + masking_file=None) + expected_calls = [call(1, "JS", sample_tp_lib), call(2, "JS", sample_tp_lib), call(3, "JS", sample_tp_lib)] + init_pattern_mock.assert_has_calls(expected_calls) + check_file_exists_mock.assert_called() + measurement_result_exist_mock.assert_called_once() + mkdir_mock.assert_called() diff --git a/qualitytests/cli/test_main.py b/qualitytests/cli/test_main.py index 3c1a832..cecb54a 100644 --- a/qualitytests/cli/test_main.py +++ b/qualitytests/cli/test_main.py @@ -7,16 +7,19 @@ from qualitytests.qualitytests_utils import pyexe, join_resources_path from cli import main +from pathlib import Path + class TestMain: testdir = Path(__file__).parent.parent.resolve() tpf = testdir.parent / "tp_framework/cli/main.py" + sample_tp_lib = str(join_resources_path("sample_patlib")) def test_cli_help_1(self): # process call cmd = pyexe + " {0} -h".format(self.tpf) - pr = subprocess.Popen(cmd, shell=False, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + pr = subprocess.Popen(cmd.split(" "), shell=False, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) (output, errdata) = pr.communicate() output = output.decode("utf-8") print(output) @@ -122,7 +125,7 @@ def test_cli_measure_4(self, tmp_path, mocker): main.main(['measure', '-p', self.tp1, self.tp2, '--tools', self.tool1, 'whatever', '-l', self.test_lang, - '--tp-lib', str(tmp_path)]) + '--tp-lib', TestMain.sample_tp_lib]) def test_cli_measure_5(self, tmp_path, mocker): @@ -131,7 +134,7 @@ def test_cli_measure_5(self, tmp_path, mocker): main.main(['measure', '-p', self.tp1, self.tp2, '--tools', self.tool1, self.tool2, '-l', self.test_lang, - '--tp-lib', str(tmp_path)]) + '--tp-lib', TestMain.sample_tp_lib]) def _init_cli_report(self, mocker): @@ -156,7 +159,7 @@ def test_cli_report_2(self, tmp_path, mocker): '--print', '-p', self.tp1, self.tp2, '--tools', self.tool1, self.tool2, '-l', self.test_lang, - '--tp-lib', str(tmp_path)]) + '--tp-lib', TestMain.sample_tp_lib]) def test_cli_report_3(self, tmp_path, mocker): @@ -188,11 +191,13 @@ def test_cli_report_4(self, tmp_path, mocker): def test_cli_report_5(self, tmp_path, mocker): self._init_cli_report(mocker) # Test: valid params, no tools i.e., get all measurements + test_tp_lib_path = join_resources_path("sample_patlib") main.main(['sastreport', '--export', 'whatever.csv', '-a', '-l', self.test_lang, - '--output-dir', str(tmp_path) + '--output-dir', str(tmp_path), + '--tp-lib', str(test_tp_lib_path) # '--output-dir', str(tmp_path), # '--only-last-measurement' ]) @@ -206,9 +211,11 @@ def _init_cli_check_discovery_rules_1(self, mocker): def test_cli_check_discovery_rules_1(self, tmp_path, mocker): self._init_cli_check_discovery_rules_1(mocker) # Test: valid params + test_tp_lib_path = join_resources_path("sample_patlib") main.main(['checkdiscoveryrules', '--export', 'whatever.csv', '-a', '-l', self.test_lang, - '--output-dir', str(tmp_path) + '--output-dir', str(tmp_path), + '--tp-lib', str(test_tp_lib_path) ]) \ No newline at end of file diff --git a/qualitytests/cli/test_tpf_commands.py b/qualitytests/cli/test_tpf_commands.py index b34ca12..128e955 100644 --- a/qualitytests/cli/test_tpf_commands.py +++ b/qualitytests/cli/test_tpf_commands.py @@ -51,9 +51,9 @@ def test_parse_patterns(self): tp_ids = tpf_commands.parse_patterns(False, tp_range, [], test_tp_lib_path, test_lang) assert tp_ids == [2, 3] # one and only one mutual exclusion params: pattern ids - itp_ids = [1,2,5,10] + itp_ids = [1,3] tp_ids = tpf_commands.parse_patterns(False, "", itp_ids, test_tp_lib_path, test_lang) assert tp_ids == itp_ids # one and only one mutual exclusion params: all tp_ids = tpf_commands.parse_patterns(True, "", [], test_tp_lib_path, test_lang) - assert tp_ids == [1,2,3] + assert tp_ids == [1,2,3,4] diff --git a/qualitytests/core/test_discovery.py b/qualitytests/core/test_discovery.py index 7fc1bda..bb633a1 100644 --- a/qualitytests/core/test_discovery.py +++ b/qualitytests/core/test_discovery.py @@ -7,9 +7,9 @@ from pytest_mock import MockerFixture import config -from core import utils, discovery, instance, pattern -from core.exceptions import MeasurementNotFound, CPGGenerationError -from qualitytests.qualitytests_utils import join_resources_path, get_result_output_dir +from core import utils, discovery +from core.exceptions import CPGGenerationError +from qualitytests.qualitytests_utils import join_resources_path, create_instance class TestDiscovery: @@ -253,12 +253,8 @@ def test_patch_PHP_discovery_rule_2(self, tmp_path): assert str(tmp_path) in str(pdr) def test_dicovery_with_empty_rule(self): - with open(join_resources_path("sample_patlib/PHP/4_empty_pattern/4_empty_pattern.json"), "r") as json_file: - pattern_dict = json.load(json_file) - test_pattern = pattern.pattern_from_dict(pattern_dict, "PHP", 4) - with open(join_resources_path("sample_patlib/PHP/4_empty_pattern/1_instance_4_empty_pattern/1_instance_4_empty_pattern.json"), "r") as json_file: - instance_dict = json.load(json_file) - tpi_instance = instance.instance_from_dict(instance_dict, test_pattern, "PHP", 1) + tpi_instance = create_instance() + tpi_instance.discovery_rule = None assert not tpi_instance.discovery_rule, "The test case is broken, instance 1 of PHP pattern 4 is not supposed to have a discovery rule" expected = dict.fromkeys(["rule_path", "method", "rule_name", "rule_accuracy", "rule_hash", "rule_name", "results", "rule_already_executed"], None) actual = discovery.discovery_for_tpi(tpi_instance, None, None, None) diff --git a/qualitytests/core/test_instance.py b/qualitytests/core/test_instance.py new file mode 100644 index 0000000..7b208ec --- /dev/null +++ b/qualitytests/core/test_instance.py @@ -0,0 +1,157 @@ +import json +import pytest +from pathlib import Path +from unittest.mock import patch, mock_open + +from core.instance import Instance +from core.exceptions import InstanceInvalid +from qualitytests.qualitytests_utils import join_resources_path, create_instance, example_tpi_dict + + +class TestInstance: + sample_tp_lib: Path = join_resources_path("sample_patlib") + + invalid_instances = [ + (Path("./test_instance.json"), False, {}, "The provided instance path 'test_instance.json' does not exist."), + (Path("./1_instance_test_pattern.json"), True, {}, "Could not get id from ''."), + (Path("./1_instance_test_pattern/1_instance_test_pattern.json"), True, {}, "Pattern 1 - Instance 1 - Please check ") + ] + + @pytest.mark.parametrize("json_file_path, is_file_return, read_json_return, expected_error", invalid_instances) + def test_init_invalid_instance_from_json_path(self, + json_file_path: Path, + is_file_return: bool, + read_json_return: dict, + expected_error: str): + with patch("pathlib.Path.is_file") as is_file_mock, \ + patch('core.utils.read_json') as read_json_mock, \ + pytest.raises(InstanceInvalid) as e_info: + is_file_mock.return_value = is_file_return + read_json_mock.return_value = read_json_return + Instance.init_from_json_path(json_file_path, 1, "js", TestInstance.sample_tp_lib) + is_file_mock.assert_called_once() + assert expected_error in str(e_info.value) + + def test_init_valid_instance_from_json_path(self): + with patch('core.utils.read_json') as read_json_mock, \ + patch('pathlib.Path.is_file') as is_file_mock, \ + patch("pathlib.Path.is_dir") as is_dir_mock: + + is_file_mock.return_value = True + read_json_mock.return_value = example_tpi_dict + test_instance = Instance.init_from_json_path(Path("/1_instance_test_pattern/1_instance_test_pattern.json"), 1, "js", TestInstance.sample_tp_lib) + + read_json_mock.assert_called_once() + is_file_mock.assert_called() + is_dir_mock.assert_called() + assert Path("/1_instance_test_pattern/") == test_instance.path + assert Path("/1_instance_test_pattern/1_instance_test_pattern.json") == test_instance.json_path + assert 1 == test_instance.instance_id + assert Path("/1_instance_test_pattern/", "") == test_instance.code_path + assert "Some description" == test_instance.description + assert test_instance.code_injection_skeleton_broken + assert "xss" == test_instance.expectation_type + assert Path("/1_instance_test_pattern/", "") == test_instance.expectation_sink_file + assert 5 == test_instance.expectation_sink_line + assert Path("/1_instance_test_pattern/", "") == test_instance.expectation_source_file + assert 9 == test_instance.expectation_source_line + assert test_instance.expectation_expectation + assert None == test_instance.compile_binary + assert test_instance.compile_instruction is None + assert test_instance.compile_dependencies is None + assert Path("/1_instance_test_pattern/", "") == test_instance.discovery_rule + assert "joern" == test_instance.discovery_method + assert "Perfect" == test_instance.discovery_rule_accuracy + assert "Some notes" == test_instance.discovery_notes + assert "S0" == test_instance.properties_category + assert "FEATURE" == test_instance.properties_feature_vs_internal_api + assert not test_instance.properties_input_sanitizer + assert not test_instance.properties_source_and_sink + assert not test_instance.properties_negative_test_case + assert "./docs/remediation_notes.md" == test_instance.remediation_notes + assert test_instance.remediation_transformation is None + assert test_instance.remediation_modeling_rule is None + + def test_copy_to_tp_lib(self): + test_instance = create_instance() + with patch("pathlib.Path.mkdir") as mkdir_mock, \ + patch("core.utils.copy_dir_content") as copy_mock: + + new_tp_lib_path = Path("/test_path") + old_path = test_instance.path + test_instance.copy_to_tplib(new_tp_lib_path) + + mkdir_mock.assert_called_once() + expected_new_instance_path = new_tp_lib_path / old_path.name + copy_mock.assert_called_once_with(old_path, expected_new_instance_path) + assert expected_new_instance_path == test_instance.path + + def test_set_new_instance_path(self): + test_instance = create_instance() + new_path = Path("/test_path") + with patch("shutil.move") as move_mock: + test_instance.set_new_instance_path(new_path) + move_mock.assert_called_once() + assert new_path == test_instance.path + + def test_to_dict(self): + test_instance = create_instance() + with patch("core.utils.get_relative_paths") as rel_path_mock: + rel_path_mock.return_value = None + actual = test_instance.to_dict() + path_to_instance_json = test_instance.json_path + with open(path_to_instance_json, "r") as jfile: + expected = json.load(jfile) + expected["code"]["path"] = None + expected["discovery"]["rule"] = None + expected["compile"]["binary"] = None + expected["expectation"]["sink_file"] = None + expected["description"] = None + expected["expectation"]["source_file"] = None + assert expected == actual + + def test_get_description_from_file(self): + test_pattern = create_instance() + test_pattern.description = "file.md" + expected_description = "Some description in a file\nTest description.\n\n" + with patch("builtins.open", mock_open(read_data=expected_description), create=True), \ + patch("pathlib.Path.is_file") as isfile_mock: + + isfile_mock.return_value = True + + is_file, actual = test_pattern.get_description() + assert is_file + assert expected_description.strip() == actual + + def test_get_description_(self): + test_pattern = create_instance() + expected_description = "Some description in a file\nTest description." + test_pattern.description = expected_description + with patch("pathlib.Path.is_file") as isfile_mock: + isfile_mock.return_value = False + + is_file, actual = test_pattern.get_description() + assert not is_file + assert expected_description.strip() == actual + + path_properties_testcases = [ + (Path("/test")), Path("../tplib"), Path("/tpframework/tplib") + ] + + @pytest.mark.parametrize("new_path", path_properties_testcases) + def test_path_properties_are_relative_and_resolve_to_path_when_called(self, new_path: Path): + test_instance = create_instance() + test_instance.json_path = Path("./my_awesome_json.json") + test_instance.code_path = Path("./awesome_js_code.js") + test_instance.expectation_sink_file = Path("./awesome_js_code.js") + test_instance.expectation_source_file = Path("./awesome_js_code.js") + test_instance.compile_binary = None + test_instance.discovery_rule = Path("../test_scala.sc") + + test_instance.path = new_path + assert Path(new_path / "my_awesome_json.json").resolve() == test_instance.json_path + assert Path(new_path / "awesome_js_code.js").resolve() == test_instance.code_path + assert Path(new_path / "awesome_js_code.js").resolve() == test_instance.expectation_sink_file + assert Path(new_path / "awesome_js_code.js").resolve() == test_instance.expectation_source_file + assert test_instance.compile_binary is None + assert Path(new_path / "../test_scala.sc").resolve() == test_instance.discovery_rule \ No newline at end of file diff --git a/qualitytests/core/test_instance_readme_generation.py b/qualitytests/core/test_instance_readme_generation.py new file mode 100644 index 0000000..78e9298 --- /dev/null +++ b/qualitytests/core/test_instance_readme_generation.py @@ -0,0 +1,302 @@ +import pytest +from copy import deepcopy +from pathlib import Path +from unittest.mock import patch, mock_open + +from core.repair.readme_generator import InstanceREADMEGenerator +from core.repair.readme_markdown_elements import * +from qualitytests.qualitytests_utils import create_pattern + +class TestInstanceREADMEGenerator: + def _get_instance_readme_generator(self): + test_pattern = create_pattern() + instance_readme_gen = InstanceREADMEGenerator(test_pattern, None) + instance_readme_gen.current_instance = instance_readme_gen.pattern.instances[0] + return instance_readme_gen + + def test_instance_name(self): + instance_readme_gen = self._get_instance_readme_generator() + actual = instance_readme_gen._instance_name() + assert isinstance(actual, list) + assert 1 == len(actual) + assert isinstance(actual[0], MarkdownHeading) + assert 2 == actual[0].level + assert "1 Instance" == actual[0].content + + def test_instance_description(self): + instance_readme_gen = self._get_instance_readme_generator() + instance_readme_gen.current_instance.description = None + actual1 = instance_readme_gen._instance_description() + assert [] == actual1 + + instance_readme_gen.current_instance.description = "some description" + actual2 = instance_readme_gen._instance_description() + assert isinstance(actual2, list) + assert 1 == len(actual2) + assert isinstance(actual2[0], MarkdownString) + + def test_instance_code_same_source_and_sink(self): + instance_readme_gen = self._get_instance_readme_generator() + expected_code = instance_readme_gen.current_instance.code_path + instance_readme_gen.current_instance.expectation_source_file = "code_file" + instance_readme_gen.current_instance.expectation_sink_file = "code_file" + with patch("core.repair.readme_generator.InstanceREADMEGenerator._get_file_content_if_exists") as file_content_mock: + file_content_mock.return_value = "x = 1" + + actual1 = instance_readme_gen._instance_code() + file_content_mock.assert_called_once_with(expected_code) + assert isinstance(actual1, list) + assert 2 == len(actual1) + assert isinstance(actual1[0], MarkdownHeading) + assert 3 == actual1[0].level + assert "Code" == actual1[0].content + assert isinstance(actual1[1], MarkdownCode) + + with patch("core.repair.readme_generator.InstanceREADMEGenerator._get_file_content_if_exists") as file_content_mock: + file_content_mock.return_value = None + + actual2 = instance_readme_gen._instance_code() + assert [] == actual2 + + def test_instance_code_different_source_and_sink(self): + instance_readme_gen = self._get_instance_readme_generator() + expected_code = instance_readme_gen.current_instance.code_path + instance_readme_gen.current_instance.expectation_source_file = "code_file_source" + instance_readme_gen.current_instance.expectation_sink_file = "code_file_sink" + with patch("core.repair.readme_generator.InstanceREADMEGenerator._get_file_content_if_exists") as file_content_mock: + file_content_mock.return_value = "x = 1" + + actual1 = instance_readme_gen._instance_code() + file_content_mock.assert_called() + assert isinstance(actual1, list) + assert 5 == len(actual1) + assert isinstance(actual1[0], MarkdownHeading) + assert 3 == actual1[0].level + assert "Code" == actual1[0].content + assert isinstance(actual1[1], MarkdownHeading) + assert 4 == actual1[1].level + assert "Source File" == actual1[1].content + assert isinstance(actual1[2], MarkdownCode) + assert isinstance(actual1[3], MarkdownHeading) + assert 4 == actual1[3].level + assert "Sink File" == actual1[3].content + assert isinstance(actual1[4], MarkdownCode) + + with patch("core.repair.readme_generator.InstanceREADMEGenerator._get_file_content_if_exists") as file_content_mock: + file_content_mock.reset_mock() + file_content_mock.return_value = None + + actual2 = instance_readme_gen._instance_code() + file_content_mock.assert_called() + assert [] == actual2 + + def test_instance_properties(self): + instance_readme_gen = self._get_instance_readme_generator() + actual = instance_readme_gen._instance_properties() + assert isinstance(actual, list) + assert 2 == len(actual) + assert isinstance(actual[0], MarkdownHeading) + assert 3 == actual[0].level + assert "Instance Properties" == actual[0].content + assert isinstance(actual[1], MarkdownTable) + + def test_instance_more(self): + instance_readme_gen = self._get_instance_readme_generator() + actual = instance_readme_gen._instance_more() + + assert isinstance(actual, list) + assert 1 == len(actual) + assert isinstance(actual[0], MarkdownCollapsible) + + def test_compile(self): + instance_readme_gen = self._get_instance_readme_generator() + with patch("core.repair.readme_generator.InstanceREADMEGenerator._get_file_content_if_exists") as file_content_mock: + file_content_mock.return_value = "binary" + actual1 = instance_readme_gen._compile() + + file_content_mock.assert_called_once() + assert isinstance(actual1, list) + assert 1 == len(actual1) + assert isinstance(actual1[0], MarkdownCollapsible) + + with patch("core.repair.readme_generator.InstanceREADMEGenerator._get_file_content_if_exists") as file_content_mock: + file_content_mock.return_value = "" + actual2 = instance_readme_gen._compile() + + file_content_mock.assert_called_once() + assert [] == actual2 + + discovery_rule_example1 = """@main def main(name : String): Unit = { + importCpg(name) + val x1 = (name, "1_static_variables_iall", cpg.call(".*BIND_STATIC.*").location.toJson); + println(x1) + delete; + } """ + discovery_rule_example2 = """@main def main(name : String): Unit = { + importCpg(name) + // TODO: replace line below with your detection query + val x2 = (name, "ID_pattern_name_i1", cpg.method.l)}; + println(x2) + delete; + } + + + """ + expected_discovery_rule_example1 = discovery_rule_example1.split("\n")[2].strip() + expected_discovery_rule_example2 = "\n".join([l.strip() for l in discovery_rule_example2.split("\n")[2:4]]) + + discovery_rule_testcases = [ + (discovery_rule_example1, expected_discovery_rule_example1, "./discovery_rule1.sc", "Here some description", "Here some description", MarkdownCode), + (discovery_rule_example2, expected_discovery_rule_example2, "./discovery_rule2.sc", "", "", MarkdownCode), + ("", "No discovery rule yet.", None, None, "", MarkdownString), + ("print('Hello World')", "print('Hello World')", "./discovery_rule.py", "This is a python rule\n", "This is a python rule", MarkdownCode) + ] + + @pytest.mark.parametrize("dr_return, expected_dr, rule_path, desc, expected_desc, code_or_str", discovery_rule_testcases) + def test_discovery_rule_exists(self, dr_return, expected_dr, rule_path, desc, expected_desc, code_or_str): + instance_readme_gen = self._get_instance_readme_generator() + instance_readme_gen.current_instance.discovery_rule = rule_path + instance_readme_gen.current_instance.discovery_notes = desc + with patch("core.repair.readme_generator.InstanceREADMEGenerator._get_file_content_if_exists") as file_content_mock: + file_content_mock.side_effect = [desc, dr_return] + actual = instance_readme_gen._discovery() + file_content_mock.assert_called() + assert isinstance(actual, list) + assert 1 == len(actual) + assert isinstance(actual[0], MarkdownCollapsible) + assert isinstance(actual[0].content, list) + assert 3 == len(actual[0].content) + assert isinstance(actual[0].content[0], MarkdownString) + assert expected_desc == actual[0].content[0].content + assert isinstance(actual[0].content[1], code_or_str) + assert expected_dr == actual[0].content[1].content + assert isinstance(actual[0].content[2], MarkdownTable) + assert isinstance(actual[0].heading, MarkdownHeading) + assert "Discovery" == actual[0].heading.content + assert 3 == actual[0].heading.level + + measurement_dict = { + "date": "1970-01-01 00:00:01", + "result": False, + "tool": "tool1", + "version": "saas", + "instance": "./JS/1_unset_element_array/1_instance_1_unset_element_array/1_instance_1_unset_element_array.json", + "pattern_id": 1, + "instance_id": 1, + "language": "JS" + } + invalid_test_measurement = deepcopy(measurement_dict) + invalid_test_measurement.pop("result") + no_measurements_and_invalid_measurements_testcases = [ + (None, None), + (Path("/"), [invalid_test_measurement]) + ] + + @pytest.mark.parametrize("measurement_paths, measurement_res", no_measurements_and_invalid_measurements_testcases) + def test_measurement_no_measurements_and_invalid_measurements(self, measurement_paths, measurement_res): + instance_readme_gen = self._get_instance_readme_generator() + instance_readme_gen.measurements = measurement_paths + with patch("core.utils.list_files") as list_files_mock, \ + patch("core.utils.read_json") as read_json_mock: + list_files_mock.return_value = ["file.json"] + actual = instance_readme_gen._measurement() + read_json_mock.return_value = measurement_res + + assert [] == actual + + + measure_testcases = [ + ({"tool1": "maskedTool1"}, [measurement_dict]), + ({}, [measurement_dict]), + ({}, [measurement_dict] + [measurement_dict]) + ] + + @pytest.mark.parametrize("mask, meas_results", measure_testcases) + def test_measurement(self, mask, meas_results): + instance_readme_gen = self._get_instance_readme_generator() + instance_readme_gen.measurements = Path("/") + instance_readme_gen.mask_dict = mask + with patch("core.utils.list_files") as list_files_mock, \ + patch("core.utils.read_json") as read_json_mock, \ + patch("pathlib.Path.exists") as exist_mock: + exist_mock.return_value = True + list_files_mock.return_value = ["file1.json"] + read_json_mock.return_value = meas_results + + actual = instance_readme_gen._measurement() + + list_files_mock.assert_called_once() + read_json_mock.assert_called_once_with("file1.json") + + assert isinstance(actual, list) + assert 1 == len(actual) + assert isinstance(actual[0], MarkdownCollapsible) + assert actual[0].is_open + assert isinstance(actual[0].heading, MarkdownHeading) + assert "Measurement" == actual[0].heading.content + assert isinstance(actual[0].content, list) + assert 1 == len(actual[0].content) + assert isinstance(actual[0].content[0], MarkdownTable) + if mask: + assert "tool1" not in actual[0].content[0].to_markdown() + else: + assert "tool1" in actual[0].content[0].to_markdown() + + default_note = "Can you think of a transformation, that makes this tarpit less challenging for SAST tools?" + remediation_testcases = [ + (["", "", ""], [], []), + (["", "", "rule"], [MarkdownString, MarkdownHeading, MarkdownString], [default_note, "Modeling Rule", "rule"]), + (["", "transformation", ""], [MarkdownString, MarkdownHeading, MarkdownString], [default_note, "Transformation", "transformation"]), + (["", "transformation", "rule"], [MarkdownString, MarkdownHeading, MarkdownString, MarkdownHeading, MarkdownString], [default_note, "Transformation", "transformation", "Modeling Rule", "rule"]), + (["note", "", ""], [MarkdownString], ["note"]), + (["note", "", "rule"], [MarkdownString, MarkdownHeading, MarkdownString], ["note", "Modeling Rule", "rule"]), + (["note", "transformation", ""], [MarkdownString, MarkdownHeading, MarkdownString], ["note", "Transformation", "transformation"]), + (["note", "transformation", "rule"], [MarkdownString, MarkdownHeading, MarkdownString, MarkdownHeading, MarkdownString], ["note", "Transformation", "transformation", "Modeling Rule", "rule"]) + ] + + @pytest.mark.parametrize("get_file_content_ret, expected_classes, expected_content", remediation_testcases) + def test_remediation(self, get_file_content_ret: list, expected_classes: list, expected_content: list): + instance_readme_gen = self._get_instance_readme_generator() + with patch("core.repair.readme_generator.InstanceREADMEGenerator._get_file_content_if_exists") as file_content_mock: + file_content_mock.side_effect = get_file_content_ret + + actual = instance_readme_gen._remediation() + + file_content_mock.assert_called() + if not expected_classes: + assert [] == actual + return + assert isinstance(actual, list) + assert 1 == len(actual) + assert isinstance(actual[0], MarkdownCollapsible) + assert isinstance(actual[0].heading, MarkdownHeading) + assert "Remediation" == actual[0].heading.content + assert isinstance(actual[0].content, list) + assert len(expected_classes) == len(actual[0].content) + assert expected_classes == [type(c) for c in actual[0].content] + assert expected_content == [c.content for c in actual[0].content] + + get_file_content_if_exists_testcases = [ + ("description", "description", None, False), + ("", None, None, False), + ("", "", "", False), + ("description in file", "file.md", "description in file", True), + ("description in file", "file2.md", "description in file\n\n", True) + ] + + @pytest.mark.parametrize("expected, file_path, file_content, is_file_ret", get_file_content_if_exists_testcases) + def test_get_file_content_if_exists(self, expected: str, file_path: str, file_content: str, is_file_ret: bool): + instance_readme_gen = self._get_instance_readme_generator() + with patch("builtins.open", mock_open(read_data=file_content), create=True), \ + patch("pathlib.Path.is_file") as is_file_mock: + is_file_mock.return_value = is_file_ret + actual = instance_readme_gen._get_file_content_if_exists(file_path) + assert expected == actual + + def test_mask(self): + instance_readme_gen = self._get_instance_readme_generator() + instance_readme_gen.mask_dict = {"tool2": "masked_tool2"} + assert "tool1" == instance_readme_gen._mask("tool1") + assert "masked_tool2" == instance_readme_gen._mask("tool2") + instance_readme_gen.mask_dict = {} + assert "tool2" == instance_readme_gen._mask("tool2") diff --git a/qualitytests/core/test_instance_repair.py b/qualitytests/core/test_instance_repair.py new file mode 100644 index 0000000..bfac1ee --- /dev/null +++ b/qualitytests/core/test_instance_repair.py @@ -0,0 +1,181 @@ +import pytest +from pathlib import Path +from unittest.mock import patch, mock_open + +from core.repair.instance_repair import InstanceRepair +from core.exceptions import PatternRepairError +from qualitytests.qualitytests_utils import create_instance, create_pattern + +class TestInstanceRepair: + template_json_dict = { + "description": "", + "code": { + "path": "./pattern_src_code.js|php|java", + "injection_skeleton_broken": False + }, + "expectation": { + "type": "xss", + "sink_file": "./pattern_src_code.js|php|java", + "sink_line": 0, + "source_file": "./pattern_src_code.js|php|java", + "source_line": 0, + "expectation": True + }, + "compile": { + "binary": None, + "instruction": None, + "dependencies": None + }, + "discovery": { + "rule": "./pattern_discovery_rule.sc", + "method": "joern", + "rule_accuracy": "FN|FP|FPFN|Perfect", + "notes": None + }, + "properties": { + "category": "S0|D1|D2|D3", + "feature_vs_internal_api": "FEATURE", + "input_sanitizer": False, + "source_and_sink": False, + "negative_test_case": False + }, + "remediation": { + "notes": "", + "transformation": None, + "modeling_rule": None + } + } + def _get_instance_repair(self) -> InstanceRepair: + test_instance = create_instance() + test_pattern = create_pattern() + with patch("pathlib.Path.is_file") as is_file_mock, \ + patch("core.utils.read_json") as read_json_mock, \ + patch("core.repair.instance_repair.globals") as global_mock: + is_file_mock.return_value = True + read_json_mock.return_value = TestInstanceRepair.template_json_dict + + repair_tool = InstanceRepair(test_instance, test_pattern) + + global_mock.assert_called_once() + read_json_mock.assert_called() + is_file_mock.assert_called() + return repair_tool + + def test_init_instance_repair_with_wrong_language(self): + test_instance = create_instance() + test_instance.language = "TEST" + test_pattern = create_pattern() + with patch("pathlib.Path.is_file") as is_file_mock, \ + patch("core.utils.read_json") as read_json_mock, \ + patch("core.repair.instance_repair.logger.error") as logger_error, \ + pytest.raises(PatternRepairError) as e_info: + + is_file_mock.return_value = True + InstanceRepair(test_instance, test_pattern) + is_file_mock.assert_called() + read_json_mock.assert_called() + logger_error.assert_called_once_with("InstanceRepairTEST could not be found, maybe it is not imported?") + assert "Could not instantiate language specific instance repair" in str(e_info) + + example_rule = """@main def main(name : String): Unit = { + importCpg(name) + // TODO: replace line below with your detection query + val x2 = (name, "ID_pattern_name_i1", cpg.method.l); + println(x2) + delete; + }\n\n + """ + + discovery_rule_test_cases = [ + # one instance, remove "delete;" from scala rule and test if the right warn log message is exposed + ([None], "", None, "delete;", 'Could not find "delete;" in'), + # one instance, remove 2 and test if the right warn log message is provided + ([None], "", None, "2", 'Could not find the pattern id in'), + # one instance, dr is not in instance directory (as in samplepatlib) + ([None], 'Changed lines in Scala rule for instance JS - p1:1:\n[\'val x1 = (name, "1_unset_element_array_iall", cpg.method.l);\', \'println(x1)\']', None, "", ""), + # two instances, dr is not in instance directory (as in samplepatlib) + ([None, None], 'Changed lines in Scala rule for instance JS - p1:1:\n[\'val x1 = (name, "1_unset_element_array_iall", cpg.method.l);\', \'println(x1)\']', None, "", ""), + # two instance, dr is in instance directory + ([None, None], 'Changed lines in Scala rule for instance JS - p1:1:\n[\'val x1 = (name, "1_unset_element_array_i1", cpg.method.l);\', \'println(x1)\']', Path("dr_rule.sc"), "", "") + ] + + @pytest.mark.parametrize("instances, expected_info, dr_rule_path, dr_rule_replace, warn_logger_msg", discovery_rule_test_cases) + def test_adjust_variable_number_in_discovery_works(self, instances, expected_info, dr_rule_path, dr_rule_replace, warn_logger_msg): + test_instance_repair = self._get_instance_repair() + + test_instance_repair.pattern.instances = instances + test_instance_repair.to_repair.path = Path("/1_unset_element_array/1_instance_1_unset_element_array") + test_instance_repair.pattern.path = Path("/1_unset_element_array") + if dr_rule_path: + test_instance_repair.to_repair.discovery_rule = dr_rule_path + dr_rule = TestInstanceRepair.example_rule.replace(dr_rule_replace, "") + with patch("builtins.open", mock_open(read_data=dr_rule), create=True), \ + patch("core.repair.instance_repair.logger.info") as info_logger, \ + patch("core.repair.instance_repair.logger.warning") as warn_logger: + test_instance_repair._adjust_variable_number_in_discovery_rule() + + if dr_rule_replace: + warn_logger.assert_called_once_with(f"{warn_logger_msg} {test_instance_repair.to_repair.discovery_rule}") + info_logger.assert_not_called() + else: + info_logger.assert_called_once_with(expected_info) + + def test_check_rule_accuracy_given(self): + test_instance_repair = self._get_instance_repair() + + test_instance_repair.to_repair.discovery_rule_accuracy = "FP" + with patch("core.repair.instance_repair.logger.warning") as warn_logger: + test_instance_repair._check_rule_accuracy() + warn_logger.assert_not_called() + + test_instance_repair.to_repair.discovery_rule_accuracy = "" + with patch("core.repair.instance_repair.logger.warning") as warn_logger: + test_instance_repair._check_rule_accuracy() + warn_logger.assert_called_once_with("PatternRepair (JS - p1:1) Discovery rule given, but no rule accuracy.") + + repair_scala_rules_testcases = [ + # no discovery rule given + (None, True, "PatternRepair (JS - p1:1) Could not find rule for JS - p1:1, skipping...", None), + # discovery rule, but it is not a file + (Path("discovery_rule.sc"), False, "PatternRepair (JS - p1:1) Could not find rule for JS - p1:1, skipping...", None), + # discovery_rule, but has wrong suffix + (Path("discovery_rule.py"), True, None, "PatternRepair (JS - p1:1) Found a rule, but it is no scala rule, don't know how to repair this, skipping..."), + # everything is alright + (Path("discovery_rule.sc"), True, None, None), + ] + + @pytest.mark.parametrize("dr_rule, is_file_return, warn, info", repair_scala_rules_testcases) + def test_repair_scala_rule(self, dr_rule, is_file_return, warn, info): + test_instance_repair = self._get_instance_repair() + test_instance_repair.to_repair.discovery_rule = dr_rule + with patch("pathlib.Path.is_file") as is_file_mock, \ + patch("core.repair.instance_repair.InstanceRepair._adjust_variable_number_in_discovery_rule") as adjust_mock, \ + patch("core.repair.instance_repair.InstanceRepair._check_rule_accuracy") as check_rule_mock, \ + patch("core.repair.instance_repair.logger.warning") as logger_warn_mock, \ + patch("core.repair.instance_repair.logger.info") as logger_info_mock: + is_file_mock.return_value = is_file_return + + test_instance_repair._repair_scala_rule() + + if warn: + logger_warn_mock.assert_called_once_with(warn) + logger_info_mock.assert_not_called() + if info: + logger_info_mock.assert_called_once_with(info) + logger_warn_mock.assert_not_called() + if not warn and not info: + logger_info_mock.assert_not_called() + logger_warn_mock.assert_not_called() + + check_rule_mock.assert_called_once() + adjust_mock.assert_called_once() + + def test_repair(self): + test_instance_repair = self._get_instance_repair() + with patch("core.repair.instance_repair.InstanceRepair._ensure_json_file_exists") as func1_mock, \ + patch("core.repair.instance_repair.InstanceRepair._repair_scala_rule") as func2_mock, \ + patch("core.repair.instance_repair.RepairTool.to_json") as func3_mock: + test_instance_repair.repair() + func1_mock.assert_called_once() + func2_mock.assert_called_once() + func3_mock.assert_called_once() diff --git a/qualitytests/core/test_instance_repair_php.py b/qualitytests/core/test_instance_repair_php.py new file mode 100644 index 0000000..cd73199 --- /dev/null +++ b/qualitytests/core/test_instance_repair_php.py @@ -0,0 +1,106 @@ +import pytest +from pathlib import Path +from unittest.mock import patch, mock_open + +from core.repair.instance_repair import InstanceRepairPHP +from qualitytests.qualitytests_utils import create_instance_php, join_resources_path + +class TestInstanceRepairPHP: + + def _get_instance_repair(self): + test_instance = create_instance_php() + return InstanceRepairPHP(test_instance) + + def test_get_source_and_sink_for_file(self): + test_instance_php_repair = self._get_instance_repair() + code = """ {expected} 1>/dev/null") + mask_line_mock.assert_called_once() + + def test_repair_opcode(self): + test_instance_php_repair = self._get_instance_repair() + with patch("core.repair.instance_repair.InstanceRepairPHP._remove_bash_files") as bash_file_remove_mock, \ + patch("core.repair.instance_repair.InstanceRepairPHP._make_opcode_from_php_file") as make_opcode_mock, \ + patch("core.utils.list_files") as list_files_mock: + + list_files_mock.return_value = ["file1"] + + test_instance_php_repair._repair_opcode() + bash_file_remove_mock.assert_called_once() + make_opcode_mock.assert_called_once() + list_files_mock.assert_called() + + repair_source_sink_testcases = [ + ((None, None), True, 99, 99), + ((1, None), True, 1, 99), + ((None, 1), True, 99, 1), + ((42, 24), False, 42, 24) + ] + + @pytest.mark.parametrize("source_sink_ret, warning, exp_source, exp_sink", repair_source_sink_testcases) + def test_repair_source_line_sink_line(self, source_sink_ret, warning, exp_source, exp_sink): + test_instance_php_repair = self._get_instance_repair() + expected_file = test_instance_php_repair.instance.expectation_sink_file + test_instance_php_repair.instance.expectation_sink_line = 99 + test_instance_php_repair.instance.expectation_source_line = 99 + with patch("core.repair.instance_repair.InstanceRepairPHP._get_source_and_sink_for_file") as source_sink_mock, \ + patch("core.repair.instance_repair.logger.warning") as warn_logger: + + source_sink_mock.return_value = source_sink_ret + test_instance_php_repair._repair_source_line_sink_line() + + source_sink_mock.assert_called_with(expected_file) + if warning: + warn_logger.assert_called() + + assert exp_source == test_instance_php_repair.instance.expectation_source_line + assert exp_sink == test_instance_php_repair.instance.expectation_sink_line diff --git a/qualitytests/core/test_markdown_elements.py b/qualitytests/core/test_markdown_elements.py new file mode 100644 index 0000000..ee1cd12 --- /dev/null +++ b/qualitytests/core/test_markdown_elements.py @@ -0,0 +1,43 @@ +from core.repair.readme_markdown_elements import * + + +class TestMarkdownElements: + + def test_markdown_code(self): + code = MarkdownCode('\n\nMore\n\n\nHello\n\n\n' == coll.to_markdown() + + def test_markdown_string(self): + s = MarkdownString("Test") + assert "\nTest\n" == s.to_markdown() + + def test_markdown_link(self): + link = MarkdownLink("Test", MarkdownHeading("Heading 1", 3)) + assert "[Test](#heading-1)" == link.to_markdown() + + def test_markdown_table(self): + test_content = {"0::column1": ["value1", "value1.1"], "column2": ["value2"]} + tab = MarkdownTable(test_content) + expected_tab = "\n| column1 | column2 |\n" + expected_tab += "|-----------|-----------|\n" + expected_tab += "| value1 | value2 |\n" + expected_tab += "| value1.1 | |\n" + assert expected_tab == tab.to_markdown() + + def test_markdown_document(self): + coll = MarkdownCollapsible([MarkdownString("Hello")], MarkdownString("More")) + doc = MarkdownDocument([coll]) + assert '
\n\nMore\n\nHello\n\n
\n' == doc.to_markdown() + diff --git a/qualitytests/core/test_pattern.py b/qualitytests/core/test_pattern.py index f7da7f4..100fb0d 100644 --- a/qualitytests/core/test_pattern.py +++ b/qualitytests/core/test_pattern.py @@ -1,174 +1,162 @@ -from typing import Dict - import pytest -import json +from copy import deepcopy from pathlib import Path +from unittest.mock import patch, mock_open -from core.exceptions import PatternDoesNotExists, PatternValueError -from core.pattern import Pattern, pattern_from_dict, get_pattern_path_by_pattern_id - - -def setup_three_pattern(tmp_path: Path): - language: str = "PHP" - tmp_tp_path: Path = tmp_path / language - tmp_tp_path.mkdir() - p1 = tmp_tp_path / "1_pattern_one" - p2 = tmp_tp_path / "2_pattern_two" - p3 = tmp_tp_path / "3_pattern_three" - p1.mkdir() - p2.mkdir() - p3.mkdir() - - return language, tmp_tp_path, p1, p2, p3 - +from core.pattern import Pattern +from core.exceptions import PatternDoesNotExists, PatternInvalid, InstanceDoesNotExists +from qualitytests.qualitytests_utils import join_resources_path, create_pattern, example_tp_dict class TestPattern: - - def test_pattern_init_with_id(self): - pattern = Pattern("TestName", "PHP", [], "FAMILY", "TestDesc", [], 1) - assert pattern.pattern_id == 1 - assert pattern.name == "TestName" - assert pattern.description == "TestDesc" - assert pattern.family == "FAMILY" - assert len(pattern.tags) == 0 - assert len(pattern.instances) == 0 - - def test_pattern_init_without_id(self, tmp_path): - language, tmp_tp_path, p1, p2, p3 = setup_three_pattern(tmp_path) - pattern = Pattern("TestName", language, [], "FAMILY", "TestDesc", [], pattern_dir=tmp_path) - assert pattern.pattern_id == 4 - assert pattern.name == "TestName" - assert pattern.description == "TestDesc" - assert pattern.family == "FAMILY" - assert len(pattern.tags) == 0 - assert len(pattern.instances) == 0 - - def test_pattern_init_without_id_and_empty_tp_library(self, tmp_path): - language: str = "PHP" - tmp_tp_path: Path = tmp_path / language - tmp_tp_path.mkdir() - pattern = Pattern("TestName", language, [], "FAMILY", "TestDesc", [], pattern_dir=tmp_path) - assert pattern.pattern_id == 1 - assert pattern.name == "TestName" - assert pattern.description == "TestDesc" - assert pattern.family == "FAMILY" - assert len(pattern.tags) == 0 - assert len(pattern.instances) == 0 - - def test_pattern_non_existing_language(self, tmp_path): - pattern: Pattern = Pattern("TestName", "JS", [], "FAMILY", "TestDesc", [], pattern_dir=tmp_path) - assert pattern.pattern_id == 1 - - def test_get_pattern_path_by_pattern_id(self, tmp_path): - language, tmp_tp_path, p1, p2, p3 = setup_three_pattern(tmp_path) - assert p3 == get_pattern_path_by_pattern_id(language, 3, tmp_path) - - def test_get_pattern_path_by_pattern_id_non_exist(self, tmp_path): - language, tmp_tp_path, p1, p2, p3 = setup_three_pattern(tmp_path) - with pytest.raises(PatternDoesNotExists): - get_pattern_path_by_pattern_id(language, 5, tmp_path) - - # TODO: to be fixed - @pytest.mark.skip() - def test_add_pattern_to_tp_library(self, tmp_path): - language: str = "PHP" - tmp_tp_path: Path = tmp_path / language - tmp_tp_path.mkdir() - p1 = tmp_tp_path / "1_pattern_one" - p1.mkdir() - - pattern: Pattern = Pattern("Pattern Two", language, [], "FAMILY", "TestDesc", [], pattern_dir=tmp_path) - pattern.add_pattern_to_tp_library(language, tmp_path, tmp_path) - - expected_new_pattern_path: Path = tmp_tp_path / "2_pattern_two" - expected_new_pattern_json_path: Path = expected_new_pattern_path / "2_pattern_two.json" - with open(expected_new_pattern_json_path) as json_file: - pattern_from_tp_lib = json.load(json_file) - - assert pattern.name == pattern_from_tp_lib["name"] - assert pattern.description == pattern_from_tp_lib["definition"] - assert len(pattern.instances) == len(pattern_from_tp_lib["instances"]) - - # TODO: to be fixed - @pytest.mark.skip() - def test_add_pattern_to_tp_library_new_language(self, tmp_path): - language: str = "JS" - tmp_tp_path: Path = tmp_path / language - - pattern: Pattern = Pattern("Pattern One JS", language, [], "FAMILY", "TestDesc", [], pattern_dir=tmp_path) - pattern.add_pattern_to_tp_library(language, tmp_path) - - expected_new_pattern_path: Path = tmp_tp_path / "1_pattern_one_js" - expected_new_pattern_json_path: Path = expected_new_pattern_path / "1_pattern_one_js.json" - with open(expected_new_pattern_json_path) as json_file: - pattern_from_tp_lib = json.load(json_file) - - assert pattern.name == pattern_from_tp_lib["name"] - assert pattern.description == pattern_from_tp_lib["definition"] - assert len(pattern.instances) == len(pattern_from_tp_lib["instances"]) - - # TODO: to be fixed - @pytest.mark.skip() - def test_add_new_instance_reference(self, tmp_path): - language: str = "JS" - tmp_tp_path: Path = tmp_path / language - - pattern: Pattern = Pattern("Pattern One JS", language, [], "FAMILY", "TestDesc", [], pattern_dir=tmp_path) - pattern.add_pattern_to_tp_library(language, tmp_path) - - pattern.add_new_instance_reference(language, tmp_path, "./new_instance_test") - - expected_new_pattern_path: Path = tmp_tp_path / "1_pattern_one_js" - expected_new_pattern_json_path: Path = expected_new_pattern_path / "1_pattern_one_js.json" - with open(expected_new_pattern_json_path) as json_file: - pattern_from_tp_lib = json.load(json_file) - - assert ["./new_instance_test"] == pattern_from_tp_lib["instances"] - - - def test_pattern_from_dict(self): - pattern_dict: Dict = { - "name": "Try Catch Finally", - "description": "", - "family": "None", - "tags": [], - "instances": [ - "./1_instance_52_try_catch_finally/1_instance_52_try_catch_finally.json", - "./2_instance_52_try_catch_finally/2_instance_52_try_catch_finally.json" - ] - } - pattern = pattern_from_dict(pattern_dict, "PHP", 1) - assert pattern.name == pattern_dict["name"] - assert pattern.pattern_id == 1 - assert pattern.language == "PHP" - assert pattern.instances == pattern_dict["instances"] - - - def test_pattern_from_dict_missing_non_mand_field(self): - pattern_dict: Dict = { - "name": "Try Catch Finally", - "instances": [ - "./1_instance_52_try_catch_finally/1_instance_52_try_catch_finally.json", - "./2_instance_52_try_catch_finally/2_instance_52_try_catch_finally.json" - ] - } - pattern = pattern_from_dict(pattern_dict, "PHP", 1) - assert pattern.name == pattern_dict["name"] - assert pattern.pattern_id == 1 - assert pattern.language == "PHP" - assert pattern.instances == pattern_dict["instances"] - - - def test_pattern_from_dict_missing_mandatory_field(self): - # name is a mandatory field - pattern_dict: Dict = { - # "name": "Try Catch Finally", - "description": "", - "tags": [], - "instances": [ - "./1_instance_52_try_catch_finally/1_instance_52_try_catch_finally.json", - "./2_instance_52_try_catch_finally/2_instance_52_try_catch_finally.json" - ] - } - with pytest.raises(PatternValueError): - pattern_from_dict(pattern_dict, "PHP", 1) + sample_tp_lib: Path = join_resources_path("sample_patlib") + + example_tp_dict = { + "name": "Test Pattern", + "description": "./docs/description.md", + "family": "test_pattern", + "tags": ["sast", "language"], + "instances": [ + "./1_instance_1_test_pattern/1_instance_1_test_pattern.json" + ] + } + + not_existing_patterns = [(1000, "php"), (1000, "js"), (1000, "java")] + invalid_patterns = [ + (3, "php", {}, "The pattern needs a valid JSON file."), + (3, "php", {"name": "test_instances_key_in_json_missing"}, "Pattern 3 (PHP) - Pattern JSON file needs an 'instances' key with valid relative links."), + (3, "php", {"instances": ["test_instances_invalid_relative_path"]}, "Pattern 3 (PHP) - The instance path 'test_instances_invalid_relative_path' is not valid.") + ] + valid_patterns = [ + (1, "php", example_tp_dict), + (1, "js", example_tp_dict) + ] + + valid_patterns_without_id = [ + (Path("path_to_json_file"), "php", Path("pattern_path"), 5), + (Path("path_to_json_file"), "js", Path("pattern_path"), 3) + ] + + @pytest.mark.parametrize("pattern_id, language", not_existing_patterns) + def test_not_exising_pattern_init_from_id_and_language(self, pattern_id: int, language: str): + with pytest.raises(PatternDoesNotExists) as e_info: + Pattern.init_from_id_and_language(pattern_id, language, TestPattern.sample_tp_lib) + assert f"Specified Pattern `{pattern_id}` does not exists." == str(e_info.value) + + @pytest.mark.parametrize("pattern_id, language, read_json_return, expected_assertion_error", invalid_patterns) + def test_init_invalid_pattern_from_id_and_language(self, + pattern_id: int, language: str, + read_json_return: dict, + expected_assertion_error: str): + with patch('core.utils.read_json') as read_json_mock, \ + pytest.raises(PatternInvalid) as e_info: + + read_json_mock.return_value = read_json_return + Pattern.init_from_id_and_language(pattern_id, language, TestPattern.sample_tp_lib) + + read_json_mock.assert_called_once() + assert f"{expected_assertion_error} Pattern is invalid." == str(e_info.value) + + @pytest.mark.parametrize("path_to_json, language, pattern_path, expected_id", valid_patterns_without_id) + def test_init_from_json_file_without_pattern_id(self, path_to_json: Path, language: str, pattern_path: Path, expected_id: int): + with patch('core.utils.read_json') as read_json_mock, \ + patch('pathlib.Path.is_file') as is_file_mock, \ + patch("pathlib.Path.is_dir") as is_dir_mock, \ + patch("core.pattern.isinstance") as isinstance_mock, \ + patch('core.instance.Instance.init_from_json_path') as instance_init_mock: + + is_dir_mock.return_value = True + is_file_mock.return_value = True + isinstance_mock.return_value = True + read_json_mock.return_value = TestPattern.example_tp_dict + pattern = Pattern.init_from_json_file_without_pattern_id(path_to_json, language, pattern_path, TestPattern.sample_tp_lib) + read_json_mock.assert_called_once() + is_file_mock.assert_called() + is_dir_mock.assert_called() + isinstance_mock.assert_called() + instance_init_mock.assert_called_once() + assert expected_id == pattern.pattern_id + assert path_to_json == pattern.json_path + assert pattern_path == pattern.path + assert language.upper() == pattern.language + + + @pytest.mark.parametrize("pattern_id, language, read_json_return", valid_patterns) + def test_init_valid_pattern_from_id_and_language(self, pattern_id: int, language: str, + read_json_return: dict): + with patch('core.utils.read_json') as read_json_mock, \ + patch('pathlib.Path.is_file') as is_file_mock, \ + patch("pathlib.Path.is_dir") as is_dir_mock, \ + patch("core.pattern.isinstance") as isinstance_mock, \ + patch('core.instance.Instance.init_from_json_path') as instance_init_mock: + + is_dir_mock.return_value = True + is_file_mock.return_value = True + isinstance_mock.return_value = True + read_json_mock.return_value = read_json_return + test_pattern = Pattern.init_from_id_and_language(pattern_id, language, TestPattern.sample_tp_lib) + + read_json_mock.assert_called_once() + is_file_mock.assert_called() + is_dir_mock.assert_called() + isinstance_mock.assert_called() + instance_init_mock.assert_called_once() + assert "Test Pattern" == test_pattern.name + assert "./docs/description.md" == test_pattern.description + assert "test_pattern" == test_pattern.family + assert ["sast", "language"] == test_pattern.tags + + copy_to_tp_lib_testcases = [(1, "1_unset_element_array"), (None, "1_1_unset_element_array")] + + @pytest.mark.parametrize("ret_pattern_id, expected_name", copy_to_tp_lib_testcases) + def test_copy_to_tp_lib(self, ret_pattern_id, expected_name): + test_pattern = create_pattern() + new_tplib_path = Path("/tp_lib") + with patch("core.instance.Instance.copy_to_tplib") as copy_instance_mock, \ + patch("core.utils.copy_dir_content") as copy_dir_mock, \ + patch("core.utils.get_id_from_name") as get_id_mock: + get_id_mock.return_value = ret_pattern_id + test_pattern.tp_lib_path = new_tplib_path + test_pattern.copy_to_tplib() + copy_instance_mock.assert_called_once() + copy_dir_mock.assert_called_once() + expected_pattern_path = new_tplib_path / "JS" / expected_name + assert expected_pattern_path == test_pattern.path + + def test_to_dict(self): + test_pattern = create_pattern() + with patch("core.utils.get_relative_paths") as rel_path_mock: + rel_path_mock.return_value = None + + actual = test_pattern.to_dict() + expected = deepcopy(example_tp_dict) + expected["instances"] = [None] + assert expected == actual + + def test_get_instance_by_id(self): + test_pattern = create_pattern() + instance = test_pattern.get_instance_by_id(1) + assert test_pattern.instances[0] == instance + + with pytest.raises(InstanceDoesNotExists) as e_info: + test_pattern.get_instance_by_id(2) + assert "Specified Pattern Instance `2` does not exists." in str(e_info) + + get_description_testcases = [ + ("Some description\n", None, "Some description", False), + ("file.md", "Some description inside a file\nTest description. ", "Some description inside a file\nTest description.", True), + (None, None, "", False) + ] + + + @pytest.mark.parametrize("file_path, description, expected_desc, is_file", get_description_testcases) + def test_get_description_from_file(self, file_path, description, expected_desc, is_file): + test_pattern = create_pattern() + test_pattern.description = file_path + with patch("builtins.open", mock_open(read_data=description), create=True), \ + patch("pathlib.Path.is_file") as isfile_mock: + + isfile_mock.return_value = is_file + + actual_is_file, actual = test_pattern.get_description() + assert is_file == actual_is_file + assert expected_desc == actual diff --git a/qualitytests/core/test_pattern_operations.py b/qualitytests/core/test_pattern_operations.py index d25997a..c0e68db 100644 --- a/qualitytests/core/test_pattern_operations.py +++ b/qualitytests/core/test_pattern_operations.py @@ -1,349 +1,77 @@ -import json from datetime import datetime -from json import JSONDecodeError -from pathlib import Path, WindowsPath -from typing import Dict +from pathlib import Path +from unittest.mock import patch, mock_open import pytest -from freezegun import freeze_time - from core import pattern_operations -from core.exceptions import PatternValueError -from core.instance import PatternCategory, FeatureVsInternalApi, Instance from core.measurement import Measurement -from core.pattern import Pattern - - -def setup_three_pattern(tmp_path: Path): - language: str = "PHP" - tmp_tp_path: Path = tmp_path / language - tmp_tp_path.mkdir() - p1 = tmp_tp_path / "1_pattern_one" - p2 = tmp_tp_path / "2_pattern_two" - p3 = tmp_tp_path / "3_pattern_three" - p1.mkdir() - p2.mkdir() - p3.mkdir() - - pattern1: Dict = { - "name": "Pattern One", - "description": "", - "family": "None", - "tags": [], - "instances": [ - "./1_instance_1_pattern_one/1_instance_1_pattern_one.json", - "./2_instance_1_pattern_one/2_instance_1_pattern_one.json" - ] - } - with open(p1 / (p1.name + ".json"), "w") as pattern_json_file: - json.dump(pattern1, pattern_json_file, indent=4) - - pattern2: Dict = { - "name": "Pattern Two", - "description": "", - "family": "None", - "tags": [], - "instances": [ - "./1_instance_2_pattern_two/1_instance_2_pattern_two.json", - "./2_instance_2_pattern_two/2_instance_2_pattern_two.json" - ] - } - with open(p2 / (p2.name + ".json"), "w") as pattern_json_file: - json.dump(pattern2, pattern_json_file, indent=4) - - pattern3: Dict = { - "name": "Pattern Three", - "description": "", - "family": "None", - "tags": [], - "instances": [ - "./1_instance_3_pattern_three/1_instance_3_pattern_three.json", - "./2_instance_3_pattern_three/2_instance_3_pattern_three.json" - ] - } - with open(p3 / (p3.name + ".json"), "w") as pattern_json_file: - json.dump(pattern3, pattern_json_file, indent=4) - - return language, tmp_tp_path, p1, p2, p3 - - -def setup_two_instances(p_path: Path): - pi1_path = p_path / ("1_instance_" + p_path.name) - pi2_path = p_path / ("2_instance_" + p_path.name) - pi1_path.mkdir() - pi2_path.mkdir() - - instance_dict: Dict = { - "code": "./instance_one.php", - "discovery": { - "rule": "./instance_one.sc", - "method": None, - "rule_accuracy": None - }, - "transformation": "", - "version": "1", - "compile": { - "binary": "./instance_one.bash", - "instruction": None - }, - "expectation": { - "type": "xss", - "sink_file": "./instance_one.php", - "sink_line": 18, - "source_file": "./instance_one.php", - "source_line": 17, - "expectation": True - }, - "properties": { - "category": "D2", - "feature_vs_internal_api": "FEATURE", - "input_sanitizer": False, - "source_and_sink": False, - "negative_test_case": False - }, - "measurements": [] - } - - with open(pi1_path / (pi1_path.name + ".json"), "w") as instance_json_file: - json.dump(instance_dict, instance_json_file, indent=4) - - with open(pi2_path / (pi2_path.name + ".json"), "w") as instance_json_file: - json.dump(instance_dict, instance_json_file, indent=4) - return pi1_path, pi2_path +from qualitytests.qualitytests_utils import create_pattern, join_resources_path class TestPatternOperations: - - # TODO: most of these tests need to be updated and do not work - - def test_add_testability_pattern_to_lib(self, tmp_path): - language, tmp_tp_path, p1, p2, p3 = setup_three_pattern(tmp_path) - pattern: Dict = { - "name": "Try Catch Finally", - "description": "", - "family": "None", - "tags": [], - "instances": [ - "./1_instance_52_try_catch_finally/1_instance_52_try_catch_finally.json", - "./2_instance_52_try_catch_finally/2_instance_52_try_catch_finally.json" - ] - } - - pattern_operations.add_testability_pattern_to_lib(language, pattern, None, tmp_path) - - expected_new_pattern_path: Path = tmp_tp_path / "4_try_catch_finally" - expected_new_pattern_json_path: Path = expected_new_pattern_path / "4_try_catch_finally.json" - with open(expected_new_pattern_json_path) as json_file: - pattern_from_tp_lib = json.load(json_file) - - assert pattern["name"] == pattern_from_tp_lib["name"] - assert pattern_from_tp_lib["instances"] == [] - - def test_add_testability_pattern_to_lib_with_value_error(self, tmp_path): - language, tmp_tp_path, p1, p2, p3 = setup_three_pattern(tmp_path) - pattern: Dict = { - "description": "", - "family": "None", - "tags": [], - "instances": [ - "./1_instance_52_try_catch_finally/1_instance_52_try_catch_finally.json", - "./2_instance_52_try_catch_finally/2_instance_52_try_catch_finally.json" - ] - } - - with pytest.raises(PatternValueError): - pattern_operations.add_testability_pattern_to_lib(language, pattern, None, tmp_path) - - def test_add_testability_pattern_to_lib_from_json(self, tmp_path): - language, tmp_tp_path, p1, p2, p3 = setup_three_pattern(tmp_path) - json_path: Path = ( - Path(__file__).resolve().parent / "testing_samples" / "sample_pattern" / "try_catch_finally.json" - ) - - pattern_operations.add_testability_pattern_to_lib_from_json(language, json_path, json_path.parent, tmp_path) - - actual_pattern_path: Path = tmp_tp_path / "4_try_catch_finally" - actual_pattern_json_path: Path = actual_pattern_path / "4_try_catch_finally.json" - with open(actual_pattern_json_path) as json_file: - actual_pattern = json.load(json_file) - - with open(json_path) as json_file: - expected_pattern = json.load(json_file) - - assert expected_pattern["name"] == actual_pattern["name"] - assert actual_pattern["instances"] == [ - './1_instance_4_try_catch_finally/1_instance_4_try_catch_finally.json', - './2_instance_4_try_catch_finally/2_instance_4_try_catch_finally.json' - ] - - def test_add_testability_pattern_to_lib_from_json_bad_encoding(self, tmp_path): - language, tmp_tp_path, p1, p2, p3 = setup_three_pattern(tmp_path) - json_path: Path = ( - Path(__file__).resolve().parent / "testing_samples" / "sample_broken_pattern" / "try_catch_finally_broken.json" - ) - - with pytest.raises(JSONDecodeError): - pattern_operations.add_testability_pattern_to_lib_from_json(language, json_path, json_path.parent, tmp_path) - - def test_add_testability_pattern_to_lib_from_json_with_missing_field(self, tmp_path): - language, tmp_tp_path, p1, p2, p3 = setup_three_pattern(tmp_path) - json_path: Path = ( - Path(__file__).resolve().parent / "testing_samples" / "sample_broken_pattern" / "try_catch_finally.json" - ) - - with pytest.raises(PatternValueError): - pattern_operations.add_testability_pattern_to_lib_from_json(language, json_path, json_path.parent, tmp_path) - - def test_add_tp_instance_to_lib(self, tmp_path): - language, tmp_tp_path, p1, p2, p3 = setup_three_pattern(tmp_path) - pi1 = p2 / ("1_instance" + p2.name) - pi2 = p2 / ("2_instance" + p2.name) - pi1.mkdir() - pi2.mkdir() - - pattern = Pattern("Try Catch Finally", language, [], "FAMILY", "TestDesc", [], pattern_id=4) - - source_path: Path = Path(__file__).resolve().parent / "testing_samples" / "sample_pattern" - - exp_instance: Dict = { - "code": "./instance_one.php", - "discovery": { - "rule": "./instance_one.sc", - "method": None, - "rule_accuracy": None - }, - "transformation": "", - "version": "1", - "compile": { - "binary": "./instance_one.bash", - "instruction": None - }, - "expectation": { - "type": "xss", - "sink_file": "./instance_one.php", - "sink_line": 18, - "source_file": "./instance_one.php", - "source_line": 17, - "expectation": True - }, - "properties": { - "category": "D2", - "feature_vs_internal_api": "FEATURE", - "input_sanitizer": False, - "source_and_sink": False, - "negative_test_case": False - }, - "measurements": [] - } - - pattern_operations.add_tp_instance_to_lib(language, pattern, exp_instance, "instance_one", source_path, - tmp_path) - - actual_instance_path: Path = tmp_tp_path / "4_try_catch_finally" / "1_instance_4_try_catch_finally" - actual_pattern_json_path: Path = actual_instance_path / "1_instance_4_try_catch_finally.json" - with open(actual_pattern_json_path) as act_json_file: - actual_instance = json.load(act_json_file) - - assert exp_instance["expectation"]["type"] == actual_instance["expectation"]["type"] - assert actual_instance["code"] == "./instance_one.php" - assert actual_instance["compile"]["binary"] == "./instance_one.bash" - assert actual_instance["expectation"]["sink_file"] == "./instance_one.php" - assert actual_instance["expectation"]["source_file"] == "./instance_one.php" - assert actual_instance["discovery"]["rule"] == "./instance_one.sc" - - @freeze_time(datetime.now()) - async def test_add_measurement_for_pattern(self, tmp_path, mocker): - language, tmp_tp_path, p1, p2, p3 = setup_three_pattern(tmp_path) - pi1 = p2 / ("1_instance_" + p2.name) - pi1.mkdir() - instance_dict: Dict = { - "code": "./instance_one.php", - "discovery": { - "rule": "./instance_one.sc", - "method": None, - "rule_accuracy": None - }, - "transformation": "", - "version": "1", - "compile": { - "binary": "./instance_one.bash", - "instruction": None - }, - "expectation": { - "type": "xss", - "sink_file": "./instance_one.php", - "sink_line": 18, - "source_file": "./instance_one.php", - "source_line": 17, - "expectation": True - }, - "properties": { - "category": "D2", - "feature_vs_internal_api": "FEATURE", - "input_sanitizer": False, - "source_and_sink": False, - "negative_test_case": False - }, - "measurements": [] - } - - with open(pi1 / (pi1.name + ".json"), "w") as instance_json_file: - json.dump(instance_dict, instance_json_file, indent=4) - - pi1_meas: Path = tmp_path / "measurements" / language / "2_pattern_two/1_instance_2_pattern_two" - - current_time: datetime = datetime.now() - date_time_str_file = current_time.strftime("%Y-%m-%d_%H-%M-%S") - date_time_str = current_time.strftime("%Y-%m-%d %H:%M:%S") - exp_instance1: Instance = Instance( - name='Pattern Two', - definition='What happens for the variables inside the function when the function finish simply they die! and if we run the function again, we will have new variables. But if we want to keep the variable life, we have to use static. At the same time, static variables are challenges for the scanners, because the scanner has to record the last value for the variable with the last call for the function.', - family="", - tags=[], - instances=[Path('./1_instance_2_pattern_two/1_instance_2_pattern_two.json')], - language='PHP', - pattern_id=2, - code=Path('1_instance_2_pattern_two.php'), - compile_binary=Path('1_instance_2_pattern_two.bash'), - version='1', - properties_category=PatternCategory.S0, - properties_negative_test_case=False, - properties_source_and_sink=False, - properties_input_sanitizer=False, - properties_feature_vs_internal_api=FeatureVsInternalApi.FEATURE, - expectation=True, - discovery_rule=Path('1_instance_2_pattern_two.sc'), - discovery_method="", - discovery_rule_accuracy="", - expectation_type='xss', - expectation_sink_file=Path('1_instance_2_pattern_two.php'), - expectation_sink_line=5, - expectation_source_file=Path('1_instance_2_pattern_two.php'), - expectation_source_line=9, - instance_id=1, - ) - - exp_measurements: list[Measurement] = [ - Measurement( - date=date_time_str, - result=True, - expected_result=True, - tool="dummyTool", - version="1", - instance=exp_instance1, - - ) - ] - - mocker.patch("core.pattern_operations.analysis.analyze_pattern_instance", return_value=exp_measurements) - sast_tools: Dict = { - "name": "dummyTool", - "version": "1" + def test_add_testability_pattern_to_lib(self): + test_pattern = create_pattern() + json_path = test_pattern.json_path + pattern_dir = test_pattern.path + tp_lib_dest = Path("/tp_framework/tp_lib") + with patch("core.pattern.Pattern.init_from_json_file_without_pattern_id") as init_pattern_mock, \ + patch("core.pattern.Pattern.copy_to_tplib") as copy_mock, \ + patch("core.pattern_operations.logger.info") as logger_info_mock: + init_pattern_mock.return_value = test_pattern + + pattern_operations.add_testability_pattern_to_lib_from_json("js", json_path, pattern_dir, tp_lib_dest) + + init_pattern_mock.assert_called_once_with(json_path, "js", pattern_dir, tp_lib_dest) + copy_mock.assert_called_once() + logger_info_mock.assert_called_once_with(f"The pattern has been copied to {pattern_dir}, You might need to adjust relative path links.") + + @pytest.mark.asyncio + async def test_add_measurement_for_pattern(self): + sample_tp_lib: Path = join_resources_path("sample_patlib") + test_pattern = create_pattern() + now = datetime.now() + with patch("core.pattern.Pattern.init_from_id_and_language") as pattern_init_mock, \ + patch("core.pattern_operations.logger.warning") as warn_logger_mock, \ + patch("core.analysis.analyze_pattern_instance") as analyze_mock: + pattern_init_mock.return_value = test_pattern + await pattern_operations.start_add_measurement_for_pattern("js", [{"dummyTool": "saas"}], 1, now, sample_tp_lib, Path("non_existing_dir")) + + pattern_init_mock.assert_called_once_with(1, "js", sample_tp_lib) + warn_logger_mock.assert_not_called() + analyze_mock.assert_awaited_once_with(test_pattern.instances[0], [{"dummyTool": "saas"}], "js", now, Path("non_existing_dir")) + + @pytest.mark.asyncio + async def test_save_measurement_for_pattern(self): + test_pattern = create_pattern() + fake_measurement = Measurement(datetime.now(), False, True, "some_tool", "saas", test_pattern.instances[0]) + open_mock = mock_open() + with patch("core.pattern_operations.job_list_to_dict") as job_list_to_dict_mock, \ + patch("core.analysis.inspect_analysis_results") as inspect_analysis_results_mock, \ + patch("core.pattern_operations.meas_list_to_tp_dict") as meas_list_to_tp_dict_mock, \ + patch("core.pattern.Pattern.init_from_id_and_language") as pattern_init_mock, \ + patch("core.utils.get_measurement_dir_for_language") as measurement_dir_for_lang_mock, \ + patch("pathlib.Path.mkdir") as mkdir_mock, \ + patch("builtins.open", open_mock, create=True), \ + patch("json.dump") as json_dump_mock: + + meas_list_to_tp_dict_mock.return_value = {1: {1: [fake_measurement]}} + measurement_dir_for_lang_mock.return_value = Path("/") + pattern_init_mock.return_value = test_pattern + await pattern_operations.save_measurement_for_patterns("js", datetime.now(), ["list_of_sast_jobs"], Path("samplelib")) + + job_list_to_dict_mock.assert_called_once_with(["list_of_sast_jobs"]) + inspect_analysis_results_mock.assert_called_once_with(job_list_to_dict_mock.return_value, "js") + meas_list_to_tp_dict_mock.assert_called_with(inspect_analysis_results_mock.return_value) + pattern_init_mock.assert_called_once_with(1, "js", Path("samplelib")) + measurement_dir_for_lang_mock.assert_called_once_with(Path("samplelib"), "js") + mkdir_mock.assert_called_once() + d_tpi_meas_expected = { + "pattern_id": 1, + "instance_id": 1, + "language": "JS", + "instance": "keks" } - - await pattern_operations.start_add_measurement_for_pattern(language, [sast_tools], 2, tmp_path, tmp_path) - assert list(pi1_meas.iterdir())[0].name == "measurement-{}.json".format(date_time_str_file) - - with open(list(pi1_meas.iterdir())[0]) as meas_json: - assert len(json.load(meas_json)) == 1 + d_tpi_meas_expected.update(vars(fake_measurement)) + l_tpi_meas_expected = [d_tpi_meas_expected] + json_dump_mock.assert_called_once_with(l_tpi_meas_expected, open_mock.return_value, indent=4) diff --git a/qualitytests/core/test_pattern_repair.py b/qualitytests/core/test_pattern_repair.py new file mode 100644 index 0000000..9946d36 --- /dev/null +++ b/qualitytests/core/test_pattern_repair.py @@ -0,0 +1,144 @@ +import pytest +from unittest.mock import patch + +from core.repair.pattern_repair import PatternRepair +from qualitytests.qualitytests_utils import join_resources_path, create_pattern, create_instance2 + +class TestPatternRepair: + template_json_dict = { + "name": "Pattern Name", + "description": "", + "family": "code_pattern_LANG", + "tags": ["sast", "LANG"], + "instances": [ + "./IID_instance_ID_pattern_name/IID_instance_ID_pattern_name.json" + ], + "version": "v0.draft" + } + def _get_pattern_repair(self) -> PatternRepair: + test_pattern = create_pattern() + with patch("pathlib.Path.is_file") as is_file_mock, \ + patch("core.utils.read_json") as read_json_mock: + is_file_mock.return_value = True + read_json_mock.return_value = TestPatternRepair.template_json_dict + + repair_tool = PatternRepair(test_pattern) + + read_json_mock.assert_called() + is_file_mock.assert_called() + return repair_tool + + def test_complete_instances_no_new_instance0(self): + test_repair_tool = self._get_pattern_repair() + base_path = test_repair_tool.to_repair.path + instance_path = test_repair_tool.to_repair.instances[0].path + with patch("core.utils.list_directories") as listdir_mock, \ + patch("core.utils.get_json_file") as get_json_file_mock, \ + patch("core.instance.Instance.init_from_json_path") as instance_mock, \ + patch("core.instance.Instance.set_new_instance_path") as i_set_instance_path_mock: + + listdir_mock.return_value = [instance_path] + test_repair_tool._complete_instances() + listdir_mock.assert_called_once_with(base_path) + get_json_file_mock.assert_not_called() + instance_mock.assert_not_called() + i_set_instance_path_mock.assert_not_called() + + def test_complete_instances_no_new_instance1(self): + test_repair_tool = self._get_pattern_repair() + base_path = test_repair_tool.to_repair.path + instance_path = test_repair_tool.to_repair.instances[0].path + with patch("core.utils.list_directories") as listdir_mock, \ + patch("core.utils.get_json_file") as get_json_file_mock, \ + patch("core.instance.Instance.init_from_json_path") as instance_mock, \ + patch("core.instance.Instance.set_new_instance_path") as i_set_instance_path_mock: + + listdir_mock.return_value = [instance_path, base_path / "docs"] + get_json_file_mock.return_value = None + + test_repair_tool._complete_instances() + listdir_mock.assert_called_once_with(base_path) + get_json_file_mock.assert_called_once() + instance_mock.assert_not_called() + i_set_instance_path_mock.assert_not_called() + + def test_complete_instances_one_new_instance1(self): + sample_tp_lib = join_resources_path("sample_patlib") + test_repair_tool = self._get_pattern_repair() + test_instance = create_instance2() + base_path = test_repair_tool.to_repair.path + instance_path = test_repair_tool.to_repair.instances[0].path + with patch("core.utils.list_directories") as listdir_mock, \ + patch("core.utils.get_json_file") as get_json_file_mock, \ + patch("core.instance.Instance.init_from_json_path") as instance_mock, \ + patch("core.instance.Instance.set_new_instance_path") as i_set_instance_path_mock: + + listdir_mock.return_value = [instance_path, base_path / "2_instance_test_instance"] + get_json_file_mock.return_value = "some_path" + instance_mock.return_value = test_instance + + test_repair_tool._complete_instances() + listdir_mock.assert_called_once_with(base_path) + get_json_file_mock.assert_called_once() + instance_mock.assert_called_once_with("some_path", 1, "JS", sample_tp_lib) + i_set_instance_path_mock.assert_called_once_with(sample_tp_lib / "JS" / "2_uri" / "1_instance_1_unset_element_array") + + def test_repair_name(self): + test_repair_tool = self._get_pattern_repair() + test_repair_tool.to_repair.name = "Test" + test_repair_tool._repair_name() + assert "Unset Element Array" == test_repair_tool.to_repair.name + + repair_description_testcases = [ + ((True, ""), (True, ""), True, False, False), + ((True, "Some description in file"), (True, ""), False, False, False), + ((False, "Short description in JSON"), (False, ""), False, False, False), + ((False, "A"*141), (False, ""), False, True, True), + ((False, "A"*140), (False, ""), False, False, False), + ((False, "Same description"), (False, "Same description"), False, True, False) + ] + + @pytest.mark.parametrize("pattern_description_ret, instance_description_ret, should_warn, should_info, should_open", repair_description_testcases) + def test_repair_description(self, pattern_description_ret, instance_description_ret, should_warn, should_info, should_open): + test_repair_tool = self._get_pattern_repair() + + with patch("core.pattern.Pattern.get_description") as get_pattern_description_mock, \ + patch("core.instance.Instance.get_description") as get_instance_description_mock, \ + patch("core.repair.pattern_repair.logger.warn") as warn_logger, \ + patch("core.repair.pattern_repair.logger.info") as info_logger, \ + patch("pathlib.Path.mkdir") as mkdir_mock, \ + patch("builtins.open") as open_mock: + + get_pattern_description_mock.return_value = pattern_description_ret + get_instance_description_mock.return_value = instance_description_ret + + test_repair_tool._repair_description() + get_pattern_description_mock.assert_called_once() + get_instance_description_mock.assert_called() if not should_warn else get_instance_description_mock.assert_not_called() + open_mock.assert_called_once() if should_open else open_mock.assert_not_called() + mkdir_mock.assert_called_once() if should_open else mkdir_mock.assert_not_called() + warn_logger.assert_called_once() if should_warn else warn_logger.assert_not_called() + info_logger.assert_called() if should_info else info_logger.assert_not_called() + + def test_repair_tags(self): + test_repair_tool = self._get_pattern_repair() + + test_repair_tool.to_repair.tags = [] + test_repair_tool._repair_tags() + assert ["JS", "sast"] == test_repair_tool.to_repair.tags + + test_repair_tool.to_repair.tags = ["sast", "LANG"] + test_repair_tool._repair_tags() + assert ["JS", "sast"] == test_repair_tool.to_repair.tags + + test_repair_tool.to_repair.tags = ["sast", "js"] + test_repair_tool._repair_tags() + assert ["JS", "sast"] == test_repair_tool.to_repair.tags + + test_repair_tool.to_repair.tags = ["sast", "JS"] + test_repair_tool._repair_tags() + assert ["JS", "sast"] == test_repair_tool.to_repair.tags + + test_repair_tool.to_repair.tags = ["sast", "Js"] + test_repair_tool._repair_tags() + assert ["JS", "sast"] == test_repair_tool.to_repair.tags diff --git a/qualitytests/core/test_readme_generator.py b/qualitytests/core/test_readme_generator.py new file mode 100644 index 0000000..0a8f93f --- /dev/null +++ b/qualitytests/core/test_readme_generator.py @@ -0,0 +1,185 @@ +import pytest +from pathlib import Path +from unittest.mock import patch + +from core.repair.readme_generator import READMEGenerator +from core.repair.readme_markdown_elements import * +from qualitytests.qualitytests_utils import create_pattern, join_resources_path + +class TestREADMEGenerator: + + def _get_readme_generator(self): + test_pattern = create_pattern() + with patch("pathlib.Path.is_dir") as is_dir_mock, \ + patch("core.utils.read_json") as read_json_mock, \ + patch("core.utils.read_csv_to_dict") as csv_to_dict_mock: + is_dir_mock.return_value = True + read_json_mock.return_value = {} + csv_to_dict_mock.return_value = {"JS": {"1": {"1": "yes"}}} + + readme_generator = READMEGenerator(test_pattern, "discovery.csv", Path("dont_care"), "mask.json") + + is_dir_mock.assert_called_once() + read_json_mock.assert_called_once() + csv_to_dict_mock.assert_called_once() + return readme_generator + + + init_readme_generator_testcases = [ + # everyting alright + ("discovery.csv", {"JS": {"1": {"1": True}}}, True, "mask.json", None), + # Language "JS" not in discovery dict + ("discovery.csv", {"AWESOME": {"1": {"1": True}}}, True, "mask.json", "Generating README for JS - p1: Cannot find discovery rule results for language JS"), + # discovery dict of language is not of type dict + ("discovery.csv", {"JS": None}, True, "mask.json", "Generating README for JS - p1: Cannot find discovery rule results for language JS"), + # no measurement results + ("discovery.csv", {"JS": {"1": {"1": True}}}, False, "mask.json", "Generating README for JS - p1: Cannot locate `measurement_results` in 'dont_care'"), + ] + + @pytest.mark.parametrize("dr_file, dr_res, is_dir, mask_file, warn", init_readme_generator_testcases) + def test_init_readme_generator_discovery_results(self, dr_file, dr_res, is_dir, mask_file, warn): + test_pattern = create_pattern() + with patch("pathlib.Path.is_dir") as is_dir_mock, \ + patch("core.repair.readme_generator.logger.warning") as warn_logger, \ + patch("core.utils.read_json") as read_json_mock, \ + patch("core.utils.read_csv_to_dict") as csv_to_dict_mock: + is_dir_mock.return_value = is_dir + csv_to_dict_mock.return_value = dr_res + + readme_generator = READMEGenerator(test_pattern, dr_file, Path("dont_care"), mask_file) + + is_dir_mock.assert_called_once() + csv_to_dict_mock.assert_called_once_with(dr_file) + if warn: + warn_logger.assert_called_once_with(warn) + else: + warn_logger.assert_not_called() + + read_json_mock.assert_called_once_with(mask_file) + assert read_json_mock.return_value == readme_generator.mask + + def test_comment(self): + test_readme_gen = self._get_readme_generator() + actual = test_readme_gen._comment() + assert isinstance(actual, list) + assert 1 == len(actual) + assert isinstance(actual[0], MarkdownComment) + + def test_heading(self): + test_readme_gen = self._get_readme_generator() + actual = test_readme_gen._heading() + assert isinstance(actual, list) + assert 1 == len(actual) + assert isinstance(actual[0], MarkdownHeading) + assert 1 == actual[0].level + assert "Test Pattern" == actual[0].content + + def test_description(self): + test_readme_gen = self._get_readme_generator() + with patch("core.pattern.Pattern.get_description") as get_description_mock: + get_description_mock.return_value = (True, "test") + actual = test_readme_gen._pattern_description() + get_description_mock.assert_called_once() + assert isinstance(actual, list) + assert 2 == len(actual) + assert isinstance(actual[0], MarkdownHeading) + assert 2 == actual[0].level + assert "Description" == actual[0].content + assert isinstance(actual[1], MarkdownString) + assert "test" == actual[1].content + + def test_tags(self): + test_readme_gen = self._get_readme_generator() + actual = test_readme_gen._tags() + assert isinstance(actual, list) + assert 2 == len(actual) + assert isinstance(actual[0], MarkdownString) + assert "Tags" in actual[0].content + assert isinstance(actual[1], MarkdownString) + assert "Version" in actual[1].content + + def test_pattern_metadata_including_discovery_rule_results(self): + test_readme_gen = self._get_readme_generator() + with patch("pathlib.Path.is_file") as is_file_mock, \ + patch("core.utils.translate_bool") as translate_bool_mock: + actual = test_readme_gen._pattern_metadata() + assert isinstance(actual, list) + assert 2 == len(actual) + assert isinstance(actual[0], MarkdownHeading) + assert 2 == actual[0].level + assert "Overview" == actual[0].content + assert isinstance(actual[1], MarkdownTable) + assert "rule successfull" in actual[1].to_markdown() + + def test_pattern_metadata_without_discovery_rule_results(self): + test_readme_gen = self._get_readme_generator() + test_readme_gen.discovery_rule_results = None + with patch("pathlib.Path.is_file") as is_file_mock, \ + patch("core.utils.translate_bool") as translate_bool_mock: + actual = test_readme_gen._pattern_metadata() + assert isinstance(actual, list) + assert 2 == len(actual) + assert isinstance(actual[0], MarkdownHeading) + assert 2 == actual[0].level + assert "Overview" == actual[0].content + assert isinstance(actual[1], MarkdownTable) + assert "rule successfull" not in actual[1].to_markdown() + + def test_instances(self): + test_readme_gen = self._get_readme_generator() + with patch("core.repair.readme_generator.InstanceREADMEGenerator.generate_md") as generate_md_mock: + actual = test_readme_gen._instances() + generate_md_mock.assert_called_once() + assert generate_md_mock.return_value == actual + + def test_generate_readme(self): + # Could actually assert the complete readme. + # at the moment only assert, that the function works in general + test_readme_gen = self._get_readme_generator() + test_readme_gen.measurement_results = None + test_readme_gen.generate_README() + + # integration test + def test_generate_complete_readme(self): + from core.pattern import Pattern + from core.measurement import Measurement + sample_tp_lib = join_resources_path("sample_patlib") + test_pattern = Pattern.init_from_id_and_language(1, "php", sample_tp_lib) + + with patch("pathlib.Path.is_dir") as is_dir_mock, \ + patch("core.utils.read_json") as mask_json_mock, \ + patch("core.utils.read_csv_to_dict") as discovery_rule_results: + is_dir_mock.return_value = True + mask_json_mock.return_value = {"tool1": "masked_tool"} + discovery_rule_results.return_value = {"PHP": {"1": {"1": "yes", "2": "no"}}} + + readme_generator = READMEGenerator(test_pattern, "discovery.csv", Path("dont_care"), "mask.json") + + is_dir_mock.assert_called_once() + mask_json_mock.assert_called_once() + discovery_rule_results.assert_called_once() + + measurement1 = Measurement("1970-01-01 00:00:01", False, False, "tool1", "saas") + measurement2 = Measurement("1970-01-01 00:00:01", False, True, "tool2", "v2") + measurement3 = Measurement("2023-01-01 00:00:01", True, False, "tool1", "saas") + measurement4 = Measurement("2023-01-01 00:00:01", True, True, "tool2", "v2") + + with patch("core.utils.list_files") as list_files_mock, \ + patch("core.utils.read_json") as read_json_mock, \ + patch("core.measurement.Measurement.init_from_measurement_dict") as measurement_mock, \ + patch("pathlib.Path.exists") as path_exist_mock: + list_files_mock.return_value = ["file1.md", "file2.md"] + path_exist_mock.return_value = True + measurement_mock.side_effect = [measurement1, measurement2, measurement3, measurement4] + read_json_mock.return_value = [{}, {}] + + actual = readme_generator.generate_README() + + path_exist_mock.assert_called_once() + path_to_expected_readme = sample_tp_lib / "PHP" / "1_static_variables" / "README.md" + with open(path_to_expected_readme, "r") as fp: + expected = fp.read() + with open("tmp.md", "w") as f: + f.write(actual) + + assert expected == actual diff --git a/qualitytests/core/test_repair_tool.py b/qualitytests/core/test_repair_tool.py new file mode 100644 index 0000000..9a2d42d --- /dev/null +++ b/qualitytests/core/test_repair_tool.py @@ -0,0 +1,208 @@ +import pytest +from pathlib import Path +from unittest.mock import patch + +from core.pattern import Pattern +from core.repair.repair_tool import RepairTool +from core.exceptions import PatternRepairError +from qualitytests.qualitytests_utils import join_resources_path, create_pattern, create_instance + + +class TestRepairTool: + pattern = create_pattern() + tp_lib: Path = join_resources_path("sample_patlib") + template_json_dict = { + "name": "Pattern Name", + "description": "", + "family": "code_pattern_LANG", + "tags": ["sast", "LANG"], + "instances": [ + "./IID_instance_ID_pattern_name/IID_instance_ID_pattern_name.json" + ], + "version": "v0.draft" + } + + def test_init_pattern_repair0(self): + with patch("pathlib.Path.is_file") as is_file_mock, \ + pytest.raises(PatternRepairError) as e_info: + is_file_mock.return_value = False + + RepairTool(TestRepairTool.pattern, Path("."), Path(".")) + is_file_mock.assert_called_once() + # logger.assert_called_once() + assert "PatternRepair (JS - p1) No template JSON found in " in str(e_info) + + def test_init_pattern_repair1(self): + with patch("pathlib.Path.is_file") as is_file_mock, \ + pytest.raises(PatternRepairError) as e_info: + is_file_mock.side_effect = [True, False] + + RepairTool(TestRepairTool.pattern, Path("."), Path(".")) + is_file_mock.assert_called() + # logger.assert_called_once() + assert "PatternRepair (JS - p1) No schema JSON found in " in str(e_info) + + def test_init_pattern_repair2(self): + with patch("pathlib.Path.is_file") as is_file_mock, \ + patch("core.utils.read_json") as read_json_mock, \ + pytest.raises(PatternRepairError) as e_info: + is_file_mock.return_value = True + read_json_mock.return_value = {} + + RepairTool(TestRepairTool.pattern, Path("."), Path(".")) + is_file_mock.assert_called() + read_json_mock.assert_called_once() + assert "PatternRepair (JS - p1) The template JSON" in str(e_info) and " is empty" in str(e_info) + + def test_copy_template(self): + with patch("pathlib.Path.is_file") as is_file_mock, \ + patch("core.utils.read_json") as read_json_mock, \ + patch("core.repair.repair_tool.logger.info") as logger, \ + patch("shutil.copy") as copy_file_mock: + + is_file_mock.return_value = True + read_json_mock.return_value = TestRepairTool.template_json_dict + + RepairTool(TestRepairTool.pattern, Path("."), Path("."))._copy_template() + + logger.assert_called_once_with("PatternRepair (JS - p1) Copying template JSON.") + copy_file_mock.assert_called_once() + + ensure_json_file_exist_testcases = [ + (False, "test_pattern_path", {"name": "test"}, False, False), + (False, None, {"name": "test"}, True, False), + (True, "", {"name": "test"}, False, False), + (True, "", {"name": "test"}, False, True), + ] + + @pytest.mark.parametrize("is_file_mock_ret, get_pattern_json_ret, read_json_ret, should_call_copy, should_rename_json", ensure_json_file_exist_testcases) + def test_ensure_json_file_exists(self, is_file_mock_ret: bool, + get_pattern_json_ret: Path | None, + read_json_ret: dict | None, + should_call_copy: bool, + should_rename_json: bool): + with patch("pathlib.Path.is_file") as is_file_mock, \ + patch("core.utils.read_json") as read_json_mock, \ + patch("core.repair.repair_tool.logger.info"), \ + patch("core.utils.get_json_file") as get_pattern_json_mock, \ + patch("core.utils.read_json") as read_json_mock, \ + patch("core.utils.write_json") as write_json_mock, \ + patch("shutil.copy") as copy_template_mock, \ + patch("shutil.move") as move_mock: + + is_file_mock.return_value = True + read_json_mock.return_value = TestRepairTool.template_json_dict + + repair_tool = RepairTool(TestRepairTool.pattern, Path("."), Path(".")) + json_path = get_pattern_json_ret if get_pattern_json_ret else repair_tool.to_repair.json_path + is_file_mock.reset_mock() + is_file_mock.return_value = is_file_mock_ret + get_pattern_json_mock.return_value = get_pattern_json_ret + read_json_mock.return_value = read_json_ret + + if should_rename_json: + repair_tool.to_repair.json_path = repair_tool.to_repair.json_path.parent / "test_json.json" + json_path = repair_tool.to_repair.json_path.parent / "test_json.json" + + repair_tool._ensure_json_file_exists() + if should_call_copy: + copy_template_mock.assert_called_once() + if should_rename_json: + move_mock.assert_called_once() + else: + move_mock.assert_not_called() + is_file_mock.assert_called_once() + read_json_mock.assert_called_with(json_path) + write_json_mock.assert_called_once() + expected_dict = TestRepairTool.template_json_dict + expected_dict["name"] = "test" + assert expected_dict == write_json_mock.call_args.args[1] + + def test_to_json1(self): + with patch("pathlib.Path.is_file") as is_file_mock, \ + patch("core.utils.read_json") as read_json_mock, \ + patch("core.pattern.Pattern.to_dict") as to_dict_mock, \ + patch("core.utils.write_json") as write_json_mock: + + is_file_mock.return_value = True + read_json_mock.return_value = TestRepairTool.template_json_dict + + repair_tool = RepairTool(TestRepairTool.pattern, Path("."), Path(".")) + + read_json_mock.reset_mock() + read_json_mock.return_value = {} + to_dict_mock.return_value = {} + repair_tool.to_json() + read_json_mock.assert_called_once() + to_dict_mock.assert_called_once() + write_json_mock.assert_not_called() + + def test_to_json2(self): + with patch("pathlib.Path.is_file") as is_file_mock, \ + patch("core.utils.read_json") as read_json_mock, \ + patch("core.pattern.Pattern.to_dict") as to_dict_mock, \ + patch("core.utils.write_json") as write_json_mock: + + is_file_mock.return_value = True + read_json_mock.return_value = TestRepairTool.template_json_dict + + repair_tool = RepairTool(TestRepairTool.pattern, Path("."), Path(".")) + + read_json_mock.reset_mock() + read_json_mock.return_value = {"name": "test"} + to_dict_mock.return_value = {} + repair_tool.to_json() + read_json_mock.assert_called_once() + to_dict_mock.assert_called_once() + write_json_mock.assert_called_once() + + def test_check_paths_pattern_exist_all_correct(self): + with patch("pathlib.Path.is_file") as is_file_mock, \ + patch("core.utils.read_json") as read_json_mock, \ + patch("core.repair.repair_tool.logger.warning") as warn_logger_mock: + + is_file_mock.return_value = True + read_json_mock.return_value = TestRepairTool.template_json_dict + + repair_tool_pattern = RepairTool(TestRepairTool.pattern, Path("."), Path(".")) + + repair_tool_pattern._check_paths_exists() + warn_logger_mock.assert_not_called() + + def check_path_instance_exist_all_correct(self): + test_instance = create_instance() + with patch("pathlib.Path.is_file") as is_file_mock, \ + patch("core.utils.read_json") as read_json_mock, \ + patch("core.repair.repair_tool.logger.warning") as warn_logger_mock: + + is_file_mock.return_value = True + read_json_mock.return_value = TestRepairTool.template_json_dict + + repair_tool_instance = RepairTool(test_instance, Path("."), Path(".")) + + repair_tool_instance._check_paths_exists() + warn_logger_mock.assert_not_called() + + def check_path_instance_exist_non_correct(self): + test_instance = create_instance() + with patch("pathlib.Path.is_file") as is_file_mock, \ + patch("pathlib.Path.exists") as exist_mock, \ + patch("core.utils.read_json") as read_json_mock, \ + patch("core.repair.repair_tool.logger.warning") as warn_logger_mock: + + is_file_mock.return_value = True + read_json_mock.return_value = TestRepairTool.template_json_dict + exist_mock.return_value = False + + repair_tool_instance = RepairTool(test_instance, Path("."), Path(".")) + + repair_tool_instance._check_paths_exists() + warn_logger_mock.assert_called() + assert test_instance.code_path is None + assert test_instance.expectation_sink_file is None + assert test_instance.expectation_source_file is None + assert test_instance.compile_binary is None + assert test_instance.discovery_rule is None + + + diff --git a/qualitytests/core/test_utils.py b/qualitytests/core/test_utils.py index b86ccd1..3a5e096 100644 --- a/qualitytests/core/test_utils.py +++ b/qualitytests/core/test_utils.py @@ -5,6 +5,7 @@ import config from core import utils from core.exceptions import PatternDoesNotExists, TPLibDoesNotExist, LanguageTPLibDoesNotExist, DiscoveryMethodNotSupported +from unittest.mock import patch, mock_open import qualitytests.qualitytests_utils as qualitytests_utils def setup_three_pattern(tmp_path: Path): @@ -32,82 +33,6 @@ def test_check_tp_lib_2(self, tmp_path): utils.check_tp_lib(tmp_path) - def test_list_pattern_paths_for_language(self, tmp_path): - language, tmp_tp_path, p1, p2, p3 = setup_three_pattern(tmp_path) - path_list_expected = [p1, p2, p3] - - path_list = utils.list_pattern_paths_for_language(language, tmp_path) - assert sorted(path_list) == sorted(path_list_expected) - - - def test_list_pattern_paths_for_language_void_dir(self, tmp_path): - language: str = "PHP" - tmp_tp_path: Path = tmp_path / language - tmp_tp_path.mkdir() - - path_list_expected = [] - - path_list = utils.list_pattern_paths_for_language(language, tmp_path) - assert sorted(path_list) == sorted(path_list_expected) - - - def test_list_pattern_paths_for_non_existing_language(self, tmp_path): - language: str = "PHP" - with pytest.raises(LanguageTPLibDoesNotExist): - utils.list_pattern_paths_for_language(language, tmp_path) - - - # TODO: to be fixed, misses the json file - @pytest.mark.skip() - def test_list_pattern_instances_by_pattern_id(self, tmp_path): - language, tmp_tp_path, p1, p2, p3 = setup_three_pattern(tmp_path) - - pi1 = p2 / ("1_instance_" + p2.name) - pi2 = p2 / ("2_instance_" + p2.name) - pi3 = p2 / ("3_instance_" + p2.name) - pi1.mkdir() - pi2.mkdir() - pi3.mkdir() - - path_list_expected = [pi1, pi2, pi3] - path_list = utils.list_tpi_paths_by_tp_id(language, 2, tmp_path) - assert sorted(path_list) == sorted(path_list_expected) - - - def test_list_pattern_instances_by_pattern_id_with_non_existing_pattern(self, tmp_path): - language: str = "PHP" - tmp_tp_path: Path = tmp_path / language - tmp_tp_path.mkdir() - - with pytest.raises(PatternDoesNotExists): - utils.list_tpi_paths_by_tp_id(language, 5, tmp_path) - - - # TODO: to be fixed - @pytest.mark.skip() - def test_get_or_create_tp_lib_for_lang_existing_folder(self, tmp_path): - language: str = "PHP" - path_tp_language_exp = tmp_path / language - path_tp_language_exp.mkdir() - path_tp_language_act = utils.get_or_create_language_dir(language, tmp_path) - assert path_tp_language_exp.is_dir() == path_tp_language_act.is_dir() - - - def test_get_or_create_pattern_dir_existing_lang_dir(self, tmp_path): - language, tmp_tp_path, p1, p2, p3 = setup_three_pattern(tmp_path) - path_pattern_exp = tmp_tp_path / "4_pattern_four" - path_pattern_act = utils.get_or_create_pattern_dir(language, 4, "Pattern Four", tmp_path) - assert path_pattern_exp.is_dir() == path_pattern_act.is_dir() - - - def test_get_or_create_pattern_dir_non_existing_lang_dir(self, tmp_path): - language: str = "PHP" - tmp_tp_path: Path = tmp_path / language - path_pattern_exp = tmp_tp_path / "1_pattern_one" - path_pattern_act = utils.get_or_create_pattern_dir(language, 1, "Pattern One", tmp_path) - assert path_pattern_exp.is_dir() == path_pattern_act.is_dir() - - def test_get_last_measurement_for_pattern_instance(self, tmp_path): m1: Path = tmp_path / "measurement-2022-03-24_10-28-00.json" m2: Path = tmp_path / "measurement-2022-04-10_12-25-00.json" @@ -192,21 +117,96 @@ def test_get_pattern_dir_from_id(self): utils.get_pattern_dir_from_id(99, "PHP", tp_lib) - def test_get_instance_dir_from_id(self): - tp_path = qualitytests_utils.join_resources_path("sample_patlib") / "PHP" / "3_global_array" - assert utils.get_instance_dir_from_id(1, tp_path).name == "1_instance_3_global_array" - assert utils.get_instance_dir_from_id(2, tp_path).name == "2_instance_3_global_array" + next_free_pattern_id_test_cases = [ + ([Path('1_instance_test_pattern'), Path('2_instance_test_pattern')], 3, 1), + ([Path('1_instance_test_pattern'), Path('3_instance_test_pattern')], 2, 1), + ([Path('1_instance_test_pattern'), Path('3_instance_test_pattern')], 2, 2), + ] + + @pytest.mark.parametrize("list_dir_ret_value, expected_value, proposed_id", next_free_pattern_id_test_cases) + def test_get_next_free_pattern_id_for_language(self, list_dir_ret_value: list, expected_value: int, proposed_id: int): + tp_lib_path = qualitytests_utils.join_resources_path("sample_patlib") + with patch("core.utils.list_dirs_only") as list_dir_mock: + list_dir_mock.return_value = list_dir_ret_value + assert expected_value == utils.get_next_free_pattern_id_for_language("PHP", tp_lib_path) + + get_relative_paths_testcases = [ + (Path("/tp_framework/file.sc"), Path("/tp_framework"), "./file.sc"), + (Path("/tp_framework/file.sc"), Path("/tp_framework"), "./file.sc"), + (Path("/file.sc"), Path("/tp_framework/PHP"), Path("/file.sc")), + ] + + @pytest.mark.parametrize("file_path, base_path, expected", get_relative_paths_testcases) + def test_get_relative_paths_testcases(self, file_path, base_path, expected): + assert expected == utils.get_relative_paths(file_path, base_path) + + def test_get_id_from_name_error(self): + with pytest.raises(ValueError): + utils.get_id_from_name("name") + + assert 1 == utils.get_id_from_name("1_instance_85_test_pattern") + assert 42 == utils.get_id_from_name("42_test_pattern") + + def test_get_path_or_none(self): + assert utils.get_path_or_none("") is None + assert utils.get_path_or_none(None) is None + assert Path("file") == utils.get_path_or_none("file") + + def test_get_from_dict(self): + assert utils.get_from_dict({}, "key1", "key2") is None + assert utils.get_from_dict({"key1": 3}, "key1", "key2") is None + assert utils.get_from_dict({"key1": {"key3": 3}}, "key1", "key2") is None + assert 3 == utils.get_from_dict({"key1": {"key2": 3}}, "key1", "key2") + + get_json_file_testcases = [ + # special shortcut case to avoid warnings + (Path("./docs"), None, None, None), + # works as expected, only one possible JSON file + (Path("./1_instance"), Path("instance.json"), [Path("instance.json")], None), + # No JSON file at all + (Path("./1_instance"), None, [], "Could not find a JSON file in 1_instance"), + # multiple JSON files, none of them named as wanted + (Path("./1_instance"), None, ["instance.json", "insteresting.json"], "Could not determine the right pattern JSON file. Please name it _.json"), + # multiple JSON files, but one is named correctly + (Path("./1_instance"), Path("./1_instance/1_instance.json"), [Path("./1_instance/1_instance.json"), Path("./1_instance/interesting.json")], "Found multiple '.json' files for 1_instance"), + ] + + @pytest.mark.parametrize("path, expected, list_file_return, warn", get_json_file_testcases) + def test_get_json_file(self, path, expected, list_file_return, warn): + with patch("core.utils.logger.warning") as warn_logger, \ + patch("core.utils.list_files") as list_file_mock: + list_file_mock.return_value = list_file_return + + actual = utils.get_json_file(path) + + assert expected == actual + if warn: + warn_logger.assert_called_with(warn) + else: + warn_logger.assert_not_called() + + def test_read_csv_to_dict(self): + csv_data = """pattern_id,instance_id,instance_path,pattern_name,language,discovery_rule,successful + 1,1,,,JS,,no + 1,2,/some/path,Test Pattern,PHP,discovery_rule.sc,yes + """ + expected = { + "JS": { + "1": {"1": "no"} + }, + "PHP": { + "1": {"2": "yes"} + } + } + with patch("builtins.open", mock_open(read_data=csv_data), create=True): + actual = utils.read_csv_to_dict(Path("some_path")) + + assert expected == actual with pytest.raises(Exception): - utils.get_instance_dir_from_id(3, tp_path) - - - def test_get_tpi_id_from_jsonpath(self): - jp = qualitytests_utils.join_resources_path( - "sample_patlib") / "PHP" / "3_global_array" / "1_instance_3_global_array" / "1_instance_3_global_array.json" - assert utils.get_tpi_id_from_jsonpath(jp) == 1 - jp = qualitytests_utils.join_resources_path( - "sample_patlib") / "PHP" / "3_global_array" / "1_instance_3_global_array" / "111_instance_3_global_array.json" - assert utils.get_tpi_id_from_jsonpath(jp) == 1 - jp = qualitytests_utils.join_resources_path( - "sample_patlib") / "PHP" / "3_global_array" / "2_instance_3_global_array" / "111_instance_3_global_array.json" - assert utils.get_tpi_id_from_jsonpath(jp) == 2 \ No newline at end of file + actual["NOT_EXISTING_LANG"] + actual["PHP"]["5"] + actual["PHP"]["1"]["3"] + + def test_translate_bool(self): + assert "yes" == utils.translate_bool(True) + assert "no" == utils.translate_bool(False) diff --git a/qualitytests/qualitytests_utils.py b/qualitytests/qualitytests_utils.py index e850519..763d2c6 100644 --- a/qualitytests/qualitytests_utils.py +++ b/qualitytests/qualitytests_utils.py @@ -2,6 +2,7 @@ import subprocess from pathlib import Path from typing import Dict +from unittest.mock import patch import shutil pyexe = sys.executable @@ -13,6 +14,55 @@ resource_path = "resources" cpg_binary_rel_path = "sample_joern/cpg_binary.bin" +example_tpi_dict = { + "description": "Some description", + "code": { + "path": "", + "injection_skeleton_broken": True + }, + "discovery": { + "rule": "", + "method": "joern", + "rule_accuracy": "Perfect", + "notes": "Some notes" + }, + "remediation": { + "notes": "./docs/remediation_notes.md", + "transformation": None, + "modeling_rule": None + }, + "compile": { + "binary": "", + "dependencies": None, + "instruction": None + }, + "expectation": { + "type": "xss", + "sink_file": "", + "sink_line": 5, + "source_file": "", + "source_line": 9, + "expectation": True + }, + "properties": { + "category": "S0", + "feature_vs_internal_api": "FEATURE", + "input_sanitizer": False, + "source_and_sink": False, + "negative_test_case": False + } + } + +example_tp_dict = { + "name": "Test Pattern", + "description": "./docs/description.md", + "family": "test_pattern", + "tags": ["sast", "language"], + "instances": [ + "./1_instance_1_test_pattern/1_instance_1_test_pattern.json" + ], + "version": "v0.draft" + } def join_resources_path(relativepath): dirname = Path(__file__).parent.resolve() @@ -63,8 +113,9 @@ def init_test(init, language="PHP"): init["tp_lib_path"] = join_resources_path(temp_meas).resolve() try: shutil.copytree(join_resources_path("sample_patlib"), init["tp_lib_path"]) - except: + except Exception as e: pass + # assert False, f"stop your tests will fail {e}" init["patterns"] = [1,2,3] @@ -100,3 +151,77 @@ def init_sastreport_test(init, mocker): # "tool_interface": "qualitytests.core.sast_test.SastTest" # } # mocker.patch("core.utils.load_sast_specific_config", return_value=mocked_tool_interface) + +def create_instance(): + from core.instance import Instance + sample_tp_lib = join_resources_path("sample_patlib") + with patch('pathlib.Path.is_file') as is_file_mock, \ + patch("pathlib.Path.is_dir") as is_dir_mock: + + is_file_mock.return_value = True + # read_json_mock.return_value = example_tpi_dict + json_path = sample_tp_lib / "JS" / "1_unset_element_array" / "1_instance_1_unset_element_array" / "1_instance_1_unset_element_array.json" + test_instance = Instance.init_from_json_path(json_path, 1, "js", sample_tp_lib) + + # read_json_mock.assert_called_once() + is_file_mock.assert_called() + is_dir_mock.assert_called() + return test_instance + + +def create_instance2(): + from core.instance import Instance + sample_tp_lib = join_resources_path("sample_patlib") + with patch('pathlib.Path.is_file') as is_file_mock, \ + patch("pathlib.Path.is_dir") as is_dir_mock: + + is_file_mock.return_value = True + # read_json_mock.return_value = example_tpi_dict + json_path = sample_tp_lib / "JS" / "2_uri" / "1_instance_2_uri" / "1_instance_2_uri.json" + test_instance = Instance.init_from_json_path(json_path, 1, "js", sample_tp_lib) + + # read_json_mock.assert_called_once() + is_file_mock.assert_called() + is_dir_mock.assert_called() + return test_instance + + +def create_instance_php(): + from core.instance import Instance + sample_tp_lib = join_resources_path("sample_patlib") + with patch('pathlib.Path.is_file') as is_file_mock, \ + patch("pathlib.Path.is_dir") as is_dir_mock: + + is_file_mock.return_value = True + # read_json_mock.return_value = example_tpi_dict + json_path = sample_tp_lib / "PHP" / "1_static_variables" / "1_instance_1_static_variables" / "1_instance_1_static_variables.json" + test_instance = Instance.init_from_json_path(json_path, 1, "php", sample_tp_lib) + + # read_json_mock.assert_called_once() + is_file_mock.assert_called() + is_dir_mock.assert_called() + return test_instance + +def create_pattern(): + from core.pattern import Pattern + sample_tp_lib = join_resources_path("sample_patlib") + test_instance = create_instance() + with patch('core.utils.read_json') as read_json_mock, \ + patch('pathlib.Path.is_file') as is_file_mock, \ + patch("pathlib.Path.is_dir") as is_dir_mock, \ + patch("core.pattern.isinstance") as isinstance_mock, \ + patch('core.instance.Instance.init_from_json_path') as instance_init_mock: + + is_dir_mock.return_value = True + is_file_mock.return_value = True + isinstance_mock.return_value = True + read_json_mock.return_value = example_tp_dict + instance_init_mock.return_value = test_instance + test_tpi = Pattern.init_from_id_and_language(1, "JS", sample_tp_lib) + + read_json_mock.assert_called_once() + is_file_mock.assert_called() + is_dir_mock.assert_called() + isinstance_mock.assert_called() + instance_init_mock.assert_called_once() + return test_tpi diff --git a/qualitytests/resources/sample_patlib/PHP/1_static_variables/README.md b/qualitytests/resources/sample_patlib/PHP/1_static_variables/README.md index 08f4d24..260d762 100644 --- a/qualitytests/resources/sample_patlib/PHP/1_static_variables/README.md +++ b/qualitytests/resources/sample_patlib/PHP/1_static_variables/README.md @@ -1,45 +1,55 @@ -# Pattern: Static Variables +[//]: # (This file is automatically generated. If you wish to make any changes, please use the JSON files and regenerate this file using the tpframework.) -## Category +# Static Variables -Variables +Tags: sast, php, php_v7.4.9 -## Definition -What happens for the variables inside the function when the function finish simply they die! and if we run the function again, we will have new variables. But if we want to keep the variable life, we have to use static. At the same time, static variables are challenges for the scanners, because the scanner has to record the last value for the variable with the last call for the function. +Version: None -## Instances +## Description -### Instance 1 +This pattern targets `static` variables. When a function terminates, its local variables are destroyed. When we run that function again, new local variables will be allocated. If we want to keep one of those variables alive, we can use the `static` keyword. Static variables may be challenging for SAST tools: is a SAST tool considering a static variable used in a function as alive? -- CATEGORY: S0 -- FEATURE vs INTERNAL API: FEATURE -- INPUT SANITIZERS: NO -- SOURCES AND SINKS: NO -- NEGATIVE TEST CASES: NO -- CODE: +## Overview + +| Instances | has discovery rule | discovery method | rule successfull | +|---------------------------|----------------------|--------------------|--------------------| +| [1 Instance](#1-instance) | yes | joern | yes | + +## 1 Instance + +### Code ```php + +More + +
+ -| Tool | RIPS | phpSAFE | WAP | Progpilot | Comm_1 | Comm_2 | Correct | -| ------------- | ---- | ------- | ---- | --------- | ------- | --------- | ------- | -| Vulnerability | NO | NO | NO | NO | NO | NO | YES | -Measurements Date: 8 June 2021 +### Compile -- OPCODE: + ```bash $_main: ; (lines=13, args=0, vars=1, tmps=5) @@ -74,16 +84,50 @@ L8 (7): EXT_STMT L9 (7): RETURN null ``` -- DISCOVERY: +
-In this pattern, I focus on the static variables in functions not the static properties in objects nor static methods. To discover the static variables in opcode, I search for the opcode BIND_STATIC. -```bash -cpg.call(".*BIND_STATIC.*").location.l -``` -For regex, I can search for the keyword but I cannot distinguish between static variables and static properties. -- PRECONDITIONS: - 1. -- TRANSFORMATION: +
+ + +### Discovery + + + +The `BIND_STATIC` opcode is only for static variables that are normally used inside code blocks. The SAST tools may not able to keep the proper values for these static variables. As such the discovery rule should be accurate as it is + +```scala +val start_line = (name, "1_static_variables_iall", cpg.call(".*BIND_STATIC.*").location.toJson); ``` -``` \ No newline at end of file +| discovery method | expected accuracy | +|--------------------|---------------------| +| joern | Perfect | + +
+ +
+ + +### Measurement + + + +| Tool | masked_tool | tool2 | Ground Truth | +|-------------|---------------|---------|----------------| +| 01 Jan 1970 | no | no | yes | +| 01 Jan 2023 | yes | yes | yes | + +
+ +
+ + +### Remediation + + + +Likely this tarpit should be solved at the SAST tool side. Transforming a static variable into a non-static one is unfeasible. It is unclear how to create a modeling rule for the static keyword. + +
+ + diff --git a/qualitytests/resources/sample_patlib/PHP/3_global_array/1_instance_3_global_array/__P@TCHED___1_instance_3_global_array.sc b/qualitytests/resources/sample_patlib/PHP/3_global_array/1_instance_3_global_array/__P@TCHED__1_instance_3_global_array.sc similarity index 100% rename from qualitytests/resources/sample_patlib/PHP/3_global_array/1_instance_3_global_array/__P@TCHED___1_instance_3_global_array.sc rename to qualitytests/resources/sample_patlib/PHP/3_global_array/1_instance_3_global_array/__P@TCHED__1_instance_3_global_array.sc diff --git a/requirements-dev.txt b/requirements-dev.txt index 4ad98ad..58498d3 100644 Binary files a/requirements-dev.txt and b/requirements-dev.txt differ diff --git a/requirements.txt b/requirements.txt index 4c05084..240f687 100644 Binary files a/requirements.txt and b/requirements.txt differ diff --git a/tp_framework/cli/interface.py b/tp_framework/cli/interface.py index ec53651..8bc3560 100644 --- a/tp_framework/cli/interface.py +++ b/tp_framework/cli/interface.py @@ -9,54 +9,45 @@ import config from core import utils, pattern_operations, discovery, measure, errors, report_for_sast -from core.exceptions import PatternValueError +from core.exceptions import PatternInvalid, AddPatternError +from core.pattern import Pattern # CRUD patterns # TODO - add_pattern: develop UPDATE, DELETE, READ (maybe this one we do not need)... ## CREATE/ADD -def add_pattern(pattern_dir: str, language: str, measure: bool, tools: list[Dict], pattern_json: str = None, +def add_pattern(tp_dir: str, language: str, measure: bool, tools: list[Dict], tp_json: str = None, tp_lib_path: Path = Path(config.DEFAULT_TP_LIBRARY_ROOT_DIR).resolve()): # TODO - add_pattern: add some printing message for the user - pattern_dir_path: Path = Path(pattern_dir).resolve() - if not pattern_dir_path.is_dir(): - print(errors.patternFolderNotFound(pattern_dir_path)) + tp_dir_path: Path = Path(tp_dir).resolve() + if not tp_dir_path.is_dir(): + print(errors.patternFolderNotFound(tp_dir_path)) return - if not pattern_json: - # TODO - add_pattern: we could automatically find the json file - default_pattern_json = f"{pattern_dir_path.name}.json" - pattern_json_path: Path = pattern_dir_path / default_pattern_json - if not pattern_json_path.exists(): - print(errors.patternDefaultJSONNotFound(default_pattern_json)) - return - else: - # TODO - add_pattern: handle for both branches the case in which the json file does not exist? - pattern_json_path: Path = Path(pattern_json).resolve() - + tp_json_path = Path(tp_json) if tp_json else utils.get_json_file(tp_dir_path) + if not tp_json_path: + print(errors.patternDefaultJSONNotFound(tp_dir)) + return + tp_lib_path.mkdir(exist_ok=True, parents=True) try: - created_pattern_path: Path = pattern_operations.add_testability_pattern_to_lib_from_json( + created_tp: Pattern = pattern_operations.add_testability_pattern_to_lib_from_json( language, - pattern_json_path, - pattern_dir_path, + tp_json_path, + tp_dir_path, tp_lib_path ) - created_pattern_id: int = utils.get_id_from_name(created_pattern_path.name) - except PatternValueError as e: + except (PatternInvalid, AddPatternError) as e: print(e) raise - except json.JSONDecodeError as e: - print(errors.patternJSONDecodeError()) - raise except Exception as e: logger.exception(e) print(errors.unexpectedException(e)) raise if measure: - asyncio.run(measure_list_patterns([created_pattern_id], language, tools=tools, tp_lib_path=tp_lib_path)) + asyncio.run(measure_list_patterns([created_tp.pattern_id], language, tools=tools, tp_lib_path=tp_lib_path)) # Discovery @@ -183,4 +174,30 @@ def check_discovery_rules(language: str, pattern_ids: list[int], print(f" - num errors: {d_res['counters']['errors']}") if export_file: print(f"- csv file available here: {output_dir / export_file}") - print(f"- log file available here: {output_dir / config.logfile}") \ No newline at end of file + print(f"- log file available here: {output_dir / config.logfile}") + + +def repair_patterns(language: str, pattern_ids: list, + masking_file: Path, include_README: bool, + measurement_results: Path, checkdiscoveryrule_results: Path, + output_dir: Path, tp_lib_path: Path): + print("Pattern Repair started...") + should_include_readme = not include_README + utils.check_tp_lib(tp_lib_path) + if should_include_readme: + utils.check_file_exist(checkdiscoveryrule_results) + utils.check_file_exist(masking_file, ".json") if masking_file else None + utils.check_measurement_results_exist(measurement_results) + output_dir.mkdir(exist_ok=True, parents=True) + utils.add_loggers(output_dir) + + for tp_id in pattern_ids: + try: + pattern = Pattern.init_from_id_and_language(tp_id, language, tp_lib_path) + except PatternInvalid as e: + print(f"Failed to init pattern: {tp_id} due to {e}") + continue + pattern.repair(should_include_readme, + discovery_rule_results=checkdiscoveryrule_results, + measurement_results=measurement_results, + masking_file=masking_file) \ No newline at end of file diff --git a/tp_framework/cli/main.py b/tp_framework/cli/main.py index 8ebf92c..e092be2 100644 --- a/tp_framework/cli/main.py +++ b/tp_framework/cli/main.py @@ -20,6 +20,7 @@ def main(args=None): manual_discovery_cmd = tpf_commands.ManualDiscovery() sastreport_cmd = tpf_commands.SASTReport() check_discovery_rules_cmd = tpf_commands.CheckDiscoveryRules() + pattern_repair_cmd = tpf_commands.PatternRepair() # Sub-parsers subparser = parser.add_subparsers(title="Commands", dest="command", metavar="") add_pattern_cmd.add_command_subparser(subparser) @@ -29,6 +30,7 @@ def main(args=None): manual_discovery_cmd.add_command_subparser(subparser) sastreport_cmd.add_command_subparser(subparser) check_discovery_rules_cmd.add_command_subparser(subparser) # TODO: in-progress, not tested + pattern_repair_cmd.add_command_subparser(subparser) # Parsing args: Namespace = parser.parse_args(args) @@ -50,6 +52,8 @@ def main(args=None): sastreport_cmd.execute_command(args) case "checkdiscoveryrules": check_discovery_rules_cmd.execute_command(args) + case "patternrepair": + pattern_repair_cmd.execute_command(args) case other: print("Command not valid...") exit(1) diff --git a/tp_framework/cli/tpf_commands.py b/tp_framework/cli/tpf_commands.py index febd519..e1674f9 100644 --- a/tp_framework/cli/tpf_commands.py +++ b/tp_framework/cli/tpf_commands.py @@ -9,6 +9,7 @@ from core.exceptions import InvalidSastTools from core.errors import invalidSastTools +from core.pattern import Pattern class Command(ABC): @@ -514,6 +515,93 @@ def execute_command(self, args): export_file=args.export, output_dir=output_dir) +class PatternRepair(Command): + + # overriding abstract method + def add_command_subparser(self, subparser): + pattern_repair_parser = subparser.add_parser("patternrepair", + help="Repair patterns in your catalogue, helps you keeping the structure of all patterns the same") + pattern_repair_parser_pattern_selection_mode = pattern_repair_parser.add_mutually_exclusive_group(required=True) + pattern_repair_parser.add_argument( + "-l", "--language", + metavar="LANGUAGE", + dest="language", + required=True, + help="Programming language targeted" + ) + pattern_repair_parser_pattern_selection_mode.add_argument( + "-p", "--patterns", + metavar="PATTERN_ID", + dest="patterns", + nargs="+", + type=int, + help="Specify pattern(s) ID(s) to test for discovery" + ) + pattern_repair_parser_pattern_selection_mode.add_argument( + "--pattern-range", + metavar="RANGE_START-RANGE_END", + dest="pattern_range", + type=str, + help="Specify pattern ID range separated by`-` (ex. 10-50)" + ) + pattern_repair_parser_pattern_selection_mode.add_argument( + "-a", "--all-patterns", + dest="all_patterns", + action="store_true", + help="Test discovery for all available patterns" + ) + pattern_repair_parser.add_argument( + "--tp-lib", + metavar="TP_LIB_DIR", + dest="tp_lib", + help=f"Absolute path to alternative pattern library, default resolves to `./{config.TP_LIB_REL_DIR}`" + ) + pattern_repair_parser.add_argument( + "--output-dir", + metavar="OUTPUT_DIR", + dest="output_dir", + help=f"Absolute path to the folder where outcomes (e.g., log file, export file if any) will be stored, default resolves to `./{config.RESULT_REL_DIR}`" + ) + pattern_repair_parser.add_argument( + "--masking-file", + metavar="MASKING_FILE", + dest="masking_file", + help=f"Absolute path to a json file, that contains a mapping, if the name for some measurement tools should be kept secret, default is None" + ) + pattern_repair_parser.add_argument( + "--measurement-results", + metavar="MEASUREMENT_DIR", + dest="measurement_dir", + help=f"Absolute path to the folder where measurement results are stored, default resolves to `./{config.MEASUREMENT_REL_DIR}`" + ) + pattern_repair_parser.add_argument( + "--checkdiscoveryrules-results", + metavar="CHECKDISCOVERYRULES_FILE", + dest="checkdiscoveryrules_file", + help=f"Absolute path to the csv file, where the results of the `checkdiscoveryrules` command are stored, default resolves to `./checkdiscoveryrules.csv`" + ) + pattern_repair_parser.add_argument( + "--skip-readme", + dest="skip_readme", + action="store_true", + help="If set, the README generation is skipped." + ) + # overriding abstract method + def execute_command(self, args): + language: str = args.language.upper() + tp_lib_path: str = parse_tp_lib(args.tp_lib) + l_pattern_id = sorted(parse_patterns(args.all_patterns, args.pattern_range, args.patterns, + tp_lib_path, language, init_patterns=False)) + output_dir: Path = parse_dir_or_file(args.output_dir, config.RESULT_DIR, "Output directory") + measurement_results: Path = parse_dir_or_file(args.measurement_dir, config.MEASUREMENT_REL_DIR, "Measurement directory") + checkdiscoveryrules_results: Path = parse_dir_or_file(args.checkdiscoveryrules_file, "checkdiscoveryrules.csv", "Checkdiscoveryrules csv file") + masking_file: Path or None = parse_dir_or_file(args.masking_file, "mask.json","Masking file") if args.masking_file else None + interface.repair_patterns(language=language, pattern_ids=l_pattern_id, + masking_file=masking_file, include_README=args.skip_readme, + measurement_results=measurement_results, checkdiscoveryrule_results=checkdiscoveryrules_results, + output_dir=output_dir, tp_lib_path=tp_lib_path) + + # class Template(Command): # # # overriding abstract method @@ -560,29 +648,49 @@ def parse_tool_list(tools: list[str]): exit(1) -def parse_patterns(all_patterns: bool, pattern_range: str, patterns, tp_lib_path: Path, language: str): +def parse_patterns(all_patterns: bool, pattern_range: str, patterns, tp_lib_path: Path, language: str, init_patterns: bool = True): + # is this necessary? Should be ensured by `.add_mutually_exclusive_group(required=True)` in the parser try: assert sum(bool(e) for e in [all_patterns, pattern_range, patterns]) == 1 # these elements are in mutual exclusion except Exception as e: print("The following parameters are in mutual exclusion: `--all-patterns`, `--pattern-range`, and `--patterns`") exit(1) + id_list: list[int] = [] if all_patterns: lang_tp_lib_path: Path = tp_lib_path / language utils.check_lang_tp_lib_path(lang_tp_lib_path) try: id_list: list[int] = list(map(lambda d: utils.get_id_from_name(d.name), utils.list_dirs_only(lang_tp_lib_path))) - return id_list except Exception as e: print("Some patterns could not be properly fetched from the pattern library.") + print(e) exit(1) - if pattern_range: + elif pattern_range: try: spattern_range: str = pattern_range.split("-") - pattern_id_list: list[int] = list(range(int(spattern_range[0]), int(spattern_range[1]) + 1)) - return pattern_id_list + id_list: list[int] = list(range(int(spattern_range[0]), int(spattern_range[1]) + 1)) except Exception as e: - print("Pattern range could not be properly parsed. ") + print("Pattern range could not be properly parsed.") + print(e) exit(1) - if patterns and len(patterns) > 0: - return patterns \ No newline at end of file + elif patterns and len(patterns) > 0: + id_list = patterns + # init a Pattern to make sure, all the patterns that should be used for the task are valid. + # return only the pattern_id, to be compatible with current implementation + # Could refactor this to just use pattern and instance objects, main purpose is validation + return sorted([Pattern.init_from_id_and_language(idx, language, tp_lib_path).pattern_id \ + for idx in id_list]) if init_patterns else id_list + + +def parse_dir_or_file(path_to_file_or_dir: str, + default_path: str, + exception_prefix: str = "") -> Path: + if not path_to_file_or_dir: + path_to_file_or_dir: str = str(default_path) + try: + path_to_file_or_dir_as_path: Path = Path(path_to_file_or_dir).resolve() + return path_to_file_or_dir_as_path + except Exception as e: + print(f"{exception_prefix} does not exist: {path_to_file_or_dir}") + exit(1) diff --git a/tp_framework/core/analysis.py b/tp_framework/core/analysis.py index 254be9a..e0f7faa 100644 --- a/tp_framework/core/analysis.py +++ b/tp_framework/core/analysis.py @@ -7,7 +7,6 @@ from core import loggermgr logger = logging.getLogger(loggermgr.logger_name(__name__)) -import core.utils from core import utils from core.instance import Instance from core.measurement import Measurement @@ -15,7 +14,7 @@ from core.sast_job_runner import InQueue, OutQueue, SASTjob -async def analyze_pattern_instance(instance: Instance, instance_dir: Path, +async def analyze_pattern_instance(instance: Instance, tools: list[Dict], language: str, date: datetime, output_dir: Path) -> list[SASTjob]: logger.debug(f"SAST measurement - prepare SAST jobs for pattern {instance.pattern_id} instance {instance.instance_id} with {len(tools)} tools: started...") @@ -34,7 +33,7 @@ async def analyze_pattern_instance(instance: Instance, instance_dir: Path, tool_name: str = tool["name"] tool_version: str = tool["version"] - sast_config: Dict = core.utils.load_sast_specific_config(tool_name, tool_version) + sast_config: Dict = utils.load_sast_specific_config(tool_name, tool_version) sast_interface_class: str = sast_config["tool_interface"] sast_class = utils.get_class_from_str(sast_interface_class) @@ -45,7 +44,7 @@ async def analyze_pattern_instance(instance: Instance, instance_dir: Path, # TODO: what about using the sast_job object in the queue? InQueue().put_nowait((job_id, tool_name, tool_version, instance, date, - sast.launcher(instance_dir, language, output_dir, lib_dir=lib_dir, measurement=True))) + sast.launcher(instance.path, language, output_dir, lib_dir=lib_dir, measurement=True))) l_status_tpi.append(sast_job) except Exception as e: logger.warning(f"SAST measurement - failed for pattern {instance.pattern_id} instance {instance.instance_id} with tool {tool}. Instance will be ignored. Exception raised: {utils.get_exception_message(e)}") @@ -73,7 +72,7 @@ async def inspect_analysis_results(d_job: Dict, language) -> list[Measurement]: # if not csv_res, then the SAST job would have failed and no measurement in that case if csv_res: - sast_config: Dict = core.utils.load_sast_specific_config(tool_name, tool_version) + sast_config: Dict = utils.load_sast_specific_config(tool_name, tool_version) sast_interface_class: str = sast_config["tool_interface"] sast_class = utils.get_class_from_str(sast_interface_class) diff --git a/tp_framework/core/discovery.py b/tp_framework/core/discovery.py index bf9e0c8..0eb5ce3 100644 --- a/tp_framework/core/discovery.py +++ b/tp_framework/core/discovery.py @@ -17,8 +17,8 @@ CPGLanguageNotSupported, DiscoveryRuleError, DiscoveryRuleParsingResultError, InvalidSastTools from core.measurement import Measurement -from core.instance import Instance, instance_from_dict, load_instance_from_json -from core.pattern import get_pattern_by_pattern_id +from core.instance import Instance +from core.pattern import Pattern # mand_finding_joern_keys = ["filename", "methodFullName", "lineNumber"] mand_finding_joern_keys = ["filename", "lineNumber"] @@ -154,7 +154,7 @@ def patch_PHP_discovery_rule(discovery_rule: Path, language: str, output_dir: Pa newlines = [] changed = False for l in lines: - newl = l.replace(t_str, p_str) if re.match('\s*val x\d+ = \(name, "[^"]+", cpg\.call.*(\.location\.toJson)\);\s*', l) else l + newl = l.replace(t_str, p_str) if re.match(r'\s*val x\d+ = \(name, "[^"]+", cpg\.call.*(\.location\.toJson)\);\s*', l) else l newlines.append(newl) if newl != l: changed = True @@ -312,25 +312,21 @@ def discovery_under_measurement(cpg: Path, l_tp_id: list[int], tp_lib: Path, ito } # l_not_measured_tp_id.append(tp_id) continue - l_tpi_jsonpath = utils.list_tpi_paths_by_tp_id(language, tp_id, tp_lib) # TODO: do we need this later? - d_tpi_id_path = {} - for jp in l_tpi_jsonpath: - d_tpi_id_path[utils.get_tpi_id_from_jsonpath(jp)] = jp + target_pattern = Pattern.init_from_id_and_language(tp_id, language, tp_lib) l_meas_tpi_path = utils.list_dirs_only(meas_tp_path) # computing not supported tp instances (tpi) to be discovered d_res_tpi = {} d_dr_executed = {} - for tpi_id in d_tpi_id_path: - msgpre = f"pattern {tp_id} instance {tpi_id} - " - tpi_json_path = d_tpi_id_path[tpi_id] + for tpi in target_pattern.instances: + msgpre = f"pattern {tp_id} instance {tpi.instance_id} - " try: - meas_tpi_path = utils.get_instance_dir_from_list(tpi_id, l_meas_tpi_path) + meas_tpi_path = utils.get_instance_dir_from_list(tpi.instance_id, l_meas_tpi_path) except: logger.warning( f"{msgpre}No measurements for this instance. {msgpost}") - d_res_tpi[tpi_id] = { + d_res_tpi[tpi.instance_id] = { "measurement": "not_found", - "jsonpath": tpi_json_path + "jsonpath": tpi.json_path } continue l_last_meas = measurement.load_measurements(utils.get_last_measurement_for_pattern_instance(meas_tpi_path), @@ -340,16 +336,16 @@ def discovery_under_measurement(cpg: Path, l_tp_id: list[int], tp_lib: Path, ito if not meas_tpi_by_tools: logger.warning( f"{msgpre}No measurements of the tools specified ({[t['name'] + ':' + t['version'] for t in tools]}) for the instance. {msgpost}") - d_res_tpi[tpi_id] = { + d_res_tpi[tpi.instance_id] = { "measurement": "not_found", - "jsonpath": tpi_json_path + "jsonpath": tpi.json_path } continue - tpi_instance = meas_tpi_by_tools[0].instance + tpi = meas_tpi_by_tools[0].instance d_tpi = { - "instance": tpi_instance, + "instance": tpi, "measurement": "supported", - "jsonpath": tpi_json_path, + "jsonpath": tpi.json_path, "discovery": {} } # discovery continue iff at least one tool not supporting the tpi @@ -366,9 +362,9 @@ def discovery_under_measurement(cpg: Path, l_tp_id: list[int], tp_lib: Path, ito d_tpi["measurement"] = "not_supported" # discovery per tpi measurement_stop: bool = d_tpi["measurement"] not in ["ignore", "not_supported"] - d_tpi["discovery"] = discovery_for_tpi(tpi_instance, tpi_json_path, cpg, disc_output_dir, + d_tpi["discovery"] = discovery_for_tpi(tpi, cpg, disc_output_dir, measurement_stop=measurement_stop, already_executed=d_dr_executed) - d_res_tpi[tpi_id] = d_tpi + d_res_tpi[tpi.instance_id] = d_tpi d_res[tp_id]["instances"] = d_res_tpi # post-process results and export them @@ -388,29 +384,15 @@ def discovery_ignore_measurement(cpg: Path, l_tp_id: list[int], tp_lib: Path, # loop over testability patterns (tp) to be discovered for tp_id in l_tp_id: d_res[tp_id] = {"measurement_found": None} - l_tpi_jsonpath = utils.list_tpi_paths_by_tp_id(language, tp_id, tp_lib) # TODO: do we need this later? - d_tpi_id_path = {} - for jp in l_tpi_jsonpath: - d_tpi_id_path[utils.get_tpi_id_from_jsonpath(jp)] = jp + target_pattern = Pattern.init_from_id_and_language(tp_id, language, tp_lib) # loop over tp instances (tpi) to be discovered d_res_tpi = {} d_dr_executed = {} - for tpi_id in d_tpi_id_path: - tpi_json_path = d_tpi_id_path[tpi_id] - tpi_json_rel = os.path.relpath(tpi_json_path, start=tp_lib) - # get the instance - try: - tpi_instance = load_instance_from_json(tpi_json_rel, tp_lib, language) # TODO: this ignores the language - except: - # instance for which no discovery could be done as instance not properly loaded - logger.exception(f"Failed to decode the metadata associated to the instance `{tpi_json_rel}`...") - d_tpi = {"instance": None, "measurement": "ignored", "jsonpath": None} - d_res_tpi[tpi_id] = d_tpi - continue - d_tpi = {"instance": tpi_instance, "measurement": "ignored", "jsonpath": tpi_json_path, - "discovery": discovery_for_tpi(tpi_instance, tpi_json_path, cpg, disc_output_dir, + for instance in target_pattern.instances: + d_tpi = {"instance": instance, "measurement": "ignored", "jsonpath": instance.json_path, + "discovery": discovery_for_tpi(instance, cpg, disc_output_dir, measurement_stop=False, already_executed=d_dr_executed)} - d_res_tpi[tpi_id] = d_tpi + d_res_tpi[instance.instance_id] = d_tpi d_res[tp_id]["instances"] = d_res_tpi # post-process results and export them @@ -420,7 +402,7 @@ def discovery_ignore_measurement(cpg: Path, l_tp_id: list[int], tp_lib: Path, return d_results -def discovery_for_tpi(tpi_instance: Instance, tpi_json_path: Path, cpg: Path, disc_output_dir: Path, +def discovery_for_tpi(tpi_instance: Instance, cpg: Path, disc_output_dir: Path, measurement_stop: bool = False, already_executed: dict = {}) -> Dict: msgpre = f"pattern {tpi_instance.pattern_id} instance {tpi_instance.instance_id} - " d_tpi_discovery = dict.fromkeys(["rule_path", "method", "rule_name", "rule_accuracy", "rule_hash", \ @@ -428,7 +410,7 @@ def discovery_for_tpi(tpi_instance: Instance, tpi_json_path: Path, cpg: Path, di # execute the discovery rule if not measurement_stop and tpi_instance.discovery_rule: # prepare and execute the discovery rule (if not done yet) - dr = (tpi_json_path.parent / tpi_instance.discovery_rule).resolve() + dr = tpi_instance.discovery_rule if not dr.exists(): d_tpi_discovery["rule_path"] = str(dr) logger.error(f"{msgpre}Discovery rule is specified in the instance, but corresponding file {dr} does not exists...") @@ -733,16 +715,14 @@ def get_check_discovery_rule_result_header(): ] -def get_check_discovery_rule_result(pattern_id, language, - instance_id=None, instance_path=None, pattern_name=None, - discovery_rule=None, successful="error") -> Dict: +def get_check_discovery_rule_result(pattern: Pattern, instance: Instance | None= None, successful="error") -> Dict: return { - "pattern_id": pattern_id, - "instance_id": instance_id, - "instance_path": instance_path, - "pattern_name": pattern_name, - "language": language, - "discovery_rule": discovery_rule, + "pattern_id": pattern.pattern_id, + "instance_id": instance.instance_id if instance else None, + "instance_path": instance.path if instance else None, + "pattern_name": pattern.name, + "language": pattern.language, + "discovery_rule": instance.discovery_rule if instance else None, "successful": successful } @@ -758,90 +738,79 @@ def check_discovery_rules(language: str, l_tp_id: list[int], unsuccess = 0 missing = 0 err = 0 + num_patterns = len(l_tp_id) for i, tp_id in enumerate(l_tp_id): logger.info(utils.get_tp_op_status_string( - (i + 1, len(l_tp_id), tp_id) # tp_info + (i + 1, num_patterns, tp_id) # tp_info )) try: - target_tp, p_dir = get_pattern_by_pattern_id(language, tp_id, tp_lib_path) - l_tpi_dir: list[Path] = utils.list_tpi_paths_by_tp_id( - language, tp_id, tp_lib_path - ) + target_tp = Pattern.init_from_id_and_language(tp_id, language, tp_lib_path) + num_instances = len(target_tp.instances) except Exception as e: + # should not happen at all! And should be removed and a list of patterns should be parsed to that function logger.warning( f"Either pattern id {tp_id} does not exist, or its file system structure is not valid, or its instances cannot be fetched. Exception raised: {utils.get_exception_message(e)}") - res = get_check_discovery_rule_result(tp_id, language) + res = get_check_discovery_rule_result(pattern=target_tp) results.append(res) err += 1 continue - for j, path in enumerate(l_tpi_dir): + instance: Instance + for j, instance in enumerate(target_tp.instances): try: - target_src = path.parent - # TODO: use a function to load an instance, in general it looks to me we are going a bit back and forth - # from json and file system. - # Also: this loading seems to be used in many other places (e.g., start_add_measurement_for_pattern)... - with open(path) as instance_json_file: - instance_json: Dict = json.load(instance_json_file) - - tpi_id = utils.get_id_from_name(path.name) + tpi_id = instance.instance_id logger.info(utils.get_tpi_op_status_string( - (i + 1, len(l_tp_id), tp_id), - t_tpi_info=(j + 1, len(l_tpi_dir), tpi_id) + (i + 1, num_patterns, tp_id), + t_tpi_info=(j + 1, num_instances, tpi_id) )) - target_instance: Instance = instance_from_dict(instance_json, target_tp, language, tpi_id) - if target_instance.discovery_rule: - dr_path = target_src / target_instance.discovery_rule + if instance.discovery_rule: + dr_path = instance.discovery_rule if not dr_path.is_file(): logger.warning( f"Instance {tpi_id} of pattern {tp_id}: the discovery rule {dr_path} does not exist") - res = get_check_discovery_rule_result(tp_id, language, instance_id=tpi_id, - instance_path=path, discovery_rule=dr_path) + res = get_check_discovery_rule_result(pattern=target_tp, instance=instance) results.append(res) err += 1 continue + target_src = instance.path + build_name, disc_output_dir = utils.get_operation_build_name_and_dir( "check_discovery_rules", target_src, language, output_dir) - d_results = manual_discovery(target_src, target_instance.discovery_method, [dr_path], language, + d_results = manual_discovery(target_src, instance.discovery_method, [dr_path], language, build_name, disc_output_dir, timeout_sec=timeout_sec) # Inspect the d_results if d_results["findings"] and any( f["result"] == discovery_result_strings["discovery"] for f in d_results["findings"]): - res = get_check_discovery_rule_result(tp_id, language, instance_id=tpi_id, - instance_path=path, pattern_name=target_tp.name, - discovery_rule=dr_path, successful="yes") + res = get_check_discovery_rule_result(pattern=target_tp, instance=instance, successful="yes") success += 1 else: - res = get_check_discovery_rule_result(tp_id, language, instance_id=tpi_id, - instance_path=path, pattern_name=target_tp.name, - discovery_rule=dr_path, successful="no") + res = get_check_discovery_rule_result(pattern=target_tp, instance=instance, successful="no") unsuccess += 1 results.append(res) else: logger.info( f"Instance {tpi_id} of pattern {tp_id}: the discovery rule is not provided for the pattern") - res = get_check_discovery_rule_result(tp_id, language, instance_id=tpi_id, - instance_path=path, successful="missing") + res = get_check_discovery_rule_result(pattern=target_tp, instance=instance, successful="missing") results.append(res) missing += 1 logger.info(utils.get_tpi_op_status_string( - (i + 1, len(l_tp_id), tp_id), - t_tpi_info=(j + 1, len(l_tpi_dir), tpi_id), + (i + 1, num_patterns, tp_id), + t_tpi_info=(j + 1, num_instances, tpi_id), status="done." )) except Exception as e: logger.warning( - f"Something went wrong for the instance at {path} of the pattern id {tp_id}. Exception raised: {utils.get_exception_message(e)}") - res = get_check_discovery_rule_result(tp_id, language, pattern_name=target_tp.name, instance_path=path) + f"Something went wrong for the instance at {instance.path} of the pattern id {tp_id}. Exception raised: {utils.get_exception_message(e)}") + res = get_check_discovery_rule_result(pattern=target_tp, instance=instance) results.append(res) err += 1 continue logger.info(utils.get_tp_op_status_string( - (i + 1, len(l_tp_id), tp_id), # tp_info + (i + 1, num_patterns, tp_id), # tp_info status="done." )) - logger.info(f"Check/Test discovery rules for {len(l_tp_id)} patterns: done") + logger.info(f"Check/Test discovery rules for {num_patterns} patterns: done") d_res = { "results": results, "counters": { diff --git a/tp_framework/core/errors.py b/tp_framework/core/errors.py index 8d0e99b..bedfdf8 100644 --- a/tp_framework/core/errors.py +++ b/tp_framework/core/errors.py @@ -2,9 +2,8 @@ def patternDoesNotExists(pattern_id): return f"Specified Pattern `{pattern_id}` does not exists." -def patternValueError(): - return f"Error during Pattern initialization." - +def patternInvalidError(e): + return f"{e} Pattern is invalid." def patternKeyError(e): return f"Key {e} was not found in pattern metadata." @@ -23,8 +22,8 @@ def instanceDoesNotExists(instance_id=None, ref_metadata=None): return message -def patternFolderNotFound(pattern_dir_path): - return f"`Pattern source folder {pattern_dir_path}` not found or is not a folder." +def instanceInvalidError(e): + return f"{e} Instance is invalid." def patternDefaultJSONNotFound(default_pattern_json): @@ -84,4 +83,16 @@ def discoveryRuleParsingResultError(): def unexpectedException(e): - return f"Unexpected exception triggered: {e}." \ No newline at end of file + return f"Unexpected exception triggered: {e}." + + +def measurementResultsDirDoesNotExist(): + return "The directory with the measurements does not exist." + + +def templateDirDoesNotExist(not_exisitng_dir_or_file): + return f"Your tplib does not have {not_exisitng_dir_or_file}." + + +def addPatternFailed(exception: str): + return f"Adding the pattern to the tplib failed {exception}" \ No newline at end of file diff --git a/tp_framework/core/exceptions.py b/tp_framework/core/exceptions.py index 281221e..c5af8f3 100644 --- a/tp_framework/core/exceptions.py +++ b/tp_framework/core/exceptions.py @@ -1,6 +1,11 @@ from core import errors +class AddPatternError(Exception): + def __init__(self, message: str) -> None: + self.message = errors.addPatternFailed(message) + super().__init__() + class PatternDoesNotExists(Exception): def __init__(self, pattern_id): self.pattern_id = pattern_id @@ -8,6 +13,18 @@ def __init__(self, pattern_id): super().__init__(self.message) +class PatternInvalid(Exception): + def __init__(self, message: str) -> None: + self.message = errors.patternInvalidError(message) + super().__init__(self.message) + + +class PatternRepairError(Exception): + def __init__(self, message: str) -> None: + self.message = message + super().__init__(self.message) + + class InstanceDoesNotExists(Exception): def __init__(self, instance_id: int = None, ref_metadata: str = None): self.instance_id = instance_id @@ -16,9 +33,9 @@ def __init__(self, instance_id: int = None, ref_metadata: str = None): super().__init__(self.message) -class PatternFolderNotFound(Exception): - def __init__(self): - self.message = errors.patternFolderNotFound() +class InstanceInvalid(Exception): + def __init__(self, message: str) -> None: + self.message = errors.instanceInvalidError(message) super().__init__(self.message) @@ -85,15 +102,6 @@ def __init__(self, message=None, discovery_method=None): super().__init__(self.message) -class PatternValueError(Exception): - def __init__(self, message=None): - if message: - self.message = message - else: - self.message = errors.patternValueError() - super().__init__(self.message) - - class CPGGenerationError(Exception): def __init__(self, message=errors.cpgGenerationError()): self.message = message @@ -121,4 +129,24 @@ def __init__(self, stderr=None): self.message = stderr else: self.message = errors.discoveryRuleParsingResultError() - super().__init__(self.message) \ No newline at end of file + super().__init__(self.message) + +# Pattern Repair + +class MeasurementResultsDoNotExist(Exception): + def __init__(self, message=errors.measurementResultsDirDoesNotExist()): + self.message = message + super().__init__(self.message) + + +class MeasurementInvalid(Exception): + def __init__(self, message) -> None: + self.message = message + super().__init__(self.message) + + +class TemplateDoesNotExist(Exception): + def __init__(self, message=errors.templateDirDoesNotExist('template')) -> None: + self.message = message + super().__init__(self.message) + diff --git a/tp_framework/core/instance.py b/tp_framework/core/instance.py index e25da25..bc2b5fc 100644 --- a/tp_framework/core/instance.py +++ b/tp_framework/core/instance.py @@ -1,252 +1,190 @@ -import json -from enum import Enum -from pathlib import Path -from typing import Dict - -from core import utils -from core.exceptions import PatternDoesNotExists, InstanceDoesNotExists -from core.pattern import Pattern, get_pattern_path_by_pattern_id, get_pattern_by_pattern_id - -import logging -from core import loggermgr - -logger = logging.getLogger(loggermgr.logger_name(__name__)) - -class PatternCategory(str, Enum): - S0 = "S0" - D1 = "D1" - D2 = "D2" - D3 = "D3" - D4 = "D4" - - -class FeatureVsInternalApi(str, Enum): - FEATURE = "FEATURE" - INTERNAL_API = "INTERNAL_API" - - -class Instance(Pattern): - # TODO - pattern instance: update to current structure 09/2022 - ''' - ''' - - def __init__( - self, - code_path: Path, - code_injection_skeleton_broken: bool, - compile_dependencies: Path, # added 092022 - compile_binary: Path, - compile_instruction: str, # added 092022 - remediation_transformation: str, # added 092022 - remediation_modeling_rule: Path, # added 092022 - remediation_notes: str, # added 092022 - properties_category: PatternCategory, - properties_negative_test_case: bool, - properties_source_and_sink: bool, - properties_input_sanitizer: bool, - properties_feature_vs_internal_api: FeatureVsInternalApi, - discovery_rule: Path, - discovery_method: str, - discovery_rule_accuracy: str, - discovery_notes: str, - expectation: bool, - expectation_type: str, - expectation_sink_file: Path, - expectation_sink_line: int, - expectation_source_file: Path, - expectation_source_line: int, - name: str, - description: str, - family: str, - tags: list[str], - instances: list[Path], - language: str, - pattern_id: int = None, - instance_id: int = None, - pattern_dir: Path = None, - ) -> None: - if pattern_id is None: - super().__init__(name, language, instances, family, description, tags, pattern_dir=pattern_dir) - else: - super().__init__(name, language, instances, family, description, tags, pattern_id) - - self.code_injection_skeleton_broken = code_injection_skeleton_broken - self.compile_dependencies = compile_dependencies # added 092022 - self.compile_binary = compile_binary - self.compile_instruction = compile_instruction # added 092022 - self.remediation_transformation = remediation_transformation # added 092022 - self.remediation_modeling_rule = remediation_modeling_rule # added 092022 - self.remediation_notes = remediation_notes # added 092022 - self.properties_category = properties_category - self.properties_negative_test_case = properties_negative_test_case - self.properties_source_and_sink = properties_source_and_sink - self.properties_input_sanitizer = properties_input_sanitizer - self.properties_feature_vs_internal_api = properties_feature_vs_internal_api - self.expectation = expectation - self.discovery_rule = discovery_rule - self.discovery_method = discovery_method - self.discovery_rule_accuracy = discovery_rule_accuracy - self.discovery_notes = discovery_notes - self.expectation_type = expectation_type - self.expectation_sink_file = expectation_sink_file - self.expectation_sink_line = expectation_sink_line - self.expectation_source_file = expectation_source_file - self.expectation_source_line = expectation_source_line - self.instance_id = instance_id or self.define_instance_id(pattern_dir) - if code_path is None: - logger.warning("Instance without code snippet cannot even be measured by SAST tools: pattern {0}, instance {1}".format(name, instance_id)) - self.code_path = "" - else: - self.code_path = code_path - - - def define_instance_id(self, pattern_dir: Path) -> int: - try: - inst_list: list[Path] = utils.list_tpi_paths_by_tp_id( - self.language, self.pattern_id, pattern_dir) - id_list: list[int] = sorted(list(map(lambda x: int(str(x.name).split("_")[0]), inst_list))) - return id_list[-1] + 1 if len(id_list) > 0 else 1 - except PatternDoesNotExists: - return 1 - - def add_instance_to_pattern_id(self, language: str, pattern_src_dir: Path, pattern_dir: Path) -> None: - instance_dir_name: str = utils.get_instance_dir_name_from_pattern(pattern_src_dir.name, self.pattern_id, - self.instance_id) - pattern_dir_name: str = utils.get_pattern_dir_name_from_name(pattern_src_dir.name, self.pattern_id) - instance_dir: Path = pattern_dir / language / pattern_dir_name / instance_dir_name - instance_dir.mkdir(exist_ok=True, parents=True) - instance_json_file: Path = instance_dir / f"{instance_dir_name}.json" - - with open(instance_json_file, "w") as json_file: - instance_dict: Dict = { - "code": { - "path": utils.get_relative_path_str_or_none(self.code_path), - "injection_skeleton_broken": self.code_injection_skeleton_broken - }, - "remediation": { - "notes": self.remediation_notes, - "transformation": self.remediation_transformation, - "modeling_rule": utils.get_relative_path_str_or_none(self.remediation_modeling_rule) - }, - "discovery": { - "rule": utils.get_relative_path_str_or_none(self.discovery_rule), - "method": self.discovery_method, - "rule_accuracy": self.discovery_rule_accuracy, - "notes": self.discovery_notes - }, - "compile": { - "binary": utils.get_relative_path_str_or_none(self.compile_binary), - "dependencies": utils.get_relative_path_str_or_none(self.compile_dependencies), - "instruction": self.compile_instruction - }, - "expectation": { - "type": self.expectation_type, - "sink_file": utils.get_relative_path_str_or_none(self.expectation_sink_file), - "sink_line": self.expectation_sink_line, - "source_file": utils.get_relative_path_str_or_none(self.expectation_source_file), - "source_line": self.expectation_source_line, - "expectation": self.expectation - }, - "properties": { - "category": utils.get_enum_value_or_none(self.properties_category), - "feature_vs_internal_api": utils.get_enum_value_or_none(self.properties_feature_vs_internal_api), - "input_sanitizer": self.properties_input_sanitizer, - "source_and_sink": self.properties_source_and_sink, - "negative_test_case": self.properties_negative_test_case - } - } - json.dump(instance_dict, json_file, indent=4) - - -# TODO (old): Test this -def get_instance_by_instance_id(language: str, instance_id: int, pattern_id, pattern_dir: Path) -> Instance: - instance_dir: Path = get_instance_path_from_instance_id(language, pattern_id, instance_id, pattern_dir) - instance_json: Path = instance_dir / f"{instance_dir.name}.json" - with open(instance_json) as json_file: - pattern_from_json: Dict = json.load(json_file) - - pattern, p_dir = get_pattern_by_pattern_id(language, pattern_id, pattern_dir) - return instance_from_dict(pattern_from_json, pattern, language, pattern_id) - - -def get_instance_path_from_instance_id(language: str, pattern_id: int, instance_id: int, pattern_dir: Path) -> Path: - pattern_path: Path = get_pattern_path_by_pattern_id(language, pattern_id, pattern_dir) - filtered_res: list[str] = list(filter( - lambda x: int(x.split("_")[0]) == instance_id, - map(lambda y: y.name, utils.list_dirs_only(pattern_path)) - )) - if not filtered_res: - raise InstanceDoesNotExists(instance_id) - return pattern_path / filtered_res[0] - - -def instance_from_dict(instance_dict: Dict, pattern: Pattern, language: str, instance_id: int) -> Instance: - return Instance( - utils.get_path_or_none(utils.get_from_dict(instance_dict, "code", "path")), # code_path: Path, - utils.get_from_dict(instance_dict, "code", "injection_skeleton_broken"), # code_injection_skeleton_broken: bool, - utils.get_path_or_none(utils.get_from_dict(instance_dict, "compile", "dependencies")), # compile_dependencies: Path, # added 092022 - utils.get_path_or_none(utils.get_from_dict(instance_dict, "compile", "binary")), # compile_binary: Path, - utils.get_from_dict(instance_dict, "compile", "instruction"), # compile_instruction: str, # added 092022 - utils.get_from_dict(instance_dict, "remediation", "transformation"), # remediation_transformation: str, # added 092022 - utils.get_path_or_none(utils.get_from_dict(instance_dict, "remediation", "modeling_rule")), # remediation_modeling_rule: Path, # added 092022 - utils.get_from_dict(instance_dict, "remediation", "notes"), # remediation_notes: str, # added 092022 - get_pattern_category_or_none(utils.get_from_dict(instance_dict, "properties", "category")), - utils.get_from_dict(instance_dict, "properties", "negative_test_case"), - utils.get_from_dict(instance_dict, "properties", "source_and_sink"), - utils.get_from_dict(instance_dict, "properties", "input_sanitizer"), - get_feature_vs_internal_api_or_none(utils.get_from_dict(instance_dict, "properties", "feature_vs_internal_api")), - utils.get_path_or_none(utils.get_from_dict(instance_dict, "discovery", "rule")), - utils.get_from_dict(instance_dict, "discovery", "method"), - utils.get_from_dict(instance_dict, "discovery", "rule_accuracy"), - utils.get_from_dict(instance_dict, "discovery", "notes"), - utils.get_from_dict(instance_dict, "expectation", "expectation"), - utils.get_from_dict(instance_dict, "expectation", "type"), - utils.get_path_or_none(utils.get_from_dict(instance_dict, "expectation", "sink_file")), - utils.get_from_dict(instance_dict, "expectation", "sink_line"), - utils.get_path_or_none(utils.get_from_dict(instance_dict, "expectation", "source_file")), - utils.get_from_dict(instance_dict, "expectation", "source_line"), - pattern.name, - pattern.description, - pattern.family, - pattern.tags, - pattern.instances, - language, - pattern_id=pattern.pattern_id, - instance_id=instance_id - ) - - -# TODO: refactoring needed here together with the instance and pattern concepts... -# at the moment you need to pass either the absolute path to the tpi json or a -# relative one starting from `tp_lib` (why is language ignored in this second option??)... -# `tp_lib` and `language` seems to be more necessary to get out the corresponding pattern -# information...maybe the instance should comprise the pattern info? otherwise while from -# patterns we have the instances metadata, from the instance we can only reconstruct the -# pattern info from the strong assumptions in the file system structuring... - -def load_instance_from_json(metadata: str, tp_lib: Path, language: str) -> Instance: - metadata_path: Path = tp_lib / metadata # TODO: language??? - if not metadata_path.exists(): - raise InstanceDoesNotExists(ref_metadata=metadata_path.name) - - with open(metadata_path) as file: - instance: Dict = json.load(file) - - pattern_id = utils.get_id_from_name(metadata_path.parent.parent.name) - pattern, p_dir = get_pattern_by_pattern_id(language, pattern_id, tp_lib) - instance_id = utils.get_id_from_name(metadata_path.parent.name) - return instance_from_dict(instance, pattern, language, instance_id) - - -def get_pattern_category_or_none(el) -> PatternCategory | None: - try: - return PatternCategory(el) - except ValueError: - return None - - -def get_feature_vs_internal_api_or_none(el) -> FeatureVsInternalApi | None: - try: - return FeatureVsInternalApi(el) - except ValueError: - return None \ No newline at end of file +import shutil +from pathlib import Path +from typing import Tuple + +from core import utils +from core.exceptions import InstanceInvalid +from core.repair.instance_repair import InstanceRepair + + +# class PatternCategory(str, Enum): +# S0 = "S0" +# D1 = "D1" +# D2 = "D2" +# D3 = "D3" +# D4 = "D4" + + +# class FeatureVsInternalApi(str, Enum): +# FEATURE = "FEATURE" +# INTERNAL_API = "INTERNAL_API" + +class Instance: + @classmethod + def init_from_json_path(cls, path_to_instance_json: Path, + pattern_id: int, language: str, tp_lib_path: Path): + if not path_to_instance_json.is_file(): + raise InstanceInvalid(f"The provided instance path '{path_to_instance_json}' does not exist.") + return cls._init_from_json(cls(), path_to_instance_json, pattern_id, language, tp_lib_path) + + def __init__(self) -> None: + self.path = None + self.json_path = None + self.instance_id = None + self.pattern_id = None + self.language = None + self.name = None + self.tp_lib_path = None + + # JSON fields + self.description = None + self.code_path = None + self.code_injection_skeleton_broken = None + self.expectation_type = None + self.expectation_sink_file = None + self.expectation_sink_line = None + self.expectation_source_file = None + self.expectation_source_line = None + self.expectation_expectation = None + self.compile_binary = None + self.compile_instruction = None + self.compile_dependencies = None + self.discovery_rule = None + self.discovery_method = None + self.discovery_rule_accuracy = None + self.discovery_notes = None + self.properties_category = None + self.properties_feature_vs_internal_api = None + self.properties_input_sanitizer = None + self.properties_source_and_sink = None + self.properties_negative_test_case = None + self.remediation_notes = None + self.remediation_transformation = None + self.remediation_modeling_rule = None + + def _assert_instance(self): + try: + int(self.instance_id) + assert self.language.isupper() + assert self.path.is_dir() + assert self.json_path.is_file() + except Exception as e: + raise InstanceInvalid(f"{self._log_prefix()}Instance Variables are not properly set. '{e}'") + + def _init_from_json(self, path_to_instance_json: Path, pattern_id: int, language: str, tp_lib_path: Path): + self.path = path_to_instance_json.parent + self.name = self.path.name + self.json_path = Path(path_to_instance_json.name) + self.language = language.upper() + self.tp_lib_path = tp_lib_path + try: + self.instance_id = utils.get_id_from_name(self.path.name) + except Exception as e: + raise InstanceInvalid(f"Could not get id from '{self.path.name}'.") + self.pattern_id = pattern_id + instance_properties = utils.read_json(self.json_path) + if not instance_properties: + raise InstanceInvalid(f"{self._log_prefix()}Please check {self.json_path}.") + + self.description = instance_properties.get("description", None) + self.code_path = utils.get_path_or_none(utils.get_from_dict(instance_properties, "code", "path")) + self.code_injection_skeleton_broken = utils.get_from_dict(instance_properties, "code", "injection_skeleton_broken") + self.expectation_type = utils.get_from_dict(instance_properties, "expectation", "type") + self.expectation_sink_file = utils.get_path_or_none(utils.get_from_dict(instance_properties, "expectation", "sink_file")) + self.expectation_sink_line = utils.get_from_dict(instance_properties, "expectation", "sink_line") + self.expectation_source_file = utils.get_path_or_none(utils.get_from_dict(instance_properties, "expectation", "source_file")) + self.expectation_source_line = utils.get_from_dict(instance_properties, "expectation", "source_line") + self.expectation_expectation = utils.get_from_dict(instance_properties, "expectation", "expectation") + self.compile_binary = utils.get_path_or_none(utils.get_from_dict(instance_properties, "compile", "binary")) + self.compile_instruction = utils.get_from_dict(instance_properties, "compile", "instruction") + self.compile_dependencies = utils.get_from_dict(instance_properties, "compile", "dependencies") + self.discovery_rule = utils.get_path_or_none(utils.get_from_dict(instance_properties, "discovery", "rule")) + self.discovery_method = utils.get_from_dict(instance_properties, "discovery", "method") + self.discovery_rule_accuracy = utils.get_from_dict(instance_properties, "discovery", "rule_accuracy") + self.discovery_notes = utils.get_from_dict(instance_properties, "discovery", "notes") + self.properties_category = utils.get_from_dict(instance_properties, "properties", "category") + self.properties_feature_vs_internal_api = utils.get_from_dict(instance_properties, "properties", "feature_vs_internal_api") + self.properties_input_sanitizer = utils.get_from_dict(instance_properties, "properties", "input_sanitizer") + self.properties_source_and_sink = utils.get_from_dict(instance_properties, "properties", "source_and_sink") + self.properties_negative_test_case = utils.get_from_dict(instance_properties, "properties", "negative_test_case") + self.remediation_notes = utils.get_from_dict(instance_properties, "remediation", "notes") + self.remediation_transformation = utils.get_from_dict(instance_properties, "remediation", "transformation") + self.remediation_modeling_rule = utils.get_from_dict(instance_properties, "remediation", "modeling_rule") + self._assert_instance() + return self + + def __getattribute__(self, name): + base_path = super().__getattribute__("path") + attr = super().__getattribute__(name) + if isinstance(attr, Path) and attr != base_path: + attr = Path(base_path / attr).resolve() + return attr + + def __str__(self) -> str: + return f"{self.language} - p{self.pattern_id}:{self.instance_id}" + + def _log_prefix(self): + return f"Pattern {self.pattern_id} - Instance {self.instance_id} - " + + def copy_to_tplib(self, pattern_path: Path): + new_instance_path = pattern_path / self.path.name + new_instance_path.mkdir(parents=True, exist_ok=True) + utils.copy_dir_content(self.path, new_instance_path) + self.path = new_instance_path + self.name = self.path.name + + # same function as in Pattern, could use some interface for that, or move to utils? + def get_description(self) -> Tuple[bool, str]: + if self.description and " " not in self.description and Path(self.path / self.description).resolve().is_file(): + with open(Path(self.path / self.description).resolve(), "r") as desc_file: + return True, "".join(desc_file.readlines()).strip() + else: + return False, self.description.strip() if self.description else "" + + def set_new_instance_path(self, new_path): + old_path = self.path + self.path = new_path + shutil.move(old_path, self.path) + + def repair(self, pattern): + InstanceRepair(self, pattern).repair() + + def to_dict(self): + return { + "description": self.description, + "code": { + "path": utils.get_relative_paths(self.code_path, self.path), + "injection_skeleton_broken": self.code_injection_skeleton_broken + }, + "discovery": { + "rule": utils.get_relative_paths(self.discovery_rule, self.path), + "method": self.discovery_method, + "rule_accuracy": self.discovery_rule_accuracy, + "notes": self.discovery_notes + }, + "compile": { + "binary": utils.get_relative_paths(self.compile_binary, self.path), + "instruction": self.compile_instruction, + "dependencies": self.compile_dependencies + }, + "expectation": { + "type": self.expectation_type, + "sink_file": utils.get_relative_paths(self.expectation_sink_file, self.path), + "sink_line": self.expectation_sink_line, + "source_file": utils.get_relative_paths(self.expectation_source_file, self.path), + "source_line": self.expectation_source_line, + "expectation": self.expectation_expectation + }, + "properties": { + "category": self.properties_category, + "feature_vs_internal_api": self.properties_feature_vs_internal_api, + "input_sanitizer": self.properties_input_sanitizer, + "source_and_sink": self.properties_source_and_sink, + "negative_test_case": self.properties_negative_test_case + }, + "remediation": { + "notes": self.remediation_notes, + "transformation": self.remediation_transformation, + "modeling_rule": self.remediation_modeling_rule + } + } diff --git a/tp_framework/core/measure.py b/tp_framework/core/measure.py index 2f7f9fb..b7ebf65 100644 --- a/tp_framework/core/measure.py +++ b/tp_framework/core/measure.py @@ -120,5 +120,3 @@ async def measure_list_patterns(l_tp_id: list[int], language: str, } logger.info(f"SAST measurement - done") return d_results - - diff --git a/tp_framework/core/measurement.py b/tp_framework/core/measurement.py index 3667ccc..4c142fa 100644 --- a/tp_framework/core/measurement.py +++ b/tp_framework/core/measurement.py @@ -11,8 +11,8 @@ import config from core import utils -from core.exceptions import InstanceDoesNotExists, MeasurementNotFound -from core.instance import Instance, load_instance_from_json +from core.exceptions import MeasurementNotFound, MeasurementInvalid +from core.instance import Instance class Measurement: @@ -30,7 +30,21 @@ def __init__(self, self.tool = tool self.version = version self.instance = instance - + + #TODO: TESTING + @classmethod + def init_from_measurement_dict(cls, meas_dict): + return cls()._init_from_dict(meas_dict) + + def _init_from_dict(self, dict_to_init_from: dict): + try: + self.date = dict_to_init_from["date"] + self.result = dict_to_init_from["result"] + self.tool = dict_to_init_from["tool"] + self.version = dict_to_init_from["version"] + except KeyError as e: + raise MeasurementInvalid(e) + return self def define_verdict(self, date: datetime, instance: Instance, findings: list[Dict], tool: str, version: str, sink_line_strict : bool = False, @@ -61,8 +75,8 @@ def define_verdict(self, date: datetime, instance: Instance, findings: list[Dict # found = instance.expectation_sink_file.name == finding["file"] if found: break # we found a matching finding - self.result = (found == instance.expectation) - self.expected_result = instance.expectation + self.result = (found == instance.expectation_expectation) + self.expected_result = instance.expectation_expectation self.tool = tool self.version = version self.instance = instance @@ -90,9 +104,10 @@ def load_measurements(meas_file: Path, tp_lib: Path, language: str) -> list[Meas return [] parsed_meas: list[Measurement] = [] for m in meas: - instance = load_instance_from_json(m["instance"], tp_lib, language) + instance_json_path = tp_lib / Path(m["instance"]) + instance = Instance.init_from_json_path(instance_json_path, None, language, tp_lib) # NOTE 06/2023: if not expectation in measurement, then we take it from instance (backword compatibility though it could introduce mistakes if the instance expectation was changed after the measurement) - expected_result = m["expected_result"] if "expected_result" in m.keys() else instance.expectation + expected_result = m["expected_result"] if "expected_result" in m.keys() else instance.expectation_expectation parsed_meas.append(Measurement( m["date"], m["result"], @@ -104,23 +119,17 @@ def load_measurements(meas_file: Path, tp_lib: Path, language: str) -> list[Meas return parsed_meas -def load_last_measurement_for_tool(tool: Dict, language: str, tp_lib: Path, p_id: int, - pi_id: int) -> Measurement: +def load_last_measurement_for_tool(tool: Dict, language: str, tp_lib: Path, pattern, + instance: Instance) -> Measurement: # TODO - load last measurement: the code hereafter strongly depends on the folder notation in place for # patterns and pattern instances. Make sure to factorize in function what needs to # and to generalize the approach as much as we can to rely the least possible on # the strict notation - pattern_dir: Path = utils.get_pattern_dir_from_id(p_id, language, tp_lib) - pattern_dir_name: str = pattern_dir.name - instance_dir_name: str = f"{pi_id}_instance_{pattern_dir_name}" - instance_dir: Path = pattern_dir / instance_dir_name - if not instance_dir.is_dir(): - ee = InstanceDoesNotExists(instance_id=pi_id) - logger.exception(ee) - raise ee + pattern_dir_name: str = pattern.path.name + instance_dir_name: str = instance.path.name measurement_dir_for_pattern_instance: Path = utils.get_measurement_dir_for_language(tp_lib, language) / pattern_dir_name / instance_dir_name if not measurement_dir_for_pattern_instance.is_dir(): - ee = MeasurementNotFound(p_id) + ee = MeasurementNotFound(pattern.pattern_id) logger.exception(ee) raise ee meas_file_list = list( @@ -137,7 +146,7 @@ def load_last_measurement_for_tool(tool: Dict, language: str, tp_lib: Path, p_id measurements) ) if not measurements_for_tool: - logger.warning(f'No measurement has been found for tool {tool["name"]}:{tool["version"]} on pattern {p_id} instance {pi_id}') + logger.warning(f'No measurement has been found for tool {tool["name"]}:{tool["version"]} on pattern {pattern.pattern_id} instance {instance.instance_id}') return None return sorted(measurements_for_tool, reverse=True)[0] @@ -158,4 +167,4 @@ def any_tool_matching(meas, tools, version=config.discovery_under_measurement["e if not version: return any(meas.tool == tool["name"] for tool in tools) else: - return any(meas.tool == tool["name"] and meas.version == tool["version"] for tool in tools) \ No newline at end of file + return any(meas.tool == tool["name"] and meas.version == tool["version"] for tool in tools) diff --git a/tp_framework/core/modelling_rules.py b/tp_framework/core/modelling_rules.py index ca3c5a7..c8afbe4 100644 --- a/tp_framework/core/modelling_rules.py +++ b/tp_framework/core/modelling_rules.py @@ -24,4 +24,4 @@ async def scan(src_dir: Path, tools: list[Dict], language: str, modelling_rules: apply_remediation=True, modelling_rules=modelling_rules) results.append({f"{tool['name']}:{tool['version']}": sast.inspector(res, language)}) - return results, tools \ No newline at end of file + return results, tools diff --git a/tp_framework/core/pattern.py b/tp_framework/core/pattern.py index d9d0280..508b0fe 100644 --- a/tp_framework/core/pattern.py +++ b/tp_framework/core/pattern.py @@ -1,82 +1,170 @@ -import json -from pathlib import Path -from core import utils -from core.exceptions import LanguageTPLibDoesNotExist, PatternDoesNotExists, PatternValueError -from typing import Dict, Tuple - - -class Pattern: - def __init__(self, name: str, language: str, instances: list[Path], family: str = None, description: str = "", - tags: list[str] = [], pattern_id: int = None, pattern_dir: Path = None) -> None: - self.name = name - self.description = description - self.family = family - self.tags = tags - self.instances = instances - self.language = language - self.pattern_id = pattern_id or self.define_pattern_id(pattern_dir) - - def define_pattern_id(self, pattern_dir) -> int: - try: - dir_list: list[Path] = utils.list_pattern_paths_for_language(self.language, pattern_dir) - except LanguageTPLibDoesNotExist: - return 1 - id_list: list[int] = sorted(list(map(lambda x: int(str(x.name).split("_")[0]), dir_list))) - return id_list[-1] + 1 if len(id_list) > 0 else 1 - - def add_pattern_to_tp_library(self, language: str, pattern_src_dir: Path, pattern_dir: Path) -> None: - pattern_dir_name: str = utils.get_pattern_dir_name_from_name(pattern_src_dir.name, self.pattern_id) - new_tp_dir: Path = pattern_dir / language / pattern_dir_name - new_tp_dir.mkdir(exist_ok=True, parents=True) - pattern_json_file: Path = new_tp_dir / f"{pattern_dir_name}.json" - - with open(pattern_json_file, "w") as json_file: - pattern_dict: Dict = { - "name": self.name, - "description": self.description, - "family": self.family, - "tags": self.tags, - "instances": self.instances, - } - json.dump(pattern_dict, json_file, indent=4) - - def add_new_instance_reference(self, language: str, pattern_dir: Path, new_instance_ref: str) -> None: - tp_dir: Path = get_pattern_path_by_pattern_id(language, self.pattern_id, pattern_dir) - with open(tp_dir / f"{tp_dir.name}.json") as json_file: - pattern_dict: Dict = json.load(json_file) - - pattern_dict["instances"].append(new_instance_ref) - - with open(tp_dir / f"{tp_dir.name}.json", "w") as json_file: - json.dump(pattern_dict, json_file, indent=4) - - -# TODO (old): Test this -def get_pattern_by_pattern_id(language: str, pattern_id: int, tp_lib_dir: Path) -> Tuple[Pattern, Path]: - tp_dir: Path = get_pattern_path_by_pattern_id(language, pattern_id, tp_lib_dir) - tp_json: Path = tp_dir / f"{tp_dir.name}.json" - with open(tp_json) as json_file: - pattern_from_json: Dict = json.load(json_file) - return pattern_from_dict(pattern_from_json, language, pattern_id), tp_dir - - -def get_pattern_path_by_pattern_id(language: str, pattern_id: int, tp_lib_dir: Path) -> Path: - tp_dir_for_language: Path = tp_lib_dir / language - filtered_res: list[str] = list(filter( - lambda x: x.split("_")[0] == str(pattern_id), - map(lambda y: y.name, utils.list_dirs_only(tp_dir_for_language)) - )) - if not filtered_res: - raise PatternDoesNotExists(pattern_id) - return tp_dir_for_language / filtered_res[0] - - -def pattern_from_dict(pattern_dict: Dict, language: str, pattern_id: int) -> Pattern: - try: - return Pattern(pattern_dict["name"], language, pattern_dict["instances"], - family=pattern_dict.get("family", None), - description=pattern_dict.get("description", ""), - tags=pattern_dict.get("tags", []), - pattern_id=pattern_id) - except KeyError as e: - raise PatternValueError(message=f"Key {e} was not found in pattern metadata") +from pathlib import Path +from typing import Tuple + +from core.exceptions import PatternInvalid, AddPatternError, InstanceDoesNotExists +from core.instance import Instance + +from core import utils +from core.repair.pattern_repair import PatternRepair +import logging +from core import loggermgr +logger = logging.getLogger(loggermgr.logger_name(__name__)) + + +class Pattern: + @classmethod + def init_from_id_and_language(cls, id: int, language: str, tp_lib_path: Path): + return cls._init_from_id_and_language(cls(), id, language.upper(), tp_lib_path) + + @classmethod + def init_from_json_file_without_pattern_id(cls, json_file_path: Path, language: str, pattern_path: Path, tp_lib_path: Path): + return cls._init_from_json_without_id(cls(), json_file_path, language, pattern_path, tp_lib_path) + + def __init__(self) -> None: + # metadata + self.pattern_id = None + self.language = None + self.tp_lib_path = None + self.language = None + self.path = None + self.json_path = None + + # json fields + self.name = None + self.description = None + self.family = None + self.tags = None + self.version = None + self.instances = [] + + def __str__(self) -> str: + return f"{self.language} - p{self.pattern_id}" + + def _assert_pattern(self): + try: + assert int(self.pattern_id) + assert self.language + assert self.tp_lib_path.is_dir() + assert self.path.is_dir() + assert self.json_path.is_file() + assert self.instances and all([isinstance(instance, Instance) for instance in self.instances]) + except Exception as e: + raise PatternInvalid(f"{self._log_prefix()}Instance Variables are not properly set. '{e}'") + + def _init_from_id_and_language(self, id: int, language: str, tp_lib_path: Path): + self.pattern_id = id + self.language = language.upper() + self.tp_lib_path = tp_lib_path + self.path = utils.get_pattern_dir_from_id(id, language, tp_lib_path) + self._init_from_json_file(utils.get_json_file(self.path)) + self._assert_pattern() + return self + + def _init_instances(self, instance_paths_from_json: list): + instances = [] + for instance_json in instance_paths_from_json: + abs_path = Path(self.path / Path(instance_json)) + if not abs_path.is_file(): + raise PatternInvalid(f"{self._log_prefix()}The instance path '{instance_json}' is not valid.") + try: + instances += [Instance.init_from_json_path(abs_path, self.pattern_id, self.language, self.tp_lib_path)] + except Exception as e: + raise PatternInvalid(f"{self._log_prefix()}Could not instantiate instance, due to '{e}'") + return instances + + def _init_from_json_file(self, json_file_path: Path): + if not json_file_path: + raise PatternInvalid(f"The provided JSON Path is not valid '{json_file_path}'") + self.json_path = json_file_path + pattern_properties = utils.read_json(self.json_path) + if not pattern_properties: + raise PatternInvalid("The pattern needs a valid JSON file.") + self.name = pattern_properties["name"] if "name" in pattern_properties.keys() else None + self.description = pattern_properties["description"] if "description" in pattern_properties.keys() else None + self.family = pattern_properties["family"] if "family" in pattern_properties.keys() else None + self.tags = pattern_properties["tags"] if "tags" in pattern_properties.keys() else None + self.version = pattern_properties["version"] if "version" in pattern_properties.keys() else None + if "instances" in pattern_properties.keys() and pattern_properties["instances"]: + self.instances = self._init_instances(pattern_properties["instances"]) + self._sort_instances() + else: + # Raise exception + raise PatternInvalid(f"{self._log_prefix()}Pattern JSON file needs an 'instances' key with valid relative links.") + return self + + def _init_from_json_without_id(self, json_file_path: Path, language: str, pattern_path: Path, tp_lib_path: Path): + self.language = language.upper() + self.path = pattern_path + self.tp_lib_path = tp_lib_path + self._init_from_json_file(json_file_path) + try: + given_id = utils.get_id_from_name(self.path.name) + except Exception: + given_id = None + free_id = utils.get_next_free_pattern_id_for_language(self.language, self.tp_lib_path, given_id) + self.pattern_id = free_id + self._assert_pattern() + return self + + def _log_prefix(self): + return f"Pattern {self.pattern_id} ({self.language}) - " + + def _sort_instances(self): + self.instances = sorted(self.instances, key=lambda instance: instance.instance_id) + + def copy_to_tplib(self): + # copies the pattern and all its instances into the tp_lib + # try to get the id from the name: + given_id = None + try: + given_id = utils.get_id_from_name(self.path.name) + except (KeyError, ValueError): + # if we can't get an id from the name, we don't care, we just set a new id + pass + # if the given id is not the id, the algorithm identified, give it a new id + pattern_name = f'{self.pattern_id}_{self.path.name}' if given_id != self.pattern_id else self.path.name + new_pattern_path = self.tp_lib_path / self.language / pattern_name + for instance in self.instances: + instance.copy_to_tplib(new_pattern_path) + try: + utils.copy_dir_content(self.path, new_pattern_path) + except Exception as e: + raise AddPatternError(e) + self.path = new_pattern_path + + def get_instance_by_id(self, tpi_id: int) -> Instance: + try: + return list(filter(lambda tpi: tpi.instance_id == tpi_id, self.instances))[0] + except IndexError: + raise InstanceDoesNotExists(tpi_id, "") + + def get_description(self) -> Tuple[bool, str]: + if self.description and " " not in self.description and Path(self.path / self.description).resolve().is_file(): + with open(Path(self.path / self.description).resolve(), "r") as desc_file: + return True, "".join(desc_file.readlines()).strip() + else: + return False, self.description.strip() if self.description else "" + + def repair(self, should_include_readme: bool, + discovery_rule_results: Path = None, + measurement_results: Path = None, + masking_file: Path = None,): + PatternRepair(self).repair(self) + if should_include_readme: + from core.repair.readme_generator import READMEGenerator + readme = READMEGenerator(pattern=self, discovery_rule_results=discovery_rule_results, + measurement_results=measurement_results, + masking_file=masking_file).generate_README() + path_to_readme = self.path / "README.md" + with open(path_to_readme, "w") as readme_file: + readme_file.write(readme) + + def to_dict(self): + return { + "name": self.name, + "description": self.description, + "family": self.family, + "tags": self.tags, + "instances": [utils.get_relative_paths(i.json_path, self.path) for i in self.instances], + "version": self.version + } diff --git a/tp_framework/core/pattern_operations.py b/tp_framework/core/pattern_operations.py index 4cef8cf..43512e7 100644 --- a/tp_framework/core/pattern_operations.py +++ b/tp_framework/core/pattern_operations.py @@ -1,152 +1,48 @@ import json -import shutil -import uuid from datetime import datetime -from json import JSONDecodeError from pathlib import Path -from typing import Dict, Tuple +from typing import Dict import logging from core import loggermgr logger = logging.getLogger(loggermgr.logger_name(__name__)) -import core.instance -from core import errors from core import utils, analysis -from core.exceptions import PatternValueError -from core.instance import Instance, PatternCategory, FeatureVsInternalApi, instance_from_dict -from core.pattern import Pattern, get_pattern_by_pattern_id +from core.pattern import Pattern from core.sast_job_runner import SASTjob, job_list_to_dict from core.measurement import meas_list_to_tp_dict -def add_testability_pattern_to_lib(language: str, pattern_dict: Dict, pattern_src_dir: Path | None, - pattern_lib_dest: Path) -> Path: - try: - pattern: Pattern = Pattern(pattern_dict["name"], language, - [pattern_src_dir / instance_relative_path for instance_relative_path in - pattern_dict["instances"] if - pattern_src_dir], pattern_dict["family"], pattern_dict["description"], - pattern_dict["tags"], pattern_dir=pattern_lib_dest) - except KeyError as e: - raise PatternValueError(message=errors.patternKeyError(e)) - - pattern_instances_json_refs = pattern.instances - pattern.instances = [] - pattern.add_pattern_to_tp_library(language, pattern_src_dir, pattern_lib_dest) - - if pattern_src_dir: - for instance_json in pattern_instances_json_refs: - add_tp_instance_to_lib_from_json( - language, pattern.pattern_id, (pattern_src_dir / instance_json), pattern_src_dir, pattern_lib_dest - ) - return pattern_lib_dest / language / utils.get_pattern_dir_name_from_name(pattern_src_dir.name, pattern.pattern_id) - - -def add_tp_instance_to_lib(language: str, pattern: Pattern, instance_dict: Dict, inst_old_name: str, - pattern_src_dir: Path, pattern_lib_dst: Path) -> Path: - instance: Instance = Instance( - utils.get_path_or_none(utils.get_from_dict(instance_dict, "code", "path")), # code_path: Path, - utils.get_from_dict(instance_dict, "code", "injection_skeleton_broken"), # code_injection_skeleton_broken: bool, - utils.get_path_or_none(utils.get_from_dict(instance_dict, "compile", "dependencies")), # compile_dependencies: Path, # added 092022 - utils.get_path_or_none(utils.get_from_dict(instance_dict, "compile", "binary")), # compile_binary: Path, - utils.get_from_dict(instance_dict, "compile", "instruction"), # compile_instruction: str, # added 092022 - utils.get_from_dict(instance_dict, "remediation", "transformation"), # remediation_transformation: str, # added 092022 - utils.get_path_or_none(utils.get_from_dict(instance_dict, "remediation", "modeling_rule")), # remediation_modeling_rule: Path, # added 092022 - utils.get_from_dict(instance_dict, "remediation", "notes"), # remediation_notes: str, # added 092022 - core.instance.get_pattern_category_or_none(utils.get_from_dict(instance_dict, "properties", "category")), - utils.get_from_dict(instance_dict, "properties", "negative_test_case"), - utils.get_from_dict(instance_dict, "properties", "source_and_sink"), - utils.get_from_dict(instance_dict, "properties", "input_sanitizer"), - core.instance.get_feature_vs_internal_api_or_none(utils.get_from_dict(instance_dict, "properties", "feature_vs_internal_api")), - utils.get_path_or_none(utils.get_from_dict(instance_dict, "discovery", "rule")), - utils.get_from_dict(instance_dict, "discovery", "method"), - utils.get_from_dict(instance_dict, "discovery", "rule_accuracy"), - utils.get_from_dict(instance_dict, "discovery", "notes"), - utils.get_from_dict(instance_dict, "expectation", "expectation"), - utils.get_from_dict(instance_dict, "expectation", "type"), - utils.get_path_or_none(utils.get_from_dict(instance_dict, "expectation", "sink_file")), - utils.get_from_dict(instance_dict, "expectation", "sink_line"), - utils.get_path_or_none(utils.get_from_dict(instance_dict, "expectation", "source_file")), - utils.get_from_dict(instance_dict, "expectation", "source_line"), - pattern.name, - pattern.description, - pattern.family, - pattern.tags, - pattern.instances, - language, - pattern.pattern_id, - pattern_dir=pattern_lib_dst - ) - - inst_name = utils.get_instance_dir_name_from_pattern(pattern_src_dir.name, pattern.pattern_id, instance.instance_id) - pattern_name = utils.get_pattern_dir_name_from_name(pattern_src_dir.name, pattern.pattern_id) - - instance_src_dir: Path = pattern_src_dir / inst_old_name - instance_dst_dir: Path = pattern_lib_dst / language / pattern_name / inst_name - - instance.add_instance_to_pattern_id(language, pattern_src_dir, pattern_lib_dst) - pattern.add_new_instance_reference(language, pattern_lib_dst, f"./{inst_name}/{inst_name}.json") - - for path in list(instance_src_dir.iterdir()): - if not path.suffix.endswith("json"): - dst_path = instance_dst_dir / path.name - if path.is_dir(): - shutil.copytree(path, dst_path) - else: - shutil.copy(path, dst_path) - def add_testability_pattern_to_lib_from_json(language: str, pattern_json: Path, pattern_src_dir: Path, pattern_lib_dest: Path) -> Path: - with open(pattern_json) as json_file: - try: - pattern: Dict = json.load(json_file) - except JSONDecodeError as e: - raise e - try: - return add_testability_pattern_to_lib(language, pattern, pattern_src_dir, pattern_lib_dest) - except PatternValueError as e: - raise e - - -def add_tp_instance_to_lib_from_json(language: str, pattern_id: int, instance_json: Path, - pattern_src_dir: Path, pattern_dest_dir: Path): - pattern, p_dir = get_pattern_by_pattern_id(language, pattern_id, pattern_dest_dir) - - with open(instance_json) as json_file: - try: - instance: Dict = json.load(json_file) - except JSONDecodeError as e: - raise e - return add_tp_instance_to_lib( - language, pattern, instance, instance_json.parent.name, pattern_src_dir, pattern_dest_dir - ) + # The pattern objects automatically initializes the instances as well + pattern = Pattern.init_from_json_file_without_pattern_id(pattern_json, language, pattern_src_dir, pattern_lib_dest) + # dump the pattern to the tplib + pattern.copy_to_tplib() + logger.info(f"The pattern has been copied to {pattern.path}, You might need to adjust relative path links.") + return pattern async def start_add_measurement_for_pattern(language: str, sast_tools: list[Dict], tp_id: int, now, tp_lib_dir: Path, output_dir: Path) -> Dict: + d_status_tp = {} try: - l_tpi_path: list[Path] = utils.list_tpi_paths_by_tp_id(language, tp_id, tp_lib_dir) - target_pattern, p_dir = get_pattern_by_pattern_id(language, tp_id, tp_lib_dir) + target_pattern = Pattern.init_from_id_and_language(tp_id, language, tp_lib_dir) except Exception as e: logger.warning( f"SAST measurement - failed in fetching instances for pattern {tp_id}. Pattern will be ignored. Exception raised: {utils.get_exception_message(e)}") return d_status_tp - - for path in l_tpi_path: + + for instance in target_pattern.instances: try: - tpi_id = utils.get_id_from_name(path.name) - with open(path) as instance_json_file: - instance_json: Dict = json.load(instance_json_file) - target_instance: Instance = instance_from_dict(instance_json, target_pattern, language, tpi_id) - d_status_tp[tpi_id]: list[SASTjob] = await analysis.analyze_pattern_instance( - target_instance, path.parent, sast_tools, language, now, output_dir + d_status_tp[instance.instance_id]: list[SASTjob] = await analysis.analyze_pattern_instance( + instance, sast_tools, language, now, output_dir ) except Exception as e: - d_status_tp[tpi_id] = [] + d_status_tp[target_pattern.pattern_id] = [] logger.warning( - f"SAST measurement - failed in preparing SAST jobs for instance at {path} of the pattern {tp_id}. Instance will be ignored. Exception raised: {utils.get_exception_message(e)}") + f"SAST measurement - failed in preparing SAST jobs for instance at {instance.path} of the pattern {tp_id}. Instance will be ignored. Exception raised: {utils.get_exception_message(e)}") continue return d_status_tp @@ -154,21 +50,20 @@ async def start_add_measurement_for_pattern(language: str, sast_tools: list[Dict async def save_measurement_for_patterns(language: str, now: datetime, l_job: list[SASTjob], tp_lib_dir: Path): - d_job = job_list_to_dict(l_job) l_meas = await analysis.inspect_analysis_results(d_job, language) d_tp_meas = meas_list_to_tp_dict(l_meas) for tp_id in d_tp_meas: + target_pattern = Pattern.init_from_id_and_language(tp_id, language, tp_lib_dir) for tpi_id in d_tp_meas[tp_id]: l_tpi_meas = [] for meas in d_tp_meas[tp_id][tpi_id]: - # meas.instance - tp_rel_dir = utils.get_pattern_dir_name_from_name(meas.instance.name, meas.instance.pattern_id) - tpi_rel_dir = utils.get_instance_dir_name_from_pattern(meas.instance.name, meas.instance.pattern_id, meas.instance.instance_id) + tp_rel_dir = target_pattern.path.name + tpi_rel_dir = meas.instance.path.name meas_dir = utils.get_measurement_dir_for_language(tp_lib_dir, language) / tp_rel_dir / tpi_rel_dir meas_dir.mkdir(parents=True, exist_ok=True) - d_tpi_meas_ext: Dict = meas.__dict__ + d_tpi_meas_ext: Dict = meas.__dict__ # Could use vars(meas) here? # TODO: rather than extending here we should extend the Measurement class d_tpi_meas_ext["pattern_id"] = meas.instance.pattern_id d_tpi_meas_ext["instance_id"] = meas.instance.instance_id @@ -177,4 +72,4 @@ async def save_measurement_for_patterns(language: str, now: datetime, l_tpi_meas.append(d_tpi_meas_ext) with open(meas_dir / utils.get_measurement_file(now), "w") as f_meas: - json.dump(l_tpi_meas, f_meas, indent=4) \ No newline at end of file + json.dump(l_tpi_meas, f_meas, indent=4) diff --git a/tp_framework/core/repair/__init__.py b/tp_framework/core/repair/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tp_framework/core/repair/instance_repair.py b/tp_framework/core/repair/instance_repair.py new file mode 100644 index 0000000..95c4c42 --- /dev/null +++ b/tp_framework/core/repair/instance_repair.py @@ -0,0 +1,197 @@ +import time +import os +import re +from pathlib import Path + +from core import utils +from core.exceptions import PatternRepairError + +import logging +from core import loggermgr +logger = logging.getLogger(loggermgr.logger_name(__name__)) + +from core.repair.repair_tool import RepairTool + +class InstanceRepair(RepairTool): + def __init__(self, instance, pattern: Path) -> None: + self.pattern = pattern + template = instance.tp_lib_path / "pattern_template" / "ID_pattern_name" / "IID_instance_ID_pattern_name" / "IID_instance_ID_pattern_name.json" + schema = instance.tp_lib_path / "pattern_template" / "schema" / "instance.schema.json" + super().__init__(instance, template, schema) + try: + self.instance_repair_class = globals()[f"InstanceRepair{self.to_repair.language}"] + except KeyError: + logger.error( + f"InstanceRepair{self.to_repair.language} could not be found, maybe it is not imported?" + ) + raise PatternRepairError("Could not instantiate language specific instance repair") + + def _adjust_variable_number_in_discovery_rule(self) -> None: + dr_path = self.to_repair.discovery_rule + with open(dr_path, "r") as fp: + result = fp.readlines() + + # assume, that a scala files end with + # println() + # delete; + try: + println_line = result[result.index(list(filter(lambda line: "delete;" in line, result))[0]) - 1] + except IndexError: + logger.warning(f'Could not find "delete;" in {dr_path}') + return + try: + real_number = re.search(r"println\(x(\d+)\)", println_line).group(1) + except AttributeError: + logger.warning(f"Could not find the pattern id in {dr_path}") + return + # determine the name for the rule in scala file + # if there is more than one instance, it should be _i + # if this rule is for multiple patterns, it should be _iall + rule_name = ( + f'{self.pattern.path.name}_i{self.to_repair.instance_id}' + if len(self.pattern.instances) > 1 and dr_path.parent != self.pattern.path + else f"{self.pattern.path.name}_iall" + ) + # make sure the number and the pattern name + new_rule = [] + for line in result: + new_line = line.replace(f"x{real_number}", f"x{self.pattern.pattern_id}") + new_rule += [ + re.sub( + f"({self.pattern.path.name}_i(\d+|all)|ID_pattern_name_i1)", + rule_name, + new_line, + ) + ] + + diff = [line for line in new_rule if line not in result] + # assert False, f"{new_rule}\n\n{result}\n\n{diff}" + if diff: + logger.info( + f"Changed lines in Scala rule for instance {self.to_repair}:\n{[line.strip() for line in diff]}" + ) + with open(dr_path, "w") as fp: + fp.writelines(new_rule) + + def _check_rule_accuracy(self): + if not self.to_repair.discovery_rule_accuracy: + logger.warning(f"{self._log_prefix()}Discovery rule given, but no rule accuracy.") + + def _repair_scala_rule(self): + if not self.to_repair.discovery_rule or not self.to_repair.discovery_rule.is_file(): + logger.warning(f"{self._log_prefix()}Could not find rule for {self.to_repair}, skipping...") + return + if not self.to_repair.discovery_rule.suffix == ".sc": + logger.info(f"{self._log_prefix()}Found a rule, but it is no scala rule, don't know how to repair this, skipping...") + return + self._adjust_variable_number_in_discovery_rule() + self._check_rule_accuracy() + + def repair(self): + # ensure JSON file exists + self._ensure_json_file_exists() + self._check_paths_exists() + # language specific repair instructions + self.instance_repair_class(self.to_repair).repair() + # repair scala rule if exists + self._repair_scala_rule() + # check description + if not self.to_repair.description: + logger.warning(f"{self._log_prefix()}No description provided for {self.to_repair}") + # check properties_negative_test_case vs expectation_expectation + if self.to_repair.expectation_expectation == self.to_repair.properties_negative_test_case: + logger.warning(f"{self._log_prefix()}Changing properites_negative_test_case, it has to be `not` expectation_expectation") + self.to_repair.properties_negative_test_case = not self.to_repair.expectation_expectation + # validate the instance json against the scheme + self._validate_against_schema() + self.to_json() + + +class InstanceRepairPHP: + def __init__(self, instance_to_repair) -> None: + self.instance = instance_to_repair + + def _log_prefix(self): + return f"PatternRepair - PHPInstanceRepair {self.instance} " + + def _get_source_and_sink_for_file(self, path_to_file: Path) -> tuple: + if not path_to_file: + return (None, None) + with open(path_to_file, "r") as fp: + file_lines = fp.readlines() + sink = None + source = None + for idx, line in enumerate(file_lines): + if "// sink" in line: + sink = idx + 1 + if "// source" in line: + source = idx + 1 + return (source, sink) + + def _remove_bash_files(self): + all_bash_files = utils.list_files(self.instance.path, ".bash") + for file in all_bash_files: + file.unlink() + + def _mask_line(self, input_line: str, php_file: str) -> str: + if not php_file in input_line: + return input_line + line_prefix = input_line.split(os.sep)[0] + line_suffix = input_line[input_line.rfind(".php") + 4 :] + actual_filepath = Path(input_line.replace(line_prefix, "").replace(line_suffix, "")) + new_path = f"{os.sep}...{os.sep}{actual_filepath.relative_to(self.instance.path.parent.parent.parent)}" + return line_prefix + new_path + line_suffix + + def _make_opcode_from_php_file(self, php_file_path: Path) -> Path: + # define necessary paths + bash_file_path = php_file_path.parent / f"{php_file_path.stem}.bash" + + # opcache will only compile and cache files older than the script execution start (https://www.php.net/manual/en/function.opcache-compile-file.php) + # therefor we have to modify the time the php file was created + one_minute_ago = time.time() - 60 + os.utime(php_file_path, (one_minute_ago, one_minute_ago)) + + # Generate the bash file + os.system( + f"php -d zend_extension=opcache -d opcache.enable_cli=1 -d opcache.opt_debug_level=0x10000 --syntax-check {php_file_path} 2> {bash_file_path} 1>/dev/null" + ) + + # Sanitize the opcode: on some systems, there is an error included in the bash file + with open(bash_file_path, "r") as file: + result = file.readlines() + for idx, line in enumerate(result): + if line.startswith("$_main"): + result = result[max(idx - 1, 0) :] + break + # mask the path to file + final_lines = [self._mask_line(line, str(php_file_path)) for line in result] + with open(bash_file_path, "w") as file: + file.writelines(final_lines) + return Path(bash_file_path) + + def _repair_opcode(self): + # we are radical, remove all '.bash' file and generate new ones for the '.php' files + self._remove_bash_files() + all_php_files = utils.list_files(self.instance.path, ".php", True) + for file in all_php_files: + bash_file_path = self._make_opcode_from_php_file(file) + if not self.instance.compile_binary or not self.instance.compile_binary.is_file(): + self.instance.compile_binary = bash_file_path.relative_to(self.instance.path) + + all_bash_files = utils.list_files(self.instance.path, ".bash", recursive=True) + if len(all_bash_files) != len(all_php_files): + logger.warning(f"{self._log_prefix()}The number of php files and bash files missmatches.") + + def _repair_source_line_sink_line(self): + _, sink_line = self._get_source_and_sink_for_file(self.instance.expectation_sink_file) + source_line, _ = self._get_source_and_sink_for_file(self.instance.expectation_source_file) + if not sink_line: + logger.warning(f"{self._log_prefix()}Could not find '// sink' in sink file '{self.instance.expectation_sink_file}'") + if not source_line: + logger.warning(f"{self._log_prefix()}Could not find '// source' in source file '{self.instance.expectation_source_file}'") + self.instance.expectation_sink_line = sink_line if sink_line else self.instance.expectation_sink_line + self.instance.expectation_source_line = source_line if source_line else self.instance.expectation_source_line + + def repair(self): + self._repair_opcode() + self._repair_source_line_sink_line() \ No newline at end of file diff --git a/tp_framework/core/repair/pattern_repair.py b/tp_framework/core/repair/pattern_repair.py new file mode 100644 index 0000000..2c15e92 --- /dev/null +++ b/tp_framework/core/repair/pattern_repair.py @@ -0,0 +1,92 @@ +import logging +from core import loggermgr +logger = logging.getLogger(loggermgr.logger_name(__name__)) + + +from core.instance import Instance +from core.repair.repair_tool import RepairTool +from core import utils + +class PatternRepair(RepairTool): + def __init__(self, pattern) -> None: + json_template = pattern.tp_lib_path / "pattern_template" / "ID_pattern_name" / "ID_pattern_name.json" + schema = pattern.tp_lib_path / "pattern_template" / "schema" / "pattern.schema.json" + super().__init__(pattern, json_template, schema) + + def _complete_instances(self): + # list pattern directory and try to find all instances + potential_instances = utils.list_directories(self.to_repair.path) + actual_instances = [i.path for i in self.to_repair.instances] + + # potentially all dirs, that are in the symmetric_difference of potential_instances and actual_instances could be missing instances + missing_instances = set(potential_instances) ^ set(actual_instances) + for m_instance in missing_instances: + instance_json = utils.get_json_file(m_instance) + if instance_json: + # if there is a JSON file, try to instantiate an Instance from it + try: + new_instance = Instance.init_from_json_path(instance_json, self.to_repair.pattern_id, self.to_repair.language, self.to_repair.tp_lib_path) + except Exception: + logger.warn(f"Found potential instance JSON at {instance_json}, but cannot initialize instance.") + continue + self.to_repair.instances += [new_instance] + self.to_repair._sort_instances() + # check if instances are named after naming scheme {instance_id}_instance_{pattern_name} + for instance in self.to_repair.instances: + expected_name = f"{instance.instance_id}_instance_{self.to_repair.path.name}" + actual_name = instance.name + if expected_name != actual_name: + new_path = instance.path.parent / expected_name + instance.set_new_instance_path(new_path) + + def _repair_name(self): + self.to_repair.name = " ".join([w.title() for w in self.to_repair.path.name.split("_")[1:]]) + if not self.to_repair.name: + logger.warn(f"{self._log_prefix()}The name of this pattern is weird.") + + def _repair_description(self): + is_file, description = self.to_repair.get_description() + if not description: + logger.warn(f"{self._log_prefix()}Could not find description.") + return + + # check if description is in JSON and is longer than 140 symbols + if not is_file and len(description) > 140: + # description is a bit to long, put it into file + path_to_new_description_file = self.to_repair.path / "docs" / "description.md" + path_to_new_description_file.parent.mkdir(parents=True, exist_ok=True) + with open(path_to_new_description_file, "w") as desc_file: + desc_file.write(description) + logger.info(f"{self._log_prefix()}Moving description into ./docs/description.md") + self.to_repair.description = utils.get_relative_paths(path_to_new_description_file, self.to_repair.path) + + # check if instances have the same description + for instance in self.to_repair.instances: + if description == instance.get_description()[1].strip(): + logger.info(f"{self._log_prefix()}Instance description is the same as pattern description, removing instance description.") + instance.description = "" + + def _repair_tags(self): + if not self.to_repair.tags or set(self.to_repair.tags) == set(self.template_dict["tags"]): + # default tags have not been changed, or there are no tags, set default tags. + self.to_repair.tags = ["sast", self.to_repair.language] + self.to_repair.tags = [t.upper() if t.upper() == self.to_repair.language else t for t in self.to_repair.tags] + self.to_repair.tags = sorted(self.to_repair.tags, key=lambda x: x.lower()) + + def repair(self, pattern): + # make sure, that the JSON file exist + self._ensure_json_file_exists() + self._check_paths_exists() + # get all instances + self._complete_instances() + # repair instances + for instance in self.to_repair.instances: + instance.repair(pattern) + # fix name + self._repair_name() + self._repair_description() + self._repair_tags() + + self._validate_against_schema() + # write to json + self.to_json() diff --git a/tp_framework/core/repair/readme_generator.py b/tp_framework/core/repair/readme_generator.py new file mode 100644 index 0000000..ef404fa --- /dev/null +++ b/tp_framework/core/repair/readme_generator.py @@ -0,0 +1,385 @@ +import logging +import re + +from datetime import datetime +from pathlib import Path + +from core.exceptions import MeasurementInvalid +from core.measurement import Measurement +from core.repair.readme_markdown_elements import * + +from core import utils +from core import loggermgr + +logger = logging.getLogger(loggermgr.logger_name(__name__)) + + +class READMEGenerator: + def __init__( + self, + pattern, + discovery_rule_results: Path | None, + measurement_results: Path | None, + masking_file: Path | None, + ) -> None: + self.pattern = pattern + self.discovery_rule_results = discovery_rule_results + self.measurement_results = measurement_results + self.mask = {} + + self.log_prefix = f"Generating README for {self.pattern}: " + + try: + self.discovery_rule_results = utils.read_csv_to_dict(discovery_rule_results) + self.discovery_rule_results = self.discovery_rule_results[self.pattern.language] + assert isinstance(self.discovery_rule_results, dict) + except Exception: + logger.warning(f"{self.log_prefix}Cannot find discovery rule results for language {self.pattern.language}") + self.discovery_rule_results = None + + if not measurement_results or not measurement_results.is_dir(): + logger.warning(f"{self.log_prefix}Cannot locate `measurement_results` in '{self.measurement_results}'") + self.measurement_results = None + + try: + self.mask = utils.read_json(masking_file) + except Exception: + logger.info(f"{self.log_prefix}Continue without masking.") + self.mask = {} + + self.readme_structure = [ + self._comment, + self._heading, + self._tags, + self._pattern_description, + self._pattern_metadata, + self._instances, + ] + + def _comment(self) -> list: + # Generates a Comment for the top of the README file. + return [ + MarkdownComment( + "This file is automatically generated. If you wish to make any changes, please use the JSON files and regenerate this file using the tpframework." + ) + ] + + def _heading(self) -> list: + # Generates the heading for the README file. + return [MarkdownHeading(self.pattern.name, 1)] + + def _pattern_description(self) -> list: + # Generates the description for the pattern. + _, desc = self.pattern.get_description() + return [MarkdownHeading("Description", 2), MarkdownString(desc)] + + def _tags(self) -> list: + # Generates pattern tags. + return [ + MarkdownString(f'Tags: {", ".join(self.pattern.tags)}'), + MarkdownString(f'Version: {self.pattern.version}'), + ] + + def _pattern_metadata(self) -> list: + # Generates a table of pattern metadata, such as the instances, discovery rule discovery method and if the discovery rule is successfull on the instance. + discovery_rule_exists = [] + instance_names = [] + discovery_rule_successfull = [] + discovery_method = [] + for instance in self.pattern.instances: + instance_name = f"{instance.instance_id} Instance" + instance_names += [MarkdownLink(instance_name, MarkdownHeading(instance_name, 2))] + + discovery_rule_exists += [utils.translate_bool(instance.discovery_rule.is_file())] + + if self.discovery_rule_results: + try: + current_result = self.discovery_rule_results[str(self.pattern.pattern_id)][str(instance.instance_id)] + except KeyError: + logger.warning(f'{self.log_prefix}Could not find discovery rule result for {instance}. Assuming "error"') + current_result = "error" + discovery_rule_successfull += [current_result] + discovery_method += [instance.discovery_method] + + metadata_dict = { + "0::Instances": instance_names, + "1::has discovery rule": discovery_rule_exists, + "2::discovery method": discovery_method, + "3::rule successfull": discovery_rule_successfull, + } + if not self.discovery_rule_results: + metadata_dict.pop("3::rule successfull") + + return [MarkdownHeading("Overview", 2), MarkdownTable(metadata_dict)] + + def _instances(self) -> list: + # Generates the README elements for all instances. + return InstanceREADMEGenerator( + self.pattern, + self.measurement_results, + mask=self.mask + ).generate_md() + + def generate_README(self) -> str: + md_elements = [] + for f in self.readme_structure: + md_elements += f() + return MarkdownDocument(md_elements).to_markdown() + + +class InstanceREADMEGenerator: + def __init__( + self, + pattern, + path_to_measurements: Path | None, + level: int = 2, + mask: dict = {}, + ) -> None: + self.pattern = pattern + self.log_prefix = f"Generating README for {self.pattern}: " + self.level = level + self.measurements = Path(path_to_measurements) if path_to_measurements else None + self.has_multiple_instances = len(self.pattern.instances) > 1 + self.mask_dict = mask + + self.current_instance = None + + self.instance_structure = [ + self._instance_name, + self._instance_description, + self._instance_code, + self._instance_properties, + self._instance_more, + ] + self.instance_more_structure = [ + self._compile, + self._discovery, + self._measurement, + self._remediation, + ] + + def _instance_name(self) -> list: + # Generates the Markdown heading for the current instance. + return [MarkdownHeading(f"{self.current_instance.instance_id} Instance", self.level)] + + def _instance_description(self) -> list: + # Generates the description for the current instance. + _, desc = self.current_instance.get_description() + return [MarkdownString(desc)] if desc else [] + + def _instance_code(self) -> list: + # Generates the Instance code for the current instance. + heading = MarkdownHeading("Code", self.level + 1) + code = self.current_instance.code_path + source = self.current_instance.expectation_source_file + sink = self.current_instance.expectation_sink_file + if source == sink: + content = self._get_file_content_if_exists(code) + return [heading, MarkdownCode(content, self.pattern.language)] if content else [] + source_content = self._get_file_content_if_exists(source) + sink_content = self._get_file_content_if_exists(sink) + return [ + heading, + MarkdownHeading("Source File", self.level + 2), + MarkdownCode(source_content, self.pattern.language), + MarkdownHeading("Sink File", self.level + 2), + MarkdownCode(sink_content, self.pattern.language), + ] if source_content and sink_content else [] + + def _instance_properties(self) -> list: + # Generates the table of instance properties. + properties_dict = { + "category": [self.current_instance.properties_category], + "feature_vs_internal_api": [self.current_instance.properties_feature_vs_internal_api], + "input_sanitizer": [utils.translate_bool(self.current_instance.properties_input_sanitizer)], + "source_and_sink": [utils.translate_bool(self.current_instance.properties_source_and_sink)], + "negative_test_case": [utils.translate_bool(self.current_instance.properties_negative_test_case)] + } + return [ + MarkdownHeading("Instance Properties", self.level + 1), + MarkdownTable(properties_dict) + ] + + def _instance_more(self) -> list: + # generates the instance more section + ret = [] + for f in self.instance_more_structure: + ret += f() + return [MarkdownCollapsible(ret, MarkdownString("More"))] + + def _compile(self) -> list: + # Generates the compile section for an instance. + compile = self.current_instance.compile_binary + content = self._get_file_content_if_exists(compile) + binary = MarkdownCode(content, utils.get_language_by_file_ending(compile)) + return [MarkdownCollapsible([binary], MarkdownHeading("Compile", self.level + 1))] if content else [] + + def _discovery(self) -> list: + # Generates the 'discovery' section for an instance. + desc = self.current_instance.discovery_notes + desc = MarkdownString(self._get_file_content_if_exists(desc)) + rule_path = self.current_instance.discovery_rule + rule = self._get_file_content_if_exists(rule_path) + # get only necessary content + rule = re.sub(r"@main def main\(name .*{.*$", "", rule, flags=re.M) + rule = re.sub(r"importCpg.*$", "", rule, flags=re.M) + rule = re.sub(r"println\(.*\).*$", "", rule, flags=re.M) + rule = re.sub(r"delete;.*$", "", rule, flags=re.M) + rule = "".join(rule.rsplit("}", 1)) # remove the last } + rule = "\n".join([l.strip() for l in rule.split("\n")]).strip() + rule = ( + MarkdownCode(rule, utils.get_language_by_file_ending(rule_path)) + if rule_path + else MarkdownString("No discovery rule yet.") + ) + discovery_table = { + "discovery method": [self.current_instance.discovery_method], + "expected accuracy": [ + self.current_instance.discovery_rule_accuracy + ], + } + discovery_table = MarkdownTable(discovery_table) + return [ + MarkdownCollapsible( + [desc, rule, discovery_table], + MarkdownHeading("Discovery", self.level + 1), + ) + ] + + def _measurement(self) -> list: + # Generates the 'measurement' section for an instance. + if not self.measurements: + return [] + instance_measurements = self.measurements / self.pattern.path.name / self.current_instance.name + if not instance_measurements.exists(): + logger.error(f"{self.log_prefix}Could not find `measurement` for {self.current_instance}") + return [] + measurement_table = {} + has_measurement = False + dates = [] + ground_truth = self.current_instance.expectation_expectation + for json_file in utils.list_files(instance_measurements, ".json"): + current_json = utils.read_json(json_file) + for c_dict in current_json: + try: + measurement = Measurement.init_from_measurement_dict(c_dict) + except MeasurementInvalid: + logger.warning(f"{self.log_prefix}Could not fetch measurement in {current_json}") + continue + has_measurement = True + tool = f"1::{self._mask(measurement.tool.lower())}" + date = datetime.strptime(measurement.date, "%Y-%m-%d %H:%M:%S").strftime("%d %b %Y") + dates += [date] + sast_tool_result = utils.translate_bool(not (measurement.result ^ ground_truth)) + try: + measurement_table[tool] += [(sast_tool_result, date)] + measurement_table[tool] = sorted( + measurement_table[tool], + key=lambda tup: datetime.strptime(tup[1], "%d %b %Y"), + ) + except KeyError: + measurement_table[tool] = [(sast_tool_result, date)] + if not has_measurement: + return [] + measurement_table, sorted_dates = self._format_measurements(measurement_table, dates) + measurement_table["0::Tool"] = sorted_dates + measurement_table["2::Ground Truth"] = [utils.translate_bool(ground_truth)] * len(sorted_dates) + return [ + MarkdownCollapsible( + [MarkdownTable(measurement_table)], + MarkdownHeading("Measurement", self.level + 1), + is_open=True, + ) + ] + + def _remediation(self) -> list: + # Generates the 'remediation' section for an instance. + note = self.current_instance.remediation_notes + note = MarkdownString(self._get_file_content_if_exists(note)) + transformation = self.current_instance.remediation_transformation + transformation = MarkdownString(self._get_file_content_if_exists(transformation)) + modeling_rule = self.current_instance.remediation_modeling_rule + modeling_rule = MarkdownString(self._get_file_content_if_exists(modeling_rule)) + if any([note, transformation, modeling_rule]): + note = [ + note + if note + else MarkdownString( + "Can you think of a transformation, that makes this tarpit less challenging for SAST tools?" + ) + ] + transformation = ( + [MarkdownHeading("Transformation", self.level + 2), transformation] + if transformation + else [] + ) + modeling_rule = ( + [MarkdownHeading("Modeling Rule", self.level + 2), modeling_rule] + if modeling_rule + else [] + ) + return [ + MarkdownCollapsible( + note + transformation + modeling_rule, + MarkdownHeading("Remediation", self.level + 1), + ) + ] + return [] + + def _get_file_content_if_exists(self, path_to_file: Path) -> str: + if not path_to_file: + return "" + potential_file_path = Path(self.current_instance.path / path_to_file).resolve() + if " " not in str(path_to_file) and potential_file_path.is_file(): + with open(potential_file_path, "r") as in_file: + return "".join(in_file.readlines()).strip() + return path_to_file if path_to_file else "" + + def _mask(self, value_to_mask: str): + if value_to_mask in self.mask_dict.keys(): + return self.mask_dict[value_to_mask] + return value_to_mask + + def _format_measurements(self, tool_measurement_dict: dict, dates: list) -> tuple: + """Formats the measurements in the wanted table format: + | | Tool1 | Tool2 | + |--------+--------+--------| + | Date1 | yes | no | + Args: + tool_measurement_dict (dict): dict containing measurement results and date as a list of tuple for each tool. + dates (list): a list of measurement dates. + Returns: + tuple(dict, list): dict of all tools and their measurement results (one column) and a list of sorted measurement dates (first column) + """ + dates_sorted = sorted(list(set(dates))) + formatted_measurement_table = {} + for tool, measurements in tool_measurement_dict.items(): + formatted_measurements = [] + current_measurement = measurements.pop(0) + for date in dates_sorted: + if current_measurement[1] == date: + formatted_measurements += [current_measurement[0]] + if len(measurements): + current_measurement = measurements.pop(0) + else: + break + else: + formatted_measurements += [""] + formatted_measurement_table[tool] = formatted_measurements + return formatted_measurement_table, dates_sorted + + def generate_md(self) -> list: + ret = [] + for idx, self.current_instance in enumerate(self.pattern.instances): + instance_md_elements = [] + for f in self.instance_structure: + instance_md_elements += f() + if self.has_multiple_instances: + ret += [ + MarkdownCollapsible( + instance_md_elements[1:], instance_md_elements[0], idx == 0 + ) + ] + else: + ret = instance_md_elements + return ret \ No newline at end of file diff --git a/tp_framework/core/repair/readme_markdown_elements.py b/tp_framework/core/repair/readme_markdown_elements.py new file mode 100644 index 0000000..4514860 --- /dev/null +++ b/tp_framework/core/repair/readme_markdown_elements.py @@ -0,0 +1,171 @@ +from tabulate import tabulate + + +class MarkdownElement: + """Super class for all MarkdownElements used within generating README files for a testability pattern.""" + + def __init__(self, content: str): + self.content = content.strip() if content else "" + + def linkable(self) -> str: + """Makes it possible for a markdown Element to be used within a link. + Returns: + str: a string representation, that can be used in a markdown link. + """ + raise NotImplementedError + + def to_markdown(self): + raise NotImplementedError + + def strip(self): + return self.to_markdown().strip() + + def __bool__(self): + return bool(self.content) + + +class MarkdownCode(MarkdownElement): + """A markdown code block. + Syntax: + ``` + self.content + ``` + """ + + def __init__(self, content, code_type): + super().__init__(content) + self.code_type = code_type + + def to_markdown(self) -> str: + return f"\n```{self.code_type.lower()}\n{self.content}\n```\n" + + +class MarkdownComment(MarkdownElement): + """A markdown comment + Syntax: + [//]: # () + """ + + def to_markdown(self): + self.content = self.content.replace("\r", " ").replace("\n", " ") + return f"\n[//]: # ({self.content})\n" + + +class MarkdownHeading(MarkdownElement): + """A markdown heading, `self.level` indicates the number of '#' + Syntax example: + # + """ + + def __init__(self, content, level: int): + super().__init__(content) + self.level = int(level) + assert self.level >= 1 + + def to_markdown(self) -> str: + return f'\n{"#" * self.level} {self.content}\n\n' + + def linkable(self) -> str: + return f'#{self.content.replace(" " , "-").lower()}' + + +class MarkdownCollapsible(MarkdownElement): + """A markdown collapsible element. + Syntax example: +
+ + + +
+ """ + + def __init__(self, content: list, heading: MarkdownElement, is_open: bool = False): + self.content = content + self.is_open = is_open + self.heading = heading + + def to_markdown(self) -> str: + final = f'\n
' + heading = ( + self.heading.to_markdown().strip() + if not isinstance(self.heading, MarkdownHeading) + else self.heading.to_markdown() + ) + final += f"\n\n{heading}\n\n" + for element in self.content: + final += element.to_markdown() + final += f"\n
\n" + return final + + +class MarkdownString(MarkdownElement): + """Representation of a String, it is surrounded by newlines.""" + def to_markdown(self) -> str: + return f"\n{self.content}\n" + + +class MarkdownLink(MarkdownElement): + """A markdown link. + Syntax: + [self.content](self.link) + """ + + def __init__(self, content: str | MarkdownElement, link: MarkdownElement): + super().__init__(content) + assert isinstance( + link, MarkdownElement + ), "The link of a MarkdownLink must be a MarkdownElement." + self.link = link.linkable() + + def to_markdown(self): + return f"[{self.content.strip()}]({self.link.strip()})" + + +class MarkdownTable(MarkdownElement): + """A markdown table + Syntax: + | | | + |---|---| + | | | + The content must be provided as a dict, where the value for each key is a list. + The key will be the header and the list contains values for that column. + Columns will be sorted alphabetically, if you wish to sort columns yourself you can prefix them using ::. + """ + + def __init__(self, content: dict): + assert isinstance( + content, dict + ), "content for Markdown table must be provided as dict" + assert all( + [isinstance(v, list) for v in content.values()] + ), "content for Markdowntable must have lists as values" + self.headings = sorted(content.keys(), key=lambda x: x.lower()) + num_rows = max([len(v) for v in content.values()]) + self.lines = [ + [None for _ in range(len(self.headings))] for _ in range(num_rows) + ] + for column_idx, key in enumerate(self.headings): + for row_index, v in enumerate(content[key]): + self.lines[row_index][column_idx] = v.strip() if v else "" + + def to_markdown(self): + return f'\n{tabulate(self.lines, [h.split("::")[-1] if "::" in h else h for h in self.headings], "github")}\n' + + +class MarkdownDocument(MarkdownElement): + """A central point, where all markdown elements are collected into one single markdown document.""" + + def __init__(self, content: list) -> None: + self.content = content + + def to_markdown(self) -> str: + final = "" + for element in self.content: + assert isinstance(element, MarkdownElement) + final += element.to_markdown() + import re + + final = re.sub("\n\n\n*", "\n\n", final) + return ( + f"{final.strip()}\n" # GitHub markdown likes a newline at the end of files + ) diff --git a/tp_framework/core/repair/repair_tool.py b/tp_framework/core/repair/repair_tool.py new file mode 100644 index 0000000..94aba5f --- /dev/null +++ b/tp_framework/core/repair/repair_tool.py @@ -0,0 +1,102 @@ +import shutil +from pathlib import Path +from copy import deepcopy +from jsonschema import validate + +from core.exceptions import PatternRepairError +from core import utils + + +import logging +from core import loggermgr +logger = logging.getLogger(loggermgr.logger_name(__name__)) + +class RepairTool: + def __init__(self, to_repair, template_json_file: Path, schema_file: Path) -> None: + self.to_repair = to_repair + self.json_template = template_json_file + self.schema_dict = schema_file + if not self.json_template.is_file(): + raise PatternRepairError(f"{self._log_prefix()}No template JSON found in {self.json_template}") + if not schema_file.is_file(): + raise PatternRepairError(f"{self._log_prefix()}No schema JSON found in {schema_file}") + try: + self.template_dict = utils.read_json(self.json_template) + except Exception: + raise PatternRepairError(f"{self._log_prefix()}The template JSON file {self.json_template} is corrupt, please check") + if not self.template_dict: + raise PatternRepairError(f"{self._log_prefix()}The template JSON {self.json_template} is empty") + try: + self.schema_dict = utils.read_json(schema_file) + except Exception: + raise PatternRepairError(f"{self._log_prefix()}The schema JSON file {schema_file} is corrupt, please check") + + def _log_prefix(self): + return f"PatternRepair ({self.to_repair}) " + + def _copy_template(self): + logger.info(f"{self._log_prefix()}Copying template JSON.") + expected_json_path = self.to_repair.path / f'{self.to_repair.path.name}.json' + shutil.copy(self.json_template, expected_json_path) + self.to_repair.json_path = expected_json_path + return expected_json_path + + def _ensure_json_file_exists(self): + to_repair_json_path = self.to_repair.json_path + # check if json path is a file + if not to_repair_json_path.is_file(): + # try to get the file, if not possible copy the template + to_repair_json_path = utils.get_json_file() + if not to_repair_json_path: + to_repair_json_path = self._copy_template() + # read the given file to check if there are errors or keys missing + pattern_dict = {} + try: + org_pattern_dict = utils.read_json(to_repair_json_path) + except Exception: + self._copy_template() + org_pattern_dict = utils.read_json(self.to_repair.json_path) + + pattern_dict = deepcopy(org_pattern_dict) + # check for missing keys + missing_keys_in_pattern_dict = set(self.template_dict.keys()) - set(pattern_dict.keys()) + for key in missing_keys_in_pattern_dict: + pattern_dict[key] = self.template_dict[key] + + if pattern_dict != org_pattern_dict: + utils.write_json(self.to_repair.json_path, pattern_dict) + + # rename the JSON file to the expected format + expected_json_name = f"{self.to_repair.path.name}.json" + actual_name = self.to_repair.json_path.name + + if expected_json_name != actual_name: + new_path = self.to_repair.path / expected_json_name + shutil.move(self.to_repair.json_path, new_path) + self.to_repair.json_path = new_path + + def _check_paths_exists(self): + for k, v in vars(self.to_repair).items(): + if isinstance(v, Path): + attr = getattr(self.to_repair, k) + if not attr.exists(): + logger.warning(f"{self._log_prefix()}Could not find path {v}") + setattr(self.to_repair, k, None) + + def _validate_against_schema(self): + repaired_dict = self.to_repair.to_dict() + try: + validate(instance=repaired_dict, schema=self.schema_dict) + except Exception as e: + msg = utils.get_exception_message(e) + logger.error(f"{self._log_prefix()}Validating against schema failed: {msg}") + + def repair(self): + raise NotImplementedError() + + def to_json(self): + repaired_dict = self.to_repair.to_dict() + + original_dict = utils.read_json(self.to_repair.json_path) + if repaired_dict != original_dict: + utils.write_json(self.to_repair.json_path, repaired_dict) diff --git a/tp_framework/core/report_for_sast.py b/tp_framework/core/report_for_sast.py index d38bc5a..f2ec373 100644 --- a/tp_framework/core/report_for_sast.py +++ b/tp_framework/core/report_for_sast.py @@ -6,8 +6,10 @@ logger = logging.getLogger(loggermgr.logger_name(__name__)) import config -from core import utils, measurement, instance +from core import utils, measurement from core.exceptions import InstanceDoesNotExists, MeasurementNotFound +from core.pattern import Pattern +from core.instance import Instance def report_sast_measurement_for_pattern_list(tools: list[Dict], language: str, l_tp_id: list[int], @@ -19,11 +21,10 @@ def report_sast_measurement_for_pattern_list(tools: list[Dict], language: str, l utils.add_loggers(output_dir) results = [] for tp_id in l_tp_id: - l_tpi_path: list[Path] = utils.list_tpi_paths_by_tp_id( - language, tp_id, tp_lib_path - ) - for tpi_path in l_tpi_path: - tpi_id = utils.get_id_from_name(tpi_path.name) + target_pattern = Pattern.init_from_id_and_language(tp_id, language, tp_lib_path) + instance: Instance + for instance in target_pattern.instances: + tpi_id = instance.instance_id msgpre = f"{language} pattern {tp_id} instance {tpi_id} - " logger.info(f"{msgpre}Fetching last measurements...") for tool in tools: @@ -40,17 +41,15 @@ def report_sast_measurement_for_pattern_list(tools: list[Dict], language: str, l } try: meas: measurement.Measurement = measurement.load_last_measurement_for_tool( - tool, language, tp_lib_path, tp_id, tpi_id + tool, language, tp_lib_path, target_pattern, instance ) except InstanceDoesNotExists: row["results"] = "PATTERN_INSTANCE_DOES_NOT_EXIST" except MeasurementNotFound: - tpi: instance.Instance = instance.load_instance_from_json(tpi_path, tp_lib_path, language) - row["pattern_name"] = tpi.name + row["pattern_name"] = instance.name row["results"] = "NOT_FOUND" - row["negative_test_case"] = "YES" if tpi.properties_negative_test_case else "NO" - row["expectation"] = tpi.expectation - pass + row["negative_test_case"] = "YES" if instance.properties_negative_test_case else "NO" + row["expectation"] = instance.expectation_expectation if meas: row["pattern_name"] = meas.instance.name row["tool"] = f"{meas.tool}:{meas.version}" # rewrite `saas` occurrences with precise versions diff --git a/tp_framework/core/utils.py b/tp_framework/core/utils.py index d828246..dc004bf 100644 --- a/tp_framework/core/utils.py +++ b/tp_framework/core/utils.py @@ -1,116 +1,82 @@ import csv +import hashlib +import json +import shutil import os -from datetime import datetime -from platform import system +import yaml +from datetime import datetime from importlib import import_module from pathlib import Path from typing import Tuple, Dict -import yaml -import hashlib import logging from core import loggermgr logger = logging.getLogger(loggermgr.logger_name(__name__)) import config -from core import pattern, instance from core.exceptions import PatternDoesNotExists, LanguageTPLibDoesNotExist, TPLibDoesNotExist, InvalidSastTools, \ - DiscoveryMethodNotSupported, TargetDirDoesNotExist, InvalidSastTool, PatternFolderNotFound, InstanceDoesNotExists + DiscoveryMethodNotSupported, TargetDirDoesNotExist, InvalidSastTool, InstanceDoesNotExists, \ + MeasurementResultsDoNotExist from core import errors -def is_windows(): - return system() == "Windows" - - -def list_pattern_paths_for_language(language: str, tp_lib_dir: Path) -> list[Path]: - all_pattern_dirs_by_lang: Path = tp_lib_dir / language - if not all_pattern_dirs_by_lang.is_dir(): - raise LanguageTPLibDoesNotExist - return list_dirs_only(all_pattern_dirs_by_lang) - - -def list_tpi_paths_by_tp_id(language: str, pattern_id: int, tp_lib_dir: Path) -> list[Path]: - try: - p, p_dir = pattern.get_pattern_by_pattern_id(language, pattern_id, tp_lib_dir) - return list(map(lambda i: (tp_lib_dir / language / p_dir / i).resolve(), p.instances)) - except: - ee = PatternDoesNotExists(pattern_id) - logger.exception(ee) - raise ee - +################################################################################ +# PATTERNS +# -def get_tpi_id_from_jsonpath(jp: Path) -> int: - return get_id_from_name(jp.parent.name) -def get_pattern_dir_from_id(pattern_id: int, language: str, tp_lib_dir: Path) -> Path: +def get_pattern_dir_from_id(pattern_id: int, language: str, tp_lib_dir: Path) -> Path: # needed tp_lib_dir_lang_dir: Path = tp_lib_dir / language if tp_lib_dir_lang_dir.is_dir(): pattern_with_id = list(filter(lambda p: get_id_from_name(p.name) == pattern_id, list_dirs_only(tp_lib_dir_lang_dir))) if pattern_with_id: - return pattern_with_id[0] + return Path(pattern_with_id[0]) raise PatternDoesNotExists(pattern_id) else: raise PatternDoesNotExists(pattern_id) -def get_instance_dir_from_id(instance_id: int, pattern_dir: Path) -> Path: - if pattern_dir.is_dir(): - return get_instance_dir_from_list(instance_id, list_dirs_only(pattern_dir)) - else: - raise PatternFolderNotFound() +def get_next_free_pattern_id_for_language(language: str, tp_lib_dir: Path, proposed_id = None): + lang_tp_lib_path = tp_lib_dir / language + check_lang_tp_lib_path(lang_tp_lib_path) + all_patterns = list_dirs_only(lang_tp_lib_path) + taken_ids = [] + for pattern in all_patterns: + taken_ids += [get_id_from_name(pattern.name)] + id_range = list(range(1, max(taken_ids)+1)) + free_ids = sorted(list(set(id_range) - set(taken_ids))) + if proposed_id in free_ids: + return proposed_id + return free_ids[0] if free_ids else max(taken_ids) + 1 +################################################################################ +# INSTANCES +# + + +# TODO: TESTING def get_instance_dir_from_list(instance_id: int, l_pattern_dir: list[Path]): instance_with_id = list(filter(lambda tpi_dir: get_id_from_name(tpi_dir.name) == instance_id, l_pattern_dir)) if not instance_with_id: raise InstanceDoesNotExists(instance_id=instance_id) return instance_with_id[0] -# def get_or_create_language_dir(language: str, tp_lib_dir: Path) -> Path: -# tp_lib_for_lang: Path = tp_lib_dir / language -# tp_lib_for_lang.mkdir(parents=True, exist_ok=True) -# return tp_lib_for_lang - - -def get_or_create_pattern_dir(language: str, pattern_id: int, pattern_name: str, tp_lib_dir: Path) -> Path: - pattern_dir = tp_lib_dir / language / get_pattern_dir_name_from_name(pattern_name, pattern_id) - pattern_dir.mkdir(parents=True, exist_ok=True) - return pattern_dir - - -def get_pattern_dir_name_from_name(name: str, pattern_id: int) -> str: - return f"{pattern_id}_{name.lower().replace(' ', '_')}" -def get_instance_dir_name_from_pattern(name: str, pattern_id: int, instance_id: int) -> str: - return f"{instance_id}_instance_{get_pattern_dir_name_from_name(name, pattern_id)}" - - -def get_id_from_name(name: str) -> int: - return int(name.split("_")[0]) - - -def get_class_from_str(class_str: str) -> object: - try: - module_path, class_name = class_str.rsplit('.', 1) - module = import_module(module_path) - return getattr(module, class_name) - except (ImportError, AttributeError) as e: - raise ImportError(class_str) - - -def get_tp_dir_for_language(tp_lib_dir: Path, language: str): - return Path(tp_lib_dir / language) +################################################################################ +# MEASUREMENT +# def get_measurement_dir_for_language(tp_lib_dir: Path, language: str): return Path(tp_lib_dir / config.MEASUREMENT_REL_DIR / language) +# TODO: TESTING def get_measurement_file(date: datetime): date_time_str = date.strftime("%Y-%m-%d_%H-%M-%S") return f"measurement-{date_time_str}.json" @@ -128,43 +94,15 @@ def get_last_measurement_for_pattern_instance(meas_inst_dir: Path) -> Path: return sorted_meas[-1][1] -# Useful for some SAST tools that accepts a zip file of the source code to scan -def zipdir(path, ziph): - for root, dirs, files in os.walk(path): - for file in files: - ziph.write(os.path.join(root, file), - os.path.relpath(os.path.join(root, file), - os.path.join(path, '..'))) - - -################################################################################ -# TODO (LC): are these related to pattern instance ? -# -def get_path_or_none(p: str) -> Path | None: - if p: - return Path(p) - return p - - -def get_enum_value_or_none(enum) -> str | None: - try: - return enum.value - except AttributeError: - return None - - -def get_relative_path_str_or_none(path) -> str | None: - if path: - return f"./{path}" - return None - - -def get_from_dict(d, k1, k2): - return d.get(k1, {}).get(k2, None) +def check_measurement_results_exist(measurement_dir: Path): + if not measurement_dir.is_dir(): + e = MeasurementResultsDoNotExist() + logger.error(get_exception_message(e)) + raise e ################################################################################ -# Discovery +# DISCOVERY # def get_discovery_rule_ext(discovery_method: str): @@ -195,11 +133,145 @@ def get_discovery_rules(discovery_rule_list: list[str], discovery_rule_ext: str) logger.warning(errors.wrongDiscoveryRule(discovery_rule)+ " The script will try to continue ignoring this discovery rule.") return list(discovery_rules_to_run) +################################################################################ +# SAST +# + +def sast_tool_version_match(v1, v2, nv_max=3, ignore_saas=True): + if ignore_saas and (v1 == "saas" or v2 == "saas"): + return True + sv1 = v1.split(".") + sv2 = v2.split(".") + nv = max(len(sv1), len(sv2)) + for i in range(0, min(nv, nv_max)): + try: + if sv1[i] != sv2[i]: + return False + except IndexError: + return False + return True + + +def load_sast_specific_config(tool_name: str, tool_version: str) -> Dict: + try: + tool_config_path: Path = config.ROOT_SAST_DIR / load_yaml(config.SAST_CONFIG_FILE)["tools"][tool_name]["version"][tool_version]["config"] + except KeyError: + e = InvalidSastTool(f"{tool_name}:{tool_version}") + raise e + return load_yaml(tool_config_path) ################################################################################ -# Others +# PATTERN REPAIR # + +def check_file_exist(file_path: Path, file_suffix = ".csv"): + if not file_path.is_file() or not file_path.suffix == file_suffix: + e = FileNotFoundError(file_path) + logger.error(get_exception_message(e)) + raise e + + +def get_relative_paths(file_path: Path, base_path: Path): + if not file_path: + return None + try: + return f"./{file_path.relative_to(base_path)}" + except ValueError: + try: + return f"../{file_path.relative_to(base_path.parent)}" + except ValueError as e: + logger.warning(f"Could not parse filepath {file_path} to a relative path.") + return file_path + + +def read_csv_to_dict(path_to_file: str) -> dict: + # Reads a csv file into a dictionary, the csv file must contain the columns 'pattern_id', 'instance_id', 'language', 'successful' + # The dict will have the form: {: {: {: }}} + res = [] + with open(path_to_file, "r") as csvfile: + r = csv.reader(csvfile, delimiter=",") + headings = next(r) + wanted_columns = ["pattern_id", "instance_id", "language", "successful"] + wanted_idx = [headings.index(w) for w in wanted_columns] + assert len(wanted_idx) == len(wanted_columns), f"Could not find wanted column names in csv {path_to_file}" + sanitized_lines =filter(lambda x: bool(x[0].strip()), r) + res = [[line[i].strip() for i in wanted_idx] for line in sanitized_lines] + + ret = {} + for line in res: + if line[2] not in ret.keys(): + ret[line[2]] = {} + if line[0] not in ret[line[2]].keys(): + ret[line[2]][line[0]] = {} + if line[1] not in ret[line[2]][line[0]].keys(): + ret[line[2]][line[0]][line[1]] = {} + ret[line[2]][line[0]][line[1]] = line[3] + return ret + + +def translate_bool(bool_to_translate: bool): + return "yes" if bool_to_translate else "no" + +# TODO TESTING +def get_language_by_file_ending(filename: str) -> str: + if not filename: + return "" + if Path(filename).suffix == ".py": + return "python" + if Path(filename).suffix == ".php": + return "php" + if Path(filename).suffix == ".js": + return "javascript" + if Path(filename).suffix == ".java": + return "java" + if Path(filename).suffix == ".sc": + return "scala" + if Path(filename).suffix == ".bash": + return "bash" + return "" + +################################################################################ +# OTHER +# TODO: Could be sorted alphabetically? + + +# Useful for some SAST tools that accepts a zip file of the source code to scan +def zipdir(path, ziph): + for root, dirs, files in os.walk(path): + for file in files: + ziph.write(os.path.join(root, file), + os.path.relpath(os.path.join(root, file), + os.path.join(path, '..'))) + + +def get_id_from_name(name: str) -> int: + return int(name.split("_")[0]) + + +def get_class_from_str(class_str: str) -> object: + try: + module_path, class_name = class_str.rsplit('.', 1) + module = import_module(module_path) + return getattr(module, class_name) + except (ImportError, AttributeError) as e: + raise ImportError(class_str) + + +# TODO (LC): are these related to pattern instance ? +def get_path_or_none(p: str) -> Path | None: + if p: + return Path(p) + return None + + +def get_from_dict(d: dict, k1: str, k2: str): + try: + return d.get(k1, {}).get(k2, None) + except AttributeError: + return None + + def build_timestamp_language_name(name: Path | None, language: str, now: datetime, extra: str = None) -> str: res = language if name: @@ -232,7 +304,7 @@ def check_target_dir(target_dir: Path): logger.error(get_exception_message(e)) raise e - +# TODO: TESTING def filter_sast_tools(itools: list[Dict], language: str, exception_raised=True): for t in itools: t["supported_languages"] = load_sast_specific_config(t["name"], t["version"])["supported_languages"] @@ -244,36 +316,12 @@ def filter_sast_tools(itools: list[Dict], language: str, exception_raised=True): return tools -def sast_tool_version_match(v1, v2, nv_max=3, ignore_saas=True): - if ignore_saas and (v1 == "saas" or v2 == "saas"): - return True - sv1 = v1.split(".") - sv2 = v2.split(".") - nv = max(len(sv1), len(sv2)) - for i in range(0, min(nv, nv_max)): - try: - if sv1[i] != sv2[i]: - return False - except IndexError: - return False - return True - - def load_yaml(fpath): with open(fpath) as f: fdict: Dict = yaml.load(f, Loader=yaml.Loader) return fdict -def load_sast_specific_config(tool_name: str, tool_version: str) -> Dict: - try: - tool_config_path: Path = config.ROOT_SAST_DIR / load_yaml(config.SAST_CONFIG_FILE)["tools"][tool_name]["version"][tool_version]["config"] - except KeyError: - e = InvalidSastTool(f"{tool_name}:{tool_version}") - raise e - return load_yaml(tool_config_path) - - def write_csv_file(ofile: Path, header: list[str], data: list[dict]): with open(ofile, "w", newline='') as report: writer = csv.DictWriter(report, fieldnames=header) @@ -293,7 +341,6 @@ def add_loggers(output_dir_path: Path, filename: str=None, console=True): loggermgr.add_console_logger() - def get_operation_build_name_and_dir(op: str, src_dir: Path | None, language: str, output_dir: Path): now = datetime.now() if not src_dir: @@ -353,4 +400,71 @@ def get_file_hash(fpath, bigfile=False): else: while chunk := f.read(8192): hash.update(chunk) - return hash.hexdigest() \ No newline at end of file + return hash.hexdigest() + + +def list_files(path_to_parent_dir: Path, suffix: str, recursive: bool = False): + assert suffix[0] == ".", "Suffix has to start with '.'" + if recursive: + matches = [] + for root, _, filenames in os.walk(path_to_parent_dir): + for filename in filter(lambda f: Path(f).suffix == suffix, filenames): + matches += [Path(root) / filename] + return matches + else: + return list(filter(lambda file_name: file_name.suffix == suffix, [path_to_parent_dir / f for f in path_to_parent_dir.iterdir()])) + + +def list_directories(parent_dir: Path): + return list(filter(lambda name: name.is_dir(), [parent_dir / d for d in parent_dir.iterdir()])) + +# TODO: TESTING +def get_json_file(path_to_pattern_or_instance: Path) -> Path: + if path_to_pattern_or_instance.name == 'docs': + return None + json_files_in_dir = list_files(path_to_pattern_or_instance, ".json") + if len(json_files_in_dir) == 1: + return json_files_in_dir[0] + elif not json_files_in_dir: + logger.warning(f"Could not find a JSON file in {path_to_pattern_or_instance.name}") + return None + else: + logger.warning(f"Found multiple '.json' files for {path_to_pattern_or_instance.name}") + if path_to_pattern_or_instance / f"{path_to_pattern_or_instance.name}.json" in json_files_in_dir: + return path_to_pattern_or_instance / f"{path_to_pattern_or_instance.name}.json" + logger.warning("Could not determine the right pattern JSON file. Please name it _.json") + return None + + +def read_json(path_to_json_file: Path): + if not path_to_json_file.is_file(): + return {} + result = {} + + try: + with open(path_to_json_file, "r") as json_file: + result = json.load(json_file) + except json.JSONDecodeError as err: + raise Exception(f"JSON is corrupt, please check {path_to_json_file}") from err + + if not result: + logger.error(f"JSON file is empty") + return result + + +def write_json(path_to_json_file: Path, result_dict: dict): + path_to_json_file.parent.mkdir(exist_ok=True, parents=True) + with open(path_to_json_file, "w") as json_file: + json.dump(result_dict, json_file, indent=4) + + +def copy_dir_content(path_to_src_dir: Path, path_to_dst_dir: Path): + for element in os.listdir(path_to_src_dir): + src_path = path_to_src_dir / element + dest_path = path_to_dst_dir / element + if dest_path.exists(): + continue + if src_path.is_file(): + shutil.copy2(src_path, dest_path) + else: + shutil.copytree(src_path, dest_path)