From af9ff5d9c6622d2f8714327321abeb191c7655ff Mon Sep 17 00:00:00 2001 From: Alex Bourret Date: Wed, 11 Mar 2026 17:00:13 +0100 Subject: [PATCH 1/4] add option overide for csv decoding --- custom-recipes/api-connect/recipe.json | 75 +++++++++++++++++++ .../api-connect_dataset/connector.json | 75 +++++++++++++++++++ .../api-connect_dataset/connector.py | 3 +- python-lib/dku_utils.py | 69 ++++++++++++++--- python-lib/rest_api_recipe_session.py | 5 +- 5 files changed, 214 insertions(+), 13 deletions(-) diff --git a/custom-recipes/api-connect/recipe.json b/custom-recipes/api-connect/recipe.json index ee84f57..d590d92 100644 --- a/custom-recipes/api-connect/recipe.json +++ b/custom-recipes/api-connect/recipe.json @@ -312,6 +312,81 @@ "type": "STRING", "visibilityCondition": "model.use_mtls==true" }, + { + "name": "force_csv_parameters", + "label": "Force CSV parameters", + "description": "", + "type": "BOOLEAN", + "defaultValue": false + }, + { + "name": "csv_delimiter", + "label": "Delimiter", + "description": "", + "type": "STRING", + "visibilityCondition": "model.force_csv_parameters==true" + }, + { + "name": "csv_doublequote", + "label": "Double quote", + "description": "", + "type": "SELECT", + "selectChoices":[ + {"value": null, "label": "Auto detect"}, + {"value": "double_quote", "label": "Double quote"}, + {"value": "not_double_quote", "label": "No double quote"} + ], + "visibilityCondition": "model.force_csv_parameters==true" + }, + { + "name": "csv_escapechar", + "label": "Escape char", + "description": "", + "type": "STRING", + "visibilityCondition": "model.force_csv_parameters==true" + }, + { + "name": "csv_lineterminator", + "label": "Line terminator", + "description": "", + "type": "STRING", + "visibilityCondition": "model.force_csv_parameters==true" + }, + { + "name": "csv_quotechar", + "label": "Quote char", + "description": "", + "type": "STRING", + "visibilityCondition": "model.force_csv_parameters==true" + }, + { + "name": "csv_quoting", + "label": "Quote", + "description": "", + "type": "SELECT", + "selectChoices":[ + {"value": null, "label": "Auto detect"}, + {"value": 0, "label": "Minimal"}, + {"value": 1, "label": "All"}, + {"value": 2, "label": "Non numeric"}, + {"value": 3, "label": "None"}, + {"value": 4, "label": "Strings"}, + {"value": 5, "label": "Not null"} + ], + "visibilityCondition": "model.force_csv_parameters==true" + }, + { + "name": "csv_skipinitialspace", + "label": "Skip initial space", + "description": "", + "type": "SELECT", + "selectChoices":[ + {"value": null, "label": "Auto detect"}, + {"value": "skip", "label": "Skip"}, + {"value": "not_skip", "label": "Do not skip"} + ], + "visibilityCondition": "model.force_csv_parameters==true" + }, { "name": "redirect_auth_header", "label": "Redirect authorization header", diff --git a/python-connectors/api-connect_dataset/connector.json b/python-connectors/api-connect_dataset/connector.json index ee987bc..0dac288 100644 --- a/python-connectors/api-connect_dataset/connector.json +++ b/python-connectors/api-connect_dataset/connector.json @@ -259,6 +259,81 @@ "type": "STRING", "visibilityCondition": "model.use_mtls==true" }, + { + "name": "force_csv_parameters", + "label": " ", + "description": "Force CSV parameters", + "type": "BOOLEAN", + "defaultValue": false + }, + { + "name": "csv_delimiter", + "label": " ", + "description": "Delimiter", + "type": "STRING", + "visibilityCondition": "model.force_csv_parameters==true" + }, + { + "name": "csv_doublequote", + "label": " ", + "description": "Double quote", + "type": "SELECT", + "selectChoices":[ + {"value": null, "label": "Auto detect"}, + {"value": "double_quote", "label": "Double quote"}, + {"value": "not_double_quote", "label": "No double quote"} + ], + "visibilityCondition": "model.force_csv_parameters==true" + }, + { + "name": "csv_escapechar", + "label": " ", + "description": "Escape char", + "type": "STRING", + "visibilityCondition": "model.force_csv_parameters==true" + }, + { + "name": "csv_lineterminator", + "label": " ", + "description": "Line terminator", + "type": "STRING", + "visibilityCondition": "model.force_csv_parameters==true" + }, + { + "name": "csv_quotechar", + "label": " ", + "description": "Quote char", + "type": "STRING", + "visibilityCondition": "model.force_csv_parameters==true" + }, + { + "name": "csv_quoting", + "label": " ", + "description": "Quote", + "type": "SELECT", + "selectChoices":[ + {"value": null, "label": "Auto detect"}, + {"value": 0, "label": "Minimal"}, + {"value": 1, "label": "All"}, + {"value": 2, "label": "Non numeric"}, + {"value": 3, "label": "None"}, + {"value": 4, "label": "Strings"}, + {"value": 5, "label": "Not null"} + ], + "visibilityCondition": "model.force_csv_parameters==true" + }, + { + "name": "csv_skipinitialspace", + "label": " ", + "description": "Skip initial space", + "type": "SELECT", + "selectChoices":[ + {"value": null, "label": "Auto detect"}, + {"value": "skip", "label": "Skip"}, + {"value": "not_skip", "label": "Do not skip"} + ], + "visibilityCondition": "model.force_csv_parameters==true" + }, { "name": "redirect_auth_header", "label": " ", diff --git a/python-connectors/api-connect_dataset/connector.py b/python-connectors/api-connect_dataset/connector.py index fc155de..1873a6a 100644 --- a/python-connectors/api-connect_dataset/connector.py +++ b/python-connectors/api-connect_dataset/connector.py @@ -33,6 +33,7 @@ def __init__(self, config, plugin_config): self.raw_output = endpoint_parameters.get("raw_output", None) self.maximum_number_rows = config.get("maximum_number_rows", -1) self.display_metadata = config.get("display_metadata", False) + self.csv_configuration = config def get_read_schema(self): # In this example, we don't specify a schema here, so DSS will infer the schema @@ -60,7 +61,7 @@ def generate_rows(self, dataset_schema=None, dataset_partitioning=None, record_count += 1 yield self.format_output(data, metadata) else: - csv_data = decode_csv_data(data) + csv_data = decode_csv_data(data, self.csv_configuration) if csv_data: record_count += len(csv_data) for row in csv_data: diff --git a/python-lib/dku_utils.py b/python-lib/dku_utils.py index b16a8f8..22ebc9d 100644 --- a/python-lib/dku_utils.py +++ b/python-lib/dku_utils.py @@ -41,6 +41,7 @@ def get_endpoint_parameters(configuration): "next_page_url_key", "is_next_page_url_relative", "next_page_url_base", "top_key", "skip_key", "maximum_number_rows", "use_mtls", "mtls_certificate_path", "mtls_key_path", + "force_csv_parameters", "csv_delimiter" ] parameters = { endpoint_parameter: configuration.get(endpoint_parameter) for endpoint_parameter in endpoint_parameters if configuration.get(endpoint_parameter) is not None @@ -167,7 +168,7 @@ def xml_to_json(content): return json_response -def decode_csv_data(data): +def decode_csv_data(data, csv_configuation): import csv import io json_data = None @@ -189,19 +190,67 @@ def decode_csv_data(data): ) except Exception as error: logger.error("Could not sniff csv dialect. Error={}".format(error)) - dialect = "excel" - try: - reader = csv.DictReader( - io.StringIO(data), - dialect=dialect - ) - json_data = list(reader) - except Exception as error: - logger.error("Could not extract csv data. Error={}. Trying method 2.".format(error)) + # dialect = "excel" + dialect = csv.Dialect() + dialect.delimiter = ',' + dialect.quotechar = '"' + dialect.doublequote = True + dialect.skipinitialspace = False + dialect.lineterminator = '\r\n' + dialect.quoting = 0 + dialect = update_csv_dialect(csv_configuation, dialect) + if not csv_configuation.get("force_csv_parameters", False): + # For back compatibility reason, if csv params are not forced, + # we try the old method first. + try: + reader = csv.DictReader( + io.StringIO(data), + dialect=dialect + ) + json_data = list(reader) + except Exception as error: + logger.error("Could not extract csv data. Error={}. Trying method 2.".format(error)) + json_data = decode_csv_data_m2(data, dialect) + else: + logger.error("CSV parameters are forced, trying method 2") json_data = decode_csv_data_m2(data, dialect) return json_data +def update_csv_dialect(config, input_dialect): + if config.get("force_csv_parameters", False): + logger.info("Updating csv parameters with ") + csv_delimiter = config.get("csv_delimiter") + if csv_delimiter: + input_dialect.delimiter = csv_delimiter + logger.info("delimiter={}".format(csv_delimiter)) + csv_doublequote = config.get("csv_doublequote", None) + if csv_doublequote: + input_dialect.doublequote = csv_doublequote == "double_quote" + logger.info("doublequote={}".format(input_dialect.doublequote)) + csv_escapechar = config.get("csv_escapechar", "") + if csv_escapechar: + input_dialect.escapechar = csv_escapechar + logger.info("escapechar={}".format(csv_escapechar)) + csv_lineterminator = config.get("csv_lineterminator", "") + if csv_lineterminator: + input_dialect.lineterminator = csv_lineterminator + logger.info("lineterminator={}".format(csv_lineterminator)) + csv_quotechar = config.get("csv_quotechar", "") + if csv_quotechar: + input_dialect.quotechar = csv_quotechar + logger.info("quotechar={}".format(csv_quotechar)) + csv_quoting = config.get("csv_quoting", None) + if csv_quoting is not None: + input_dialect.quoting = csv_quoting + logger.info("quoting={}".format(csv_quoting)) + csv_skipinitialspace = config.get("csv_skipinitialspace", None) + if csv_skipinitialspace: + input_dialect.skipinitialspace = csv_skipinitialspace == "skip" + logger.info("skipinitialspace={}".format(input_dialect.skipinitialspace)) + return input_dialect + + def decode_csv_data_m2(data, dialect): import csv json_data = None diff --git a/python-lib/rest_api_recipe_session.py b/python-lib/rest_api_recipe_session.py index fada80a..5df182d 100644 --- a/python-lib/rest_api_recipe_session.py +++ b/python-lib/rest_api_recipe_session.py @@ -29,6 +29,7 @@ def __init__(self, custom_key_values, credential_parameters, secure_credentials, self.is_row_limit = (self.maximum_number_rows > 0) self.behaviour_when_error = behaviour_when_error or "add-error-column" self.can_raise = self.behaviour_when_error == "raise" + self.csv_configuration = endpoint_parameters @staticmethod def get_column_to_parameter_dict(parameter_columns, parameter_renamings): @@ -126,7 +127,7 @@ def retrieve_next_page(self, is_raw_output): base_row.update(self.initial_parameter_columns) page_rows.append(base_row) else: - decoded_csv_data = decode_csv_data(json_response) + decoded_csv_data = decode_csv_data(json_response, self.csv_configuration) is_api_returning_dict = False if not decoded_csv_data and json_response: logger.warning("Data is not in CSV format. Dumping it in text mode.") @@ -151,7 +152,7 @@ def format_page_rows(self, data_rows, is_raw_output, metadata=None): page_rows = [] metadata = metadata or {} if type(data_rows) in [str, bytes]: - data_rows = decode_csv_data(data_rows) + data_rows = decode_csv_data(data_rows, self.csv_configuration) if type(data_rows) in [list]: for data_row in data_rows: base_row = copy.deepcopy(self.initial_parameter_columns) From 34f3125e344658531fa64ba64fb417bd76335b5d Mon Sep 17 00:00:00 2001 From: Alex Bourret Date: Wed, 11 Mar 2026 17:00:20 +0100 Subject: [PATCH 2/4] beta.3 --- python-lib/dku_constants.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python-lib/dku_constants.py b/python-lib/dku_constants.py index fab7777..8bf962b 100644 --- a/python-lib/dku_constants.py +++ b/python-lib/dku_constants.py @@ -2,6 +2,6 @@ class DKUConstants(object): API_RESPONSE_KEY = "api_response" FORBIDDEN_KEYS = ["token", "password", "api_key_value", "secure_token"] FORM_DATA_BODY_FORMAT = "FORM_DATA" - PLUGIN_VERSION = "1.2.7-beta.2" + PLUGIN_VERSION = "1.2.7-beta.3" RAW_BODY_FORMAT = "RAW" REPONSE_ERROR_KEY = "dku_error" From 185915b1c04171be1ce89103c60ba5512db8403c Mon Sep 17 00:00:00 2001 From: Alex Bourret Date: Wed, 13 May 2026 13:34:59 +0200 Subject: [PATCH 3/4] fix get_endpoint_parameters --- python-lib/dku_utils.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/python-lib/dku_utils.py b/python-lib/dku_utils.py index 22ebc9d..e9b7087 100644 --- a/python-lib/dku_utils.py +++ b/python-lib/dku_utils.py @@ -41,7 +41,9 @@ def get_endpoint_parameters(configuration): "next_page_url_key", "is_next_page_url_relative", "next_page_url_base", "top_key", "skip_key", "maximum_number_rows", "use_mtls", "mtls_certificate_path", "mtls_key_path", - "force_csv_parameters", "csv_delimiter" + "force_csv_parameters", "csv_delimiter", + "csv_doublequote", "csv_escapechar", "csv_lineterminator", + "csv_quotechar", "csv_quoting", "csv_skipinitialspace" ] parameters = { endpoint_parameter: configuration.get(endpoint_parameter) for endpoint_parameter in endpoint_parameters if configuration.get(endpoint_parameter) is not None From bba57519ba028c4378e95f9c8c95acd87fd39314 Mon Sep 17 00:00:00 2001 From: Alex Bourret Date: Wed, 13 May 2026 14:09:12 +0200 Subject: [PATCH 4/4] UI: moving above the csv parameters names --- .../api-connect_dataset/connector.json | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/python-connectors/api-connect_dataset/connector.json b/python-connectors/api-connect_dataset/connector.json index 0dac288..4921bc8 100644 --- a/python-connectors/api-connect_dataset/connector.json +++ b/python-connectors/api-connect_dataset/connector.json @@ -268,15 +268,15 @@ }, { "name": "csv_delimiter", - "label": " ", - "description": "Delimiter", + "label": "Delimiter", + "description": "", "type": "STRING", "visibilityCondition": "model.force_csv_parameters==true" }, { "name": "csv_doublequote", - "label": " ", - "description": "Double quote", + "label": "Double quote", + "description": "", "type": "SELECT", "selectChoices":[ {"value": null, "label": "Auto detect"}, @@ -287,29 +287,29 @@ }, { "name": "csv_escapechar", - "label": " ", - "description": "Escape char", + "label": "Escape char", + "description": "", "type": "STRING", "visibilityCondition": "model.force_csv_parameters==true" }, { "name": "csv_lineterminator", - "label": " ", - "description": "Line terminator", + "label": "Line terminator", + "description": "", "type": "STRING", "visibilityCondition": "model.force_csv_parameters==true" }, { "name": "csv_quotechar", - "label": " ", - "description": "Quote char", + "label": "Quote char", + "description": "", "type": "STRING", "visibilityCondition": "model.force_csv_parameters==true" }, { "name": "csv_quoting", - "label": " ", - "description": "Quote", + "label": "Quote", + "description": "", "type": "SELECT", "selectChoices":[ {"value": null, "label": "Auto detect"}, @@ -324,8 +324,8 @@ }, { "name": "csv_skipinitialspace", - "label": " ", - "description": "Skip initial space", + "label": "Skip initial space", + "description": "", "type": "SELECT", "selectChoices":[ {"value": null, "label": "Auto detect"},