From dcf7aa82677d94c42114541397277cf59fb09655 Mon Sep 17 00:00:00 2001 From: David Declerck Date: Fri, 5 Dec 2025 11:36:47 +0100 Subject: [PATCH 01/12] Add support for XML PARSE with XMLSS This is work by Chuck Haatvedt edited by David Declerck. * mlio.c: modified to support xml parse with xmlss. eliminated the xml_event_data structure and moved that data into the xml_event structure. Created a new enum cob_xml_registers and added it to the add_xml_event_data function. This function was modified to update the xml_event structure. All of the context parser callback functions were modified to use the add_xml_event_data function. the cob_xml_parse and xml_parse functions were modified to support the new end_of_input event required by xmlss. a new eof variable was added to the xml_state structure so that the endDocument callback function could be triggered by the parser in the xml_parse funtction. TODO ==> logic needs to be added to support returning NATIONAL data this needs to support the RETURNING NATIONAL phrase. --- libcob/ChangeLog | 22 +- libcob/mlio.c | 718 +++++++++++++++++++++++----------- tests/testsuite.src/run_ml.at | 320 ++++++++++++++- 3 files changed, 832 insertions(+), 228 deletions(-) diff --git a/libcob/ChangeLog b/libcob/ChangeLog index 91ff9b50e..e0ec8b57b 100644 --- a/libcob/ChangeLog +++ b/libcob/ChangeLog @@ -1,4 +1,3 @@ - 2025-12-04 Simon Sobisch * fileio.c (indexed_open) [WITH_DB]: if open was successful but checking @@ -8,6 +7,11 @@ * fileio.c (cob_file_close): close file depending on internal state, not depending on file organization +2025-11-19 Chuck Haatvedt + + * mlio.c (xml_startDocument, myStructuredErrorHandler, xml_parse): + compat for LIBXML_VERSION < 21400 + 2025-11-19 Oğuzcan Kırmemiş * common.c (cob_set_signal): add the enum COB_SIGNAL_REGIME to toggle @@ -82,6 +86,22 @@ * intrinsic.c (cob_intr_char): raise COB_EC_ARGUMENT_FUNCTION when calling CHAR with an argument outside the collation range +2025-08-15 Chuck Haatvedt + + * mlio.c: modified to support xml parse with xmlss. + eliminated the xml_event_data structure and moved that data + into the xml_event structure. Created a new enum cob_xml_registers + and added it to the add_xml_event_data function. This function was + modified to update the xml_event structure. All of the context parser + callback functions were modified to use the add_xml_event_data function. + the cob_xml_parse and xml_parse functions were modified to support + the new end_of_input event required by xmlss. a new eof variable + was added to the xml_state structure so that the endDocument callback + function could be triggered by the parser in the xml_parse funtction. + + TODO ==> logic needs to be added to support returning NATIONAL data + this needs to support the RETURNING NATIONAL phrase. + 2025-07-28 Simon Sobisch * common.h, fileio.c: new externalized typedef EXTFH_FUNC used in diff --git a/libcob/mlio.c b/libcob/mlio.c index db2b13a73..f47e7cd43 100644 --- a/libcob/mlio.c +++ b/libcob/mlio.c @@ -26,6 +26,10 @@ #include #include +#ifdef _WIN32 +#include "localcharset.h" +#endif + /* include internal and external libcob definitions, forcing exports */ #define COB_LIB_EXPIMP #include "coblocal.h" @@ -39,6 +43,7 @@ #include #include #include +#include #ifndef LIBXML_CONST_ERROR_PTR #if LIBXML_VERSION >= 21200 @@ -139,14 +144,20 @@ enum xml_parser_state { XML_PARSER_FINE, XML_PARSER_HAD_NONFATAL_ERROR, XML_PARSER_HAD_FATAL_ERROR, + XML_PARSER_STARTING_NEXT_CHUNK, XML_PARSER_FINISHED, XML_PARSER_IGNORE_ERROR /* special value for suppressing errors */ }; -struct xml_event_data { - const char *data_ptr; /* data pointer in buff */ - size_t data_len; /* length of this data */ - struct xml_event_data *next; /* pointer to next element */ +enum cob_xml_registers { + SREG_XML_EVENT, + SREG_XML_INFORMATION, + SREG_XML_TEXT, + SREG_XML_NTEXT, + SREG_XML_NAMESPACE, + SREG_XML_NNAMESPACE, + SREG_XML_NS_PREFIX, + SREG_XML_NNS_PREFIX }; #define COB_XML_EVENT(name,str) name, @@ -178,10 +189,14 @@ static void init_xml_event_list (void); #endif struct xml_event { - enum cob_xml_event event; - struct xml_event_data *first; /* first data element */ - struct xml_event_data *last; /* last data element */ - struct xml_event *next; /* pointer to next element */ + enum cob_xml_event event; + struct xml_event *next; /* pointer to next element */ + const char *text_ptr; /* text pointer in buff */ + size_t text_len; /* length of this text */ + const char *namespace_ptr; /* namespace pointer in buff */ + size_t namespace_len; /* length of this namespace */ + const char *prefix_ptr; /* prefix pointer in buff */ + size_t prefix_len; /* length of this prefix */ }; struct xml_state { @@ -204,6 +219,7 @@ struct xml_state { size_t buff_len; /* size of current buffer for "text" (increasing until end of XML processing) */ size_t buff_off; /* offset in buffer, reset before each iteration */ + int eof; }; enum json_code_status { @@ -215,7 +231,13 @@ enum json_code_status { static cob_global *cobglobptr; -/* Local functions */ +/* Local functions prototypes */ + +static void xml_endDocument (void *ctx); +void * buffer_xml_event_data (struct xml_state *state, + const void *data, + size_t size); + /* set special register XML-CODE */ static COB_INLINE COB_A_INLINE void @@ -267,11 +289,12 @@ set_xml_event (enum cob_xml_event event) note: re-uses events if possible, allocates a new event if needed */ static struct xml_event * xml_event_initialized (struct xml_event *event) { - struct xml_event_data *data; - for (data = event->first; data; data = data->next) { - data->data_ptr = NULL; - } - event->last = event->first; + event->text_ptr = NULL; + event->text_len = 0; + event->namespace_ptr = NULL; + event->namespace_len = 0; + event->prefix_ptr = NULL; + event->prefix_len = 0; return event; } @@ -299,6 +322,8 @@ new_xml_event (struct xml_state *state, enum cob_xml_event xml_event) { /* no empty events from previous parsing, create a new one */ event = cob_malloc (sizeof (struct xml_event)); + /* add logic to check for malloc failure */ + memset (event, '\0', sizeof (struct xml_event)); event->event = xml_event; if (state->event) { state->event->next = event; @@ -317,26 +342,24 @@ new_xml_event (struct xml_state *state, enum cob_xml_event xml_event) { /* add data to event buffer with given size; returns -1 if buffer allocation is not possible */ -static int -buffer_xml_event_data (struct xml_state *state, struct xml_event_data *event_data, - const void *data, size_t size) +void * +buffer_xml_event_data (struct xml_state *state, + const void *data, + size_t size) { size_t buff_free_size = state->buff_len - state->buff_off; void *next_buffer_pos = ((unsigned char *)state->buff) + state->buff_off; - event_data->data_ptr = next_buffer_pos; - /* most common: enough size in the buffer, so copy and finish */ if (size <= buff_free_size) { memcpy (next_buffer_pos, data, size); state->buff_off += size; - return 0; + return next_buffer_pos; } /* otherwise: allocate new buffer with additional space, preserving existing data */ { - const size_t malloc_size = state->buff_off - + size > COB_MINI_BUFF ? size : COB_MINI_BUFF; + const size_t malloc_size = state->buff_len + COB_LARGE_BUFF; void *mptr = cob_fast_malloc (malloc_size); /* CHECKME: we possibly want to handle out of memory to pass it to COBOL as XML error - but cob_fast_malloc / cob_malloc already abort the runtime @@ -348,102 +371,53 @@ buffer_xml_event_data (struct xml_state *state, struct xml_event_data *event_dat cob_free (state->buff); state->buff = mptr; state->buff_len = malloc_size; - memcpy (next_buffer_pos, data, size); - state->buff_off += size; - return 0; - } - } + buff_free_size = state->buff_len - state->buff_off; + next_buffer_pos = ((unsigned char *)state->buff) + state->buff_off; - /* if that did not work out, set whatever our buffer provides */ - event_data->data_len = size = buff_free_size; - if (size) { - memcpy (next_buffer_pos, data, size); - state->buff_off += size; - } - return 1; -} - -/* add data to event buffer with given size (will be calculated if -1 is specified); - returns event_data to use */ -static struct xml_event_data * -new_xml_event_data (struct xml_event *event) -{ - struct xml_event_data *event_data = event->last; - - /* re-use event structure from previous run */ - if (event_data) { - if (event_data->data_ptr == NULL) { - /* very first element, and unsused: */ - return event_data; - } - if (event_data->next) { - /* another unused element */ - return event_data->next; + /* most common: enough size in the buffer, so copy and finish */ + if (size <= buff_free_size) { + memcpy (next_buffer_pos, data, size); + state->buff_off += size; + return next_buffer_pos; + } } } - /* no empty event data from previous parsing, create a new one */ - - /* add to the current event's data*/ - event_data = cob_malloc (sizeof (struct xml_event_data)); - if (event->last) { - event->last->next = event_data; - } else { - event->first = event_data; - } - event->last = event_data; - return event_data; + return NULL; } /* add data to event buffer with given size, ignores size = zero; returns -1 if buffer allocation is not possible */ -static int -add_xml_event_data (struct xml_state *state, const void *data, size_t size, const int c_string) +static void +add_xml_event_data (struct xml_state *state, + enum cob_xml_registers sreg, + const void *data, + size_t size) { - /* add to the current event's data*/ - struct xml_event_data *new_event_data; + void *buff_data; + buff_data = buffer_xml_event_data (state, data, size); + /* add to the current event's data*/ if (size == 0) { - /* comments, CDATA, ... may be empty */ - return 0; + return; } - - new_event_data = new_xml_event_data (state->event); - new_event_data->data_len = size; - - /* TODO: handle out-of-memory per IBM in the caller */ - return buffer_xml_event_data (state, new_event_data, data, size + c_string); -} - -/* add data to event buffer with given size; - returns -1 if buffer allocation is not possible */ -static int -add_xml_event_data_tag (struct xml_state *state, const xmlChar *name, size_t size) -{ - /* add to the current event's data*/ - struct xml_event_data *new_event_data = new_xml_event_data (state->event); - new_event_data->data_len = size; - - /* check if already existing in previous cached events, - which is likely for namespaces and tags */ - { - struct xml_event *event = state->first_event; - struct xml_event_data *event_data; - - while (event != state->event) { - for (event_data = event->first; event_data; event_data = event_data->next) { - if (event_data->data_len == size - && memcmp (event_data->data_ptr, name, size) == 0) { - new_event_data->data_ptr = event_data->data_ptr; - return 0; - } - } - event = event->next; - } + switch (sreg) { + case SREG_XML_TEXT : + case SREG_XML_NTEXT : + state->event->text_ptr = (char *) buff_data; + state->event->text_len = size; + break; + case SREG_XML_NAMESPACE : + case SREG_XML_NNAMESPACE : + state->event->namespace_ptr = (char *) buff_data; + state->event->namespace_len = size; + break; + case SREG_XML_NS_PREFIX : + case SREG_XML_NNS_PREFIX : + state->event->prefix_ptr = (char *) buff_data; + state->event->prefix_len = size; + break; } - - /* TODO: handle out-of-memory per IBM in the caller */ - return buffer_xml_event_data (state, new_event_data, name, size); } #endif /* defined (WITH_XML2) */ @@ -1424,6 +1398,7 @@ int cob_xml_parse (cob_field *in, cob_field *encoding, cob_field *validation, } /* LCOV_EXCL_STOP */ *saved_state = cob_malloc (sizeof (struct xml_state)); + memset (*saved_state, '\0', sizeof(struct xml_state)); ((struct xml_state *)*saved_state)->flags = flags; xml_code = 0; } @@ -1436,8 +1411,10 @@ int cob_xml_parse (cob_field *in, cob_field *encoding, cob_field *validation, /* initial setup of registers, ensuring they are available in the processing procedure */ +#if 0 set_xml_text (0, "", 0); set_xml_namespace (0, "", 0, NULL, 0); +#endif /* LINKAGE or BASED item without data */ if (!in->data) { @@ -1491,10 +1468,21 @@ int cob_xml_parse (cob_field *in, cob_field *encoding, cob_field *validation, switch (xml_code) { case 0: xml_endDocument (state); + if (state->eof == 0) { + state->eof = 1; + } else { +// xml_code = 1; + state->state == XML_PARSER_FINISHED; + } break; case 1: - /* goes on with parsing */ + /* goes on with parsing + note that since we are processing a new chunk + of the xml data, we need to set both data pointers */ xml_code = 0; + state->input_data_ptr = (const char*)in->data; + state->input_data_end = state->input_data_ptr + in->size; + state->state = XML_PARSER_STARTING_NEXT_CHUNK; break; default: /* fatal runtime error, @@ -1672,13 +1660,19 @@ xml_generate (cob_field *out, cob_ml_tree *tree, cob_field *count, static void xml_error_handling (struct xml_state *state, const xmlError *err) { + char err_code[5]; new_xml_event (state, EVENT_EXCEPTION); - add_xml_event_data (state, err->message, strlen (err->message), 1); - { - char err_code[5]; - sprintf (err_code, "%4d", err->code); - add_xml_event_data (state, err_code, 4, 1); - } + add_xml_event_data (state, + SREG_XML_TEXT, + err->message, + strlen (err->message) + 1); + new_xml_event (state, EVENT_EXCEPTION); + sprintf (err_code, "%4d", err->code); + add_xml_event_data (state, + SREG_XML_TEXT, + err_code, + 5); + /* CHECKME: Which other elements of the xmlError do we want to pass? */ #if 0 /* CHECKME: Do we want that? */ state->state = XML_PARSER_HAD_NONFATAL_ERROR; @@ -1760,26 +1754,81 @@ xml_endDocument (void *ctx) { static void xml_startDocument (void *ctx) { struct xml_state *state = ctx; + xmlParserCtxtPtr ctxt = state->ctx; + +#if LIBXML_VERSION >= 21200 + const xmlChar *encoding = xmlCtxtGetDeclaredEncoding(ctxt); + int standalone = xmlCtxtGetStandalone(ctxt); + const xmlChar *version = xmlCtxtGetVersion(ctxt); + +#else + const xmlChar *encoding = ctxt->encoding; + int standalone = ctxt->standalone; + const xmlChar *version = ctxt->version; +#endif + new_xml_event (state, EVENT_START_OF_DOCUMENT); + new_xml_event (state, EVENT_VERSION_INFORMATION); + add_xml_event_data (state, + SREG_XML_TEXT, + version, + xmlStrlen (version)); + new_xml_event (state, EVENT_ENCODING_DECLARATION); + add_xml_event_data (state, + SREG_XML_TEXT, + encoding, + xmlStrlen (encoding)); state->state = XML_PARSER_DOCUMENT_START; + + switch (ctxt->standalone) { + case 1 : + new_xml_event (state, EVENT_STANDALONE_DECLARATION); + add_xml_event_data (state, + SREG_XML_TEXT, + "YES", + 3); + break; + case 0 : + new_xml_event (state, EVENT_STANDALONE_DECLARATION); + add_xml_event_data (state, + SREG_XML_TEXT, + "no", + 2); + break; + } + +} + +static void +xml_endofInput (struct xml_state *state) { + new_xml_event (state, EVENT_END_OF_INPUT); } static void xml_comment (void *ctx, const xmlChar *content) { struct xml_state *state = ctx; new_xml_event (state, EVENT_COMMENT); - add_xml_event_data (state, content, xmlStrlen (content), 0); + add_xml_event_data (state, + SREG_XML_TEXT, + content, + xmlStrlen (content)); } static void -xml_element_ns_handling (struct xml_state *state, - const xmlChar *localname, const xmlChar *prefix, const xmlChar *URI, - int nb_namespaces, const xmlChar **namespaces, - int nb_attributes, int nb_defaulted, const xmlChar **attributes) { - add_xml_event_data_tag (state, localname, xmlStrlen (localname)); - /* TODO: cleanup and code namespace stuff and check what to do on endElement */ - add_xml_event_data_tag (state, prefix, xmlStrlen (prefix)); - add_xml_event_data_tag (state, URI, xmlStrlen (URI)); +xml_processingInstruction (void *ctx, + const xmlChar *target, + const xmlChar *data) { + struct xml_state *state = ctx; + new_xml_event (state, EVENT_PROCESSING_INSTRUCTION_TARGET); + add_xml_event_data (state, + SREG_XML_TEXT, + target, + xmlStrlen (target)); + new_xml_event (state, EVENT_PROCESSING_INSTRUCTION_DATA); + add_xml_event_data (state, + SREG_XML_TEXT, + data, + xmlStrlen (data)); } static void @@ -1787,10 +1836,72 @@ xml_startElementNs (void *ctx, const xmlChar *localname, const xmlChar *prefix, const xmlChar *URI, int nb_namespaces, const xmlChar **namespaces, int nb_attributes, int nb_defaulted, const xmlChar **attributes) { + int cntr, attr_value_len; struct xml_state *state = ctx; + new_xml_event (state, EVENT_START_OF_ELEMENT); - xml_element_ns_handling (state, localname, prefix, URI, nb_namespaces, namespaces, - nb_attributes, nb_defaulted, attributes); + add_xml_event_data (state, + SREG_XML_TEXT, + localname, + xmlStrlen (localname)); + /* TODO: cleanup and code namespace stuff and check what to do on endElement */ + if (prefix) { + add_xml_event_data (state, + SREG_XML_NS_PREFIX, + prefix, + xmlStrlen (prefix)); + } + if (URI) { + add_xml_event_data (state, + SREG_XML_NAMESPACE, + URI, + xmlStrlen (URI)); + } + + /* Now we start to process the NAMESPACE-DECLARATION's */ + if (namespaces != NULL) { + for (cntr = 0; cntr < nb_namespaces * 2; cntr++) { + new_xml_event (state, EVENT_NAMESPACE_DECLARATION); + const xmlChar *nprefix = namespaces[cntr++]; // Get nprefix (even index) + const xmlChar *nuri = namespaces[cntr]; // Get URI (odd index) + /* Handle default namespace (nprefix is NULL) */ + if (nuri) { + add_xml_event_data (state, + SREG_XML_NAMESPACE, + nuri, + xmlStrlen (nuri)); + } + if (nprefix) { + add_xml_event_data (state, + SREG_XML_NS_PREFIX, + nprefix, + xmlStrlen (nprefix)); + } + } + } + + /* Process each attribute */ + for (cntr = 0; cntr < nb_attributes * 5; cntr += 5) { + const xmlChar *attr_name = attributes[cntr]; // ATTRIBUTE-NAME + const xmlChar *attr_prefix = attributes[cntr + 1]; // ATTRIBUTE-NAMESPACE + const xmlChar *attr_value_start = attributes[cntr + 3]; // ATTRIBUTE-CHARACTERS start + const xmlChar *attr_value_end = attributes[cntr + 4]; // ATTRIBUTE-CHARACTERS end + + // Calculate attribute value length + attr_value_len = attr_value_end - attr_value_start; + + // Use the extracted information + new_xml_event (state, EVENT_ATTRIBUTE_NAME); + add_xml_event_data (state, + SREG_XML_TEXT, + attr_name, + xmlStrlen (attr_name)); + new_xml_event (state, EVENT_ATTRIBUTE_CHARACTERS); + add_xml_event_data (state, + SREG_XML_TEXT, + attr_value_start, + attr_value_len); + } } static void @@ -1798,38 +1909,187 @@ xml_endElementNs (void *ctx, const xmlChar *localname, const xmlChar *prefix, const xmlChar *URI) { struct xml_state *state = ctx; new_xml_event (state, EVENT_END_OF_ELEMENT); - xml_element_ns_handling (state, localname, prefix, URI, - 0, NULL, 0, 0, NULL); + add_xml_event_data (state, + SREG_XML_TEXT, + localname, + xmlStrlen (localname)); + /* TODO: cleanup and code namespace stuff and check what to do on endElement */ + if (prefix) { + add_xml_event_data (state, + SREG_XML_NS_PREFIX, + prefix, + xmlStrlen (prefix)); + } + if (URI) { + add_xml_event_data (state, + SREG_XML_NAMESPACE, + URI, + xmlStrlen (URI)); + } } static void xml_startElement (void *ctx, const xmlChar *name, const xmlChar **atts) { struct xml_state *state = ctx; new_xml_event (state, EVENT_START_OF_ELEMENT); - add_xml_event_data_tag (state, name, xmlStrlen (name)); + add_xml_event_data (state, + SREG_XML_TEXT, + name, + xmlStrlen (name)); } static void xml_endElement (void *ctx, const xmlChar *name) { struct xml_state *state = ctx; new_xml_event (state, EVENT_END_OF_ELEMENT); - add_xml_event_data_tag (state, name, xmlStrlen (name)); + add_xml_event_data (state, + SREG_XML_TEXT, + name, + xmlStrlen (name)); } static void xml_characters (void *ctx, const xmlChar *content, int len) { struct xml_state *state = ctx; new_xml_event (state, EVENT_CONTENT_CHARACTERS); - add_xml_event_data (state, content, len, 0); + add_xml_event_data (state, + SREG_XML_TEXT, + content, + len); +} + +static void +myStructuredErrorHandler(void *ctx, const xmlError *error) { + + struct xml_state *state = ctx; + static int errorCount = 0; + int i, len; + if (error->level == XML_ERR_ERROR || error->level == XML_ERR_FATAL) { + errorCount++; + } + +#if LIBXML_VERSION >= 21400 // 2.14.0 + if (error->code == XML_WAR_ENCODING_MISMATCH) { +#else + if (error->code == 113) { +#endif + fprintf(stderr, "WARNING: Encoding mismatch detected!\n"); + fprintf(stderr, "Message: %s\n", error->message); + if (error->str1) { + fprintf(stderr, "Declared encoding: %s\n", error->str1); + } + if (error->str2) { + fprintf(stderr, "Auto-detected encoding: %s\n", error->str2); + } + return; + } + + if ((errorCount == 1) && + (error->code == XML_ERR_XMLDECL_NOT_FINISHED || + error->code == XML_ERR_SPACE_REQUIRED)) { + len = state->input_data_end - state->input_data_ptr; +#ifdef _WIN32 + fprintf(stderr, + "Encoding declaration '%s' appears incompatible with input data\n" + "The current runtime character encoding is %s \n" + "This caused XML declaration parsing to fail with: %s", + state->ctx->encoding, locale_charset(), error->message); +#else + fprintf(stderr, + "Encoding declaration '%s' appears incompatible with input data\n" + "The current runtime character encoding does not match \n" + "This caused XML declaration parsing to fail with: %s", + state->ctx->encoding, error->message); +#endif + fprintf(stderr, + "The first 5 characters in HEX are X'"); + if (len > 10) { + len = 10; + } + for (i = 0; i < 5; i++) { + printf("%02x", (unsigned char)state->input_data_ptr[i]); + } + printf("'\n"); + } + + if (error->code == XML_ERR_INVALID_ENCODING) { + fprintf(stderr, "Encoding Error: %s\n", error->message); + if (error->file) { + fprintf(stderr, "File: %s\n", error->file); + } + if (error->line > 0) { + fprintf(stderr, "Line: %d", error->line); + if (error->int2 > 0) { + fprintf(stderr, ", Column: %d", error->int2); + } + fprintf(stderr, "\n"); + } + + // Display problematic bytes if available + if (error->str1) { + fprintf(stderr, "Context: %s\n", error->str1); + } + } else { + fprintf(stderr, + "XML Error ==> %d %s \n", + error->code, + error->message); + } + + if (error->level == XML_ERR_FATAL) { + state->last_xml_code = error->code; + new_xml_event (state, EVENT_EXCEPTION); + len = state->input_data_end - state->input_data_ptr; + if (len > 100) { + len = 100; + } + add_xml_event_data (state, + SREG_XML_TEXT, + state->input_data_ptr, + len); + } +} + +static void +xml_internalSubset(void *ctx, + const xmlChar *name, + const xmlChar *ExternalID, + const xmlChar *SystemID) { + struct xml_state *state = ctx; + + if (state == NULL || name == NULL) { + return; + } + new_xml_event (state, EVENT_DOCUMENT_TYPE_DECLARATION); + add_xml_event_data (state, + SREG_XML_TEXT, + name, + xmlStrlen (name)); } static void xml_cdata (void *ctx, const xmlChar *content, int len) { struct xml_state *state = ctx; new_xml_event (state, EVENT_START_OF_CDATA_SECTION); + if (COB_MODULE_PTR->xml_mode == COB_XML_COMPAT) { + add_xml_event_data (state, + SREG_XML_TEXT, + "xml_mode == COB_XML_COMPAT) { + add_xml_event_data (state, + SREG_XML_TEXT, + "]]>", + 3); + } } #endif /* defined (WITH_XML2) */ @@ -1839,6 +2099,7 @@ void xml_parse (cob_field *in, cob_field *encoding, cob_field *validation, const int flags, struct xml_state *state) { static int first_xml = 1; + const xmlError *error; if (state->ctx == NULL) { char *enc = NULL; @@ -1848,6 +2109,12 @@ void xml_parse (cob_field *in, cob_field *encoding, cob_field *validation, } /* setup sax-parser callbacks */ + memset(&state->sax, 0, sizeof(xmlSAXHandler)); + + /* do NOT use xmlSAXVersion(&state->sax, 2); + only set the callbacks that we need to use + All other callbacks remain NULL from memset */ + state->sax.startDocument = xml_startDocument; state->sax.endDocument = xml_endDocument; state->sax.comment = xml_comment; @@ -1860,10 +2127,13 @@ void xml_parse (cob_field *in, cob_field *encoding, cob_field *validation, state->sax.startElement = xml_startElement; state->sax.endElement = xml_endElement; } + state->sax.internalSubset = xml_internalSubset; state->sax.cdataBlock = xml_cdata; state->sax.endElement = xml_endElement; + state->sax.processingInstruction = xml_processingInstruction; state->sax.characters = xml_characters; + state->sax.serror = myStructuredErrorHandler; /* * The document being in memory, it have no base per RFC 2396, @@ -1871,6 +2141,23 @@ void xml_parse (cob_field *in, cob_field *encoding, cob_field *validation, */ state->ctx = xmlCreatePushParserCtxt (&state->sax, state, NULL, 0, "noname.xml"); + + // Add this immediately after creating the context: + if (state->ctx != NULL) { +#if LIBXML_VERSION >= 21200 + int options = xmlCtxtGetOptions(state->ctx); +#else + int options = state->ctx->options; +#endif + options &= ~XML_PARSE_NOWARNING; /* Clear the NOWARNING flag */ + options &= ~XML_PARSE_NOERROR; /* Also clear NOERROR flag */ +#if LIBXML_VERSION >= 21200 + xmlCtxtSetOptions(state->ctx, options); +#else + state->ctx->options = options; +#endif + } + state->input_data_ptr = (const char*)in->data; state->input_data_end = state->input_data_ptr + in->size; @@ -1964,17 +2251,19 @@ void xml_parse (cob_field *in, cob_field *encoding, cob_field *validation, } } - state->buff = cob_malloc (COB_MINI_BUFF); - state->buff_len = COB_MINI_BUFF; + state->buff = cob_malloc (COB_LARGE_BUFF); + state->buff_len = COB_LARGE_BUFF; state->state = XML_PARSER_JUST_STARTED; } +#if 0 if (first_xml) { first_xml = 0; cob_runtime_warning (_("%s is unfinished"), "XML PARSE"); } +#endif /* unset existing events, allowing re-use*/ { @@ -1993,12 +2282,43 @@ void xml_parse (cob_field *in, cob_field *encoding, cob_field *validation, if (size > 100) { size = 100; } - state->err = xmlParseChunk (state->ctx, state->input_data_ptr, size, end_of_parsing); - if (end_of_parsing) { + if (state->eof) { + state->err = xmlParseChunk (state->ctx, state->input_data_ptr, 0, 1); break; + } else if (!end_of_parsing){ + state->err = xmlParseChunk (state->ctx, state->input_data_ptr, size, end_of_parsing); +#if 0 + if (state->err) { + error = xmlGetLastError (); + fprintf(stderr, "xmlParseChunk returned error %d ==> %s \n", + state->err, + error->message); + } +#endif + state->input_data_ptr += size; + } else { + break; + } + } + + if (state->input_data_ptr >= state->input_data_end) { + if (state->eof) { + state->state = XML_PARSER_FINISHED; + } else { + xml_endofInput (state); + } + } + +#if 0 + { + struct xml_event *event = state->first_event; + for (;event && event->event != EVENT_UNKNOWN; event = event->next) { + printf("Event ==> %30.*s \n", + xml_event_name_len[event->event], + (unsigned char *)xml_event_name[event->event]); } - state->input_data_ptr += size; } +#endif state->event = state->first_event; xml_process_next_event (state); @@ -2011,101 +2331,53 @@ void xml_process_next_event (struct xml_state *state) { struct xml_event *event = state->event; - struct xml_event_data *data = event->first; const int ntext = state->flags & COB_XML_PARSE_NATIONAL; - const char *text_data = data ? data->data_ptr : NULL; - size_t text_len = data ? data->data_len : 0; - - state->event = event->next; - - set_xml_event (event->event); - set_xml_code (0); - - switch (event->event) { - - case EVENT_ATTRIBUTE_CHARACTERS: - if (text_len <= 1 - && COB_MODULE_PTR->xml_mode == COB_XML_COMPAT) { - event->event = EVENT_ATTRIBUTE_CHARACTER; - } - /* XML-TEXT already setup */ - break; - - case EVENT_CONTENT_CHARACTERS: - if (text_len <= 1 - && COB_MODULE_PTR->xml_mode == COB_XML_COMPAT) { - event->event = EVENT_CONTENT_CHARACTER; - } - /* XML-TEXT already setup */ - break; - - case EVENT_START_OF_DOCUMENT: - if (COB_MODULE_PTR->xml_mode == COB_XML_COMPAT) { - text_len = state->input_data_end - state->input_data_ptr; - text_data = state->input_data_ptr; - } - state->state = XML_PARSER_FINE; - break; - case EVENT_END_OF_DOCUMENT: - state->state = XML_PARSER_FINISHED; - /* empty register */ - break; + /* First set all XML registers to zero length */ - case EVENT_START_OF_CDATA_SECTION: - if (COB_MODULE_PTR->xml_mode == COB_XML_COMPAT) { - text_len = 9; - text_data = "xml_mode == COB_XML_COMPAT) { - text_len = 3; - text_data = "]]>"; - } - break; + cob_set_int (COB_MODULE_PTR->xml_information, (int) 1); + COB_MODULE_PTR->xml_namespace->size = 0; + COB_MODULE_PTR->xml_namespace_prefix->size = 0; + COB_MODULE_PTR->xml_nnamespace->size = 0; + COB_MODULE_PTR->xml_nnamespace_prefix->size = 0; + COB_MODULE_PTR->xml_text->size = 0; + COB_MODULE_PTR->xml_ntext->size = 0; - case EVENT_START_OF_ELEMENT: - case EVENT_END_OF_ELEMENT: - case EVENT_COMMENT: - /* XML-TEXT already setup */ - /* TODO: iterate over the next data pointers and set namespace */ - break; + COB_MODULE_PTR->xml_namespace->data = NULL; + COB_MODULE_PTR->xml_namespace_prefix->data = NULL; + COB_MODULE_PTR->xml_nnamespace->data = NULL; + COB_MODULE_PTR->xml_nnamespace_prefix->data = NULL; + COB_MODULE_PTR->xml_text->data = NULL; + COB_MODULE_PTR->xml_ntext->data = NULL; - case EVENT_END_OF_INPUT: - /* empty register */ + if (event->event == EVENT_END_OF_INPUT && !state->eof) { state->state = XML_PARSER_HAD_END_OF_INPUT; - break; + } - case EVENT_EXCEPTION: - /* first data is message -> already passed as is, - second data is the libxml2 error code */ - data = data->next; - if (data && data->data_len == 4) { - set_xml_code_parsing_error (atoi (data->data_ptr)); - } - break; - /* TODO */ - case EVENT_CONTENT_NATIONAL_CHARACTER: - case EVENT_DOCUMENT_TYPE_DECLARATION: - case EVENT_ENCODING_DECLARATION: - case EVENT_NAMESPACE_DECLARATION: - case EVENT_PROCESSING_INSTRUCTION_DATA: - case EVENT_PROCESSING_INSTRUCTION_TARGET: - case EVENT_STANDALONE_DECLARATION: - case EVENT_UNKNOWN_REFERENCE_IN_ATTRIBUTE: - case EVENT_UNKNOWN_REFERENCE_IN_CONTENT: - case EVENT_UNRESOLVED_REFERENCE: - case EVENT_VERSION_INFORMATION: - default: - state->last_xml_code = XML_INTERNAL_ERROR; - set_xml_exception (XML_INTERNAL_ERROR); - set_xml_event (EVENT_EXCEPTION); - state->state = XML_PARSER_HAD_NONFATAL_ERROR; - return; + set_xml_event (event->event); + if (state->last_xml_code) { + set_xml_code (state->last_xml_code); + } else { + set_xml_code (0); } - set_xml_text (ntext, text_data , text_len); + if (ntext) { + COB_MODULE_PTR->xml_ntext->size = event->text_len; + COB_MODULE_PTR->xml_ntext->data = event->text_ptr; + COB_MODULE_PTR->xml_nnamespace->size = event->namespace_len; + COB_MODULE_PTR->xml_nnamespace->data = event->namespace_ptr; + COB_MODULE_PTR->xml_nnamespace_prefix->size = event->prefix_len; + COB_MODULE_PTR->xml_nnamespace_prefix->data = event->prefix_ptr; + } else { + COB_MODULE_PTR->xml_text->size = event->text_len; + COB_MODULE_PTR->xml_text->data = event->text_ptr; + COB_MODULE_PTR->xml_namespace->size = event->namespace_len; + COB_MODULE_PTR->xml_namespace->data = event->namespace_ptr; + COB_MODULE_PTR->xml_namespace_prefix->size = event->prefix_len; + COB_MODULE_PTR->xml_namespace_prefix->data = event->prefix_ptr; + } + + state->event = event->next; } #if defined (WITH_XML2) @@ -2130,12 +2402,6 @@ void xml_free_parse_memory (struct xml_state* state) struct xml_event *event = state->first_event; while (event) { struct xml_event *next = event->next; - struct xml_event_data *data = event->first; - while (data) { - struct xml_event_data *dnext = data->next; - cob_free (data); - data = dnext; - } cob_free (event); event = next; } diff --git a/tests/testsuite.src/run_ml.at b/tests/testsuite.src/run_ml.at index 1da8962c6..ddb140c34 100644 --- a/tests/testsuite.src/run_ml.at +++ b/tests/testsuite.src/run_ml.at @@ -660,6 +660,324 @@ end : +000000005 - "Test " AT_CLEANUP +AT_SETUP([XML PARSE]) +AT_KEYWORDS([extensions PARSE]) + +AT_DATA([prog.cob], [ + PROCESS XMLPARSE XMLSS + Identification division. + PROGRAM-ID. IBMXML. + Data division. + Working-storage section. + ****************************************************************** + * Sample data definitions for processing numeric XML content. * + ****************************************************************** + 77 element-depth comp pic s9(4). + 77 countd comp pic s9(4). + 77 discount computational pic 9v99 value 0. + 77 display-price pic $$9.99. + 77 filling pic x(4095). + 77 list-price computational pic 9v99 value 0. + 77 ofr-ed pic x(9) justified. + 77 ofr-ed-1 redefines ofr-ed pic 999999.99. + ****************************************************************** + * XML document data, encoded as initial values of data items. * + ****************************************************************** + 1 xml-document-data. + 2 pic x(39) value ''. + 2 pic x(39) value ''. + 2 pic x(10) value ''. + 2 pic x(33) value ''. + 2 pic x(36) value ''. + 2 pic x(29) value 'Ham + turkey'. + 2 pic x(34) value 'Cheese, lettuce, tomato, '. + 2 pic x(32) value 'and that''s all, Folks!'. + 2 pic x(25) value ' element!]]>'. + 2 pic x(28) value '$4.99'. + 2 pic x(25) value '0.10'. + 2 pic x(31) value ''. + ****************************************************************** + * XML document, represented as fixed-length records. * + ****************************************************************** + 01 xml-document redefines xml-document-data. + 05 xml-segment pic x(40) occurs 10 times. + 01 xml-segment-no comp pic s9(4). + 01 content-buffer pic x(100). + 01 current-element-stack. + 05 current-element pic x(40) occurs 10 times. + Procedure division. + Mainline section. + display 'len=:' length of xml-document-data. + Move 1 to xml-segment-no + Display 'Initial segment {' xml-segment(xml-segment-no) '}' + Display ' ' + XML parse xml-segment (xml-segment-no) + processing procedure XML-handler + ON EXCEPTION + Display 'XML processing error, XML-Code=' XML-Code '.' + Move 16 to return-code + Goback + End-XML + Display ' ' + Display 'XML document successfully parsed.' + Display ' ' + Display '-----+++++***** Using information from XML ' + '*****+++++-----' + Display ' ' + Move list-price to Display-price + Display ' Sandwich list price: ' Display-price + Compute Display-price = list-price * (1 - discount) + Display ' Promotional price: ' Display-price + Display ' Get one today!' + Move 0 to return-code + Goback + ****************************************************************** + * Process the transformed content and calculate promo price. * + ****************************************************************** + Display ' ' + Display '-----+++++***** Using information from XML ' + '*****+++++-----' + Display ' ' + Move list-price to Display-price + Display ' Sandwich list price: ' Display-price + Compute Display-price = list-price * (1 - discount) + Display ' Promotional price: ' Display-price + Display ' Get one today!' + Goback. + XML-handler section. + * if xml-segment-no = 10 + * move 'END-OF-DOCUMENT' to xml-event + * end-if + display 'parsed event=:' xml-event + Evaluate XML-Event + * ==> Order XML events most frequent first + When 'START-OF-ELEMENT' + *** Display 'Start element tag: {' XML-Text '}' + Add 1 to element-depth + *** if address of xml-text not = null + Move XML-Text to current-element(element-depth) + *** end-if + When 'CONTENT-CHARACTERS' + *** Display 'Content characters: {' XML-Text '}' + * ==> In general, a split can occur for any element or attribute + * ==> data, but in this sample, it only occurs for "filling"... + DISPLAY 'ALL INFO =:' XML-INFORMATION + If xml-information = 2 + DISPLAY 'INFO = 2' + END-IF + If xml-information = 2 and + current-element(element-depth) not = 'filling' + DISPLAY 'INFO = 2 AND FILLING' + Display 'Unexpected split in content for element ' + current-element(element-depth) + Move -1 to xml-code + End-if + * ==> Transform XML content to operational COBOL data item... + Evaluate current-element(element-depth) + When 'filling' + * ==> After reassembling separate pieces of character content... + String xml-text delimited by size into + content-buffer with pointer countd + On overflow + Display 'content buffer (' + length of content-buffer + ' bytes) is too small' + Move -1 to xml-code + End-string + Evaluate xml-information + When 2 + Display ' Character data for element "filling" ' + 'is incomplete.' + Display ' The partial data was buffered for ' + 'content assembly.' + When 1 + subtract 1 from countd + move content-buffer(1:countd) to filling + Display ' Element "filling" data (' countd + ' bytes) is now complete:' + Display ' {' filling(1:countd) '}' + End-evaluate + When 'listprice' + * ==> Using function NUMVAL-C... + Move XML-Text to content-buffer + Compute list-price = + function numval-c(content-buffer) + When 'discount' + * ==> Using de-editing of a numeric edited item... + Move XML-Text to ofr-ed + Move ofr-ed-1 to discount + End-evaluate + When 'END-OF-ELEMENT' + *** Display 'End element tag: {' XML-Text '}' + Subtract 1 from element-depth + When 'START-OF-DOCUMENT' + Display 'Start of document' + Move 0 to element-depth + Move 1 to countd + When 'END-OF-DOCUMENT' + Display 'End of document.' + When 'VERSION-INFORMATION' + continue + *** Display 'Version: {' XML-Text '}' + When 'ENCODING-DECLARATION' + continue + *** Display 'Encoding: {' XML-Text '}' + When 'STANDALONE-DECLARATION' + *** Display 'Standalone: {' XML-Text '}' + When 'ATTRIBUTE-NAME' + continue + *** Display 'Attribute name: {' XML-Text '}' + When 'ATTRIBUTE-CHARACTERS' + continue + *** Display 'Attribute value characters: {' XML-Text '}' + When 'ATTRIBUTE-CHARACTER' + continue + *** Display 'Attribute value character: {' XML-Text '}' + When 'START-OF-CDATA-SECTION' + Display 'Start of CData section' + When 'END-OF-CDATA-SECTION' + Display 'End of CData section' + When 'CONTENT-CHARACTER' + *** Display 'Content character: {' XML-Text '}' + When 'PROCESSING-INSTRUCTION-TARGET' + *** Display 'PI target: {' XML-Text '}' + When 'PROCESSING-INSTRUCTION-DATA' + *** Display 'PI data: {' XML-Text '}' + continue + When 'COMMENT' + * if address of xml-text not = null + continue + *** Display 'Comment: {' XML-Text '}' + * end-if + When 'EXCEPTION' + Compute countd = function length (XML-Text) + Display 'Exception ' XML-Code ' at offset ' countd '.' + When 'END-OF-INPUT' + Display 'End of input' + if xml-segment-no < 10 + Add 1 to xml-segment-no + display 'segment-num=:' xml-segment-no + Display + ' Next segment: {' xml-segment(xml-segment-no) + '}' + Display ' ' + Move 1 to xml-code + end-if + When other + Display 'Unexpected XML event: ' XML-Event '.' + End-evaluate. +]) + +AT_CHECK([$COMPILE -w prog.cob], [0], [], []) +AT_CHECK([$COBCRUN_DIRECT ./prog], [0], +[len=:400 +Initial segment { Ham + turkey Cheese, lettuce, tomato, a} + @&t@ +parsed event=:END-OF-ELEMENT @&t@ +parsed event=:CONTENT-CHARACTERS @&t@ +ALL INFO =:+000000001 +parsed event=:START-OF-ELEMENT @&t@ +parsed event=:END-OF-INPUT @&t@ +End of input +segment-num=:+0007 + Next segment: {nd that's all, Folks! element!]]>$4.990.10 } + @&t@ +parsed event=:END-OF-ELEMENT @&t@ +parsed event=:END-OF-ELEMENT @&t@ +parsed event=:END-OF-INPUT @&t@ +End of input +parsed event=:END-OF-DOCUMENT @&t@ +End of document. + @&t@ +XML document successfully parsed. + @&t@ +-----+++++***** Using information from XML *****+++++----- + @&t@ + Sandwich list price: $4.99 + Promotional price: $4.49 + Get one today! +], []) +AT_CLEANUP + + # ## JSON # @@ -1104,4 +1422,4 @@ AT_DATA([prog.cob], [ AT_CHECK([$COMPILE prog.cob], [0], [], []) AT_CHECK([$COBCRUN_DIRECT ./prog], [0], [], []) -AT_CLEANUP \ No newline at end of file +AT_CLEANUP From 79eb432888086beed5bdcc518fe9b420f2fe453b Mon Sep 17 00:00:00 2001 From: Guillaume Bertholon Date: Tue, 14 Apr 2026 15:34:35 +0200 Subject: [PATCH 02/12] Fixes for XML PARSE support * common.h: rename COB_XML_PARSE_XMLNSS into COB_XML_PARSE_XMLSS to match the IBM option name * mlio.c [WITH_XML2]: Fix issues in XML PARSE handling most notably a use after free error if the internal buffer needs to grow during the parsing. Respect the high order half-word for exception XML-CODE. Reduce the number of parsing states by removing useless ones, and encode eof in these states. Handle XML chunks with more than one recoverable error. Trigger ON EXCEPTION code after EXCEPTION XML events. * parser.y: remove the CB_PENDING warning on XML PARSE but still warn for untested XML PARSE RETURNING NATIONAL and XML PARSE VALIDATING. * typeck.c: remove invalid call to cob_check_based for XML-* builtin variable length registers (like XML-TEXT) * codegen.c: remove the uninitialized and unused b_* field for XML-* builtin variable length registers --- cobc/ChangeLog | 8 + cobc/codegen.c | 6 +- cobc/parser.y | 25 +- cobc/typeck.c | 2 +- libcob/ChangeLog | 13 + libcob/common.h | 6 +- libcob/mlio.c | 970 ++++++++++++---------------------- tests/testsuite.src/run_ml.at | 643 +++++++++++++++------- 8 files changed, 839 insertions(+), 834 deletions(-) diff --git a/cobc/ChangeLog b/cobc/ChangeLog index cce36250c..102f2621f 100644 --- a/cobc/ChangeLog +++ b/cobc/ChangeLog @@ -1,3 +1,11 @@ +2026-04-14 Guillaume Bertholon + + * parser.y: remove the CB_PENDING warning on XML PARSE but still warn for + untested XML PARSE RETURNING NATIONAL and XML PARSE VALIDATING. + * typeck.c: remove invalid call to cob_check_based for XML-* builtin variable + length registers (like XML-TEXT) + * codegen.c: remove the uninitialized and unused b_* field for XML-* builtin + variable length registers 2025-12-29 Roger Bowler diff --git a/cobc/codegen.c b/cobc/codegen.c index 96224c017..b5adb5797 100644 --- a/cobc/codegen.c +++ b/cobc/codegen.c @@ -7274,7 +7274,7 @@ output_xml_parse (struct cb_xml_parse *p) { int flags = 0; if (cb_xml_parse_xmlss) { - flags |= COB_XML_PARSE_XMLNSS; + flags |= COB_XML_PARSE_XMLSS; } if (p->returning_national && current_prog->xml_ntext) { flags |= COB_XML_PARSE_NATIONAL; @@ -11905,7 +11905,9 @@ output_internal_function (struct cb_program *prog, cb_tree parameter_list) /* Dangling linkage section items */ seen = 0; for (f = prog->linkage_storage; f; f = f->sister) { - if (f->redefines) { + if (f->redefines || f->flag_internal_register) { + /* XML-TEXT and other XML-* registers are in linkage_storage but do not use the + corresponding b_* field. */ continue; } for (l = parameter_list; l; l = CB_CHAIN (l)) { diff --git a/cobc/parser.y b/cobc/parser.y index 0083d70ee..d635ec621 100644 --- a/cobc/parser.y +++ b/cobc/parser.y @@ -17935,7 +17935,6 @@ xml_parse_statement: xml PARSE { begin_statement (STMT_XML_PARSE, TERM_XML); - CB_PENDING ("XML PARSE"); cobc_cs_check = CB_CS_XML_PARSE; cb_set_register_receiving (current_program->xml_code, 1); cb_set_register_receiving (current_program->xml_event, 1); @@ -17946,8 +17945,6 @@ xml_parse_statement: cb_set_register_receiving (current_program->xml_namespace_prefix, 1); cb_set_register_receiving (current_program->xml_nnamespace, 1); cb_set_register_receiving (current_program->xml_nnamespace_prefix, 1); - } - if (cb_xml_parse_xmlss) { cb_set_register_receiving (current_program->xml_information, 0); } } @@ -17987,14 +17984,28 @@ _with_encoding: ; _returning_national: -/* empty */ { $$ = NULL; } -| RETURNING NATIONAL { $$ = cb_true; } +/* empty */ + { + $$ = NULL; + } +| RETURNING NATIONAL + { + CB_PENDING ("XML PARSE RETURNING NATIONAL"); + $$ = cb_true; + } ; _validating_with: -/* empty */ { $$ = NULL; } +/* empty */ + { + $$ = NULL; + } | VALIDATING _with - schema_file_or_record_name { $$ = $3; } + schema_file_or_record_name + { + CB_PENDING ("XML PARSE VALIDATING"); + $$ = $3; + } ; schema_file_or_record_name: diff --git a/cobc/typeck.c b/cobc/typeck.c index f29df4c8d..7c0db4c10 100644 --- a/cobc/typeck.c +++ b/cobc/typeck.c @@ -2544,7 +2544,7 @@ cb_build_identifier (cb_tree x, const int subchk) && !current_statement->flag_no_based) { if (p->flag_item_based || (p->storage == CB_STORAGE_LINKAGE && - !(p->flag_is_pdiv_parm || p->flag_is_returning))) { + !(p->flag_is_pdiv_parm || p->flag_is_returning || p->flag_internal_register))) { current_statement->null_check = CB_BUILD_FUNCALL_2 ( "cob_check_based", cb_build_address (cb_build_field_reference (p, NULL)), diff --git a/libcob/ChangeLog b/libcob/ChangeLog index e0ec8b57b..9f152e534 100644 --- a/libcob/ChangeLog +++ b/libcob/ChangeLog @@ -1,3 +1,16 @@ +2026-04-14 Guillaume Bertholon + + * common.h: rename COB_XML_PARSE_XMLNSS into COB_XML_PARSE_XMLSS to match + the IBM option name + * mlio.c [WITH_XML2]: Fix issues in XML PARSE handling most notably a use + after free error if the internal buffer needs to grow during the parsing. + Respect the high order half-word for exception XML-CODE. + Reduce the number of parsing states by removing useless ones, + and encode eof in these states. + Handle XML chunks with more than one recoverable error. + Trigger ON EXCEPTION code after EXCEPTION XML events. + + 2025-12-04 Simon Sobisch * fileio.c (indexed_open) [WITH_DB]: if open was successful but checking diff --git a/libcob/common.h b/libcob/common.h index 7657d624a..078d83b24 100644 --- a/libcob/common.h +++ b/libcob/common.h @@ -1116,7 +1116,7 @@ enum cob_statement { #define COB_JSON_CJSON 1 #define COB_JSON_JSON_C 2 -#define COB_XML_PARSE_XMLNSS (1U << 0) +#define COB_XML_PARSE_XMLSS (1U << 0) #define COB_XML_PARSE_NATIONAL (1U << 1) #define COB_XML_PARSE_VALIDATE_FILE (1U << 2) @@ -1327,11 +1327,11 @@ typedef struct __cob_module { const char *gc_version; /* module version, until 3.1.2: set by cob_check_version */ unsigned char xml_mode; /* Mode to handle XML PARSE (may be extended) */ - /* similar to XMLPARSE(XMLNSS) Micro Focus, + /* similar to XMLPARSE(XMLSS) Micro Focus, IBM may be different (_very_ likely for error codes); but the main difference is to "COMPAT" */ #define COB_XML_COMPAT 0 - #define COB_XML_XMLNSS 1 + #define COB_XML_XMLSS 1 struct cob_frame_ext *frame_ptr; /* current frame ptr, note: if set then cob_frame in this module is of type "struct cob_frame_ext", otherwise "struct cob_frame" */ diff --git a/libcob/mlio.c b/libcob/mlio.c index f47e7cd43..afca682bb 100644 --- a/libcob/mlio.c +++ b/libcob/mlio.c @@ -26,10 +26,6 @@ #include #include -#ifdef _WIN32 -#include "localcharset.h" -#endif - /* include internal and external libcob definitions, forcing exports */ #define COB_LIB_EXPIMP #include "coblocal.h" @@ -45,12 +41,20 @@ #include #include -#ifndef LIBXML_CONST_ERROR_PTR #if LIBXML_VERSION >= 21200 #define LIBXML_CONST_ERROR_PTR const xmlError * +#define LIBXML_CTXT_GET_ENCODING(ctxt) xmlCtxtGetDeclaredEncoding(ctxt) +#define LIBXML_CTXT_GET_STANDALONE(ctxt) xmlCtxtGetStandalone(ctxt) +#define LIBXML_CTXT_GET_VERSION(ctxt) xmlCtxtGetVersion(ctxt) +#define LIBXML_CTXT_GET_OPTIONS(ctxt) xmlCtxtGetOptions(ctxt) +#define LIBXML_CTXT_SET_OPTIONS(ctxt, options) xmlCtxtSetOptions(ctxt, options) #else #define LIBXML_CONST_ERROR_PTR xmlErrorPtr /* use old ABI */ -#endif +#define LIBXML_CTXT_GET_ENCODING(ctxt) ctxt->encoding +#define LIBXML_CTXT_GET_STANDALONE(ctxt) ctxt->standalone +#define LIBXML_CTXT_GET_VERSION(ctxt) ctxt->version +#define LIBXML_CTXT_GET_OPTIONS(ctxt) ctxt->options +#define LIBXML_CTXT_SET_OPTIONS(ctxt, options) ctxt->options = options #endif #else @@ -109,6 +113,7 @@ json_object_to_json_string_length (struct json_object *obj, /* XMLSS return-code halfword */ #define XRC_SUCCESS 0x0000 /* XMLPARSE processing successfull */ +#define XRC_WARNING 0x0004 /* XMLPARSE warnings = recoverable XML errors */ #define XRC_NOT_WELL_FORMED 0x000C /* not well-formed doc */ #define XRC_FATAL 0x0010 /* fatal error with potential bad / invalid output */ #define XRC_NOT_VALID 0x0018 /* non-fatal: doc doesn't match specified schema */ @@ -122,7 +127,7 @@ json_object_to_json_string_length (struct json_object *obj, enum xml_code_status { XML_STMT_EXIT = -1, XML_STMT_SUCCESSFULL = 0, - XML_PARSE_ERROR_FATAL = XRC_FATAL & (XRSN_UNKNOWN_ERROR << 1), + XML_PARSE_WARNING_MISC_COMPAT = 99, /* various warnings, only in XMLPARSE COMPAT */ XML_PARSE_ERROR_MISC_COMPAT = 201, /* various errors, only in XMLPARSE COMPAT */ XML_OUT_FIELD_TOO_SMALL = 400, XML_INVALID_NAMESPACE = 416, @@ -131,33 +136,16 @@ enum xml_code_status { XML_INTERNAL_ERROR = 600 }; -/* TODO: check for necessary cleanup */ - enum xml_parser_state { XML_PARSER_NOT_STARTED = 0, XML_PARSER_VALIDATION_SETUP, XML_PARSER_VALIDATION_SETUP_MEM, - XML_PARSER_JUST_STARTED, - XML_PARSER_DOCUMENT_START, - XML_PARSER_HAD_END_OF_DOCUMENT, + XML_PARSER_READ_CHUNK, + XML_PARSER_NO_NEW_CHUNKS, XML_PARSER_HAD_END_OF_INPUT, - XML_PARSER_FINE, XML_PARSER_HAD_NONFATAL_ERROR, XML_PARSER_HAD_FATAL_ERROR, - XML_PARSER_STARTING_NEXT_CHUNK, - XML_PARSER_FINISHED, - XML_PARSER_IGNORE_ERROR /* special value for suppressing errors */ -}; - -enum cob_xml_registers { - SREG_XML_EVENT, - SREG_XML_INFORMATION, - SREG_XML_TEXT, - SREG_XML_NTEXT, - SREG_XML_NAMESPACE, - SREG_XML_NNAMESPACE, - SREG_XML_NS_PREFIX, - SREG_XML_NNS_PREFIX + XML_PARSER_FINISHED }; #define COB_XML_EVENT(name,str) name, @@ -191,17 +179,18 @@ static void init_xml_event_list (void); struct xml_event { enum cob_xml_event event; struct xml_event *next; /* pointer to next element */ - const char *text_ptr; /* text pointer in buff */ + int xml_code; /* the XML-CODE of the event (0 unless event is EXCEPTION) */ + size_t text_off; /* text offset in buff */ size_t text_len; /* length of this text */ - const char *namespace_ptr; /* namespace pointer in buff */ + size_t namespace_off; /* namespace offset in buff */ size_t namespace_len; /* length of this namespace */ - const char *prefix_ptr; /* prefix pointer in buff */ + size_t prefix_off; /* prefix offset in buff */ size_t prefix_len; /* length of this prefix */ }; struct xml_state { enum xml_parser_state state; - enum xml_code_status last_xml_code; + int last_xml_code; int flags; #if WITH_XML2 xmlSAXHandler sax; @@ -214,12 +203,11 @@ struct xml_state { struct xml_event *first_event; /* pointer to first processed event */ struct xml_event *event; /* pointer to last processed event */ const char *input_data_ptr; - const char *input_data_end; - void *buff; /* buffer for event data */ + size_t input_data_len; + xmlChar *buff; /* buffer for event data */ size_t buff_len; /* size of current buffer for "text" (increasing until end of XML processing) */ size_t buff_off; /* offset in buffer, reset before each iteration */ - int eof; }; enum json_code_status { @@ -231,14 +219,6 @@ enum json_code_status { static cob_global *cobglobptr; -/* Local functions prototypes */ - -static void xml_endDocument (void *ctx); -void * buffer_xml_event_data (struct xml_state *state, - const void *data, - size_t size); - - /* set special register XML-CODE */ static COB_INLINE COB_A_INLINE void set_xml_code (const enum xml_code_status code) @@ -285,53 +265,52 @@ set_xml_event (enum cob_xml_event event) memset (data2 + size1, ' ', size2 - size1); } -/* provide event structure and does the setup in the state, - note: re-uses events if possible, allocates a new event if needed */ -static struct xml_event * -xml_event_initialized (struct xml_event *event) { - event->text_ptr = NULL; - event->text_len = 0; - event->namespace_ptr = NULL; - event->namespace_len = 0; - event->prefix_ptr = NULL; - event->prefix_len = 0; - return event; +static void +xml_event_reset_registers (struct xml_event *event) +{ + event->xml_code = 0; + event->text_len = 0; + event->prefix_len = 0; + event->namespace_len = 0; } /* provide event structure and does the setup in the state, note: re-uses events if possible, allocates a new event if needed */ -static struct xml_event * -new_xml_event (struct xml_state *state, enum cob_xml_event xml_event) { +static void +new_xml_event (struct xml_state *state, enum cob_xml_event xml_event) +{ struct xml_event *event = state->event; - /* re-use event structure from previous run */ + /* try to re-use event structure from previous run */ if (event) { if (event->event == EVENT_UNKNOWN) { /* very first element, and unsused: */ event->event = xml_event; - return xml_event_initialized (event); + xml_event_reset_registers (event); + return; } if (event->next) { /* another unused element */ event = event->next; event->event = xml_event; state->event = event; - return xml_event_initialized (event); + xml_event_reset_registers (event); + return; } } /* no empty events from previous parsing, create a new one */ event = cob_malloc (sizeof (struct xml_event)); /* add logic to check for malloc failure */ - memset (event, '\0', sizeof (struct xml_event)); event->event = xml_event; + event->next = NULL; + xml_event_reset_registers (event); if (state->event) { state->event->next = event; } else { state->first_event = event; } state->event = event; - return event; } /* the following functions may be used with partially manual parsing @@ -339,153 +318,136 @@ new_xml_event (struct xml_state *state, enum cob_xml_event xml_event) { only used for complete parsing via libxml2 */ #if defined (WITH_XML2) - /* add data to event buffer with given size; + returns the offset at which data is stored inside the buffer returns -1 if buffer allocation is not possible */ -void * -buffer_xml_event_data (struct xml_state *state, - const void *data, - size_t size) +static size_t +buffer_xml_event_data (struct xml_state *state, const void *data, size_t size) { - size_t buff_free_size = state->buff_len - state->buff_off; - void *next_buffer_pos = ((unsigned char *)state->buff) + state->buff_off; + size_t buff_off = state->buff_off; + size_t buff_free_size = state->buff_len - buff_off; + + if (size == 0) { + return -1; + } - /* most common: enough size in the buffer, so copy and finish */ if (size <= buff_free_size) { - memcpy (next_buffer_pos, data, size); + /* most common: enough size in the buffer, so copy and finish */ + memcpy (state->buff + buff_off, data, size); state->buff_off += size; - return next_buffer_pos; - } + return buff_off; + + } else { + /* otherwise: allocate new buffer with additional space, preserving existing data */ + const size_t malloc_size = buff_off + size > state->buff_len * 2 ? + buff_off + size : state->buff_len * 2; + xmlChar *mptr = cob_fast_malloc (malloc_size); - /* otherwise: allocate new buffer with additional space, preserving existing data */ - { - const size_t malloc_size = state->buff_len + COB_LARGE_BUFF; - void *mptr = cob_fast_malloc (malloc_size); /* CHECKME: we possibly want to handle out of memory to pass it to COBOL as XML error - but cob_fast_malloc / cob_malloc already abort the runtime in case of missing memory */ if (mptr) { - if (state->buff_off) { - memcpy (mptr, state->buff, state->buff_off); - } + memcpy (mptr, state->buff, buff_off); cob_free (state->buff); state->buff = mptr; state->buff_len = malloc_size; - buff_free_size = state->buff_len - state->buff_off; - next_buffer_pos = ((unsigned char *)state->buff) + state->buff_off; - - /* most common: enough size in the buffer, so copy and finish */ - if (size <= buff_free_size) { - memcpy (next_buffer_pos, data, size); - state->buff_off += size; - return next_buffer_pos; - } + + memcpy (mptr + buff_off, data, size); + state->buff_off += size; + return buff_off; + } else { + return -1; } } +} - return NULL; +/* set the exception code of the current xml event */ +static void +set_xml_event_exception_code (struct xml_state *state, int xml_code) +{ + state->event->xml_code = xml_code; } -/* add data to event buffer with given size, ignores size = zero; - returns -1 if buffer allocation is not possible */ +/* set text of current event by placing it into the event data buffer */ static void -add_xml_event_data (struct xml_state *state, - enum cob_xml_registers sreg, - const void *data, - size_t size) +set_xml_event_text (struct xml_state *state, const void *data, size_t size) { - void *buff_data; - buff_data = buffer_xml_event_data (state, data, size); + state->event->text_off = buffer_xml_event_data (state, data, size); + state->event->text_len = size; +} - /* add to the current event's data*/ - if (size == 0) { - return; - } - switch (sreg) { - case SREG_XML_TEXT : - case SREG_XML_NTEXT : - state->event->text_ptr = (char *) buff_data; - state->event->text_len = size; - break; - case SREG_XML_NAMESPACE : - case SREG_XML_NNAMESPACE : - state->event->namespace_ptr = (char *) buff_data; - state->event->namespace_len = size; - break; - case SREG_XML_NS_PREFIX : - case SREG_XML_NNS_PREFIX : - state->event->prefix_ptr = (char *) buff_data; - state->event->prefix_len = size; - break; - } +/* set namespace of current event by placing it into the event data buffer */ +static void +set_xml_event_namespace (struct xml_state *state, const void *data, size_t size) +{ + state->event->namespace_off = buffer_xml_event_data (state, data, size); + state->event->namespace_len = size; } -#endif /* defined (WITH_XML2) */ -/* set special registers XML-TEXT / XML-NTEXT - the size is calculated if not explicit specified (size -> -1) - if the state is given then the text is copied to its buffer */ +/* set ns-prefix of current event by placing it into the event data buffer */ static void -set_xml_text (const int ntext, const void *data, const size_t size) +set_xml_event_prefix (struct xml_state *state, const void *data, size_t size) { - if (ntext) { - /* TODO (later): convert input data (libxml2 uses UTF8) to UTF-16 - (or the specified national character set) */ - COB_MODULE_PTR->xml_ntext->data = (unsigned char *) data; - COB_MODULE_PTR->xml_ntext->size = size; - COB_MODULE_PTR->xml_text->data = (unsigned char *) ""; - COB_MODULE_PTR->xml_text->size = 0; - } else { - /* XML-NTEXT and other XML-N... special registers are not available with ACUCOBOL */ - if (COB_MODULE_PTR->xml_ntext) { - COB_MODULE_PTR->xml_ntext->data = (unsigned char *) ""; - COB_MODULE_PTR->xml_ntext->size = 0; - } - COB_MODULE_PTR->xml_text->data = (unsigned char *) data; - COB_MODULE_PTR->xml_text->size = size; - } + state->event->prefix_off = buffer_xml_event_data (state, data, size); + state->event->prefix_len = size; } +#endif /* defined (WITH_XML2) */ -/* set special registers XML-NAMESPACE / XML-NNAMESPACE as well - as optional XML-NAMESPACE-PREFIX / XML-NNAMESPACE-PREFIX - the size is auto-calculated */ +/* set special registers XML-TEXT / XML-NTEXT, XML-NAMESPACE / XML-NNAMESPACE as well + as XML-NAMESPACE-PREFIX / XML-NNAMESPACE-PREFIX from the event */ static void -set_xml_namespace (const int ntext, const void *nsdata, const size_t ns_size, - const void *prefix, const size_t prefix_size) +set_xml_registers (const int ntext, unsigned char *buff, const struct xml_event *event) { - if (ntext) { - /* TODO (later): convert input data (libxml2 uses UTF8) to UTF-16 - (or the specified national character set) */ - COB_MODULE_PTR->xml_nnamespace->data = (unsigned char *) nsdata; - COB_MODULE_PTR->xml_nnamespace->size = ns_size; - if (prefix) { - COB_MODULE_PTR->xml_nnamespace_prefix->data = (unsigned char *) prefix; - COB_MODULE_PTR->xml_nnamespace_prefix->size = prefix_size; - } else { - COB_MODULE_PTR->xml_nnamespace_prefix->data = (unsigned char *) ""; - COB_MODULE_PTR->xml_nnamespace_prefix->size = 0; - } - COB_MODULE_PTR->xml_namespace->data = (unsigned char *) ""; - COB_MODULE_PTR->xml_namespace->size = 0; - COB_MODULE_PTR->xml_namespace_prefix->data = (unsigned char *) ""; - COB_MODULE_PTR->xml_namespace_prefix->size = 0; - } else { - /* XML-NTEXT and other XML-N... special registers are not available with ACUCOBOL */ - if (COB_MODULE_PTR->xml_namespace) { - COB_MODULE_PTR->xml_nnamespace->data = (unsigned char *) ""; - COB_MODULE_PTR->xml_nnamespace->size = 0; - } - if (COB_MODULE_PTR->xml_nnamespace_prefix) { - COB_MODULE_PTR->xml_nnamespace_prefix->data = (unsigned char *) ""; - COB_MODULE_PTR->xml_nnamespace_prefix->size = 0; - } - COB_MODULE_PTR->xml_namespace->data = (unsigned char *) nsdata; - COB_MODULE_PTR->xml_namespace->size = ns_size; - if (prefix) { - COB_MODULE_PTR->xml_namespace_prefix->data = (unsigned char *) prefix; - COB_MODULE_PTR->xml_namespace_prefix->size = prefix_size; + COB_MODULE_PTR->xml_text->data = NULL; + COB_MODULE_PTR->xml_text->size = 0; + COB_MODULE_PTR->xml_namespace->data = NULL; + COB_MODULE_PTR->xml_namespace->size = 0; + COB_MODULE_PTR->xml_namespace_prefix->data = NULL; + COB_MODULE_PTR->xml_namespace_prefix->size = 0; + + /* XML-NTEXT and other XML-N... special registers are not available with ACUCOBOL */ + if (COB_MODULE_PTR->xml_ntext) { + COB_MODULE_PTR->xml_ntext->data = NULL; + COB_MODULE_PTR->xml_ntext->size = 0; + } + if (COB_MODULE_PTR->xml_nnamespace) { + COB_MODULE_PTR->xml_nnamespace->data = NULL; + COB_MODULE_PTR->xml_nnamespace->size = 0; + } + if (COB_MODULE_PTR->xml_nnamespace_prefix) { + COB_MODULE_PTR->xml_nnamespace_prefix->data = NULL; + COB_MODULE_PTR->xml_nnamespace_prefix->size = 0; + } + + if (buff && event) { + if (ntext) { + /* TODO (later): convert input data (libxml2 uses UTF8) to UTF-16 + (or the specified national character set) */ + if (event->text_len) { + COB_MODULE_PTR->xml_ntext->data = buff + event->text_off; + COB_MODULE_PTR->xml_ntext->size = event->text_len; + } + if (event->namespace_len) { + COB_MODULE_PTR->xml_nnamespace->data = buff + event->namespace_off; + COB_MODULE_PTR->xml_nnamespace->size = event->namespace_len; + } + if (event->prefix_len) { + COB_MODULE_PTR->xml_nnamespace_prefix->data = buff + event->prefix_off; + COB_MODULE_PTR->xml_nnamespace_prefix->size = event->prefix_len; + } } else { - COB_MODULE_PTR->xml_namespace_prefix->data = (unsigned char *) ""; - COB_MODULE_PTR->xml_namespace_prefix->size = 0; + if (event->text_len) { + COB_MODULE_PTR->xml_text->data = buff + event->text_off; + COB_MODULE_PTR->xml_text->size = event->text_len; + } + if (event->namespace_len) { + COB_MODULE_PTR->xml_namespace->data = buff + event->namespace_off; + COB_MODULE_PTR->xml_namespace->size = event->namespace_len; + } + if (event->prefix_len) { + COB_MODULE_PTR->xml_namespace_prefix->data = buff + event->prefix_off; + COB_MODULE_PTR->xml_namespace_prefix->size = event->prefix_len; + } } } } @@ -1357,7 +1319,7 @@ cob_xml_generate_new (cob_field *out, cob_ml_tree *tree, cob_field *count, } } -static void xml_parse (cob_field *in, cob_field *encoding, cob_field *validation, +static void xml_parse (cob_field *encoding, cob_field *validation, const int flags, struct xml_state *state); static void xml_endDocument (void *); static void xml_free_parse_memory (struct xml_state *state); @@ -1397,83 +1359,76 @@ int cob_xml_parse (cob_field *in, cob_field *encoding, cob_field *validation, #endif } /* LCOV_EXCL_STOP */ - *saved_state = cob_malloc (sizeof (struct xml_state)); - memset (*saved_state, '\0', sizeof(struct xml_state)); - ((struct xml_state *)*saved_state)->flags = flags; xml_code = 0; + state = cob_malloc (sizeof (struct xml_state)); + memset (state, '\0', sizeof(struct xml_state)); + state->flags = flags; + state->input_data_ptr = (const char*)in->data; + state->input_data_len = in->size; + *saved_state = state; + } else { + state = *saved_state; } - state = (struct xml_state *)*saved_state; - /* postponed loading of XML code to have codegen check (register setup) up front */ xml_code = get_xml_code (); /* initial setup of registers, ensuring they are available in the processing procedure */ -#if 0 - set_xml_text (0, "", 0); - set_xml_namespace (0, "", 0, NULL, 0); -#endif + set_xml_registers (0, NULL, NULL); /* LINKAGE or BASED item without data */ if (!in->data) { - state->last_xml_code = XML_INTERNAL_ERROR; set_xml_exception (XML_INTERNAL_ERROR); - set_xml_event (EVENT_EXCEPTION); - return 0; + xml_free_parse_memory (state); + *saved_state = NULL; + return 1; } if (encoding && is_empty (encoding)) { encoding = NULL; } + /* parser had non-fatal error: check if the user reset it */ + if (state->state == XML_PARSER_HAD_NONFATAL_ERROR) { + /* TODO: handle errors 100,001 to 165,535 with XMLPARSE(COMPAT) + positive non-zero values should set the encoding + and reset the error */ + if (xml_code != 0) { + /* not reset: turn the error into a fatal error */ + state->state = XML_PARSER_HAD_FATAL_ERROR; + } else { + /* reset: ignore the error and continue processing events */ + state->state = XML_PARSER_READ_CHUNK; + } + } + /* parser function had fatal error */ if (state->state == XML_PARSER_HAD_FATAL_ERROR) { set_xml_code (state->last_xml_code); + cob_set_exception (COB_EC_XML); xml_free_parse_memory (state); *saved_state = NULL; return 1; } - /* parser had non-fatal error but the user did not reset it */ - if (state->state == XML_PARSER_HAD_NONFATAL_ERROR) { - if (xml_code != 0) { - /* TODO: recheck !COB_XML_XMLNSS has one - "Parses using the difference as the encoding value" */ - set_xml_code (state->last_xml_code); - xml_free_parse_memory (state); - *saved_state = NULL; - return 1; - } else { - if (COB_MODULE_PTR->xml_mode == COB_XML_XMLNSS) { - /* note: Next event is ATTRIBUTE-NAME or START-OF-ELEMENT */ - /* TODO: likely set appropriate instead of parsing more data */ - } else { - /* TODO: runs with adjusted encoding */ - } - } - } - - /* user user-initiated exception condition (-1) */ + /* user-initiated exception condition (-1) */ if (xml_code == -1) { - /* xml code stays with one */ + /* xml code stays -1 */ + cob_set_exception (COB_EC_XML); xml_free_parse_memory (state); *saved_state = NULL; return 1; } - /* we reached "end of input" (xmlss only?) and were not told to go on */ if (state->state == XML_PARSER_HAD_END_OF_INPUT) { switch (xml_code) { case 0: - xml_endDocument (state); - if (state->eof == 0) { - state->eof = 1; - } else { -// xml_code = 1; - state->state == XML_PARSER_FINISHED; - } + /* no new data chunk to give to the XML parser */ + state->input_data_ptr = NULL; + state->input_data_len = 0; + state->state = XML_PARSER_NO_NEW_CHUNKS; break; case 1: /* goes on with parsing @@ -1481,35 +1436,19 @@ int cob_xml_parse (cob_field *in, cob_field *encoding, cob_field *validation, of the xml data, we need to set both data pointers */ xml_code = 0; state->input_data_ptr = (const char*)in->data; - state->input_data_end = state->input_data_ptr + in->size; - state->state = XML_PARSER_STARTING_NEXT_CHUNK; + state->input_data_len = in->size; + state->state = XML_PARSER_READ_CHUNK; break; - default: - /* fatal runtime error, - TODO: at least a runtime warning, likely runtime exit */ - cob_set_exception (COB_EC_XML); - xml_free_parse_memory (state); - *saved_state = NULL; - return 1; } - } - - /* empty item = no error, just "no data any more" */ - if (state->state != XML_PARSER_HAD_END_OF_DOCUMENT - && state->state != XML_PARSER_FINISHED - && is_empty (in)) { - set_xml_event (EVENT_END_OF_INPUT); - set_xml_code (XML_STMT_SUCCESSFULL); - state->state = XML_PARSER_HAD_END_OF_INPUT; - return 0; + /* other cases are handled below */ } if (xml_code != 0) { /* note: -1 is handled above, also 1 where possible */ - if (COB_MODULE_PTR->xml_mode == COB_XML_XMLNSS) { - /* fatal runtime error, - TODO: at least a runtime warning, likely runtime exit */ - cob_set_exception (COB_EC_XML); + cob_set_exception (COB_EC_XML); + if (COB_MODULE_PTR->xml_mode == COB_XML_XMLSS) { + /* fatal runtime error for IBM, just warn here for now */ + cob_runtime_warning(_("Unexpected XML-CODE value: %d"), xml_code); } else { set_xml_code (-1); } @@ -1531,24 +1470,12 @@ int cob_xml_parse (cob_field *in, cob_field *encoding, cob_field *validation, xml_process_next_event (state); } else { /* do actual parsing */ - xml_parse (in, encoding, validation, flags, state); + xml_parse (encoding, validation, flags, state); } return 0; } - -static void -set_xml_code_parsing_error (const int libxml2_err) { - int xml_err = 0x00000018 /* 24 COMP in split field per IBM doc */ - + (libxml2_err << 8); /* second part with error number */ -#ifdef WORDS_BIGENDIAN /* CHECKME: is that correct? */ - xml_err = COB_BSWAP_32 (xml_err); -#endif - memcpy (COB_MODULE_PTR->xml_code->data, &xml_err, sizeof (int)); -} - - #if defined (WITH_XML2) /* actual handling of XML GENERATE */ @@ -1658,32 +1585,13 @@ xml_generate (cob_field *out, cob_ml_tree *tree, cob_field *count, } } -static void -xml_error_handling (struct xml_state *state, const xmlError *err) { - char err_code[5]; - new_xml_event (state, EVENT_EXCEPTION); - add_xml_event_data (state, - SREG_XML_TEXT, - err->message, - strlen (err->message) + 1); - new_xml_event (state, EVENT_EXCEPTION); - sprintf (err_code, "%4d", err->code); - add_xml_event_data (state, - SREG_XML_TEXT, - err_code, - 5); - - /* CHECKME: Which other elements of the xmlError do we want to pass? */ -#if 0 /* CHECKME: Do we want that? */ - state->state = XML_PARSER_HAD_NONFATAL_ERROR; -#endif -} - static void xml_error_handler (void *ctx, LIBXML_CONST_ERROR_PTR err) { - struct xml_state *parse_state = ctx; - enum xml_parser_state state = parse_state->state; + struct xml_state *state = ctx; static int last_error_code = 0; + int severity; + const char *severity_str; + size_t message_len; /* suppress duplicate message */ if (err->code == XML_SCHEMAP_FAILED_LOAD @@ -1692,46 +1600,70 @@ xml_error_handler (void *ctx, LIBXML_CONST_ERROR_PTR err) { return; } - if (state == XML_PARSER_VALIDATION_SETUP - || state == XML_PARSER_VALIDATION_SETUP_MEM) { + if (state->state == XML_PARSER_VALIDATION_SETUP + || state->state == XML_PARSER_VALIDATION_SETUP_MEM) { /* skip schema detail issues we are not interested in */ if (err->code < XML_IO_UNKNOWN) { return; } - } + } + + if (err->domain == XML_FROM_VALID) { + severity = XRC_NOT_VALID; + severity_str = _("validation error"); + } else if (err->level == XML_ERR_FATAL) { + severity = XRC_FATAL; + severity_str = _("non-recoverable error"); + } else if (err->level == XML_ERR_ERROR) { + severity = XRC_WARNING; + severity_str = _("recoverable error"); + } else { + severity = XRC_WARNING; + severity_str = _("warning"); + } - switch (state) { + /* remove final \n of libxml2 error message */ + message_len = strlen(err->message); + if (message_len && err->message[message_len - 1] == '\n') { + err->message[message_len - 1] = '\0'; + } + + switch (state->state) { case XML_PARSER_VALIDATION_SETUP: if (err->file) { - cob_runtime_warning (_("XML PARSE setup for VALIDATE FILE %s:%d (%d): %s"), - err->file, err->line, err->code, err->message); + cob_runtime_warning (_("XML PARSE %s for VALIDATE FILE %s:%d (%d): %s"), + severity_str, err->file, err->line, err->code, err->message); } else { - cob_runtime_warning (_("XML PARSE setup for VALIDATE FILE (%d): %s"), - err->code, err->message); + cob_runtime_warning (_("XML PARSE %s for VALIDATE FILE (%d): %s"), + severity_str, err->code, err->message); } - set_xml_event (EVENT_EXCEPTION); - parse_state->last_xml_code = XML_PARSE_ERROR_FATAL; - parse_state->state = XML_PARSER_HAD_FATAL_ERROR; - set_xml_code_parsing_error (err->code); break; case XML_PARSER_VALIDATION_SETUP_MEM: - cob_runtime_warning (_("XML PARSE setup for VALIDATE (%d): %s"), - err->code, err->message); - set_xml_event (EVENT_EXCEPTION); - parse_state->last_xml_code = XML_PARSE_ERROR_FATAL; - parse_state->state = XML_PARSER_HAD_FATAL_ERROR; - set_xml_code_parsing_error (err->code); - break; - case XML_PARSER_JUST_STARTED: - case XML_PARSER_DOCUMENT_START: - case XML_PARSER_FINE: - case XML_PARSER_HAD_NONFATAL_ERROR: - xml_error_handling (parse_state, err); + cob_runtime_warning (_("XML PARSE %s for VALIDATE (%d): %s"), + severity_str, err->code, err->message); break; default: - /* not translated as unplanned */ - cob_runtime_warning ("XML PARSE state %d on %s:%d (%d): %s", - state, err->file, err->line, err->code, err->message); + cob_runtime_warning (_("XML PARSE %s (%d): %s"), + severity_str, err->code, err->message); + break; + } + + new_xml_event (state, EVENT_EXCEPTION); + + /* TODO: According to IBM we should put the the prefix of the last chunk + that occurs before the error in XML-TEXT. */ + + /* Set the XML exception code: + For now, we do not try to follow the error codes of IBM. + But we still try to categorize the error between non-recoverable and recoverable. */ + if (COB_MODULE_PTR->xml_mode == COB_XML_XMLSS) { + set_xml_event_exception_code (state, (severity << 16) | err->code); + } else { + if (err->level == XML_ERR_FATAL) { + set_xml_event_exception_code (state, XML_PARSE_ERROR_MISC_COMPAT); + } else { + set_xml_event_exception_code (state, XML_PARSE_WARNING_MISC_COMPAT); + } } last_error_code = err->code; @@ -1743,7 +1675,6 @@ static void xml_endDocument (void *ctx) { struct xml_state *state = ctx; new_xml_event (state, EVENT_END_OF_DOCUMENT); - state->state = XML_PARSER_HAD_END_OF_DOCUMENT; } /* the following functions may be partially used with @@ -1756,62 +1687,31 @@ xml_startDocument (void *ctx) { struct xml_state *state = ctx; xmlParserCtxtPtr ctxt = state->ctx; -#if LIBXML_VERSION >= 21200 - const xmlChar *encoding = xmlCtxtGetDeclaredEncoding(ctxt); - int standalone = xmlCtxtGetStandalone(ctxt); - const xmlChar *version = xmlCtxtGetVersion(ctxt); - -#else - const xmlChar *encoding = ctxt->encoding; - int standalone = ctxt->standalone; - const xmlChar *version = ctxt->version; -#endif + const xmlChar *encoding = LIBXML_CTXT_GET_ENCODING(ctxt); + int standalone = LIBXML_CTXT_GET_STANDALONE(ctxt); + const xmlChar *version = LIBXML_CTXT_GET_VERSION(ctxt); new_xml_event (state, EVENT_START_OF_DOCUMENT); new_xml_event (state, EVENT_VERSION_INFORMATION); - add_xml_event_data (state, - SREG_XML_TEXT, - version, - xmlStrlen (version)); + set_xml_event_text (state, version, xmlStrlen (version)); new_xml_event (state, EVENT_ENCODING_DECLARATION); - add_xml_event_data (state, - SREG_XML_TEXT, - encoding, - xmlStrlen (encoding)); - state->state = XML_PARSER_DOCUMENT_START; - - switch (ctxt->standalone) { - case 1 : - new_xml_event (state, EVENT_STANDALONE_DECLARATION); - add_xml_event_data (state, - SREG_XML_TEXT, - "YES", - 3); - break; - case 0 : - new_xml_event (state, EVENT_STANDALONE_DECLARATION); - add_xml_event_data (state, - SREG_XML_TEXT, - "no", - 2); - break; + set_xml_event_text (state, encoding, xmlStrlen (encoding)); + + if (standalone) { + new_xml_event (state, EVENT_STANDALONE_DECLARATION); + set_xml_event_text (state, "yes", 3); + } else { + new_xml_event (state, EVENT_STANDALONE_DECLARATION); + set_xml_event_text (state, "no", 2); } } -static void -xml_endofInput (struct xml_state *state) { - new_xml_event (state, EVENT_END_OF_INPUT); -} - static void xml_comment (void *ctx, const xmlChar *content) { struct xml_state *state = ctx; new_xml_event (state, EVENT_COMMENT); - add_xml_event_data (state, - SREG_XML_TEXT, - content, - xmlStrlen (content)); + set_xml_event_text (state, content, xmlStrlen (content)); } static void @@ -1820,15 +1720,9 @@ xml_processingInstruction (void *ctx, const xmlChar *data) { struct xml_state *state = ctx; new_xml_event (state, EVENT_PROCESSING_INSTRUCTION_TARGET); - add_xml_event_data (state, - SREG_XML_TEXT, - target, - xmlStrlen (target)); + set_xml_event_text (state, target, xmlStrlen (target)); new_xml_event (state, EVENT_PROCESSING_INSTRUCTION_DATA); - add_xml_event_data (state, - SREG_XML_TEXT, - data, - xmlStrlen (data)); + set_xml_event_text (state, data, xmlStrlen (data)); } static void @@ -1840,67 +1734,47 @@ xml_startElementNs (void *ctx, struct xml_state *state = ctx; new_xml_event (state, EVENT_START_OF_ELEMENT); - add_xml_event_data (state, - SREG_XML_TEXT, - localname, - xmlStrlen (localname)); - /* TODO: cleanup and code namespace stuff and check what to do on endElement */ + set_xml_event_text (state, localname, xmlStrlen (localname)); if (prefix) { - add_xml_event_data (state, - SREG_XML_NS_PREFIX, - prefix, - xmlStrlen (prefix)); + set_xml_event_prefix (state, prefix, xmlStrlen (prefix)); } if (URI) { - add_xml_event_data (state, - SREG_XML_NAMESPACE, - URI, - xmlStrlen (URI)); + set_xml_event_namespace (state, URI, xmlStrlen (URI)); } /* Now we start to process the NAMESPACE-DECLARATION's */ if (namespaces != NULL) { for (cntr = 0; cntr < nb_namespaces * 2; cntr++) { + const xmlChar *nprefix = namespaces[cntr++]; /* Get nprefix (even index) */ + const xmlChar *nuri = namespaces[cntr]; /* Get URI (odd index) */ new_xml_event (state, EVENT_NAMESPACE_DECLARATION); - const xmlChar *nprefix = namespaces[cntr++]; // Get nprefix (even index) - const xmlChar *nuri = namespaces[cntr]; // Get URI (odd index) - /* Handle default namespace (nprefix is NULL) */ if (nuri) { - add_xml_event_data (state, - SREG_XML_NAMESPACE, - nuri, - xmlStrlen (nuri)); + set_xml_event_namespace (state, nuri, xmlStrlen (nuri)); } if (nprefix) { - add_xml_event_data (state, - SREG_XML_NS_PREFIX, - nprefix, - xmlStrlen (nprefix)); + set_xml_event_prefix (state, nprefix, xmlStrlen (nprefix)); } } } /* Process each attribute */ for (cntr = 0; cntr < nb_attributes * 5; cntr += 5) { - const xmlChar *attr_name = attributes[cntr]; // ATTRIBUTE-NAME - const xmlChar *attr_prefix = attributes[cntr + 1]; // ATTRIBUTE-NAMESPACE - const xmlChar *attr_value_start = attributes[cntr + 3]; // ATTRIBUTE-CHARACTERS start - const xmlChar *attr_value_end = attributes[cntr + 4]; // ATTRIBUTE-CHARACTERS end + const xmlChar *attr_name = attributes[cntr]; /* ATTRIBUTE-NAME */ + const xmlChar *attr_prefix = attributes[cntr + 1]; /* ATTRIBUTE-NAMESPACE-PREFIX */ + const xmlChar *attr_namespace = attributes[cntr + 2]; /* ATTRIBUTE-NAMESPACE */ + const xmlChar *attr_value_start = attributes[cntr + 3]; /* ATTRIBUTE-CHARACTERS start */ + const xmlChar *attr_value_end = attributes[cntr + 4]; /* ATTRIBUTE-CHARACTERS end */ - // Calculate attribute value length + /* Calculate attribute value length */ attr_value_len = attr_value_end - attr_value_start; - // Use the extracted information + /* Use the extracted information */ new_xml_event (state, EVENT_ATTRIBUTE_NAME); - add_xml_event_data (state, - SREG_XML_TEXT, - attr_name, - xmlStrlen (attr_name)); + set_xml_event_text (state, attr_name, xmlStrlen (attr_name)); + set_xml_event_prefix (state, attr_prefix, xmlStrlen(attr_prefix)); + set_xml_event_namespace (state, attr_namespace, xmlStrlen(attr_namespace)); new_xml_event (state, EVENT_ATTRIBUTE_CHARACTERS); - add_xml_event_data (state, - SREG_XML_TEXT, - attr_value_start, - attr_value_len); + set_xml_event_text (state, attr_value_start, attr_value_len); } } @@ -1909,22 +1783,12 @@ xml_endElementNs (void *ctx, const xmlChar *localname, const xmlChar *prefix, const xmlChar *URI) { struct xml_state *state = ctx; new_xml_event (state, EVENT_END_OF_ELEMENT); - add_xml_event_data (state, - SREG_XML_TEXT, - localname, - xmlStrlen (localname)); - /* TODO: cleanup and code namespace stuff and check what to do on endElement */ + set_xml_event_text (state, localname, xmlStrlen (localname)); if (prefix) { - add_xml_event_data (state, - SREG_XML_NS_PREFIX, - prefix, - xmlStrlen (prefix)); + set_xml_event_prefix (state, prefix, xmlStrlen (prefix)); } if (URI) { - add_xml_event_data (state, - SREG_XML_NAMESPACE, - URI, - xmlStrlen (URI)); + set_xml_event_namespace (state, URI, xmlStrlen (URI)); } } @@ -1932,122 +1796,21 @@ static void xml_startElement (void *ctx, const xmlChar *name, const xmlChar **atts) { struct xml_state *state = ctx; new_xml_event (state, EVENT_START_OF_ELEMENT); - add_xml_event_data (state, - SREG_XML_TEXT, - name, - xmlStrlen (name)); + set_xml_event_text (state, name, xmlStrlen (name)); } static void xml_endElement (void *ctx, const xmlChar *name) { struct xml_state *state = ctx; new_xml_event (state, EVENT_END_OF_ELEMENT); - add_xml_event_data (state, - SREG_XML_TEXT, - name, - xmlStrlen (name)); + set_xml_event_text (state, name, xmlStrlen (name)); } static void xml_characters (void *ctx, const xmlChar *content, int len) { struct xml_state *state = ctx; new_xml_event (state, EVENT_CONTENT_CHARACTERS); - add_xml_event_data (state, - SREG_XML_TEXT, - content, - len); -} - -static void -myStructuredErrorHandler(void *ctx, const xmlError *error) { - - struct xml_state *state = ctx; - static int errorCount = 0; - int i, len; - if (error->level == XML_ERR_ERROR || error->level == XML_ERR_FATAL) { - errorCount++; - } - -#if LIBXML_VERSION >= 21400 // 2.14.0 - if (error->code == XML_WAR_ENCODING_MISMATCH) { -#else - if (error->code == 113) { -#endif - fprintf(stderr, "WARNING: Encoding mismatch detected!\n"); - fprintf(stderr, "Message: %s\n", error->message); - if (error->str1) { - fprintf(stderr, "Declared encoding: %s\n", error->str1); - } - if (error->str2) { - fprintf(stderr, "Auto-detected encoding: %s\n", error->str2); - } - return; - } - - if ((errorCount == 1) && - (error->code == XML_ERR_XMLDECL_NOT_FINISHED || - error->code == XML_ERR_SPACE_REQUIRED)) { - len = state->input_data_end - state->input_data_ptr; -#ifdef _WIN32 - fprintf(stderr, - "Encoding declaration '%s' appears incompatible with input data\n" - "The current runtime character encoding is %s \n" - "This caused XML declaration parsing to fail with: %s", - state->ctx->encoding, locale_charset(), error->message); -#else - fprintf(stderr, - "Encoding declaration '%s' appears incompatible with input data\n" - "The current runtime character encoding does not match \n" - "This caused XML declaration parsing to fail with: %s", - state->ctx->encoding, error->message); -#endif - fprintf(stderr, - "The first 5 characters in HEX are X'"); - if (len > 10) { - len = 10; - } - for (i = 0; i < 5; i++) { - printf("%02x", (unsigned char)state->input_data_ptr[i]); - } - printf("'\n"); - } - - if (error->code == XML_ERR_INVALID_ENCODING) { - fprintf(stderr, "Encoding Error: %s\n", error->message); - if (error->file) { - fprintf(stderr, "File: %s\n", error->file); - } - if (error->line > 0) { - fprintf(stderr, "Line: %d", error->line); - if (error->int2 > 0) { - fprintf(stderr, ", Column: %d", error->int2); - } - fprintf(stderr, "\n"); - } - - // Display problematic bytes if available - if (error->str1) { - fprintf(stderr, "Context: %s\n", error->str1); - } - } else { - fprintf(stderr, - "XML Error ==> %d %s \n", - error->code, - error->message); - } - - if (error->level == XML_ERR_FATAL) { - state->last_xml_code = error->code; - new_xml_event (state, EVENT_EXCEPTION); - len = state->input_data_end - state->input_data_ptr; - if (len > 100) { - len = 100; - } - add_xml_event_data (state, - SREG_XML_TEXT, - state->input_data_ptr, - len); - } + set_xml_event_text (state, content, len); } static void @@ -2061,10 +1824,7 @@ xml_internalSubset(void *ctx, return; } new_xml_event (state, EVENT_DOCUMENT_TYPE_DECLARATION); - add_xml_event_data (state, - SREG_XML_TEXT, - name, - xmlStrlen (name)); + set_xml_event_text (state, name, xmlStrlen (name)); } static void @@ -2072,34 +1832,25 @@ xml_cdata (void *ctx, const xmlChar *content, int len) { struct xml_state *state = ctx; new_xml_event (state, EVENT_START_OF_CDATA_SECTION); if (COB_MODULE_PTR->xml_mode == COB_XML_COMPAT) { - add_xml_event_data (state, - SREG_XML_TEXT, - "xml_mode == COB_XML_COMPAT) { - add_xml_event_data (state, - SREG_XML_TEXT, - "]]>", - 3); + set_xml_event_text (state, "]]>", 3); } } #endif /* defined (WITH_XML2) */ #if defined (WITH_XML2) -/* actual handling of XML PARSE (not implemented yet) */ -void xml_parse (cob_field *in, cob_field *encoding, cob_field *validation, +/* actual handling of XML PARSE */ +void xml_parse (cob_field *encoding, cob_field *validation, const int flags, struct xml_state *state) { - static int first_xml = 1; - const xmlError *error; + const int end_of_parsing = state->state == XML_PARSER_NO_NEW_CHUNKS; if (state->ctx == NULL) { char *enc = NULL; @@ -2119,7 +1870,7 @@ void xml_parse (cob_field *in, cob_field *encoding, cob_field *validation, state->sax.endDocument = xml_endDocument; state->sax.comment = xml_comment; - if (COB_MODULE_PTR->xml_mode == COB_XML_XMLNSS) { + if (COB_MODULE_PTR->xml_mode == COB_XML_XMLSS) { state->sax.initialized = XML_SAX2_MAGIC; state->sax.startElementNs = xml_startElementNs; state->sax.endElementNs = xml_endElementNs; @@ -2133,47 +1884,34 @@ void xml_parse (cob_field *in, cob_field *encoding, cob_field *validation, state->sax.processingInstruction = xml_processingInstruction; state->sax.characters = xml_characters; - state->sax.serror = myStructuredErrorHandler; + state->sax.serror = xml_error_handler; /* - * The document being in memory, it have no base per RFC 2396, - * and the "noname.xml" argument will serve as its base. + * The document being in memory, it have no base per RFC 2396. */ state->ctx = xmlCreatePushParserCtxt (&state->sax, state, - NULL, 0, "noname.xml"); + NULL, 0, NULL); - // Add this immediately after creating the context: + /* Add this immediately after creating the context: */ if (state->ctx != NULL) { -#if LIBXML_VERSION >= 21200 - int options = xmlCtxtGetOptions(state->ctx); -#else - int options = state->ctx->options; -#endif + int options = LIBXML_CTXT_GET_OPTIONS(state->ctx); options &= ~XML_PARSE_NOWARNING; /* Clear the NOWARNING flag */ options &= ~XML_PARSE_NOERROR; /* Also clear NOERROR flag */ -#if LIBXML_VERSION >= 21200 - xmlCtxtSetOptions(state->ctx, options); -#else - state->ctx->options = options; -#endif + LIBXML_CTXT_SET_OPTIONS(state->ctx, options); } - state->input_data_ptr = (const char*)in->data; - state->input_data_end = state->input_data_ptr + in->size; - if (enc) { /* TODO (later): handle encoding */ cob_free (enc); } if (state->ctx == NULL) { - state->last_xml_code = XML_PARSE_ERROR_FATAL; - state->state = XML_PARSER_HAD_FATAL_ERROR; - if (COB_MODULE_PTR->xml_mode == COB_XML_XMLNSS) { - set_xml_exception (XML_PARSE_ERROR_FATAL); + new_xml_event (state, EVENT_EXCEPTION); + if (COB_MODULE_PTR->xml_mode == COB_XML_XMLSS) { + set_xml_event_exception_code (state, XRC_FATAL << 16); } else { - set_xml_exception (XML_PARSE_ERROR_MISC_COMPAT); + set_xml_event_exception_code (state, XML_PARSE_ERROR_MISC_COMPAT); } - set_xml_event (EVENT_EXCEPTION); + xml_process_next_event(state); return; } @@ -2251,21 +1989,13 @@ void xml_parse (cob_field *in, cob_field *encoding, cob_field *validation, } } - state->buff = cob_malloc (COB_LARGE_BUFF); - state->buff_len = COB_LARGE_BUFF; + state->buff = cob_malloc (COB_MINI_BUFF); + state->buff_len = COB_MINI_BUFF; - state->state = XML_PARSER_JUST_STARTED; + state->state = XML_PARSER_READ_CHUNK; } -#if 0 - if (first_xml) { - first_xml = 0; - cob_runtime_warning (_("%s is unfinished"), - "XML PARSE"); - } -#endif - - /* unset existing events, allowing re-use*/ + /* unset existing events, allowing re-use */ { struct xml_event *event; for (event = state->first_event; event; event = event->next) { @@ -2275,38 +2005,12 @@ void xml_parse (cob_field *in, cob_field *encoding, cob_field *validation, state->event = state->first_event; state->buff_off = 0; - while (state->event == NULL - || state->event->event == EVENT_UNKNOWN) { - const int end_of_parsing = state->input_data_ptr >= state->input_data_end; - int size = state->input_data_end - state->input_data_ptr; - if (size > 100) { - size = 100; - } - if (state->eof) { - state->err = xmlParseChunk (state->ctx, state->input_data_ptr, 0, 1); - break; - } else if (!end_of_parsing){ - state->err = xmlParseChunk (state->ctx, state->input_data_ptr, size, end_of_parsing); -#if 0 - if (state->err) { - error = xmlGetLastError (); - fprintf(stderr, "xmlParseChunk returned error %d ==> %s \n", - state->err, - error->message); - } -#endif - state->input_data_ptr += size; - } else { - break; - } - } + state->err = xmlParseChunk (state->ctx, state->input_data_ptr, state->input_data_len, end_of_parsing); - if (state->input_data_ptr >= state->input_data_end) { - if (state->eof) { - state->state = XML_PARSER_FINISHED; - } else { - xml_endofInput (state); - } + if (end_of_parsing) { + state->state = XML_PARSER_FINISHED; + } else { + new_xml_event (state, EVENT_END_OF_INPUT); } #if 0 @@ -2333,48 +2037,37 @@ xml_process_next_event (struct xml_state *state) struct xml_event *event = state->event; const int ntext = state->flags & COB_XML_PARSE_NATIONAL; - /* First set all XML registers to zero length */ - - cob_set_int (COB_MODULE_PTR->xml_information, (int) 1); - COB_MODULE_PTR->xml_namespace->size = 0; - COB_MODULE_PTR->xml_namespace_prefix->size = 0; - COB_MODULE_PTR->xml_nnamespace->size = 0; - COB_MODULE_PTR->xml_nnamespace_prefix->size = 0; - COB_MODULE_PTR->xml_text->size = 0; - COB_MODULE_PTR->xml_ntext->size = 0; - - COB_MODULE_PTR->xml_namespace->data = NULL; - COB_MODULE_PTR->xml_namespace_prefix->data = NULL; - COB_MODULE_PTR->xml_nnamespace->data = NULL; - COB_MODULE_PTR->xml_nnamespace_prefix->data = NULL; - COB_MODULE_PTR->xml_text->data = NULL; - COB_MODULE_PTR->xml_ntext->data = NULL; - - if (event->event == EVENT_END_OF_INPUT && !state->eof) { + if (event->event == EVENT_END_OF_INPUT) { state->state = XML_PARSER_HAD_END_OF_INPUT; + } else if (event->event == EVENT_EXCEPTION) { + state->last_xml_code = event->xml_code; + if (COB_MODULE_PTR->xml_mode == COB_XML_XMLSS) { + if (event->xml_code >> 16 == XRC_WARNING) { + state->state = XML_PARSER_HAD_NONFATAL_ERROR; + } else { + state->state = XML_PARSER_HAD_FATAL_ERROR; + } + } else { + /* TODO: Add an encoding error state for compat mode */ + if (event->xml_code < 100) { + state->state = XML_PARSER_HAD_NONFATAL_ERROR; + } else { + state->state = XML_PARSER_HAD_FATAL_ERROR; + } + } } set_xml_event (event->event); - if (state->last_xml_code) { - set_xml_code (state->last_xml_code); - } else { - set_xml_code (0); - } + set_xml_code (event->xml_code); + set_xml_registers (ntext, state->buff, event); - if (ntext) { - COB_MODULE_PTR->xml_ntext->size = event->text_len; - COB_MODULE_PTR->xml_ntext->data = event->text_ptr; - COB_MODULE_PTR->xml_nnamespace->size = event->namespace_len; - COB_MODULE_PTR->xml_nnamespace->data = event->namespace_ptr; - COB_MODULE_PTR->xml_nnamespace_prefix->size = event->prefix_len; - COB_MODULE_PTR->xml_nnamespace_prefix->data = event->prefix_ptr; - } else { - COB_MODULE_PTR->xml_text->size = event->text_len; - COB_MODULE_PTR->xml_text->data = event->text_ptr; - COB_MODULE_PTR->xml_namespace->size = event->namespace_len; - COB_MODULE_PTR->xml_namespace->data = event->namespace_ptr; - COB_MODULE_PTR->xml_namespace_prefix->size = event->prefix_len; - COB_MODULE_PTR->xml_namespace_prefix->data = event->prefix_ptr; + if (COB_MODULE_PTR->xml_information) { + /* IBM doc states that we should store 1 in XML-INFORMATION on events + ATTRIBUTE-CHARACTERS and CONTENT-CHARACTERS if the value in XML-TEXT + is complete. It seems to be always the case with libxml2. */ + const int info = event->event == EVENT_ATTRIBUTE_CHARACTERS + || event->event == EVENT_CONTENT_CHARACTERS ? 1 : 0; + cob_set_int (COB_MODULE_PTR->xml_information, info); } state->event = event->next; @@ -2436,12 +2129,11 @@ xml_generate (cob_field *out, cob_ml_tree *tree, cob_field *count, } /* actual (non) handling of XML PARSE */ -void xml_parse (cob_field *in, cob_field *encoding, cob_field *validation, +void xml_parse (cob_field *encoding, cob_field *validation, const int flags, struct xml_state *state) { static int first_xml = 1; - COB_UNUSED (in); COB_UNUSED (encoding); COB_UNUSED (validation); COB_UNUSED (flags); diff --git a/tests/testsuite.src/run_ml.at b/tests/testsuite.src/run_ml.at index ddb140c34..35391af96 100644 --- a/tests/testsuite.src/run_ml.at +++ b/tests/testsuite.src/run_ml.at @@ -628,18 +628,16 @@ AT_CLEANUP AT_SETUP([XML register data after use]) AT_KEYWORDS([extensions XML-CODE XML-EVENT PARSE]) -# ensure that we have the data as other compilers -# and can handle the minimal (not useful) parsing with identical -# results with / without libxml2 +AT_SKIP_IF([test "$COB_HAS_XML2" = "no"]) -AT_DATA([prog.cob], [ +AT_DATA([prog.cob],[ PROGRAM-ID. prog. DATA DIVISION. WORKING-STORAGE SECTION. - 77 empty PIC X VALUE SPACE. + 77 empty-xml PIC X(8) VALUE ''. PROCEDURE DIVISION. MAIN. - XML PARSE empty + XML PARSE empty-xml PROCESSING PROCEDURE PROC. DISPLAY 'after : ' XML-CODE ' - "' XML-EVENT '"'. MOVE 5 TO XML-CODE @@ -651,8 +649,13 @@ AT_DATA([prog.cob], [ ]) AT_CHECK([$COMPILE -w prog.cob], [0], [], []) -AT_CHECK([$COBCRUN_DIRECT ./prog], [0], -[proc : +000000000 - "END-OF-INPUT " +AT_CHECK([$COBCRUN_DIRECT ./prog], [0], [proc : +000000000 - "START-OF-DOCUMENT " +proc : +000000000 - "VERSION-INFORMATION " +proc : +000000000 - "ENCODING-DECLARATION " +proc : +000000000 - "STANDALONE-DECLARATION " +proc : +000000000 - "START-OF-ELEMENT " +proc : +000000000 - "END-OF-ELEMENT " +proc : +000000000 - "END-OF-INPUT " proc : +000000000 - "END-OF-DOCUMENT " after : +000000000 - "END-OF-DOCUMENT " end : +000000005 - "Test " @@ -660,11 +663,150 @@ end : +000000005 - "Test " AT_CLEANUP +AT_SETUP([XML PARSE abort]) +AT_KEYWORDS([extensions XML-CODE XML-EVENT PARSE]) + +AT_SKIP_IF([[test "$COB_HAS_XML2" = "no"]]) + +AT_DATA([prog.cob],[ + PROGRAM-ID. prog. + DATA DIVISION. + WORKING-STORAGE SECTION. + 77 empty-xml PIC X(8) VALUE ''. + PROCEDURE DIVISION. + MAIN. + XML PARSE empty-xml + PROCESSING PROCEDURE PROC + ON EXCEPTION + DISPLAY 'XML processing aborted, XML-CODE = ' XML-CODE + GOBACK. + DISPLAY 'Unreachable'. + GOBACK. + PROC. + DISPLAY 'proc : ' XML-CODE ' - "' XML-EVENT '"'. + SET XML-CODE TO -1. +]) + +AT_CHECK([$COMPILE -w prog.cob], [0], [], []) +AT_CHECK([$COBCRUN_DIRECT ./prog], [0], [proc : +000000000 - "START-OF-DOCUMENT " +XML processing aborted, XML-CODE = -000000001 +], []) +AT_CLEANUP + + +AT_SETUP([XML register data after failure]) +AT_KEYWORDS([extensions XML-CODE XML-EVENT PARSE]) + +AT_SKIP_IF([[test "$COB_HAS_XML2" = "no"]]) + +AT_DATA([prog.cob],[ + PROGRAM-ID. prog. + DATA DIVISION. + WORKING-STORAGE SECTION. + 77 invalid-xml PIC X(4) VALUE ''. + PROCEDURE DIVISION. + MAIN. + XML PARSE invalid-xml + PROCESSING PROCEDURE PROC + ON EXCEPTION + DISPLAY 'XML PARSE aborted' + . + DISPLAY 'after : ' XML-CODE ' - "' XML-EVENT '"'. + GOBACK. + PROC. + DISPLAY 'proc : ' XML-CODE ' - "' XML-EVENT '"'. +]) + +AT_CHECK([$COMPILE -w prog.cob], [0], [], []) +AT_CHECK([$COBCRUN_DIRECT ./prog], [0], [proc : +000000000 - "START-OF-DOCUMENT " +proc : +000000000 - "VERSION-INFORMATION " +proc : +000000000 - "ENCODING-DECLARATION " +proc : +000000000 - "STANDALONE-DECLARATION " +proc : +001048644 - "EXCEPTION " +XML PARSE aborted +after : +001048644 - "EXCEPTION " +], [libcob: prog.cob:8: warning: XML PARSE non-recoverable error (68): StartTag: invalid element name +]) +AT_CLEANUP + + AT_SETUP([XML PARSE]) AT_KEYWORDS([extensions PARSE]) -AT_DATA([prog.cob], [ - PROCESS XMLPARSE XMLSS +AT_SKIP_IF([test "$COB_HAS_XML2" = "no"]) + +AT_DATA([prog.cob],[ + Identification division. + PROGRAM-ID. IBMXML. + Data division. + Working-storage section. + ****************************************************************** + * XML document data, encoded as initial values of data items. * + ****************************************************************** + 01 xml-document-data. + 02 pic x(10) value ''. + 02 pic x(33) value ''. + 02 pic x(29) value 'Ham + turkey'. + 02 pic x(34) value 'Cheese, lettuce, tomato, '. + 02 pic x(32) value 'and that''s all, Folks!'. + 02 pic x(28) value '$4.99'. + 02 pic x(25) value '0.10'. + 02 pic x(31) value ''. + 01 xml-document redefines xml-document-data pic x(222). + Procedure division. + Mainline section. + XML parse xml-document + processing procedure XML-handler + ON EXCEPTION + Display 'XML processing error, XML-Code=' XML-Code '.' + Move 16 to return-code + Goback + End-XML + Display 'XML document successfully parsed.' + Move 0 to return-code + Goback. + XML-handler section. + display 'event: ' xml-event ', text: {' xml-text '}'. +]) + +AT_CHECK([$COMPILE prog.cob], [0], [], []) +AT_CHECK([$COBCRUN_DIRECT ./prog], [0], [event: START-OF-DOCUMENT , text: {} +event: VERSION-INFORMATION , text: {1.0} +event: ENCODING-DECLARATION , text: {} +event: STANDALONE-DECLARATION , text: {yes} +event: START-OF-ELEMENT , text: {sandwich} +event: START-OF-ELEMENT , text: {bread} +event: ATTRIBUTE-NAME , text: {type} +event: ATTRIBUTE-CHARACTERS , text: {baker's best} +event: END-OF-ELEMENT , text: {bread} +event: CONTENT-CHARACTERS , text: { } +event: START-OF-ELEMENT , text: {meat} +event: CONTENT-CHARACTERS , text: {Ham + turkey} +event: END-OF-ELEMENT , text: {meat} +event: CONTENT-CHARACTERS , text: { } +event: START-OF-ELEMENT , text: {filling} +event: CONTENT-CHARACTERS , text: {Cheese, lettuce, tomato, and that's all, Folks!} +event: END-OF-ELEMENT , text: {filling} +event: START-OF-ELEMENT , text: {listprice} +event: CONTENT-CHARACTERS , text: {$4.99} +event: END-OF-ELEMENT , text: {listprice} +event: START-OF-ELEMENT , text: {discount} +event: CONTENT-CHARACTERS , text: {0.10} +event: END-OF-ELEMENT , text: {discount} +event: END-OF-ELEMENT , text: {sandwich} +event: END-OF-INPUT , text: {} +event: END-OF-DOCUMENT , text: {} +XML document successfully parsed. +], []) +AT_CLEANUP + + +AT_SETUP([XML PARSE complex XML]) +AT_KEYWORDS([extensions PARSE]) + +AT_SKIP_IF([test "$COB_HAS_XML2" = "no"]) + +AT_DATA([prog.cob],[ Identification division. PROGRAM-ID. IBMXML. Data division. @@ -693,8 +835,8 @@ AT_DATA([prog.cob], [ 2 pic x(29) value 'Ham + turkey'. 2 pic x(34) value 'Cheese, lettuce, tomato, '. 2 pic x(32) value 'and that''s all, Folks!'. - 2 pic x(25) value ' element!]]>'. + 2 pic x(25) value ' element!@:>@@:>@>'. 2 pic x(28) value '$4.99'. 2 pic x(25) value '0.10'. 2 pic x(31) value ''. @@ -709,10 +851,9 @@ AT_DATA([prog.cob], [ 05 current-element pic x(40) occurs 10 times. Procedure division. Mainline section. - display 'len=:' length of xml-document-data. + display 'len: ' length of xml-document-data. Move 1 to xml-segment-no Display 'Initial segment {' xml-segment(xml-segment-no) '}' - Display ' ' XML parse xml-segment (xml-segment-no) processing procedure XML-handler ON EXCEPTION @@ -720,56 +861,33 @@ AT_DATA([prog.cob], [ Move 16 to return-code Goback End-XML - Display ' ' - Display 'XML document successfully parsed.' - Display ' ' - Display '-----+++++***** Using information from XML ' - '*****+++++-----' - Display ' ' - Move list-price to Display-price - Display ' Sandwich list price: ' Display-price - Compute Display-price = list-price * (1 - discount) - Display ' Promotional price: ' Display-price - Display ' Get one today!' - Move 0 to return-code - Goback ****************************************************************** * Process the transformed content and calculate promo price. * ****************************************************************** - Display ' ' + Display 'XML document successfully parsed.' Display '-----+++++***** Using information from XML ' '*****+++++-----' - Display ' ' Move list-price to Display-price Display ' Sandwich list price: ' Display-price Compute Display-price = list-price * (1 - discount) Display ' Promotional price: ' Display-price Display ' Get one today!' + Move 0 to return-code Goback. XML-handler section. - * if xml-segment-no = 10 - * move 'END-OF-DOCUMENT' to xml-event - * end-if - display 'parsed event=:' xml-event + display 'event: ' xml-event ', text: {' xml-text '}'. Evaluate XML-Event * ==> Order XML events most frequent first When 'START-OF-ELEMENT' - *** Display 'Start element tag: {' XML-Text '}' Add 1 to element-depth - *** if address of xml-text not = null - Move XML-Text to current-element(element-depth) - *** end-if + Move XML-Text to current-element(element-depth) When 'CONTENT-CHARACTERS' - *** Display 'Content characters: {' XML-Text '}' * ==> In general, a split can occur for any element or attribute * ==> data, but in this sample, it only occurs for "filling"... - DISPLAY 'ALL INFO =:' XML-INFORMATION - If xml-information = 2 - DISPLAY 'INFO = 2' - END-IF + * In practice, with GnuCOBOL, there is no split in XML events + DISPLAY 'XML-INFORMATION: ' XML-INFORMATION If xml-information = 2 and current-element(element-depth) not = 'filling' - DISPLAY 'INFO = 2 AND FILLING' Display 'Unexpected split in content for element ' current-element(element-depth) Move -1 to xml-code @@ -810,167 +928,99 @@ AT_DATA([prog.cob], [ Move ofr-ed-1 to discount End-evaluate When 'END-OF-ELEMENT' - *** Display 'End element tag: {' XML-Text '}' Subtract 1 from element-depth When 'START-OF-DOCUMENT' - Display 'Start of document' Move 0 to element-depth Move 1 to countd - When 'END-OF-DOCUMENT' - Display 'End of document.' - When 'VERSION-INFORMATION' - continue - *** Display 'Version: {' XML-Text '}' - When 'ENCODING-DECLARATION' - continue - *** Display 'Encoding: {' XML-Text '}' - When 'STANDALONE-DECLARATION' - *** Display 'Standalone: {' XML-Text '}' - When 'ATTRIBUTE-NAME' - continue - *** Display 'Attribute name: {' XML-Text '}' - When 'ATTRIBUTE-CHARACTERS' - continue - *** Display 'Attribute value characters: {' XML-Text '}' - When 'ATTRIBUTE-CHARACTER' - continue - *** Display 'Attribute value character: {' XML-Text '}' - When 'START-OF-CDATA-SECTION' - Display 'Start of CData section' - When 'END-OF-CDATA-SECTION' - Display 'End of CData section' - When 'CONTENT-CHARACTER' - *** Display 'Content character: {' XML-Text '}' - When 'PROCESSING-INSTRUCTION-TARGET' - *** Display 'PI target: {' XML-Text '}' - When 'PROCESSING-INSTRUCTION-DATA' - *** Display 'PI data: {' XML-Text '}' - continue - When 'COMMENT' - * if address of xml-text not = null - continue - *** Display 'Comment: {' XML-Text '}' - * end-if When 'EXCEPTION' Compute countd = function length (XML-Text) Display 'Exception ' XML-Code ' at offset ' countd '.' When 'END-OF-INPUT' - Display 'End of input' if xml-segment-no < 10 Add 1 to xml-segment-no - display 'segment-num=:' xml-segment-no + Display 'segment-num: ' xml-segment-no Display - ' Next segment: {' xml-segment(xml-segment-no) - '}' - Display ' ' + 'Next segment: {' xml-segment(xml-segment-no) '}' Move 1 to xml-code end-if When other - Display 'Unexpected XML event: ' XML-Event '.' + continue End-evaluate. ]) -AT_CHECK([$COMPILE -w prog.cob], [0], [], []) -AT_CHECK([$COBCRUN_DIRECT ./prog], [0], -[len=:400 +AT_CHECK([$COMPILE prog.cob], [0], [], []) +AT_CHECK([$COBCRUN_DIRECT ./prog], [0], [len: 400 Initial segment { Ham + turkey Cheese, lettuce, tomato, a} - @&t@ -parsed event=:END-OF-ELEMENT @&t@ -parsed event=:CONTENT-CHARACTERS @&t@ -ALL INFO =:+000000001 -parsed event=:START-OF-ELEMENT @&t@ -parsed event=:END-OF-INPUT @&t@ -End of input -segment-num=:+0007 - Next segment: {nd that's all, Folks! Ham + turkey Cheese, lettuce, tomato, a} +event: END-OF-ELEMENT , text: {meat} +event: CONTENT-CHARACTERS , text: { } +XML-INFORMATION: +000000001 +event: START-OF-ELEMENT , text: {filling} +event: END-OF-INPUT , text: {} +segment-num: +0007 +Next segment: {nd that's all, Folks! element!]]>$4.990.10 } - @&t@ -parsed event=:END-OF-ELEMENT @&t@ -parsed event=:END-OF-ELEMENT @&t@ -parsed event=:END-OF-INPUT @&t@ -End of input -parsed event=:END-OF-DOCUMENT @&t@ -End of document. - @&t@ +event: END-OF-ELEMENT , text: {filling} +event: END-OF-INPUT , text: {} +segment-num: +0008 +Next segment: {We should add a element!@:>@@:>@> element!} +XML-INFORMATION: +000000001 +event: END-OF-CDATA-SECTION , text: {} +event: END-OF-INPUT , text: {} +segment-num: +0009 +Next segment: {tprice>$4.990.10 } +event: END-OF-ELEMENT , text: {discount} +event: END-OF-ELEMENT , text: {sandwich} +event: END-OF-INPUT , text: {} +event: END-OF-DOCUMENT , text: {} XML document successfully parsed. - @&t@ -----+++++***** Using information from XML *****+++++----- - @&t@ Sandwich list price: $4.99 Promotional price: $4.49 Get one today! @@ -978,6 +1028,235 @@ XML document successfully parsed. AT_CLEANUP +AT_SETUP([XML PARSE long text]) +AT_KEYWORDS([extensions PARSE]) + +AT_SKIP_IF([test "$COB_HAS_XML2" = "no"]) + +AT_DATA([prog.cob],[ + Identification division. + PROGRAM-ID. IBMXML. + Data division. + Working-storage section. + ****************************************************************** + * XML document data, encoded as initial values of data items. * + ****************************************************************** + 01 xml-document-data. + 02 pic x(10) value ''. + 02 pic x(40) value 'Lorem ipsum dolor sit amet,'. + 02 pic x(40) value 'consectetur adipiscing elit.'. + 02 pic x(40) value 'Sed vitae scelerisque mauris.'. + 02 pic x(40) value 'Ut suscipit faucibus ultrices.'. + 02 pic x(40) value 'Proin lacinia luctus imperdiet.'. + 02 pic x(40) value 'Fusce sit amet augue nisi.'. + 02 pic x(40) value 'Fusce a justo nec nisl luctus laoreet.'. + 02 pic x(40) value 'Nulla rutrum fermentum dui quis tincidun'. + 02 pic x(40) value 'Donec neque dui, maximus cursus felis eu'. + 02 pic x(40) value 'commodo vulputate ipsum.'. + 02 pic x(40) value 'Morbi laoreet risus congue,'. + 02 pic x(40) value 'dictum dolor tincidunt,'. + 02 pic x(40) value 'fermentum diam.'. + 02 pic x(40) value 'Mauris placerat volutpat nisl,'. + 02 pic x(40) value 'nec malesuada enim ut.'. + 02 pic x(10) value ''. + 01 xml-document redefines xml-document-data pic x(620). + Procedure division. + Mainline section. + XML parse xml-document + processing procedure XML-handler + ON EXCEPTION + Display 'XML processing error, XML-Code=' XML-Code '.' + Move 16 to return-code + Goback + End-XML + Display 'XML document successfully parsed.' + Move 0 to return-code + Goback. + XML-handler section. + display 'event: ' xml-event ', text: {' xml-text '}'. +]) + +AT_CHECK([$COMPILE prog.cob], [0], [], []) +AT_CHECK([$COBCRUN_DIRECT ./prog], [0], [event: START-OF-DOCUMENT , text: {} +event: VERSION-INFORMATION , text: {1.0} +event: ENCODING-DECLARATION , text: {} +event: STANDALONE-DECLARATION , text: {yes} +event: START-OF-ELEMENT , text: {lorem} +event: CONTENT-CHARACTERS , text: { Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed vitae scelerisque mauris. Ut suscipit faucibus ultrices. Proin lacinia luctus imperdiet. Fusce sit amet augue nisi. Fusce a justo nec nisl luctus laoreet. Nulla rutrum fermentum dui quis tincidunDonec neque dui, maximus cursus felis eucommodo vulputate ipsum. Morbi laoreet risus congue, dictum dolor tincidunt, fermentum diam. Mauris placerat volutpat nisl, nec malesuada enim ut. } +event: END-OF-ELEMENT , text: {lorem} +event: END-OF-INPUT , text: {} +event: END-OF-DOCUMENT , text: {} +XML document successfully parsed. +], []) +AT_CLEANUP + + +AT_SETUP([XML PARSE namespaces]) +AT_KEYWORDS([extensions PARSE]) + +AT_SKIP_IF([[test "$COB_HAS_XML2" = "no"]]) + +AT_DATA([prog.cob],[ + Identification division. + PROGRAM-ID. IBMXML. + Data division. + Working-storage section. + ****************************************************************** + * XML document data, encoded as initial values of data items. * + ****************************************************************** + 01 xml-document-data. + 02 pic x(35) value '
'. + 02 pic x(33) value 'Book-Signing Event'. + 02 pic x(9) value ''. + 02 pic x(24) value ''. + 02 pic x(34) value ''. + 02 pic x(18) value ''. + 02 pic x(19) value 'What a great issue!'. + 02 pic x(10) value ''. + 02 pic x(10) value ''. + 02 pic x(10) value '
'. + 01 xml-document redefines xml-document-data pic x(351). + Procedure division. + Mainline section. + XML parse xml-document + processing procedure XML-handler + ON EXCEPTION + Display 'XML processing error, XML-Code=' XML-Code '.' + Move 16 to return-code + Goback + End-XML + Display 'XML document successfully parsed.' + Move 0 to return-code + Goback. + XML-handler section. + display 'event: ' xml-event ', text: {' xml-text '}' + ', prefix: {' xml-namespace-prefix '}, namespace: {' + xml-namespace '}'. +]) + +AT_CHECK([$COMPILE prog.cob], [0], [], []) +AT_CHECK([$COBCRUN_DIRECT ./prog], [0], [event: START-OF-DOCUMENT , text: {}, prefix: {}, namespace: {} +event: VERSION-INFORMATION , text: {1.0}, prefix: {}, namespace: {} +event: ENCODING-DECLARATION , text: {}, prefix: {}, namespace: {} +event: STANDALONE-DECLARATION , text: {yes}, prefix: {}, namespace: {} +event: START-OF-ELEMENT , text: {section}, prefix: {}, namespace: {http://example.com} +event: NAMESPACE-DECLARATION , text: {}, prefix: {}, namespace: {http://example.com} +event: NAMESPACE-DECLARATION , text: {}, prefix: {bk}, namespace: {urn:loc.gov:books} +event: NAMESPACE-DECLARATION , text: {}, prefix: {pi}, namespace: {urn:personalInformation} +event: NAMESPACE-DECLARATION , text: {}, prefix: {isbn}, namespace: {urn:ISBN:0-395-36341-6} +event: START-OF-ELEMENT , text: {title}, prefix: {}, namespace: {http://example.com} +event: CONTENT-CHARACTERS , text: {Book-Signing Event}, prefix: {}, namespace: {} +event: END-OF-ELEMENT , text: {title}, prefix: {}, namespace: {http://example.com} +event: START-OF-ELEMENT , text: {signing}, prefix: {}, namespace: {http://example.com} +event: START-OF-ELEMENT , text: {author}, prefix: {bk}, namespace: {urn:loc.gov:books} +event: ATTRIBUTE-NAME , text: {title}, prefix: {pi}, namespace: {urn:personalInformation} +event: ATTRIBUTE-CHARACTERS , text: {Mr}, prefix: {}, namespace: {} +event: ATTRIBUTE-NAME , text: {name}, prefix: {pi}, namespace: {urn:personalInformation} +event: ATTRIBUTE-CHARACTERS , text: {Jim Ross}, prefix: {}, namespace: {} +event: END-OF-ELEMENT , text: {author}, prefix: {bk}, namespace: {urn:loc.gov:books} +event: START-OF-ELEMENT , text: {book}, prefix: {}, namespace: {http://example.com} +event: ATTRIBUTE-NAME , text: {title}, prefix: {bk}, namespace: {urn:loc.gov:books} +event: ATTRIBUTE-CHARACTERS , text: {COBOL for dummies}, prefix: {}, namespace: {} +event: ATTRIBUTE-NAME , text: {number}, prefix: {isbn}, namespace: {urn:ISBN:0-395-36341-6} +event: ATTRIBUTE-CHARACTERS , text: {0426070806}, prefix: {}, namespace: {} +event: END-OF-ELEMENT , text: {book}, prefix: {}, namespace: {http://example.com} +event: START-OF-ELEMENT , text: {comment}, prefix: {}, namespace: {} +event: NAMESPACE-DECLARATION , text: {}, prefix: {}, namespace: {} +event: CONTENT-CHARACTERS , text: {What a great issue!}, prefix: {}, namespace: {} +event: END-OF-ELEMENT , text: {comment}, prefix: {}, namespace: {} +event: END-OF-ELEMENT , text: {signing}, prefix: {}, namespace: {http://example.com} +event: END-OF-ELEMENT , text: {section}, prefix: {}, namespace: {http://example.com} +event: END-OF-INPUT , text: {}, prefix: {}, namespace: {} +event: END-OF-DOCUMENT , text: {}, prefix: {}, namespace: {} +XML document successfully parsed. +], []) +AT_CLEANUP + + +AT_SETUP([XML PARSE undeclared namespaces]) +AT_KEYWORDS([extensions PARSE]) + +AT_SKIP_IF([[test "$COB_HAS_XML2" = "no"]]) + +AT_DATA([prog.cob],[ + Identification division. + Program-id. XMLup. + Data division. + Working-storage section. + 1 d. + 2 pic x(40) value ''. + 2 pic x(19) value ''. + 2 pic x(20) value ''. + 2 pic x(40) value ''. + 2 pic x(02) value 'c1'. + 2 pic x(41) value ''. + 2 pic x(24) value 'c2c3'. + 2 pic x(32) value ''. + Procedure division. + main. + xml parse d processing procedure h + goback. + h. + display xml-event xml-code '|' xml-text '|' + xml-namespace-prefix '|' + xml-namespace '|' + * In the original IBM example they check specifically the two exceptions + * codes for undeclared namespaces: 264192 and 264193 + * We do not yet support these IBM code + * -> ignore all recoverable errors for now + * if xml-event = 'EXCEPTION' and xml-code = 264192 or 264193 + move 0 to xml-code + * end-if + . + End program XMLup. +]) + +AT_CHECK([$COMPILE prog.cob], [0], [], []) +AT_CHECK([$COBCRUN_DIRECT ./prog], [0], [START-OF-DOCUMENT +000000000|||| +VERSION-INFORMATION +000000000|1.0||| +ENCODING-DECLARATION +000000000|||| +STANDALONE-DECLARATION +000000000|yes||| +EXCEPTION +000262345|||| +START-OF-ELEMENT +000000000|root|pfx0|| +NAMESPACE-DECLARATION +000000000||pfx1|http://whatever| +START-OF-ELEMENT +000000000|localElName1|pfx1|http://whatever| +EXCEPTION +000262345|||| +START-OF-ELEMENT +000000000|localElName2|pfx2|| +END-OF-ELEMENT +000000000|localElName2|pfx2|| +EXCEPTION +000262345|||| +EXCEPTION +000262345|||| +START-OF-ELEMENT +000000000|localElName3|pfx3|| +ATTRIBUTE-NAME +000000000|localAtName4|pfx4|| +ATTRIBUTE-CHARACTERS +000000000|||| +CONTENT-CHARACTERS +000000000|c1||| +EXCEPTION +000262345|||| +EXCEPTION +000262345|||| +START-OF-ELEMENT +000000000|localElName5|pfx5|| +ATTRIBUTE-NAME +000000000|localAtName6|pfx6|| +ATTRIBUTE-CHARACTERS +000000000|||| +END-OF-ELEMENT +000000000|localElName5|pfx5|| +CONTENT-CHARACTERS +000000000|c2||| +END-OF-ELEMENT +000000000|localElName3|pfx3|| +CONTENT-CHARACTERS +000000000|c3||| +END-OF-ELEMENT +000000000|localElName1|pfx1|http://whatever| +END-OF-ELEMENT +000000000|root|pfx0|| +END-OF-INPUT +000000000|||| +END-OF-DOCUMENT +000000000|||| +], [libcob: prog.cob:17: warning: XML PARSE recoverable error (201): Namespace prefix pfx0 on root is not defined +libcob: prog.cob:17: warning: XML PARSE recoverable error (201): Namespace prefix pfx2 on localElName2 is not defined +libcob: prog.cob:17: warning: XML PARSE recoverable error (201): Namespace prefix pfx4 for localAtName4 on localElName3 is not defined +libcob: prog.cob:17: warning: XML PARSE recoverable error (201): Namespace prefix pfx3 on localElName3 is not defined +libcob: prog.cob:17: warning: XML PARSE recoverable error (201): Namespace prefix pfx6 for localAtName6 on localElName5 is not defined +libcob: prog.cob:17: warning: XML PARSE recoverable error (201): Namespace prefix pfx5 on localElName5 is not defined +]) +AT_CLEANUP + + # ## JSON # From c39341d4263c3c53f38bb953e3cf8e965978a4a5 Mon Sep 17 00:00:00 2001 From: Guillaume Bertholon Date: Mon, 20 Apr 2026 17:52:06 +0200 Subject: [PATCH 03/12] Fix reference modifiers on XML-TEXT & co. builtin registers --- cobc/codegen.c | 2 ++ tests/testsuite.src/run_ml.at | 36 +++++++++++++++++++++++++++++------ 2 files changed, 32 insertions(+), 6 deletions(-) diff --git a/cobc/codegen.c b/cobc/codegen.c index b5adb5797..9c350cfad 100644 --- a/cobc/codegen.c +++ b/cobc/codegen.c @@ -1091,6 +1091,8 @@ output_base (struct cb_field *f, const cob_u32_t no_output) } else { output ("cob_local_ptr"); } + } else if (f01->storage == CB_STORAGE_LINKAGE && f01->flag_internal_register) { + output ("%s%d.data", CB_PREFIX_FIELD, f01->id); } else { output ("%s%d", CB_PREFIX_BASE, f01->id); } diff --git a/tests/testsuite.src/run_ml.at b/tests/testsuite.src/run_ml.at index 35391af96..8d3b0c017 100644 --- a/tests/testsuite.src/run_ml.at +++ b/tests/testsuite.src/run_ml.at @@ -743,7 +743,7 @@ AT_DATA([prog.cob],[ ****************************************************************** * XML document data, encoded as initial values of data items. * ****************************************************************** - 01 xml-document-data. + 01 xml-document. 02 pic x(10) value ''. 02 pic x(33) value ''. 02 pic x(29) value 'Ham + turkey'. @@ -752,7 +752,6 @@ AT_DATA([prog.cob],[ 02 pic x(28) value '$4.99'. 02 pic x(25) value '0.10'. 02 pic x(31) value ''. - 01 xml-document redefines xml-document-data pic x(222). Procedure division. Mainline section. XML parse xml-document @@ -1041,7 +1040,7 @@ AT_DATA([prog.cob],[ ****************************************************************** * XML document data, encoded as initial values of data items. * ****************************************************************** - 01 xml-document-data. + 01 xml-document. 02 pic x(10) value ''. 02 pic x(40) value 'Lorem ipsum dolor sit amet,'. 02 pic x(40) value 'consectetur adipiscing elit.'. @@ -1059,7 +1058,6 @@ AT_DATA([prog.cob],[ 02 pic x(40) value 'Mauris placerat volutpat nisl,'. 02 pic x(40) value 'nec malesuada enim ut.'. 02 pic x(10) value ''. - 01 xml-document redefines xml-document-data pic x(620). Procedure division. Mainline section. XML parse xml-document @@ -1104,7 +1102,7 @@ AT_DATA([prog.cob],[ ****************************************************************** * XML document data, encoded as initial values of data items. * ****************************************************************** - 01 xml-document-data. + 01 xml-document. 02 pic x(35) value '
'. 02 pic x(10) value ''. 02 pic x(10) value '
'. - 01 xml-document redefines xml-document-data pic x(351). Procedure division. Mainline section. XML parse xml-document @@ -1257,6 +1254,33 @@ libcob: prog.cob:17: warning: XML PARSE recoverable error (201): Namespace prefi AT_CLEANUP +AT_SETUP([XML-TEXT reference modifier]) +# promoted on 2026-04-20T15:51 +AT_KEYWORDS([extensions PARSE XML-TEXT]) +AT_SKIP_IF([[test "$COB_HAS_XML2" = "no"]]) +AT_DATA([prog.cob],[ + Identification division. + PROGRAM-ID. IBMXML. + Data division. + Working-storage section. + 01 xml-document. + 02 pic x(14) value ''. + Procedure division. + Mainline section. + XML parse xml-document + processing procedure XML-handler. + Goback. + XML-handler section. + if xml-event = 'START-OF-ELEMENT' + display xml-text (7:5) + end-if. +]) +AT_CHECK([$COMPILE prog.cob], [0], [], []) +AT_CHECK([$COBCRUN_DIRECT ./prog], [0], [ipsum +], []) +AT_CLEANUP + + # ## JSON # From abd437962aab06a18530638c3519723d3a03608c Mon Sep 17 00:00:00 2001 From: Guillaume Bertholon Date: Fri, 24 Apr 2026 15:54:18 +0200 Subject: [PATCH 04/12] Remove spurious events when there is no tag and stop exposing libxml2 error codes in COBOL --- libcob/ChangeLog | 4 +- libcob/mlio.c | 70 +++++++++++------- tests/testsuite.src/run_ml.at | 132 ++++++++++++++++++++++++---------- 3 files changed, 141 insertions(+), 65 deletions(-) diff --git a/libcob/ChangeLog b/libcob/ChangeLog index 9f152e534..2b300d21f 100644 --- a/libcob/ChangeLog +++ b/libcob/ChangeLog @@ -4,11 +4,13 @@ the IBM option name * mlio.c [WITH_XML2]: Fix issues in XML PARSE handling most notably a use after free error if the internal buffer needs to grow during the parsing. - Respect the high order half-word for exception XML-CODE. + Respect the high order half-word for exception XML-CODE, but do not + expose internal libxml2 error codes. Reduce the number of parsing states by removing useless ones, and encode eof in these states. Handle XML chunks with more than one recoverable error. Trigger ON EXCEPTION code after EXCEPTION XML events. + Remove spurious events when there is no declaration in the file. 2025-12-04 Simon Sobisch diff --git a/libcob/mlio.c b/libcob/mlio.c index afca682bb..e2cc497cc 100644 --- a/libcob/mlio.c +++ b/libcob/mlio.c @@ -1615,10 +1615,11 @@ xml_error_handler (void *ctx, LIBXML_CONST_ERROR_PTR err) { severity = XRC_FATAL; severity_str = _("non-recoverable error"); } else if (err->level == XML_ERR_ERROR) { + /* IBM reports recoverable errors with the XRC_WARNING severity */ severity = XRC_WARNING; severity_str = _("recoverable error"); } else { - severity = XRC_WARNING; + severity = 0; severity_str = _("warning"); } @@ -1648,21 +1649,26 @@ xml_error_handler (void *ctx, LIBXML_CONST_ERROR_PTR err) { break; } - new_xml_event (state, EVENT_EXCEPTION); - - /* TODO: According to IBM we should put the the prefix of the last chunk - that occurs before the error in XML-TEXT. */ - - /* Set the XML exception code: - For now, we do not try to follow the error codes of IBM. - But we still try to categorize the error between non-recoverable and recoverable. */ - if (COB_MODULE_PTR->xml_mode == COB_XML_XMLSS) { - set_xml_event_exception_code (state, (severity << 16) | err->code); - } else { - if (err->level == XML_ERR_FATAL) { - set_xml_event_exception_code (state, XML_PARSE_ERROR_MISC_COMPAT); + if (severity) { + /* Give an EXCEPTION event to the processing procedure on recoverable + and non-recoverable errors, but skip libxml2 warnings. */ + new_xml_event (state, EVENT_EXCEPTION); + + /* TODO: According to IBM we should put the the prefix of the last chunk + that occurs before the error in XML-TEXT. + In practice, it seems to be different for namespace recoverable errors */ + + /* Set the XML exception code: + For now, we do not try to follow the error codes of IBM. + But we still try to categorize the error between non-recoverable and recoverable. */ + if (COB_MODULE_PTR->xml_mode == COB_XML_XMLSS) { + set_xml_event_exception_code (state, severity << 16); } else { - set_xml_event_exception_code (state, XML_PARSE_WARNING_MISC_COMPAT); + if (err->level == XML_ERR_FATAL) { + set_xml_event_exception_code (state, XML_PARSE_ERROR_MISC_COMPAT); + } else { + set_xml_event_exception_code (state, XML_PARSE_WARNING_MISC_COMPAT); + } } } @@ -1692,19 +1698,29 @@ xml_startDocument (void *ctx) { const xmlChar *version = LIBXML_CTXT_GET_VERSION(ctxt); new_xml_event (state, EVENT_START_OF_DOCUMENT); - new_xml_event (state, EVENT_VERSION_INFORMATION); - set_xml_event_text (state, version, xmlStrlen (version)); - new_xml_event (state, EVENT_ENCODING_DECLARATION); - set_xml_event_text (state, encoding, xmlStrlen (encoding)); - - if (standalone) { - new_xml_event (state, EVENT_STANDALONE_DECLARATION); - set_xml_event_text (state, "yes", 3); - } else { - new_xml_event (state, EVENT_STANDALONE_DECLARATION); - set_xml_event_text (state, "no", 2); - } + + /* standalone is -1 when tag is absent */ + if (standalone != -1) { + /* version attribute is mandatory in tag */ + new_xml_event (state, EVENT_VERSION_INFORMATION); + set_xml_event_text (state, version, xmlStrlen (version)); + + if (encoding) { + new_xml_event (state, EVENT_ENCODING_DECLARATION); + set_xml_event_text (state, encoding, xmlStrlen (encoding)); + } + /* standalone is -2 when tag is present without an encoding attribute */ + if (standalone != -2) { + if (standalone) { + new_xml_event (state, EVENT_STANDALONE_DECLARATION); + set_xml_event_text (state, "yes", 3); + } else { + new_xml_event (state, EVENT_STANDALONE_DECLARATION); + set_xml_event_text (state, "no", 2); + } + } + } } static void diff --git a/tests/testsuite.src/run_ml.at b/tests/testsuite.src/run_ml.at index 8d3b0c017..b9315af0c 100644 --- a/tests/testsuite.src/run_ml.at +++ b/tests/testsuite.src/run_ml.at @@ -625,6 +625,40 @@ end : +000000005 - "Test " AT_CLEANUP +AT_SETUP([XML PARSE unsupported]) +AT_KEYWORDS([extensions XML-CODE XML-EVENT PARSE]) + +AT_SKIP_IF([[test "$COB_HAS_XML2" = "yes"]]) + +AT_DATA([prog.cob],[ + PROGRAM-ID. prog. + DATA DIVISION. + WORKING-STORAGE SECTION. + 77 empty-xml PIC X(8) VALUE ''. + PROCEDURE DIVISION. + MAIN. + XML PARSE empty-xml + PROCESSING PROCEDURE PROC. + DISPLAY 'after : ' XML-CODE ' - "' XML-EVENT '"'. + MOVE 5 TO XML-CODE + MOVE 'Test' TO XML-EVENT + DISPLAY 'end : ' XML-CODE ' - "' XML-EVENT '"'. + GOBACK. + PROC. + DISPLAY 'proc : ' XML-CODE ' - "' XML-EVENT '"'. +]) + +AT_CHECK([$COMPILE -Wunsupported prog.cob], [0], [], [prog.cob: in paragraph 'MAIN': +prog.cob:9: warning: runtime is not configured to support XML +]) +AT_CHECK([$COBCRUN_DIRECT ./prog], [0], [proc : +000000600 - "EXCEPTION " +after : +000000600 - "EXCEPTION " +end : +000000005 - "Test " +], [libcob: prog.cob:8: warning: runtime is not configured to support XML +]) +AT_CLEANUP + + AT_SETUP([XML register data after use]) AT_KEYWORDS([extensions XML-CODE XML-EVENT PARSE]) @@ -648,11 +682,8 @@ AT_DATA([prog.cob],[ DISPLAY 'proc : ' XML-CODE ' - "' XML-EVENT '"'. ]) -AT_CHECK([$COMPILE -w prog.cob], [0], [], []) +AT_CHECK([$COMPILE prog.cob], [0], [], []) AT_CHECK([$COBCRUN_DIRECT ./prog], [0], [proc : +000000000 - "START-OF-DOCUMENT " -proc : +000000000 - "VERSION-INFORMATION " -proc : +000000000 - "ENCODING-DECLARATION " -proc : +000000000 - "STANDALONE-DECLARATION " proc : +000000000 - "START-OF-ELEMENT " proc : +000000000 - "END-OF-ELEMENT " proc : +000000000 - "END-OF-INPUT " @@ -684,10 +715,10 @@ AT_DATA([prog.cob],[ GOBACK. PROC. DISPLAY 'proc : ' XML-CODE ' - "' XML-EVENT '"'. - SET XML-CODE TO -1. + MOVE -1 TO XML-CODE. ]) -AT_CHECK([$COMPILE -w prog.cob], [0], [], []) +AT_CHECK([$COMPILE prog.cob], [0], [], []) AT_CHECK([$COBCRUN_DIRECT ./prog], [0], [proc : +000000000 - "START-OF-DOCUMENT " XML processing aborted, XML-CODE = -000000001 ], []) @@ -699,7 +730,7 @@ AT_KEYWORDS([extensions XML-CODE XML-EVENT PARSE]) AT_SKIP_IF([[test "$COB_HAS_XML2" = "no"]]) -AT_DATA([prog.cob],[ +AT_DATA([prog.cob], [ PROGRAM-ID. prog. DATA DIVISION. WORKING-STORAGE SECTION. @@ -717,15 +748,51 @@ AT_DATA([prog.cob],[ DISPLAY 'proc : ' XML-CODE ' - "' XML-EVENT '"'. ]) -AT_CHECK([$COMPILE -w prog.cob], [0], [], []) -AT_CHECK([$COBCRUN_DIRECT ./prog], [0], [proc : +000000000 - "START-OF-DOCUMENT " -proc : +000000000 - "VERSION-INFORMATION " -proc : +000000000 - "ENCODING-DECLARATION " -proc : +000000000 - "STANDALONE-DECLARATION " -proc : +001048644 - "EXCEPTION " +AT_CHECK([$COMPILE prog.cob]) +AT_CHECK([$COBCRUN_DIRECT ./prog], [0], +[proc : +000000000 - "START-OF-DOCUMENT " +proc : +001048576 - "EXCEPTION " XML PARSE aborted -after : +001048644 - "EXCEPTION " -], [libcob: prog.cob:8: warning: XML PARSE non-recoverable error (68): StartTag: invalid element name +after : +001048576 - "EXCEPTION " +], +[libcob: prog.cob:8: warning: XML PARSE non-recoverable error (68): StartTag: invalid element name +]) + +AT_CLEANUP + + +AT_SETUP([XML PARSE declaration]) +AT_KEYWORDS([extensions XML-CODE XML-EVENT PARSE]) + +AT_SKIP_IF([[test "$COB_HAS_XML2" = "no"]]) + +AT_DATA([prog.cob], [ + PROGRAM-ID. prog. + DATA DIVISION. + WORKING-STORAGE SECTION. + 77 empty-xml PIC X(29) VALUE ''. + PROCEDURE DIVISION. + MAIN. + XML PARSE empty-xml + PROCESSING PROCEDURE PROC + ON EXCEPTION + DISPLAY 'XML processing aborted, XML-CODE = ' XML-CODE + GOBACK. + Display 'XML document successfully parsed.' + GOBACK. + PROC. + DISPLAY 'proc : ' XML-CODE ' - "' XML-EVENT '"'. +]) + +AT_CHECK([$COMPILE prog.cob]) +AT_CHECK([$COBCRUN_DIRECT ./prog], [0], +[proc : +000000000 - "START-OF-DOCUMENT " +proc : +000000000 - "VERSION-INFORMATION " +proc : +000000000 - "START-OF-ELEMENT " +proc : +000000000 - "END-OF-ELEMENT " +proc : +000000000 - "END-OF-INPUT " +proc : +000000000 - "END-OF-DOCUMENT " +XML document successfully parsed. ]) AT_CLEANUP @@ -770,9 +837,6 @@ AT_DATA([prog.cob],[ AT_CHECK([$COMPILE prog.cob], [0], [], []) AT_CHECK([$COBCRUN_DIRECT ./prog], [0], [event: START-OF-DOCUMENT , text: {} -event: VERSION-INFORMATION , text: {1.0} -event: ENCODING-DECLARATION , text: {} -event: STANDALONE-DECLARATION , text: {yes} event: START-OF-ELEMENT , text: {sandwich} event: START-OF-ELEMENT , text: {bread} event: ATTRIBUTE-NAME , text: {type} @@ -1076,9 +1140,6 @@ AT_DATA([prog.cob],[ AT_CHECK([$COMPILE prog.cob], [0], [], []) AT_CHECK([$COBCRUN_DIRECT ./prog], [0], [event: START-OF-DOCUMENT , text: {} -event: VERSION-INFORMATION , text: {1.0} -event: ENCODING-DECLARATION , text: {} -event: STANDALONE-DECLARATION , text: {yes} event: START-OF-ELEMENT , text: {lorem} event: CONTENT-CHARACTERS , text: { Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed vitae scelerisque mauris. Ut suscipit faucibus ultrices. Proin lacinia luctus imperdiet. Fusce sit amet augue nisi. Fusce a justo nec nisl luctus laoreet. Nulla rutrum fermentum dui quis tincidunDonec neque dui, maximus cursus felis eucommodo vulputate ipsum. Morbi laoreet risus congue, dictum dolor tincidunt, fermentum diam. Mauris placerat volutpat nisl, nec malesuada enim ut. } event: END-OF-ELEMENT , text: {lorem} @@ -1138,9 +1199,6 @@ AT_DATA([prog.cob],[ AT_CHECK([$COMPILE prog.cob], [0], [], []) AT_CHECK([$COBCRUN_DIRECT ./prog], [0], [event: START-OF-DOCUMENT , text: {}, prefix: {}, namespace: {} -event: VERSION-INFORMATION , text: {1.0}, prefix: {}, namespace: {} -event: ENCODING-DECLARATION , text: {}, prefix: {}, namespace: {} -event: STANDALONE-DECLARATION , text: {yes}, prefix: {}, namespace: {} event: START-OF-ELEMENT , text: {section}, prefix: {}, namespace: {http://example.com} event: NAMESPACE-DECLARATION , text: {}, prefix: {}, namespace: {http://example.com} event: NAMESPACE-DECLARATION , text: {}, prefix: {bk}, namespace: {urn:loc.gov:books} @@ -1202,7 +1260,7 @@ AT_DATA([prog.cob],[ display xml-event xml-code '|' xml-text '|' xml-namespace-prefix '|' xml-namespace '|' - * In the original IBM example they check specifically the two exceptions + * In the original IBM example they check specifically the two exceptions * codes for undeclared namespaces: 264192 and 264193 * We do not yet support these IBM code * -> ignore all recoverable errors for now @@ -1213,26 +1271,24 @@ AT_DATA([prog.cob],[ End program XMLup. ]) -AT_CHECK([$COMPILE prog.cob], [0], [], []) -AT_CHECK([$COBCRUN_DIRECT ./prog], [0], [START-OF-DOCUMENT +000000000|||| -VERSION-INFORMATION +000000000|1.0||| -ENCODING-DECLARATION +000000000|||| -STANDALONE-DECLARATION +000000000|yes||| -EXCEPTION +000262345|||| +AT_CHECK([$COMPILE prog.cob]) +AT_CHECK([$COBCRUN_DIRECT ./prog], [0], +[START-OF-DOCUMENT +000000000|||| +EXCEPTION +000262144|||| START-OF-ELEMENT +000000000|root|pfx0|| NAMESPACE-DECLARATION +000000000||pfx1|http://whatever| START-OF-ELEMENT +000000000|localElName1|pfx1|http://whatever| -EXCEPTION +000262345|||| +EXCEPTION +000262144|||| START-OF-ELEMENT +000000000|localElName2|pfx2|| END-OF-ELEMENT +000000000|localElName2|pfx2|| -EXCEPTION +000262345|||| -EXCEPTION +000262345|||| +EXCEPTION +000262144|||| +EXCEPTION +000262144|||| START-OF-ELEMENT +000000000|localElName3|pfx3|| ATTRIBUTE-NAME +000000000|localAtName4|pfx4|| ATTRIBUTE-CHARACTERS +000000000|||| CONTENT-CHARACTERS +000000000|c1||| -EXCEPTION +000262345|||| -EXCEPTION +000262345|||| +EXCEPTION +000262144|||| +EXCEPTION +000262144|||| START-OF-ELEMENT +000000000|localElName5|pfx5|| ATTRIBUTE-NAME +000000000|localAtName6|pfx6|| ATTRIBUTE-CHARACTERS +000000000|||| @@ -1244,13 +1300,15 @@ END-OF-ELEMENT +000000000|localElName1|pfx1|http://whatever| END-OF-ELEMENT +000000000|root|pfx0|| END-OF-INPUT +000000000|||| END-OF-DOCUMENT +000000000|||| -], [libcob: prog.cob:17: warning: XML PARSE recoverable error (201): Namespace prefix pfx0 on root is not defined +], +[libcob: prog.cob:17: warning: XML PARSE recoverable error (201): Namespace prefix pfx0 on root is not defined libcob: prog.cob:17: warning: XML PARSE recoverable error (201): Namespace prefix pfx2 on localElName2 is not defined libcob: prog.cob:17: warning: XML PARSE recoverable error (201): Namespace prefix pfx4 for localAtName4 on localElName3 is not defined libcob: prog.cob:17: warning: XML PARSE recoverable error (201): Namespace prefix pfx3 on localElName3 is not defined libcob: prog.cob:17: warning: XML PARSE recoverable error (201): Namespace prefix pfx6 for localAtName6 on localElName5 is not defined libcob: prog.cob:17: warning: XML PARSE recoverable error (201): Namespace prefix pfx5 on localElName5 is not defined ]) + AT_CLEANUP From cdae64009526266a902d1eb845d969118ec08c6c Mon Sep 17 00:00:00 2001 From: Guillaume Bertholon Date: Fri, 24 Apr 2026 17:29:15 +0200 Subject: [PATCH 05/12] ChangeLog & copyright adjustments --- cobc/ChangeLog | 1 + libcob/ChangeLog | 19 +++++++++++-------- tests/testsuite.src/run_ml.at | 2 +- 3 files changed, 13 insertions(+), 9 deletions(-) diff --git a/cobc/ChangeLog b/cobc/ChangeLog index 102f2621f..182c521e4 100644 --- a/cobc/ChangeLog +++ b/cobc/ChangeLog @@ -1,3 +1,4 @@ + 2026-04-14 Guillaume Bertholon * parser.y: remove the CB_PENDING warning on XML PARSE but still warn for diff --git a/libcob/ChangeLog b/libcob/ChangeLog index 2b300d21f..057381e1b 100644 --- a/libcob/ChangeLog +++ b/libcob/ChangeLog @@ -1,16 +1,19 @@ + 2026-04-14 Guillaume Bertholon * common.h: rename COB_XML_PARSE_XMLNSS into COB_XML_PARSE_XMLSS to match the IBM option name - * mlio.c [WITH_XML2]: Fix issues in XML PARSE handling most notably a use + * mlio.c: Fix issues in XML PARSE handling most notably a use after free error if the internal buffer needs to grow during the parsing. - Respect the high order half-word for exception XML-CODE, but do not - expose internal libxml2 error codes. - Reduce the number of parsing states by removing useless ones, - and encode eof in these states. - Handle XML chunks with more than one recoverable error. - Trigger ON EXCEPTION code after EXCEPTION XML events. - Remove spurious events when there is no declaration in the file. + This fix changes the definition of xml_event to store offsets in the + buffer instead of pointers + * mlio.c: Respect the high order half-word for exception XML-CODE, + but do not expose internal libxml2 error codes + * mlio.c: Reduce the number of parsing states by removing useless ones, + and encode eof in these states + * mlio.c: Handle XML chunks with more than one recoverable error + * mlio.c: Trigger ON EXCEPTION code after EXCEPTION XML events + * mlio.c: Remove spurious events when there is no declaration in the file 2025-12-04 Simon Sobisch diff --git a/tests/testsuite.src/run_ml.at b/tests/testsuite.src/run_ml.at index b9315af0c..ee488566c 100644 --- a/tests/testsuite.src/run_ml.at +++ b/tests/testsuite.src/run_ml.at @@ -1,4 +1,4 @@ -## Copyright (C) 2018-2020, 2022, 2024-2025 Free Software Foundation, Inc. +## Copyright (C) 2018-2020, 2022, 2024-2026 Free Software Foundation, Inc. ## Written by Edward Hart, Simon Sobisch ## ## This file is part of GnuCOBOL. From 8fed31a5f6be6e138470c88e31bba2c92fb36bed Mon Sep 17 00:00:00 2001 From: Guillaume Bertholon Date: Fri, 24 Apr 2026 18:14:57 +0200 Subject: [PATCH 06/12] Merge together tests with and without libxml2 --- tests/testsuite.src/run_ml.at | 52 ++++++++++------------------------- 1 file changed, 15 insertions(+), 37 deletions(-) diff --git a/tests/testsuite.src/run_ml.at b/tests/testsuite.src/run_ml.at index ee488566c..e42e6b3f7 100644 --- a/tests/testsuite.src/run_ml.at +++ b/tests/testsuite.src/run_ml.at @@ -625,11 +625,9 @@ end : +000000005 - "Test " AT_CLEANUP -AT_SETUP([XML PARSE unsupported]) +AT_SETUP([XML register data after use]) AT_KEYWORDS([extensions XML-CODE XML-EVENT PARSE]) -AT_SKIP_IF([[test "$COB_HAS_XML2" = "yes"]]) - AT_DATA([prog.cob],[ PROGRAM-ID. prog. DATA DIVISION. @@ -648,42 +646,20 @@ AT_DATA([prog.cob],[ DISPLAY 'proc : ' XML-CODE ' - "' XML-EVENT '"'. ]) -AT_CHECK([$COMPILE -Wunsupported prog.cob], [0], [], [prog.cob: in paragraph 'MAIN': -prog.cob:9: warning: runtime is not configured to support XML -]) -AT_CHECK([$COBCRUN_DIRECT ./prog], [0], [proc : +000000600 - "EXCEPTION " +AT_CHECK([$COMPILE -w prog.cob], [0], [], []) + +AT_CHECK([test "$COB_HAS_XML2" = "yes"], [0], [], [], +# Previous test "failed" -> no XML runtime +AT_CHECK([$COBCRUN_DIRECT ./prog], [0], +[proc : +000000600 - "EXCEPTION " after : +000000600 - "EXCEPTION " end : +000000005 - "Test " -], [libcob: prog.cob:8: warning: runtime is not configured to support XML -]) -AT_CLEANUP - - -AT_SETUP([XML register data after use]) -AT_KEYWORDS([extensions XML-CODE XML-EVENT PARSE]) - -AT_SKIP_IF([test "$COB_HAS_XML2" = "no"]) - -AT_DATA([prog.cob],[ - PROGRAM-ID. prog. - DATA DIVISION. - WORKING-STORAGE SECTION. - 77 empty-xml PIC X(8) VALUE ''. - PROCEDURE DIVISION. - MAIN. - XML PARSE empty-xml - PROCESSING PROCEDURE PROC. - DISPLAY 'after : ' XML-CODE ' - "' XML-EVENT '"'. - MOVE 5 TO XML-CODE - MOVE 'Test' TO XML-EVENT - DISPLAY 'end : ' XML-CODE ' - "' XML-EVENT '"'. - GOBACK. - PROC. - DISPLAY 'proc : ' XML-CODE ' - "' XML-EVENT '"'. -]) - -AT_CHECK([$COMPILE prog.cob], [0], [], []) -AT_CHECK([$COBCRUN_DIRECT ./prog], [0], [proc : +000000000 - "START-OF-DOCUMENT " +], +[libcob: prog.cob:8: warning: runtime is not configured to support XML +]), +# Previous test "passed" -> libxml2 available +AT_CHECK([$COBCRUN_DIRECT ./prog], [0], +[proc : +000000000 - "START-OF-DOCUMENT " proc : +000000000 - "START-OF-ELEMENT " proc : +000000000 - "END-OF-ELEMENT " proc : +000000000 - "END-OF-INPUT " @@ -691,6 +667,8 @@ proc : +000000000 - "END-OF-DOCUMENT " after : +000000000 - "END-OF-DOCUMENT " end : +000000005 - "Test " ], []) +) + AT_CLEANUP From 2216928a21f9364ee77ab3b61c8229d98a8a382b Mon Sep 17 00:00:00 2001 From: Guillaume Bertholon Date: Mon, 27 Apr 2026 16:06:27 +0200 Subject: [PATCH 07/12] Small code improvement suggested during the code review for XML PARSE --- DEPENDENCIES | 2 +- configure.ac | 2 +- libcob/mlio.c | 162 ++++++++++++++++++---------------- tests/testsuite.src/run_ml.at | 2 +- 4 files changed, 87 insertions(+), 81 deletions(-) diff --git a/DEPENDENCIES b/DEPENDENCIES index f3043beda..e2dc79d19 100644 --- a/DEPENDENCIES +++ b/DEPENDENCIES @@ -116,7 +116,7 @@ The following libraries ARE required WHEN : BOTH runtime AND development components required. - libxml2 >= 2.5.10 - https://gitlab.gnome.org/GNOME/libxml2/-/wikis/home + libxml2 >= 2.6.1 - https://gitlab.gnome.org/GNOME/libxml2/-/wikis/home libxml2 is distributed under MIT License (Expat variant). diff --git a/configure.ac b/configure.ac index 948b8c93f..63ad78614 100644 --- a/configure.ac +++ b/configure.ac @@ -1132,7 +1132,7 @@ AS_IF([test "$with_xml2" = yes -o "$with_xml2" = check], [ LIBS="$LIBS $LIBCOB_LIBS_extern $XML2_LIBS" # note: PKG_CONFIG and xml2-config set -I/path/to/libxml2 which contains a "libxml" folder where # all the files we look for are included - for header in xmlwriter xmlversion uri parser tree; do + for header in xmlwriter xmlversion uri parser tree SAX2; do AC_CHECK_HEADER([libxml/$header.h], [], [if test "$with_xml2" = yes; then AC_MSG_ERROR([headers for libxml2 are required for --with-xml2, you may adjust XML2_CFLAGS]) diff --git a/libcob/mlio.c b/libcob/mlio.c index e2cc497cc..47072c1fb 100644 --- a/libcob/mlio.c +++ b/libcob/mlio.c @@ -42,19 +42,19 @@ #include #if LIBXML_VERSION >= 21200 -#define LIBXML_CONST_ERROR_PTR const xmlError * -#define LIBXML_CTXT_GET_ENCODING(ctxt) xmlCtxtGetDeclaredEncoding(ctxt) -#define LIBXML_CTXT_GET_STANDALONE(ctxt) xmlCtxtGetStandalone(ctxt) -#define LIBXML_CTXT_GET_VERSION(ctxt) xmlCtxtGetVersion(ctxt) -#define LIBXML_CTXT_GET_OPTIONS(ctxt) xmlCtxtGetOptions(ctxt) -#define LIBXML_CTXT_SET_OPTIONS(ctxt, options) xmlCtxtSetOptions(ctxt, options) -#else -#define LIBXML_CONST_ERROR_PTR xmlErrorPtr /* use old ABI */ -#define LIBXML_CTXT_GET_ENCODING(ctxt) ctxt->encoding -#define LIBXML_CTXT_GET_STANDALONE(ctxt) ctxt->standalone -#define LIBXML_CTXT_GET_VERSION(ctxt) ctxt->version -#define LIBXML_CTXT_GET_OPTIONS(ctxt) ctxt->options -#define LIBXML_CTXT_SET_OPTIONS(ctxt, options) ctxt->options = options +#define LIBXML_CONST_ERROR_PTR const xmlError * +#define LIBXML_CTXT_GET_ENCODING(ctxt) xmlCtxtGetDeclaredEncoding(ctxt) +#define LIBXML_CTXT_GET_STANDALONE(ctxt) xmlCtxtGetStandalone(ctxt) +#define LIBXML_CTXT_GET_VERSION(ctxt) xmlCtxtGetVersion(ctxt) +#define LIBXML_CTXT_GET_OPTIONS(ctxt) xmlCtxtGetOptions(ctxt) +#define LIBXML_CTXT_SET_OPTIONS(ctxt, options) xmlCtxtSetOptions(ctxt, options) +#else /* use old ABI */ +#define LIBXML_CONST_ERROR_PTR xmlErrorPtr +#define LIBXML_CTXT_GET_ENCODING(ctxt) ctxt->encoding +#define LIBXML_CTXT_GET_STANDALONE(ctxt) ctxt->standalone +#define LIBXML_CTXT_GET_VERSION(ctxt) ctxt->version +#define LIBXML_CTXT_GET_OPTIONS(ctxt) ctxt->options +#define LIBXML_CTXT_SET_OPTIONS(ctxt, options) ctxt->options = options #endif #else @@ -87,7 +87,7 @@ typedef unsigned char xmlChar; #define json_object_object_add_ex(obj, key, val, opts) \ json_object_object_add (obj, key, val) -static inline const char * +static COB_INLINE COB_A_INLINE const char * json_object_to_json_string_length (struct json_object *obj, int flags, size_t *length) { @@ -125,15 +125,19 @@ json_object_to_json_string_length (struct json_object *obj, /* standard error codes */ enum xml_code_status { - XML_STMT_EXIT = -1, - XML_STMT_SUCCESSFULL = 0, + XML_EXIT = -1, + XML_OK = 0, + XML_CONTINUE = 1, XML_PARSE_WARNING_MISC_COMPAT = 99, /* various warnings, only in XMLPARSE COMPAT */ XML_PARSE_ERROR_MISC_COMPAT = 201, /* various errors, only in XMLPARSE COMPAT */ XML_OUT_FIELD_TOO_SMALL = 400, XML_INVALID_NAMESPACE = 416, XML_INVALID_CHAR_REPLACED = 417, XML_INVALID_NAMESPACE_PREFIX = 419, - XML_INTERNAL_ERROR = 600 + XML_INTERNAL_ERROR = 600, + XML_PARSE_WARNING_MISC_XMLSS = XRC_WARNING << 16, + XML_PARSE_ERROR_MISC_XMLSS = XRC_FATAL << 16, + XML_PARSE_NOT_VALID_MISC_XMLSS = XRC_NOT_VALID << 16, }; enum xml_parser_state { @@ -179,7 +183,7 @@ static void init_xml_event_list (void); struct xml_event { enum cob_xml_event event; struct xml_event *next; /* pointer to next element */ - int xml_code; /* the XML-CODE of the event (0 unless event is EXCEPTION) */ + enum xml_code_status xml_code; /* the XML-CODE of the event (0 unless event is EXCEPTION) */ size_t text_off; /* text offset in buff */ size_t text_len; /* length of this text */ size_t namespace_off; /* namespace offset in buff */ @@ -190,7 +194,7 @@ struct xml_event { struct xml_state { enum xml_parser_state state; - int last_xml_code; + enum xml_code_status last_xml_code; int flags; #if WITH_XML2 xmlSAXHandler sax; @@ -241,7 +245,7 @@ set_xml_exception (const enum xml_code_status code) } /* get special register XML-CODE */ -static COB_INLINE COB_A_INLINE int +static COB_INLINE COB_A_INLINE enum xml_code_status get_xml_code (void) { return cob_get_int (COB_MODULE_PTR->xml_code); @@ -265,8 +269,8 @@ set_xml_event (enum cob_xml_event event) memset (data2 + size1, ' ', size2 - size1); } -static void -xml_event_reset_registers (struct xml_event *event) +static COB_INLINE COB_A_INLINE void +xml_event_init (struct xml_event *event) { event->xml_code = 0; event->text_len = 0; @@ -286,7 +290,7 @@ new_xml_event (struct xml_state *state, enum cob_xml_event xml_event) if (event->event == EVENT_UNKNOWN) { /* very first element, and unsused: */ event->event = xml_event; - xml_event_reset_registers (event); + xml_event_init (event); return; } if (event->next) { @@ -294,17 +298,18 @@ new_xml_event (struct xml_state *state, enum cob_xml_event xml_event) event = event->next; event->event = xml_event; state->event = event; - xml_event_reset_registers (event); + xml_event_init (event); return; } } /* no empty events from previous parsing, create a new one */ event = cob_malloc (sizeof (struct xml_event)); - /* add logic to check for malloc failure */ + /* TODO: add logic to check for malloc failure */ event->event = xml_event; + /* Implicit by zero-initialization in cob_malloc: event->next = NULL; - xml_event_reset_registers (event); + xml_event_init (event); */ if (state->event) { state->event->next = event; } else { @@ -328,7 +333,7 @@ buffer_xml_event_data (struct xml_state *state, const void *data, size_t size) size_t buff_free_size = state->buff_len - buff_off; if (size == 0) { - return -1; + return (size_t)-1; } if (size <= buff_free_size) { @@ -336,8 +341,9 @@ buffer_xml_event_data (struct xml_state *state, const void *data, size_t size) memcpy (state->buff + buff_off, data, size); state->buff_off += size; return buff_off; - - } else { + } + + { /* otherwise: allocate new buffer with additional space, preserving existing data */ const size_t malloc_size = buff_off + size > state->buff_len * 2 ? buff_off + size : state->buff_len * 2; @@ -346,18 +352,18 @@ buffer_xml_event_data (struct xml_state *state, const void *data, size_t size) /* CHECKME: we possibly want to handle out of memory to pass it to COBOL as XML error - but cob_fast_malloc / cob_malloc already abort the runtime in case of missing memory */ - if (mptr) { - memcpy (mptr, state->buff, buff_off); - cob_free (state->buff); - state->buff = mptr; - state->buff_len = malloc_size; - - memcpy (mptr + buff_off, data, size); - state->buff_off += size; - return buff_off; - } else { - return -1; + if (!mptr) { + return (size_t)-1; } + + memcpy (mptr, state->buff, buff_off); + cob_free (state->buff); + state->buff = mptr; + state->buff_len = malloc_size; + + memcpy (mptr + buff_off, data, size); + state->buff_off += size; + return buff_off; } } @@ -1331,7 +1337,7 @@ int cob_xml_parse (cob_field *in, cob_field *encoding, cob_field *validation, const int flags, void **saved_state) { struct xml_state *state; - int xml_code; + enum xml_code_status xml_code; /* no state yet ? first call */ if (*saved_state == NULL) { @@ -1361,7 +1367,7 @@ int cob_xml_parse (cob_field *in, cob_field *encoding, cob_field *validation, /* LCOV_EXCL_STOP */ xml_code = 0; state = cob_malloc (sizeof (struct xml_state)); - memset (state, '\0', sizeof(struct xml_state)); + /* state is zero-initialized */ state->flags = flags; state->input_data_ptr = (const char*)in->data; state->input_data_len = in->size; @@ -1395,7 +1401,7 @@ int cob_xml_parse (cob_field *in, cob_field *encoding, cob_field *validation, /* TODO: handle errors 100,001 to 165,535 with XMLPARSE(COMPAT) positive non-zero values should set the encoding and reset the error */ - if (xml_code != 0) { + if (xml_code != XML_OK) { /* not reset: turn the error into a fatal error */ state->state = XML_PARSER_HAD_FATAL_ERROR; } else { @@ -1414,7 +1420,7 @@ int cob_xml_parse (cob_field *in, cob_field *encoding, cob_field *validation, } /* user-initiated exception condition (-1) */ - if (xml_code == -1) { + if (xml_code == XML_EXIT) { /* xml code stays -1 */ cob_set_exception (COB_EC_XML); xml_free_parse_memory (state); @@ -1424,33 +1430,35 @@ int cob_xml_parse (cob_field *in, cob_field *encoding, cob_field *validation, if (state->state == XML_PARSER_HAD_END_OF_INPUT) { switch (xml_code) { - case 0: + case XML_OK: /* no new data chunk to give to the XML parser */ state->input_data_ptr = NULL; state->input_data_len = 0; state->state = XML_PARSER_NO_NEW_CHUNKS; break; - case 1: + case XML_CONTINUE: /* goes on with parsing note that since we are processing a new chunk of the xml data, we need to set both data pointers */ - xml_code = 0; + xml_code = XML_OK; state->input_data_ptr = (const char*)in->data; state->input_data_len = in->size; state->state = XML_PARSER_READ_CHUNK; break; + default: + /* other cases are handled below */ + break; } - /* other cases are handled below */ } - if (xml_code != 0) { + if (xml_code != XML_OK) { /* note: -1 is handled above, also 1 where possible */ cob_set_exception (COB_EC_XML); if (COB_MODULE_PTR->xml_mode == COB_XML_XMLSS) { /* fatal runtime error for IBM, just warn here for now */ - cob_runtime_warning(_("Unexpected XML-CODE value: %d"), xml_code); + cob_runtime_warning(_("unexpected XML-CODE value: %d"), xml_code); } else { - set_xml_code (-1); + set_xml_code (XML_EXIT); } xml_free_parse_memory (state); *saved_state = NULL; @@ -1494,7 +1502,7 @@ xml_generate (cob_field *out, cob_ml_tree *tree, cob_field *count, int copy_len; int num_newlines = 0; - set_xml_code (XML_STMT_SUCCESSFULL); + set_xml_code (XML_OK); buff = xmlBufferCreate (); if (buff == NULL) { @@ -1632,20 +1640,19 @@ xml_error_handler (void *ctx, LIBXML_CONST_ERROR_PTR err) { switch (state->state) { case XML_PARSER_VALIDATION_SETUP: if (err->file) { - cob_runtime_warning (_("XML PARSE %s for VALIDATE FILE %s:%d (%d): %s"), - severity_str, err->file, err->line, err->code, err->message); + cob_runtime_warning ("XML PARSE VALIDATING FILE %s (%d): %s:%d: %s", + severity_str, err->code, err->file, err->line, err->message); } else { - cob_runtime_warning (_("XML PARSE %s for VALIDATE FILE (%d): %s"), + cob_runtime_warning ("XML PARSE VALIDATING FILE %s (%d): %s", severity_str, err->code, err->message); } break; case XML_PARSER_VALIDATION_SETUP_MEM: - cob_runtime_warning (_("XML PARSE %s for VALIDATE (%d): %s"), + cob_runtime_warning ("XML PARSE VALIDATING %s (%d): %s", severity_str, err->code, err->message); break; default: - cob_runtime_warning (_("XML PARSE %s (%d): %s"), - severity_str, err->code, err->message); + cob_runtime_warning ("XML PARSE %s (%d): %s", severity_str, err->code, err->message); break; } @@ -1712,11 +1719,10 @@ xml_startDocument (void *ctx) { /* standalone is -2 when tag is present without an encoding attribute */ if (standalone != -2) { + new_xml_event (state, EVENT_STANDALONE_DECLARATION); if (standalone) { - new_xml_event (state, EVENT_STANDALONE_DECLARATION); set_xml_event_text (state, "yes", 3); } else { - new_xml_event (state, EVENT_STANDALONE_DECLARATION); set_xml_event_text (state, "no", 2); } } @@ -1903,13 +1909,23 @@ void xml_parse (cob_field *encoding, cob_field *validation, state->sax.serror = xml_error_handler; /* - * The document being in memory, it have no base per RFC 2396. + * The document being in memory, it has no base per RFC 2396. */ state->ctx = xmlCreatePushParserCtxt (&state->sax, state, NULL, 0, NULL); - /* Add this immediately after creating the context: */ - if (state->ctx != NULL) { + if (state->ctx == NULL) { + new_xml_event (state, EVENT_EXCEPTION); + if (COB_MODULE_PTR->xml_mode == COB_XML_XMLSS) { + set_xml_event_exception_code (state, XRC_FATAL << 16); + } else { + set_xml_event_exception_code (state, XML_PARSE_ERROR_MISC_COMPAT); + } + xml_process_next_event (state); + return; + } + + { int options = LIBXML_CTXT_GET_OPTIONS(state->ctx); options &= ~XML_PARSE_NOWARNING; /* Clear the NOWARNING flag */ options &= ~XML_PARSE_NOERROR; /* Also clear NOERROR flag */ @@ -1920,16 +1936,6 @@ void xml_parse (cob_field *encoding, cob_field *validation, /* TODO (later): handle encoding */ cob_free (enc); } - if (state->ctx == NULL) { - new_xml_event (state, EVENT_EXCEPTION); - if (COB_MODULE_PTR->xml_mode == COB_XML_XMLSS) { - set_xml_event_exception_code (state, XRC_FATAL << 16); - } else { - set_xml_event_exception_code (state, XML_PARSE_ERROR_MISC_COMPAT); - } - xml_process_next_event(state); - return; - } /* setup global error handler for every domain that hasn't its own */ xmlSetStructuredErrorFunc (state, xml_error_handler); @@ -2029,13 +2035,13 @@ void xml_parse (cob_field *encoding, cob_field *validation, new_xml_event (state, EVENT_END_OF_INPUT); } -#if 0 - { +#if COB_DEBUG_LOG + if (DEBUG_ISON("xml")) { struct xml_event *event = state->first_event; - for (;event && event->event != EVENT_UNKNOWN; event = event->next) { - printf("Event ==> %30.*s \n", + for (; event && event->event != EVENT_UNKNOWN; event = event->next) { + DEBUG_LOG("xml",("Event ==> %30.*s \n", xml_event_name_len[event->event], - (unsigned char *)xml_event_name[event->event]); + (unsigned char *)xml_event_name[event->event])); } } #endif diff --git a/tests/testsuite.src/run_ml.at b/tests/testsuite.src/run_ml.at index e42e6b3f7..97c0c7a27 100644 --- a/tests/testsuite.src/run_ml.at +++ b/tests/testsuite.src/run_ml.at @@ -842,7 +842,7 @@ XML document successfully parsed. AT_CLEANUP -AT_SETUP([XML PARSE complex XML]) +AT_SETUP([XML PARSE push parser]) AT_KEYWORDS([extensions PARSE]) AT_SKIP_IF([test "$COB_HAS_XML2" = "no"]) From 00d92958d6ccf78da84636a81862fb23e38af83c Mon Sep 17 00:00:00 2001 From: Guillaume Bertholon Date: Tue, 28 Apr 2026 12:41:50 +0200 Subject: [PATCH 08/12] Use empty XML-* registers, instead of NULL, and move LINKAGE without data check later in the parsing. --- libcob/mlio.c | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/libcob/mlio.c b/libcob/mlio.c index 47072c1fb..666d870bc 100644 --- a/libcob/mlio.c +++ b/libcob/mlio.c @@ -404,30 +404,30 @@ set_xml_event_prefix (struct xml_state *state, const void *data, size_t size) static void set_xml_registers (const int ntext, unsigned char *buff, const struct xml_event *event) { - COB_MODULE_PTR->xml_text->data = NULL; + COB_MODULE_PTR->xml_text->data = (unsigned char *) ""; COB_MODULE_PTR->xml_text->size = 0; - COB_MODULE_PTR->xml_namespace->data = NULL; + COB_MODULE_PTR->xml_namespace->data = (unsigned char *) ""; COB_MODULE_PTR->xml_namespace->size = 0; - COB_MODULE_PTR->xml_namespace_prefix->data = NULL; + COB_MODULE_PTR->xml_namespace_prefix->data = (unsigned char *) ""; COB_MODULE_PTR->xml_namespace_prefix->size = 0; /* XML-NTEXT and other XML-N... special registers are not available with ACUCOBOL */ if (COB_MODULE_PTR->xml_ntext) { - COB_MODULE_PTR->xml_ntext->data = NULL; + COB_MODULE_PTR->xml_ntext->data = (unsigned char *) ""; COB_MODULE_PTR->xml_ntext->size = 0; } if (COB_MODULE_PTR->xml_nnamespace) { - COB_MODULE_PTR->xml_nnamespace->data = NULL; + COB_MODULE_PTR->xml_nnamespace->data = (unsigned char *) ""; COB_MODULE_PTR->xml_nnamespace->size = 0; } if (COB_MODULE_PTR->xml_nnamespace_prefix) { - COB_MODULE_PTR->xml_nnamespace_prefix->data = NULL; + COB_MODULE_PTR->xml_nnamespace_prefix->data = (unsigned char *) ""; COB_MODULE_PTR->xml_nnamespace_prefix->size = 0; } if (buff && event) { if (ntext) { - /* TODO (later): convert input data (libxml2 uses UTF8) to UTF-16 + /* TODO (later): convert input data (libxml2 uses UTF-8) to UTF-16 (or the specified national character set) */ if (event->text_len) { COB_MODULE_PTR->xml_ntext->data = buff + event->text_off; @@ -1365,7 +1365,7 @@ int cob_xml_parse (cob_field *in, cob_field *encoding, cob_field *validation, #endif } /* LCOV_EXCL_STOP */ - xml_code = 0; + xml_code = XML_OK; state = cob_malloc (sizeof (struct xml_state)); /* state is zero-initialized */ state->flags = flags; @@ -1384,14 +1384,6 @@ int cob_xml_parse (cob_field *in, cob_field *encoding, cob_field *validation, in the processing procedure */ set_xml_registers (0, NULL, NULL); - /* LINKAGE or BASED item without data */ - if (!in->data) { - set_xml_exception (XML_INTERNAL_ERROR); - xml_free_parse_memory (state); - *saved_state = NULL; - return 1; - } - if (encoding && is_empty (encoding)) { encoding = NULL; } @@ -1917,7 +1909,7 @@ void xml_parse (cob_field *encoding, cob_field *validation, if (state->ctx == NULL) { new_xml_event (state, EVENT_EXCEPTION); if (COB_MODULE_PTR->xml_mode == COB_XML_XMLSS) { - set_xml_event_exception_code (state, XRC_FATAL << 16); + set_xml_event_exception_code (state, XML_PARSE_ERROR_MISC_XMLSS); } else { set_xml_event_exception_code (state, XML_PARSE_ERROR_MISC_COMPAT); } @@ -2027,6 +2019,14 @@ void xml_parse (cob_field *encoding, cob_field *validation, state->event = state->first_event; state->buff_off = 0; + /* LINKAGE or BASED item without data: also validated by runtime checks by the caller (when enabled) */ + if (state->input_data_len && !state->input_data_ptr) { + state->last_xml_code = XML_INTERNAL_ERROR; + state->state = XML_PARSER_HAD_FATAL_ERROR; + set_xml_exception (XML_INTERNAL_ERROR); + return; + } + state->err = xmlParseChunk (state->ctx, state->input_data_ptr, state->input_data_len, end_of_parsing); if (end_of_parsing) { From c0ac920357213e6c23de1bd252817d8dcc914beb Mon Sep 17 00:00:00 2001 From: Guillaume Bertholon Date: Wed, 29 Apr 2026 14:52:52 +0200 Subject: [PATCH 09/12] Handle incomplete CONTENT-CHARACTERS events (with XML-INFORMATION = 2). Note that, compared to IBM, we may merge short contiguous CONTENT-CHARACTERS events across END-OF-INPUT boundaries. This is due to libxml2 internal details. Also improve some tests to check predefined entities and long content. --- libcob/ChangeLog | 4 + libcob/mlio.c | 100 +++++++++++++++++++--- tests/testsuite.src/run_ml.at | 156 ++++++++++++++++++++-------------- 3 files changed, 184 insertions(+), 76 deletions(-) diff --git a/libcob/ChangeLog b/libcob/ChangeLog index 057381e1b..3b8efd52c 100644 --- a/libcob/ChangeLog +++ b/libcob/ChangeLog @@ -14,6 +14,10 @@ * mlio.c: Handle XML chunks with more than one recoverable error * mlio.c: Trigger ON EXCEPTION code after EXCEPTION XML events * mlio.c: Remove spurious events when there is no declaration in the file + * mlio.c: Handle incomplete CONTENT-CHARACTERS events (with XML-INFORMATION = 2). + Note that, compared to IBM, we may merge short contiguous CONTENT-CHARACTERS + events across END-OF-INPUT boundaries. + This is due to libxml2 internal details. 2025-12-04 Simon Sobisch diff --git a/libcob/mlio.c b/libcob/mlio.c index 666d870bc..734179d0e 100644 --- a/libcob/mlio.c +++ b/libcob/mlio.c @@ -140,6 +140,12 @@ enum xml_code_status { XML_PARSE_NOT_VALID_MISC_XMLSS = XRC_NOT_VALID << 16, }; +enum xml_information { + XML_INFORMATION_NONE = 0, + XML_INFORMATION_COMPLETE = 1, + XML_INFORMATION_INCOMPLETE = 2, +}; + enum xml_parser_state { XML_PARSER_NOT_STARTED = 0, XML_PARSER_VALIDATION_SETUP, @@ -184,6 +190,7 @@ struct xml_event { enum cob_xml_event event; struct xml_event *next; /* pointer to next element */ enum xml_code_status xml_code; /* the XML-CODE of the event (0 unless event is EXCEPTION) */ + enum xml_information xml_information; /* the XML-INFORMATION of the event */ size_t text_off; /* text offset in buff */ size_t text_len; /* length of this text */ size_t namespace_off; /* namespace offset in buff */ @@ -192,6 +199,8 @@ struct xml_event { size_t prefix_len; /* length of this prefix */ }; +#define XML_PREVIOUS_INCOMPLETE_CONTENT_CHARACTERS (1 << 16) /* flag for xml_state */ + struct xml_state { enum xml_parser_state state; enum xml_code_status last_xml_code; @@ -272,7 +281,8 @@ set_xml_event (enum cob_xml_event event) static COB_INLINE COB_A_INLINE void xml_event_init (struct xml_event *event) { - event->xml_code = 0; + event->xml_code = XML_OK; + event->xml_information = XML_INFORMATION_NONE; event->text_len = 0; event->prefix_len = 0; event->namespace_len = 0; @@ -369,22 +379,46 @@ buffer_xml_event_data (struct xml_state *state, const void *data, size_t size) /* set the exception code of the current xml event */ static void -set_xml_event_exception_code (struct xml_state *state, int xml_code) +set_xml_event_exception_code (struct xml_state *state, enum xml_code_status xml_code) { state->event->xml_code = xml_code; } +/* set the XML-INFORMATION of the current xml event */ +static void +set_xml_event_information (struct xml_state *state, enum xml_information xml_information) +{ + state->event->xml_information = xml_information; +} + /* set text of current event by placing it into the event data buffer */ static void -set_xml_event_text (struct xml_state *state, const void *data, size_t size) +set_xml_event_text (struct xml_state *state, const void *data, size_t size) { state->event->text_off = buffer_xml_event_data (state, data, size); state->event->text_len = size; } +/* Extend the text of the current event, by completing the last string of the data buffer. + The current event text MUST be the last string of the data buffer, else this function crashes. */ +static void +extend_xml_event_text (struct xml_state *state, const void *data, size_t size) +{ + /* FIXME: replace this define by a general one (COB_TREE_DEBUG) _was_ for debugging + the parse tree only ... */ +#if defined (COB_TREE_DEBUG) || defined (_DEBUG) + if (state->event->text_off + state->event->text_len != state->buff_off) { + cob_runtime_error ("current event text is not last in the data buffer when calling extern_xml_event_text"); + cob_hard_failure (); + } +#endif + buffer_xml_event_data (state, data, size); + state->event->text_len += size; +} + /* set namespace of current event by placing it into the event data buffer */ static void -set_xml_event_namespace (struct xml_state *state, const void *data, size_t size) +set_xml_event_namespace (struct xml_state *state, const void *data, size_t size) { state->event->namespace_off = buffer_xml_event_data (state, data, size); state->event->namespace_len = size; @@ -397,6 +431,19 @@ set_xml_event_prefix (struct xml_state *state, const void *data, size_t size) state->event->prefix_off = buffer_xml_event_data (state, data, size); state->event->prefix_len = size; } + +static void +finalize_xml_content_characters (struct xml_state *state) { + if (state->event && state->event->event == EVENT_CONTENT_CHARACTERS) { + /* Last event was CONTENT_CHARACTERS: mark it as complete */ + set_xml_event_information (state, XML_INFORMATION_COMPLETE); + } else if (state->flags & XML_PREVIOUS_INCOMPLETE_CONTENT_CHARACTERS) { + /* Notify the end of character content of the previous chunk with an empty character event. */ + new_xml_event (state, EVENT_CONTENT_CHARACTERS); + set_xml_event_information (state, XML_INFORMATION_COMPLETE); + } + state->flags &= ~XML_PREVIOUS_INCOMPLETE_CONTENT_CHARACTERS; +} #endif /* defined (WITH_XML2) */ /* set special registers XML-TEXT / XML-NTEXT, XML-NAMESPACE / XML-NNAMESPACE as well @@ -1724,6 +1771,7 @@ xml_startDocument (void *ctx) { static void xml_comment (void *ctx, const xmlChar *content) { struct xml_state *state = ctx; + finalize_xml_content_characters (state); new_xml_event (state, EVENT_COMMENT); set_xml_event_text (state, content, xmlStrlen (content)); } @@ -1733,6 +1781,7 @@ xml_processingInstruction (void *ctx, const xmlChar *target, const xmlChar *data) { struct xml_state *state = ctx; + finalize_xml_content_characters (state); new_xml_event (state, EVENT_PROCESSING_INSTRUCTION_TARGET); set_xml_event_text (state, target, xmlStrlen (target)); new_xml_event (state, EVENT_PROCESSING_INSTRUCTION_DATA); @@ -1747,6 +1796,7 @@ xml_startElementNs (void *ctx, int cntr, attr_value_len; struct xml_state *state = ctx; + finalize_xml_content_characters (state); new_xml_event (state, EVENT_START_OF_ELEMENT); set_xml_event_text (state, localname, xmlStrlen (localname)); if (prefix) { @@ -1789,6 +1839,8 @@ xml_startElementNs (void *ctx, set_xml_event_namespace (state, attr_namespace, xmlStrlen(attr_namespace)); new_xml_event (state, EVENT_ATTRIBUTE_CHARACTERS); set_xml_event_text (state, attr_value_start, attr_value_len); + /* ATTRIBUTE_CHARACTERS are always complete with this API */ + set_xml_event_information (state, XML_INFORMATION_COMPLETE); } } @@ -1796,6 +1848,7 @@ static void xml_endElementNs (void *ctx, const xmlChar *localname, const xmlChar *prefix, const xmlChar *URI) { struct xml_state *state = ctx; + finalize_xml_content_characters (state); new_xml_event (state, EVENT_END_OF_ELEMENT); set_xml_event_text (state, localname, xmlStrlen (localname)); if (prefix) { @@ -1809,6 +1862,7 @@ xml_endElementNs (void *ctx, static void xml_startElement (void *ctx, const xmlChar *name, const xmlChar **atts) { struct xml_state *state = ctx; + finalize_xml_content_characters (state); new_xml_event (state, EVENT_START_OF_ELEMENT); set_xml_event_text (state, name, xmlStrlen (name)); } @@ -1816,6 +1870,7 @@ xml_startElement (void *ctx, const xmlChar *name, const xmlChar **atts) { static void xml_endElement (void *ctx, const xmlChar *name) { struct xml_state *state = ctx; + finalize_xml_content_characters (state); new_xml_event (state, EVENT_END_OF_ELEMENT); set_xml_event_text (state, name, xmlStrlen (name)); } @@ -1823,8 +1878,20 @@ xml_endElement (void *ctx, const xmlChar *name) { static void xml_characters (void *ctx, const xmlChar *content, int len) { struct xml_state *state = ctx; - new_xml_event (state, EVENT_CONTENT_CHARACTERS); - set_xml_event_text (state, content, len); + /* TODO (later): Unlike XMLSS, COMPAT mode is supposed to send a + CONTENT-CHARACTER (without final S) event for predefined entities + (like &) */ + if (state->event && state->event->event == EVENT_CONTENT_CHARACTERS) { + /* Avoid sending more events than expected, + especially on predefined entities (like &) */ + extend_xml_event_text (state, content, len); + } else { + new_xml_event (state, EVENT_CONTENT_CHARACTERS); + set_xml_event_text (state, content, len); + set_xml_event_information (state, XML_INFORMATION_INCOMPLETE); + /* if the next event calls finalize_content_characters, + event information will become XML_INFORMATION_COMPLETE. */ + } } static void @@ -1844,6 +1911,9 @@ xml_internalSubset(void *ctx, static void xml_cdata (void *ctx, const xmlChar *content, int len) { struct xml_state *state = ctx; + /* TODO: recheck how XML-INFORMATION on CONTENT-CHARACTER interacts with CDATA */ + finalize_xml_content_characters (state); + new_xml_event (state, EVENT_START_OF_CDATA_SECTION); if (COB_MODULE_PTR->xml_mode == COB_XML_COMPAT) { set_xml_event_text (state, "xml_mode == COB_XML_COMPAT) { @@ -2032,6 +2103,16 @@ void xml_parse (cob_field *encoding, cob_field *validation, if (end_of_parsing) { state->state = XML_PARSER_FINISHED; } else { + if (state->ctx->input && state->ctx->input->cur < state->ctx->input->end) { + /* Something is queued in the input buffer. If it starts with '<', + the parser saw markup-start and any prior text is complete. */ + if (*state->ctx->input->cur == '<') { + finalize_xml_content_characters (state); + } + } else if (state->event && state->event->event == EVENT_CONTENT_CHARACTERS) { + /* Remember that the last event is an unfinished content characters event */ + state->flags |= XML_PREVIOUS_INCOMPLETE_CONTENT_CHARACTERS; + } new_xml_event (state, EVENT_END_OF_INPUT); } @@ -2084,12 +2165,7 @@ xml_process_next_event (struct xml_state *state) set_xml_registers (ntext, state->buff, event); if (COB_MODULE_PTR->xml_information) { - /* IBM doc states that we should store 1 in XML-INFORMATION on events - ATTRIBUTE-CHARACTERS and CONTENT-CHARACTERS if the value in XML-TEXT - is complete. It seems to be always the case with libxml2. */ - const int info = event->event == EVENT_ATTRIBUTE_CHARACTERS - || event->event == EVENT_CONTENT_CHARACTERS ? 1 : 0; - cob_set_int (COB_MODULE_PTR->xml_information, info); + cob_set_int (COB_MODULE_PTR->xml_information, event->xml_information); } state->event = event->next; diff --git a/tests/testsuite.src/run_ml.at b/tests/testsuite.src/run_ml.at index 97c0c7a27..ec23c24c9 100644 --- a/tests/testsuite.src/run_ml.at +++ b/tests/testsuite.src/run_ml.at @@ -778,9 +778,8 @@ AT_CLEANUP AT_SETUP([XML PARSE]) AT_KEYWORDS([extensions PARSE]) -AT_SKIP_IF([test "$COB_HAS_XML2" = "no"]) - -AT_DATA([prog.cob],[ +AT_SKIP_IF([[test "$COB_HAS_XML2" = "no"]]) +AT_DATA([prog.cob], [ Identification division. PROGRAM-ID. IBMXML. Data division. @@ -790,8 +789,8 @@ AT_DATA([prog.cob],[ ****************************************************************** 01 xml-document. 02 pic x(10) value ''. - 02 pic x(33) value ''. - 02 pic x(29) value 'Ham + turkey'. + 02 pic x(28) value ''. + 02 pic x(25) value 'Ham + turkey'. 02 pic x(34) value 'Cheese, lettuce, tomato, '. 02 pic x(32) value 'and that''s all, Folks!'. 02 pic x(28) value '$4.99'. @@ -813,18 +812,18 @@ AT_DATA([prog.cob],[ display 'event: ' xml-event ', text: {' xml-text '}'. ]) -AT_CHECK([$COMPILE prog.cob], [0], [], []) -AT_CHECK([$COBCRUN_DIRECT ./prog], [0], [event: START-OF-DOCUMENT , text: {} +AT_CHECK([$COMPILE prog.cob]) + +AT_CHECK([$COBCRUN_DIRECT ./prog], [0], +[event: START-OF-DOCUMENT , text: {} event: START-OF-ELEMENT , text: {sandwich} event: START-OF-ELEMENT , text: {bread} event: ATTRIBUTE-NAME , text: {type} event: ATTRIBUTE-CHARACTERS , text: {baker's best} event: END-OF-ELEMENT , text: {bread} -event: CONTENT-CHARACTERS , text: { } event: START-OF-ELEMENT , text: {meat} event: CONTENT-CHARACTERS , text: {Ham + turkey} event: END-OF-ELEMENT , text: {meat} -event: CONTENT-CHARACTERS , text: { } event: START-OF-ELEMENT , text: {filling} event: CONTENT-CHARACTERS , text: {Cheese, lettuce, tomato, and that's all, Folks!} event: END-OF-ELEMENT , text: {filling} @@ -838,16 +837,16 @@ event: END-OF-ELEMENT , text: {sandwich} event: END-OF-INPUT , text: {} event: END-OF-DOCUMENT , text: {} XML document successfully parsed. -], []) +]) + AT_CLEANUP AT_SETUP([XML PARSE push parser]) AT_KEYWORDS([extensions PARSE]) -AT_SKIP_IF([test "$COB_HAS_XML2" = "no"]) - -AT_DATA([prog.cob],[ +AT_SKIP_IF([[test "$COB_HAS_XML2" = "no"]]) +AT_DATA([prog.cob], [[ Identification division. PROGRAM-ID. IBMXML. Data division. @@ -871,13 +870,13 @@ AT_DATA([prog.cob],[ 2 pic x(19) value ' standalone="yes"?>'. 2 pic x(39) value ''. 2 pic x(10) value ''. - 2 pic x(33) value ''. + 2 pic x(33) value ''. 2 pic x(36) value ''. - 2 pic x(29) value 'Ham + turkey'. + 2 pic x(29) value 'Ham & turkey'. 2 pic x(34) value 'Cheese, lettuce, tomato, '. 2 pic x(32) value 'and that''s all, Folks!'. - 2 pic x(25) value ' element!@:>@@:>@>'. + 2 pic x(25) value ' element!]]>'. 2 pic x(28) value '$4.99'. 2 pic x(25) value '0.10'. 2 pic x(31) value ''. @@ -925,7 +924,7 @@ AT_DATA([prog.cob],[ When 'CONTENT-CHARACTERS' * ==> In general, a split can occur for any element or attribute * ==> data, but in this sample, it only occurs for "filling"... - * In practice, with GnuCOBOL, there is no split in XML events + * With GnuCOBOL, there is no split because filling is too short DISPLAY 'XML-INFORMATION: ' XML-INFORMATION If xml-information = 2 and current-element(element-depth) not = 'filling' @@ -987,10 +986,12 @@ AT_DATA([prog.cob],[ When other continue End-evaluate. -]) +]]) -AT_CHECK([$COMPILE prog.cob], [0], [], []) -AT_CHECK([$COBCRUN_DIRECT ./prog], [0], [len: 400 +AT_CHECK([$COMPILE prog.cob]) + +AT_CHECK([$COBCRUN_DIRECT ./prog], [0], +[[len: 400 Initial segment { Ham + turkeyHam & turkey Cheese, lettuce, tomato, a} +Next segment: {meat>Cheese, lettuce, tomato, a} event: END-OF-ELEMENT , text: {meat} -event: CONTENT-CHARACTERS , text: { } -XML-INFORMATION: +000000001 event: START-OF-ELEMENT , text: {filling} event: END-OF-INPUT , text: {} segment-num: +0007 -Next segment: {nd that's all, Folks! element!@:>@@:>@> element!]]> element!} XML-INFORMATION: +000000001 @@ -1065,16 +1062,16 @@ XML document successfully parsed. Sandwich list price: $4.99 Promotional price: $4.49 Get one today! -], []) +]]) + AT_CLEANUP AT_SETUP([XML PARSE long text]) AT_KEYWORDS([extensions PARSE]) -AT_SKIP_IF([test "$COB_HAS_XML2" = "no"]) - -AT_DATA([prog.cob],[ +AT_SKIP_IF([[test "$COB_HAS_XML2" = "no"]]) +AT_DATA([prog.cob], [[ Identification division. PROGRAM-ID. IBMXML. Data division. @@ -1082,27 +1079,20 @@ AT_DATA([prog.cob],[ ****************************************************************** * XML document data, encoded as initial values of data items. * ****************************************************************** - 01 xml-document. - 02 pic x(10) value ''. - 02 pic x(40) value 'Lorem ipsum dolor sit amet,'. - 02 pic x(40) value 'consectetur adipiscing elit.'. - 02 pic x(40) value 'Sed vitae scelerisque mauris.'. - 02 pic x(40) value 'Ut suscipit faucibus ultrices.'. - 02 pic x(40) value 'Proin lacinia luctus imperdiet.'. - 02 pic x(40) value 'Fusce sit amet augue nisi.'. - 02 pic x(40) value 'Fusce a justo nec nisl luctus laoreet.'. - 02 pic x(40) value 'Nulla rutrum fermentum dui quis tincidun'. - 02 pic x(40) value 'Donec neque dui, maximus cursus felis eu'. - 02 pic x(40) value 'commodo vulputate ipsum.'. - 02 pic x(40) value 'Morbi laoreet risus congue,'. - 02 pic x(40) value 'dictum dolor tincidunt,'. - 02 pic x(40) value 'fermentum diam.'. - 02 pic x(40) value 'Mauris placerat volutpat nisl,'. - 02 pic x(40) value 'nec malesuada enim ut.'. - 02 pic x(10) value ''. + 01 xml-start-tag pic x(20) value ''. + 01 lorem-ipsum. + 02 pic x(28) value 'Lorem ipsum dolor sit amet, '. + 02 pic x(28) value 'consectetur adipiscing elit.'. + 01 xml-cdata. + 02 pic x(33) value ''. + 01 xml-end-tag pic x(20) value ''. + 01 xml-buffer pic x(63). + 01 nb-buffer pic 99 value 0. Procedure division. Mainline section. - XML parse xml-document + Move xml-start-tag to xml-buffer. + XML parse xml-buffer processing procedure XML-handler ON EXCEPTION Display 'XML processing error, XML-Code=' XML-Code '.' @@ -1113,18 +1103,56 @@ AT_DATA([prog.cob],[ Move 0 to return-code Goback. XML-handler section. - display 'event: ' xml-event ', text: {' xml-text '}'. -]) + Display 'event: ' xml-event ', info: ' xml-information + ', text: {' xml-text '}'. + Evaluate XML-Event + When 'END-OF-INPUT' + Add 1 to nb-buffer + if nb-buffer = 6 + Move 1 to xml-code + Move xml-cdata to xml-buffer + else if nb-buffer = 12 + Move 1 to xml-code + Move xml-end-tag to xml-buffer + else if nb-buffer < 12 + Move 1 to xml-code + Move lorem-ipsum to xml-buffer + end-if + When other + continue + End-evaluate. +]]) -AT_CHECK([$COMPILE prog.cob], [0], [], []) -AT_CHECK([$COBCRUN_DIRECT ./prog], [0], [event: START-OF-DOCUMENT , text: {} -event: START-OF-ELEMENT , text: {lorem} -event: CONTENT-CHARACTERS , text: { Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed vitae scelerisque mauris. Ut suscipit faucibus ultrices. Proin lacinia luctus imperdiet. Fusce sit amet augue nisi. Fusce a justo nec nisl luctus laoreet. Nulla rutrum fermentum dui quis tincidunDonec neque dui, maximus cursus felis eucommodo vulputate ipsum. Morbi laoreet risus congue, dictum dolor tincidunt, fermentum diam. Mauris placerat volutpat nisl, nec malesuada enim ut. } -event: END-OF-ELEMENT , text: {lorem} -event: END-OF-INPUT , text: {} -event: END-OF-DOCUMENT , text: {} +AT_CHECK([$COMPILE prog.cob]) + +AT_CHECK([$COBCRUN_DIRECT ./prog], [0], +[event: START-OF-DOCUMENT , info: +000000000, text: {} +event: START-OF-ELEMENT , info: +000000000, text: {lorem} +event: END-OF-INPUT , info: +000000000, text: {} +event: END-OF-INPUT , info: +000000000, text: {} +event: END-OF-INPUT , info: +000000000, text: {} +event: END-OF-INPUT , info: +000000000, text: {} +event: CONTENT-CHARACTERS , info: +000000002, text: { Lorem ipsum dolor sit amet, consectetur adipiscing elit. Lorem ipsum dolor sit amet, consectetur adipiscing elit. Lorem ipsum dolor sit amet, consectetur adipiscing elit. Lorem ipsum dolor sit amet, consectetur adipiscing elit. } +event: END-OF-INPUT , info: +000000000, text: {} +event: END-OF-INPUT , info: +000000000, text: {} +event: CONTENT-CHARACTERS , info: +000000001, text: {Lorem ipsum dolor sit amet, consectetur adipiscing elit. } +event: START-OF-CDATA-SECTION , info: +000000000, text: {} +event: CONTENT-CHARACTERS , info: +000000001, text: {Quousque tandem abutere Catilina patientia nostra ?} +event: END-OF-CDATA-SECTION , info: +000000000, text: {} +event: END-OF-INPUT , info: +000000000, text: {} +event: END-OF-INPUT , info: +000000000, text: {} +event: END-OF-INPUT , info: +000000000, text: {} +event: END-OF-INPUT , info: +000000000, text: {} +event: END-OF-INPUT , info: +000000000, text: {} +event: CONTENT-CHARACTERS , info: +000000002, text: {Lorem ipsum dolor sit amet, consectetur adipiscing elit. Lorem ipsum dolor sit amet, consectetur adipiscing elit. Lorem ipsum dolor sit amet, consectetur adipiscing elit. Lorem ipsum dolor sit amet, consectetur adipiscing elit. Lorem ipsum dolor sit amet, consectetur adipiscing elit. } +event: END-OF-INPUT , info: +000000000, text: {} +event: CONTENT-CHARACTERS , info: +000000001, text: {} +event: END-OF-ELEMENT , info: +000000000, text: {lorem} +event: END-OF-INPUT , info: +000000000, text: {} +event: END-OF-DOCUMENT , info: +000000000, text: {} XML document successfully parsed. -], []) +]) + AT_CLEANUP From fd1ce2ccb4977aac9d00085b451d1a2f3e464a25 Mon Sep 17 00:00:00 2001 From: Guillaume Bertholon Date: Mon, 4 May 2026 14:52:33 +0200 Subject: [PATCH 10/12] Implement IBM XMLSS EXCEPTION code for undeclared prefix Also revert the undeclared namespace test to the IBM original one since this is fully implemented now. --- libcob/mlio.c | 30 ++++++++++++++++++++++++++---- tests/testsuite.src/run_ml.at | 24 ++++++++++-------------- 2 files changed, 36 insertions(+), 18 deletions(-) diff --git a/libcob/mlio.c b/libcob/mlio.c index 734179d0e..212dccbc4 100644 --- a/libcob/mlio.c +++ b/libcob/mlio.c @@ -136,6 +136,8 @@ enum xml_code_status { XML_INVALID_NAMESPACE_PREFIX = 419, XML_INTERNAL_ERROR = 600, XML_PARSE_WARNING_MISC_XMLSS = XRC_WARNING << 16, + XML_PARSE_WARNING_NS_ATTR_PREFIX_NOT_DECL = (XRC_WARNING << 16) | 0x800, + XML_PARSE_WARNING_NS_ELEM_PREFIX_NOT_DECL = (XRC_WARNING << 16) | 0x801, XML_PARSE_ERROR_MISC_XMLSS = XRC_FATAL << 16, XML_PARSE_NOT_VALID_MISC_XMLSS = XRC_NOT_VALID << 16, }; @@ -1702,14 +1704,34 @@ xml_error_handler (void *ctx, LIBXML_CONST_ERROR_PTR err) { /* TODO: According to IBM we should put the the prefix of the last chunk that occurs before the error in XML-TEXT. - In practice, it seems to be different for namespace recoverable errors */ + In practice, it is different at least for namespace-related recoverable errors. */ /* Set the XML exception code: - For now, we do not try to follow the error codes of IBM. - But we still try to categorize the error between non-recoverable and recoverable. */ + At minimum, we need to categorize the error between non-recoverable and recoverable. + For some errors, we follow IBM's error code and in that case we also include the correct XML-TEXT. + Even when we send the correct error code, we do not guarantee that the EXCEPTION events + arrives at the same time as with IBM's parser. */ if (COB_MODULE_PTR->xml_mode == COB_XML_XMLSS) { - set_xml_event_exception_code (state, severity << 16); + switch (err->code) { + case XML_NS_ERR_UNDEFINED_NAMESPACE: + set_xml_event_text (state, err->str1, strlen (err->str1)); + extend_xml_event_text (state, ":", 1); + extend_xml_event_text (state, err->str2, strlen (err->str2)); + /* libxml2 fills str3 with the name of the element surrounding the attribute. + We can therefore check if it is present to distinguish between the two cases. */ + if (err->str3) { + set_xml_event_exception_code (state, XML_PARSE_WARNING_NS_ATTR_PREFIX_NOT_DECL); + } else { + set_xml_event_exception_code (state, XML_PARSE_WARNING_NS_ELEM_PREFIX_NOT_DECL); + } + break; + default: + /* Handle errors that are not mapped yet to corresponding IBM errors by simply sending a severity. */ + set_xml_event_exception_code (state, severity << 16); + break; + } } else { + /* In COMPAT mode, we do not yet try to follow IBM's error codes */ if (err->level == XML_ERR_FATAL) { set_xml_event_exception_code (state, XML_PARSE_ERROR_MISC_COMPAT); } else { diff --git a/tests/testsuite.src/run_ml.at b/tests/testsuite.src/run_ml.at index ec23c24c9..c3bee8c6d 100644 --- a/tests/testsuite.src/run_ml.at +++ b/tests/testsuite.src/run_ml.at @@ -1243,8 +1243,7 @@ AT_SETUP([XML PARSE undeclared namespaces]) AT_KEYWORDS([extensions PARSE]) AT_SKIP_IF([[test "$COB_HAS_XML2" = "no"]]) - -AT_DATA([prog.cob],[ +AT_DATA([prog.cob], [ Identification division. Program-id. XMLup. Data division. @@ -1266,35 +1265,32 @@ AT_DATA([prog.cob],[ display xml-event xml-code '|' xml-text '|' xml-namespace-prefix '|' xml-namespace '|' - * In the original IBM example they check specifically the two exceptions - * codes for undeclared namespaces: 264192 and 264193 - * We do not yet support these IBM code - * -> ignore all recoverable errors for now - * if xml-event = 'EXCEPTION' and xml-code = 264192 or 264193 + if xml-event = 'EXCEPTION' and xml-code = 264192 or 264193 move 0 to xml-code - * end-if + end-if . End program XMLup. ]) AT_CHECK([$COMPILE prog.cob]) + AT_CHECK([$COBCRUN_DIRECT ./prog], [0], [START-OF-DOCUMENT +000000000|||| -EXCEPTION +000262144|||| +EXCEPTION +000264193|pfx0:root||| START-OF-ELEMENT +000000000|root|pfx0|| NAMESPACE-DECLARATION +000000000||pfx1|http://whatever| START-OF-ELEMENT +000000000|localElName1|pfx1|http://whatever| -EXCEPTION +000262144|||| +EXCEPTION +000264193|pfx2:localElName2||| START-OF-ELEMENT +000000000|localElName2|pfx2|| END-OF-ELEMENT +000000000|localElName2|pfx2|| -EXCEPTION +000262144|||| -EXCEPTION +000262144|||| +EXCEPTION +000264192|pfx4:localAtName4||| +EXCEPTION +000264193|pfx3:localElName3||| START-OF-ELEMENT +000000000|localElName3|pfx3|| ATTRIBUTE-NAME +000000000|localAtName4|pfx4|| ATTRIBUTE-CHARACTERS +000000000|||| CONTENT-CHARACTERS +000000000|c1||| -EXCEPTION +000262144|||| -EXCEPTION +000262144|||| +EXCEPTION +000264192|pfx6:localAtName6||| +EXCEPTION +000264193|pfx5:localElName5||| START-OF-ELEMENT +000000000|localElName5|pfx5|| ATTRIBUTE-NAME +000000000|localAtName6|pfx6|| ATTRIBUTE-CHARACTERS +000000000|||| From dd3cf10196f140bede9b0021512df731716121bf Mon Sep 17 00:00:00 2001 From: Guillaume Bertholon Date: Tue, 5 May 2026 19:15:05 +0200 Subject: [PATCH 11/12] Send libxml2 error message as XML-TEXT for unhandled errors --- libcob/mlio.c | 16 ++++++++++++---- tests/testsuite.src/run_ml.at | 9 +++++---- 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/libcob/mlio.c b/libcob/mlio.c index 212dccbc4..ad2c4f241 100644 --- a/libcob/mlio.c +++ b/libcob/mlio.c @@ -138,7 +138,7 @@ enum xml_code_status { XML_PARSE_WARNING_MISC_XMLSS = XRC_WARNING << 16, XML_PARSE_WARNING_NS_ATTR_PREFIX_NOT_DECL = (XRC_WARNING << 16) | 0x800, XML_PARSE_WARNING_NS_ELEM_PREFIX_NOT_DECL = (XRC_WARNING << 16) | 0x801, - XML_PARSE_ERROR_MISC_XMLSS = XRC_FATAL << 16, + XML_PARSE_ERROR_MISC_XMLSS = XRC_NOT_WELL_FORMED << 16, XML_PARSE_NOT_VALID_MISC_XMLSS = XRC_NOT_VALID << 16, }; @@ -1661,7 +1661,7 @@ xml_error_handler (void *ctx, LIBXML_CONST_ERROR_PTR err) { severity = XRC_NOT_VALID; severity_str = _("validation error"); } else if (err->level == XML_ERR_FATAL) { - severity = XRC_FATAL; + severity = XRC_NOT_WELL_FORMED; severity_str = _("non-recoverable error"); } else if (err->level == XML_ERR_ERROR) { /* IBM reports recoverable errors with the XRC_WARNING severity */ @@ -1726,8 +1726,16 @@ xml_error_handler (void *ctx, LIBXML_CONST_ERROR_PTR err) { } break; default: - /* Handle errors that are not mapped yet to corresponding IBM errors by simply sending a severity. */ - set_xml_event_exception_code (state, severity << 16); + /* Handle errors that are not mapped yet to corresponding IBM errors by sending + a severity and the error message in XML-TEXT. */ + { + char err_text_buf[COB_MINI_BUFF]; + set_xml_event_exception_code (state, severity << 16); + snprintf (err_text_buf, COB_MINI_BUFF, _("unhandled internal XML %s (%d): "), severity_str, err->code); + err_text_buf[COB_MINI_MAX] = 0; /* for MSVC compat */ + set_xml_event_text (state, err_text_buf, strlen (err_text_buf)); + extend_xml_event_text (state, err->message, strlen (err->message)); + } break; } } else { diff --git a/tests/testsuite.src/run_ml.at b/tests/testsuite.src/run_ml.at index c3bee8c6d..050a5671b 100644 --- a/tests/testsuite.src/run_ml.at +++ b/tests/testsuite.src/run_ml.at @@ -707,7 +707,6 @@ AT_SETUP([XML register data after failure]) AT_KEYWORDS([extensions XML-CODE XML-EVENT PARSE]) AT_SKIP_IF([[test "$COB_HAS_XML2" = "no"]]) - AT_DATA([prog.cob], [ PROGRAM-ID. prog. DATA DIVISION. @@ -724,21 +723,23 @@ AT_DATA([prog.cob], [ GOBACK. PROC. DISPLAY 'proc : ' XML-CODE ' - "' XML-EVENT '"'. + IF XML-EVENT = 'EXCEPTION' THEN DISPLAY XML-TEXT. ]) AT_CHECK([$COMPILE prog.cob]) + AT_CHECK([$COBCRUN_DIRECT ./prog], [0], [proc : +000000000 - "START-OF-DOCUMENT " -proc : +001048576 - "EXCEPTION " +proc : +000786432 - "EXCEPTION " +unhandled internal XML non-recoverable error (68): StartTag: invalid element name XML PARSE aborted -after : +001048576 - "EXCEPTION " +after : +000786432 - "EXCEPTION " ], [libcob: prog.cob:8: warning: XML PARSE non-recoverable error (68): StartTag: invalid element name ]) AT_CLEANUP - AT_SETUP([XML PARSE declaration]) AT_KEYWORDS([extensions XML-CODE XML-EVENT PARSE]) From 6bbabfe355f2a0d347426a9d09c868f15e2d576a Mon Sep 17 00:00:00 2001 From: Guillaume Bertholon Date: Wed, 6 May 2026 18:33:26 +0200 Subject: [PATCH 12/12] Adapt MF dialect to include XML-* registers XML-INFORMATION and XML-*NAMESPACE* are not available with MF. --- config/mf.words | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/config/mf.words b/config/mf.words index 36ea8e146..706de4584 100644 --- a/config/mf.words +++ b/config/mf.words @@ -678,15 +678,7 @@ reserved: WRITE reserved: WRITING reserved: XML reserved: XML-DECLARATION* -reserved: XML-EVENT # note: this is a register, move as soon as supported -#reserved: XML-INFORMATION # note: this is a register, move as soon as supported -#reserved: XML-NAMESPACE # note: this is a register, move as soon as supported -#reserved: XML-NAMESPACE-PREFIX # note: this is a register, move as soon as supported -#reserved: XML-NNAMESPACE # note: this is a register, move as soon as supported -#reserved: XML-NNAMESPACE-PREFIX # note: this is a register, move as soon as supported -reserved: XML-NTEXT # note: this is a register, move as soon as supported reserved: XML-SCHEMA -reserved: XML-TEXT # note: this is a register, move as soon as supported reserved: YYYYDDD* reserved: YYYYMMDD* reserved: ZERO @@ -721,14 +713,14 @@ register: SORT-RETURN # register: TIME-OF-DAY # only available in OS/VS mode # register: WHEN-COMPILED # only available in OS/VS or VSC2 modes (note the format of the date differs with the mode!) register: XML-CODE -# register: XML-EVENT +register: XML-EVENT # register: XML-INFORMATION # register: XML-NAMESPACE # register: XML-NAMESPACE-PREFIX # register: XML-NNAMESPACE # register: XML-NNAMESPACE-PREFIX -# register: XML-NTEXT -# register: XML-TEXT +register: XML-NTEXT +register: XML-TEXT # disable all functions not-intrinsic-function: DIALECT-ALL