|
| 1 | +# This code has been ported from Odoo 18.0 web_editor module. |
| 2 | +# License of this part should remain LGPL-3.0 or later following Odoo's licensing. |
| 3 | +# Part of Odoo. See LICENSE file for full copyright and licensing details. |
| 4 | + |
| 5 | +import re |
| 6 | +from difflib import SequenceMatcher |
| 7 | + |
| 8 | +# ------------------------------------------------------------ |
| 9 | +# Patch and comparison functions |
| 10 | +# ------------------------------------------------------------ |
| 11 | + |
| 12 | + |
| 13 | +OPERATION_SEPARATOR = "\n" |
| 14 | +LINE_SEPARATOR = "<" |
| 15 | + |
| 16 | +PATCH_OPERATION_LINE_AT = "@" |
| 17 | +PATCH_OPERATION_CONTENT = ":" |
| 18 | + |
| 19 | +PATCH_OPERATION_ADD = "+" |
| 20 | +PATCH_OPERATION_REMOVE = "-" |
| 21 | +PATCH_OPERATION_REPLACE = "R" |
| 22 | + |
| 23 | +PATCH_OPERATIONS = dict( |
| 24 | + insert=PATCH_OPERATION_ADD, |
| 25 | + delete=PATCH_OPERATION_REMOVE, |
| 26 | + replace=PATCH_OPERATION_REPLACE, |
| 27 | +) |
| 28 | + |
| 29 | +HTML_ATTRIBUTES_TO_REMOVE = [ |
| 30 | + "data-last-history-steps", |
| 31 | +] |
| 32 | + |
| 33 | + |
| 34 | +HTML_TAG_ISOLATION_REGEX = r"^([^>]*>)(.*)$" |
| 35 | +ADDITION_COMPARISON_REGEX = r"\1<added>\2</added>" |
| 36 | +ADDITION_1ST_REPLACE_COMPARISON_REGEX = r"added>\2</added>" |
| 37 | +DELETION_COMPARISON_REGEX = r"\1<removed>\2</removed>" |
| 38 | +EMPTY_OPERATION_TAG = r"<(added|removed)><\/(added|removed)>" |
| 39 | +SAME_TAG_REPLACE_FIXER = r"<\/added><(?:[^\/>]|(?:><))+><removed>" |
| 40 | +UNNECESSARY_REPLACE_FIXER = ( |
| 41 | + r"<added>([^<](?!<\/added>)*)<\/added>" |
| 42 | + r"<removed>([^<](?!<\/removed>)*)<\/removed>" |
| 43 | +) |
| 44 | + |
| 45 | + |
| 46 | +def generate_comparison(new_content, old_content): # noqa: C901 |
| 47 | + """Compare a content to an older content |
| 48 | + and generate a comparison html between both content. |
| 49 | +
|
| 50 | + :param string new_content: the current content |
| 51 | + :param string old_content: the old content |
| 52 | +
|
| 53 | + :return: string: the comparison content |
| 54 | + """ |
| 55 | + new_content = _remove_html_attribute(new_content, HTML_ATTRIBUTES_TO_REMOVE) |
| 56 | + old_content = _remove_html_attribute(old_content, HTML_ATTRIBUTES_TO_REMOVE) |
| 57 | + |
| 58 | + if new_content == old_content: |
| 59 | + return new_content |
| 60 | + |
| 61 | + patch = generate_patch(new_content, old_content) |
| 62 | + comparison = new_content.split(LINE_SEPARATOR) |
| 63 | + patch_operations = patch.split(OPERATION_SEPARATOR) |
| 64 | + # We need to apply operation from last to the first |
| 65 | + # to preserve the indexes integrity. |
| 66 | + patch_operations.reverse() |
| 67 | + |
| 68 | + for operation in patch_operations: |
| 69 | + metadata, *patch_content_line = operation.split(LINE_SEPARATOR) |
| 70 | + |
| 71 | + metadata_split = metadata.split(PATCH_OPERATION_LINE_AT) |
| 72 | + operation_type = metadata_split[0] |
| 73 | + lines_index_range = metadata_split[1] if len(metadata_split) > 1 else "" |
| 74 | + lines_index_range = lines_index_range.split(PATCH_OPERATION_CONTENT)[0] |
| 75 | + indexes = lines_index_range.split(",") |
| 76 | + start_index = int(indexes[0]) |
| 77 | + end_index = int(indexes[1]) if len(indexes) > 1 else start_index |
| 78 | + |
| 79 | + # If the operation is a replace, we need to flag the changes that |
| 80 | + # will generate ghost opening tags if we don't ignore |
| 81 | + # them. |
| 82 | + # this can append when: |
| 83 | + # * A change concerning only html parameters. |
| 84 | + # <p class="x">a</p> => <p class="y">a</p> |
| 85 | + # * An addition in a previously empty element opening tag |
| 86 | + # <p></p> => <p>a</p> |
| 87 | + if operation_type == PATCH_OPERATION_REPLACE: |
| 88 | + for i, line in enumerate(patch_content_line): |
| 89 | + current_index = start_index + i |
| 90 | + if current_index > end_index: |
| 91 | + break |
| 92 | + |
| 93 | + current_line = comparison[current_index] |
| 94 | + current_line_tag = current_line.split(">")[0] |
| 95 | + line_tag = line.split(">")[0] |
| 96 | + if current_line[-1] == ">" and ( |
| 97 | + current_line_tag == line_tag |
| 98 | + or current_line_tag.split(" ")[0] == line_tag.split(" ")[0] |
| 99 | + ): |
| 100 | + comparison[start_index + i] = "delete_me>" |
| 101 | + |
| 102 | + # We need to insert lines from last to the first |
| 103 | + # to preserve the indexes integrity. |
| 104 | + patch_content_line.reverse() |
| 105 | + |
| 106 | + for index in range(end_index, start_index - 1, -1): |
| 107 | + if operation_type in [ |
| 108 | + PATCH_OPERATION_REMOVE, |
| 109 | + PATCH_OPERATION_REPLACE, |
| 110 | + ]: |
| 111 | + deletion_flagged_comparison = re.sub( |
| 112 | + HTML_TAG_ISOLATION_REGEX, |
| 113 | + DELETION_COMPARISON_REGEX, |
| 114 | + comparison[index], |
| 115 | + ) |
| 116 | + # Only use this line if it doesn't generate an empty |
| 117 | + # <removed> tag |
| 118 | + if not re.search(EMPTY_OPERATION_TAG, deletion_flagged_comparison): |
| 119 | + comparison[index] = deletion_flagged_comparison |
| 120 | + |
| 121 | + if operation_type == PATCH_OPERATION_ADD: |
| 122 | + for line in patch_content_line: |
| 123 | + addition_flagged_line = re.sub( |
| 124 | + HTML_TAG_ISOLATION_REGEX, ADDITION_COMPARISON_REGEX, line |
| 125 | + ) |
| 126 | + |
| 127 | + if not re.search(EMPTY_OPERATION_TAG, addition_flagged_line): |
| 128 | + comparison.insert(start_index + 1, addition_flagged_line) |
| 129 | + else: |
| 130 | + comparison.insert(start_index + 1, line) |
| 131 | + |
| 132 | + if operation_type == PATCH_OPERATION_REPLACE: |
| 133 | + for _i, line in enumerate(patch_content_line): |
| 134 | + addition_flagged_line = re.sub( |
| 135 | + HTML_TAG_ISOLATION_REGEX, ADDITION_COMPARISON_REGEX, line |
| 136 | + ) |
| 137 | + if not re.search(EMPTY_OPERATION_TAG, addition_flagged_line): |
| 138 | + comparison.insert(start_index, addition_flagged_line) |
| 139 | + elif line.split(">")[0] != comparison[start_index].split(">")[0]: |
| 140 | + comparison.insert(start_index, line) |
| 141 | + |
| 142 | + final_comparison = LINE_SEPARATOR.join(comparison) |
| 143 | + # We can remove all the opening tags which are located between the end of an |
| 144 | + # added tag and the start of a removed tag, because this should never happen |
| 145 | + # as the added and removed tags should always be near each other. |
| 146 | + # This can happen when the new container tag had a parameter change. |
| 147 | + final_comparison = re.sub( |
| 148 | + SAME_TAG_REPLACE_FIXER, "</added><removed>", final_comparison |
| 149 | + ) |
| 150 | + |
| 151 | + # Remove al the <delete_me> tags |
| 152 | + final_comparison = final_comparison.replace(r"<delete_me>", "") |
| 153 | + |
| 154 | + # This fix the issue of unnecessary replace tags. |
| 155 | + # ex: <added>abc</added><removed>abc</removed> -> abc |
| 156 | + # This can occur when the new content is the same as the old content and |
| 157 | + # their container tags are the same but the tags parameters are different |
| 158 | + for match in re.finditer(UNNECESSARY_REPLACE_FIXER, final_comparison): |
| 159 | + if match.group(1) == match.group(2): |
| 160 | + final_comparison = final_comparison.replace(match.group(0), match.group(1)) |
| 161 | + |
| 162 | + return final_comparison |
| 163 | + |
| 164 | + |
| 165 | +def _format_line_index(start, end): |
| 166 | + """Format the line index to be used in a patch operation. |
| 167 | +
|
| 168 | + :param start: the start index |
| 169 | + :param end: the end index |
| 170 | + :return: string |
| 171 | + """ |
| 172 | + length = end - start |
| 173 | + if not length: |
| 174 | + start -= 1 |
| 175 | + if length <= 1: |
| 176 | + return f"{PATCH_OPERATION_LINE_AT}{start}" |
| 177 | + return f"{PATCH_OPERATION_LINE_AT}{start},{start + length - 1}" |
| 178 | + |
| 179 | + |
| 180 | +def _patch_generator(new_content, old_content): |
| 181 | + """Generate a patch (multiple operations) between two contents. |
| 182 | + Each operation is a string with the following format: |
| 183 | + <operation_type>@<start_index>[,<end_index>][:<patch_text>*] |
| 184 | + patch format example: |
| 185 | + +@4:<p>ab</p><p>cd</p> |
| 186 | + +@4,15:<p>ef</p><p>gh</p> |
| 187 | + -@32 |
| 188 | + -@125,129 |
| 189 | + R@523:<b>sdf</b> |
| 190 | +
|
| 191 | + :param string new_content: the new content |
| 192 | + :param string old_content: the old content |
| 193 | +
|
| 194 | + :return: string: the patch containing all the operations to reverse |
| 195 | + the new content to the old content |
| 196 | + """ |
| 197 | + # remove break line in contents to ensure they don't interfere with |
| 198 | + # operations |
| 199 | + new_content = new_content.replace("\n", "") |
| 200 | + old_content = old_content.replace("\n", "") |
| 201 | + |
| 202 | + new_content_lines = new_content.split(LINE_SEPARATOR) |
| 203 | + old_content_lines = old_content.split(LINE_SEPARATOR) |
| 204 | + |
| 205 | + for group in SequenceMatcher( |
| 206 | + None, new_content_lines, old_content_lines, False |
| 207 | + ).get_grouped_opcodes(0): |
| 208 | + patch_content_line = [] |
| 209 | + first, last = group[0], group[-1] |
| 210 | + patch_operation = _format_line_index(first[1], last[2]) |
| 211 | + |
| 212 | + if any(tag in {"replace", "delete"} for tag, _, _, _, _ in group): |
| 213 | + for tag, _, _, _, _ in group: |
| 214 | + if tag not in {"insert", "equal", "replace"}: |
| 215 | + patch_operation = PATCH_OPERATIONS[tag] + patch_operation |
| 216 | + |
| 217 | + if any(tag in {"replace", "insert"} for tag, _, _, _, _ in group): |
| 218 | + for tag, _, _, j1, j2 in group: |
| 219 | + if tag not in {"delete", "equal"}: |
| 220 | + patch_operation = PATCH_OPERATIONS[tag] + patch_operation |
| 221 | + for line in old_content_lines[j1:j2]: |
| 222 | + patch_content_line.append(line) |
| 223 | + |
| 224 | + if patch_content_line: |
| 225 | + patch_content = LINE_SEPARATOR + LINE_SEPARATOR.join(patch_content_line) |
| 226 | + yield str(patch_operation) + PATCH_OPERATION_CONTENT + patch_content |
| 227 | + else: |
| 228 | + yield str(patch_operation) |
| 229 | + |
| 230 | + |
| 231 | +def generate_patch(new_content, old_content): |
| 232 | + new_content = _remove_html_attribute(new_content, HTML_ATTRIBUTES_TO_REMOVE) |
| 233 | + old_content = _remove_html_attribute(old_content, HTML_ATTRIBUTES_TO_REMOVE) |
| 234 | + |
| 235 | + return OPERATION_SEPARATOR.join(list(_patch_generator(new_content, old_content))) |
| 236 | + |
| 237 | + |
| 238 | +def _remove_html_attribute(html_content, attributes_to_remove): |
| 239 | + for attribute in attributes_to_remove: |
| 240 | + html_content = re.sub(rf' {attribute}="[^"]*"', "", html_content) |
| 241 | + |
| 242 | + return html_content |
0 commit comments