working loop manager for single type

Theomat · Theomat · commit 71357d864845 · 2025-05-17T09:42:31.000+02:00
diff --git a/grape/automaton/loop_manager.py b/grape/automaton/loop_manager.py
@@ -1,103 +1,38 @@
-from collections import defaultdict
 from enum import StrEnum
 import itertools
 
+from grape import types
 from grape.automaton.tree_automaton import DFTA
 from grape.dsl import DSL
+from grape.program import Function, Primitive, Program, Variable
 
 
 class LoopStrategy(StrEnum):
     NO_LOOP = "none"
     STATE = "state"
 
 
-def __find_unbounded_types(
-    dfta: DFTA[str, str], state_to_type: dict[str, str]
-) -> set[str]:
-    unbounded_types = set()
-    added = True
-    while added:
-        added = False
-        for (P, args), dst in dfta.rules.items():
-            prod_type = state_to_type[dst]
-            if prod_type not in unbounded_types and any(
-                state_to_type[arg_state] in unbounded_types
-                or prod_type == state_to_type[arg_state]
-                for arg_state in args
-            ):
-                unbounded_types.add(prod_type)
-                added = True
-    return unbounded_types
-
-
-def __find_unconsumed_states(dfta: DFTA[str, str]) -> set[str]:
-    not_consumed = dfta.all_states
-    for P, args in dfta.rules:
-        for arg_state in args:
-            if arg_state in not_consumed:
-                not_consumed.remove(arg_state)
-    return not_consumed
-
-
-def __prod_types_by_states(
-    dfta: DFTA[str, str], state_to_type: dict[str, str]
-) -> dict[str, set[str]]:
-    # Compute transitive closure
-    reachable_from: dict[str, set[str]] = defaultdict(set)
-    for (P, args), dst in dfta.rules.items():
-        reachable_from[dst].update(args)
-    updated = True
-    while updated:
-        updated = False
-        for dst, reachables in reachable_from.copy().items():
-            before = len(reachables)
-            for S in reachables.copy():
-                reachables.update(reachable_from[S])
-            if len(reachables) != before:
-                updated = True
-    return {
-        s: set(state_to_type[v] for v in reachables)
-        for s, reachables in reachable_from.items()
-    }
-
-
-def __compute_outbound(dfta: DFTA[str, str], unconsumed: set[str]) -> dict[str, int]:
-    outbound: dict[str, int] = {}
-    for x in unconsumed:
-        outbound[x] = 1
-    queue = list(dfta.all_states)
-    while queue:
-        x = queue.pop()
-        if x in outbound:
-            continue
-        total = 0
-        has_missed = False
-        for (P, args), dst in dfta.rules.items():
-            if x in args:
-                if dst not in outbound:
-                    has_missed = True
-                    break
-                else:
-                    total += outbound[dst]
-        if has_missed:
-            queue.insert(0, x)
-        else:
-            outbound[x] = total
-    return outbound
+def __state2letter__(state: str) -> str:
+    if "(" in state:
+        return state[1 : state.find(" ")]
+    else:
+        return state
 
 
 def __can_states_merge(
-    dfta: DFTA[str, str], state_to_letter: dict[str, str], original: str, candidate: str
+    reversed_rules: dict[tuple[str, tuple[str, ...]], str],
+    original: str,
+    candidate: str,
 ) -> bool:
-    if state_to_letter[candidate] != state_to_letter[original] and not str(
-        state_to_letter[candidate]
+    if __state2letter__(candidate) != __state2letter__(original) and not str(
+        __state2letter__(candidate)
     ).startswith("var"):
         return False
-    for P1, args1 in dfta.reversed_rules[original]:
+    for P1, args1 in reversed_rules[original]:
         has_equivalent = False
-        for P2, args2 in dfta.reversed_rules[candidate]:
+        for P2, args2 in reversed_rules[candidate]:
             if all(
-                __can_states_merge(dfta, state_to_letter, arg1, arg2)
+                __can_states_merge(reversed_rules, arg1, arg2)
                 for arg1, arg2 in zip(args1, args2)
             ):
                 has_equivalent = True
@@ -107,98 +42,115 @@ def __can_states_merge(
     return True
 
 
+def __find_merge__(
+    dfta: DFTA[str, str], P: str, args: tuple[str, ...], candidates: set[str]
+) -> str | None:
+    best_candidate = None
+    for candidate in candidates:
+        if __state2letter__(candidate) != P and not str(
+            __state2letter__(candidate)
+        ).startswith("var"):
+            continue
+        has_equivalent = False
+        for P2, args2 in dfta.reversed_rules[candidate]:
+            if all(
+                __can_states_merge(dfta.reversed_rules, arg1, arg2)
+                for arg1, arg2 in zip(args, args2)
+            ):
+                has_equivalent = True
+                break
+        if has_equivalent and (
+            best_candidate is None or best_candidate.count(" ") < candidate.count(" ")
+        ):
+            best_candidate = candidate
+    return best_candidate
+
+
+def __convert_automaton__(dfta: DFTA[str, str]) -> DFTA[str, Program]:
+    return dfta.map_alphabet(
+        lambda x: Variable(int(str(x)[len("var") :]))
+        if str(x).startswith("var")
+        else Primitive(str(x))
+    )
+
+
 def add_loops(
-    dfta: DFTA[str, str],
+    dfta: DFTA[str, Program | str],
     dsl: DSL,
     strategy: LoopStrategy,
-) -> DFTA[str, str]:
+) -> DFTA[str, Program]:
     """
-    Assumes one state is from one letter
+    Assumes one state is from one letter and that variants are mapped.
     """
     if strategy == LoopStrategy.NO_LOOP:
-        return dfta
+        return __convert_automaton__(dfta)
     elif dfta.is_unbounded():
         raise ValueError("automaton is already looping cannot add loops!")
     else:
-        # In order to make the automaton loop
-        # 1) All unconsumed must be consumed
-        # 2) Programs of all produced types must have unbounded size
         state_to_type = dsl.get_state_types(dfta)
-        state_to_letter = {s: dfta.reversed_rules[s][0][0] for s in state_to_type}
-        prod_types_by_state = __prod_types_by_states(dfta, state_to_type)
-        all_types = set(state_to_type.values())
-        unbounded_types = __find_unbounded_types(dfta, state_to_type)
-        unconsumed = __find_unconsumed_states(dfta)
-        unconsumed_by_type = {
-            t: {s for s in unconsumed if state_to_type[s] == t} for t in all_types
+        state_to_size = {s: s.count(" ") for s in dfta.all_states}
+        max_size = max(state_to_size.values())
+        states_by_types = {
+            t: set(s for s, st in state_to_type.items() if st == t)
+            for t in set(state_to_type.values())
         }
-        unbounded_unconsumed = {
-            t for t in unbounded_types if t not in unconsumed_by_type
-        }
-        # For each unbounded unconsumed
-        #   find all states that are not consumed to produce more of that type
-        #       mark them as unconsumed
-        for t in unbounded_unconsumed:
-            unconsumed_by_type[t] = set()
-            for state in dfta.all_states:
-                if state_to_type[state] == t and t not in prod_types_by_state[state]:
-                    unconsumed.add(state)
-                    unconsumed_by_type[t].add(state)
-        # Computes consumed
-        consumed = dfta.all_states.difference(unconsumed)
-        consumed_by_type = {
-            t: {s for s in consumed if state_to_type[s] == t} for t in all_types
-        }
-        outbound = __compute_outbound(dfta, unconsumed)
-        state_merged: dict[str, str] = {}
-        new_rules = dfta.rules.copy()
-        new_finals = dfta.finals.copy()
-        # 1) Merge all unconsumed onto the largest subcontext that is being consumed
-        unmerged_by_type: dict[str, set[str]] = defaultdict(set)
-        for t, states in unconsumed_by_type.items():
-            for state in states:
-                has_merge = False
-                for candidate in consumed_by_type[t]:
-                    if not __can_states_merge(dfta, state_to_letter, state, candidate):
-                        continue
-                    if (
-                        has_merge
-                        and outbound[candidate] < outbound[state_merged[state]]
-                    ) or not has_merge:
-                        state_merged[state] = candidate
-                    has_merge = True
-                if not has_merge:
-                    unmerged_by_type[t].add(state)
-        if strategy == LoopStrategy.STATE:
-            for (P, args), dst in dfta.rules.items():
-                if dst in state_merged:
-                    new_rules[(P, args)] = state_merged[dst]
-        else:
-            assert False, f"unsupported loop strategy:{strategy}"
-        # 2) Some can still be unmerged
-        # this means multiple things:
-        # - there is no variable of that type
-        # - there is not smaller expression using the same letter
-        # print(
-        #     "UNMERGED:\n",
-        #     "\n".join([f"\t{k} ====> {v}" for k, v in unmerged_by_type.items()]),
-        # )
-        for (P, args), dst in dfta.rules.items():
-            possibles = [[arg] for arg in args]
+        added = True
+        new_dfta = DFTA(dfta.rules.copy(), dfta.finals.copy())
+        virtual_vars = set()
+        max_varno = (
+            max(
+                int(s[len("var") :])
+                for s in state_to_type.keys()
+                if s.startswith("var")
+            )
+            + 1
+        )
+        for t, states in states_by_types.items():
+            if all(not s.startswith("var") for s in states):
+                virtual_vars.add(max_varno)
+                dst = str(Variable(max_varno))
+                new_dfta.rules[(Variable(max_varno), tuple())] = dst
+                # Create a variant so that every
+                for (P, args), new_dst in dfta.rules.items():
+                    possibles = [
+                        [arg] + ([dst] if arg in states else []) for arg in args
+                    ]
+                    for new_args in itertools.product(*possibles):
+                        if dst in new_args and (P, new_args) not in new_dfta.rules:
+                            new_dfta.rules[(P, new_args)] = new_dst
+                            print("adding:", (P, new_args), new_dst)
+                max_varno += 1
+        new_dfta.refresh_reversed_rules()
+        i = 0
+        while added and i < 1:
+            i += 1
             added = False
-            for rtype, programs in unmerged_by_type.items():
-                for li in possibles:
-                    if state_to_type[li[0]] != rtype:
-                        continue
-                    else:
-                        added = True
-                        li.extend(programs)
-            if added:
-                for new_args in itertools.product(*possibles):
-                    new_rules[(P, new_args)] = dst
-        new_dfta = DFTA(new_rules, new_finals)
-        new_dfta.reduce()
-        out = new_dfta.minimise(
-            can_be_merged=lambda x, y: state_to_type[x] == state_to_type[y]
-        ).classic_state_renaming()
-        return out
+            for P, (Ptype, _) in dsl.primitives.items():
+                possibles = [states_by_types[arg_t] for arg_t in types.arguments(Ptype)]
+                for combi in itertools.product(*possibles):
+                    key = (P, combi)
+                    if key not in new_dfta.rules:
+                        args_size = list(map(lambda x: state_to_size[x], combi))
+                        dst_size = sum(args_size) + 1
+                        if (
+                            dst_size >= max_size
+                            and max(args_size) >= max_size - len(args_size) + 1
+                        ):
+                            added = True
+                            rtype = types.return_type(dsl.get_type(P))
+                            dst = Function(Primitive(P), list(map(Primitive, combi)))
+                            new_state = __find_merge__(
+                                new_dfta, P, combi, states_by_types[rtype]
+                            ) or str(dst)
+                            new_dfta.rules[key] = new_state
+                            states_by_types[rtype].add(new_state)
+                            if new_state not in state_to_size:
+                                state_to_size[new_state] = dst_size
+            new_dfta.refresh_reversed_rules()
+
+    for no in virtual_vars:
+        dst = Variable(no)
+        del new_dfta.rules[(dst, tuple())]
+    new_dfta.reduce()
+    new_dfta.refresh_reversed_rules()
+    return __convert_automaton__(new_dfta)  # .minimise())#.classic_state_renaming())
diff --git a/grape/dsl.py b/grape/dsl.py
@@ -65,9 +65,8 @@ def get_state_types(self, automaton: DFTA[T, str | Program]) -> dict[T, str]:
         # Assumes types variants are not present.
         specialized = spec_manager.is_specialized(automaton)
         if specialized:
-            arg_types = types.arguments(
-                spec_manager.type_request_from_specialized(automaton, self)
-            )
+            guessed_tr = spec_manager.type_request_from_specialized(automaton, self)
+            arg_types = types.arguments(guessed_tr)
 
         state_to_type: dict[Any, str] = {}
         elements = list(automaton.rules.items())
@@ -99,7 +98,10 @@ def get_state_types(self, automaton: DFTA[T, str | Program]) -> dict[T, str]:
                     )
                     Ptype = all_possibles.pop()
             if dst in state_to_type:
-                assert state_to_type[dst] == types.return_type(Ptype)
+                all_types = set()
+                for variant in types.all_variants(Ptype):
+                    all_types.add(types.return_type(variant))
+                assert state_to_type[dst] in all_types
             else:
                 state_to_type[dst] = types.return_type(Ptype)
         return state_to_type