Skip to content

Commit ca13755

Browse files
committed
added centralized dispatch, cursor metadata and doc_state
1 parent 139bec7 commit ca13755

2 files changed

Lines changed: 164 additions & 104 deletions

File tree

modules/writing_observer/writing_observer/reconstruct_doc.py

Lines changed: 149 additions & 96 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ def parse_tab_from_url(url: str) -> str:
3636
'''
3737
if not url or "tab=" not in url:
3838
return "t.0"
39-
match = re.search(r"tab=([^&]+)", url)
39+
match = re.search(r"tab=([^&#]+)", url)
4040
return match.group(1) if match else "t.0"
4141

4242

@@ -190,8 +190,8 @@ def command_list(doc, commands):
190190
new `save` requests.
191191
'''
192192
for item in commands:
193-
if item['ty'] in dispatch:
194-
doc = dispatch[item['ty']](doc, **item)
193+
if item['ty'] in text_dispatch:
194+
doc = text_dispatch[item['ty']](doc, **item)
195195
else:
196196
print("Unrecogized Google Docs command: " + repr(item['ty']))
197197
# TODO: Log issue and fix it!
@@ -328,7 +328,7 @@ def null(doc, **kwargs):
328328

329329

330330
# This dictionary maps the `ty` parameter to the function which
331-
# handles data of that type.
331+
# handles text edits.
332332

333333
# TODO: `ae,``ue,` `de,` and `te` need to be
334334
# reverse-engineered. These happens if we e.g. make a new bullet
@@ -337,7 +337,7 @@ def null(doc, **kwargs):
337337
# TODO: 'iss' and 'dss' are generated when suggested text is inserted or deleted.
338338
# these can't be handled like plain 'is' or 'ds' because the include different fields
339339
# (e.g., 'sugid', presumably, suggestion id.)
340-
dispatch = {
340+
text_dispatch = {
341341
'ae': null,
342342
'ase': null, # suggestion
343343
'ast': null, # suggestion. Image?
@@ -377,6 +377,141 @@ def null(doc, **kwargs):
377377
}
378378

379379

380+
def _touch_tab(tab, event_timestamp):
381+
if event_timestamp is None:
382+
return
383+
if tab.first_timestamp is None:
384+
tab.first_timestamp = event_timestamp
385+
tab.last_timestamp = event_timestamp
386+
387+
388+
@dataclass
389+
class CommandContext:
390+
doc_state: "DocState"
391+
current_tab: str
392+
event_timestamp: Optional[int]
393+
394+
@property
395+
def tab(self) -> "TabState":
396+
return self.doc_state.tabs[self.current_tab]
397+
398+
399+
def _cmd_text(ctx: CommandContext, **cmd):
400+
ty = cmd.get("ty")
401+
if ty in text_dispatch:
402+
text_dispatch[ty](ctx.tab.doc, **cmd)
403+
404+
405+
def _cmd_mlti(ctx: CommandContext, mts=None, **kwargs):
406+
for sub in mts or []:
407+
ctx.doc_state._dispatch_cmd(sub, ctx.current_tab, ctx.event_timestamp)
408+
409+
410+
def _cmd_nm(ctx: CommandContext, nmr=None, nmc=None, **kwargs):
411+
target_tab = ctx.current_tab
412+
for item in reversed(nmr or []):
413+
if isinstance(item, str) and item.startswith("t."):
414+
target_tab = item
415+
break
416+
ctx.doc_state._dispatch_cmd(nmc or {}, target_tab, ctx.event_timestamp)
417+
418+
419+
def _cmd_mkch(ctx: CommandContext, d=None, **kwargs):
420+
name = ctx.doc_state._extract_name_from_d(d)
421+
if name:
422+
ctx.tab.name = name
423+
424+
425+
def _cmd_ucp(ctx: CommandContext, d=None, **kwargs):
426+
if not isinstance(d, list) or len(d) < 2:
427+
return
428+
tab_id = d[0] or ctx.current_tab
429+
name = ctx.doc_state._extract_name_from_d(d[1])
430+
if not name:
431+
return
432+
target = ctx.doc_state.tabs[tab_id]
433+
target.name = name
434+
_touch_tab(target, ctx.event_timestamp)
435+
436+
437+
def _cmd_ac(ctx: CommandContext, d=None, **kwargs):
438+
if not isinstance(d, list) or len(d) < 2:
439+
return
440+
tab_id = d[0]
441+
if not isinstance(tab_id, str):
442+
return
443+
target = ctx.doc_state.tabs[tab_id]
444+
name = ctx.doc_state._extract_name_from_d(d[1])
445+
if name:
446+
target.name = name
447+
_touch_tab(target, ctx.event_timestamp)
448+
449+
450+
def _cmd_ae(ctx: CommandContext, id=None, et=None, **kwargs):
451+
if not id:
452+
return
453+
if et == "dropdown-definition":
454+
ctx.tab.dropdown_defs[id] = {"id": id, "et": et, **kwargs}
455+
return
456+
if et == "dropdown":
457+
ctx.tab.dropdown_elems[id] = {"id": id, "et": et, **kwargs}
458+
return
459+
ctx.tab.elements[id] = {"id": id, "et": et, **kwargs}
460+
461+
462+
def _cmd_te(ctx: CommandContext, id=None, spi=None, **kwargs):
463+
if not id or not isinstance(spi, int):
464+
return
465+
if id in ctx.tab.dropdown_elems:
466+
ctx.tab.dropdown_instances.append((spi, id))
467+
return
468+
insert(ctx.tab.doc, "is", spi, f"[{id}]")
469+
470+
471+
def _cmd_null(ctx: CommandContext, **kwargs):
472+
return
473+
474+
475+
# Centralized dispatch for all command types.
476+
dispatch = {
477+
'mlti': _cmd_mlti,
478+
'nm': _cmd_nm,
479+
'mkch': _cmd_mkch,
480+
'ucp': _cmd_ucp,
481+
'ac': _cmd_ac,
482+
'ae': _cmd_ae,
483+
'te': _cmd_te,
484+
'as': _cmd_text,
485+
'ds': _cmd_text,
486+
'is': _cmd_text,
487+
'iss': _cmd_text,
488+
'mefd': _cmd_text,
489+
'msfd': _cmd_text,
490+
'null': _cmd_null,
491+
'ord': _cmd_text,
492+
'ras': _cmd_text,
493+
'rplc': _cmd_text,
494+
'rte': _cmd_text,
495+
'rue': _cmd_text,
496+
'rvrt': _cmd_text,
497+
'sas': _cmd_text,
498+
'sl': _cmd_text,
499+
'ste': _cmd_text,
500+
'sue': _cmd_text,
501+
'uefd': _cmd_text,
502+
'use': _cmd_text,
503+
'umv': _cmd_text,
504+
'usfd': _cmd_text,
505+
'ase': _cmd_null,
506+
'ast': _cmd_null,
507+
'astss': _cmd_null,
508+
'ue': _cmd_null,
509+
'de': _cmd_null,
510+
'dse': _cmd_null,
511+
'dss': _cmd_null,
512+
}
513+
514+
380515
@dataclass
381516
class TabState:
382517
'''
@@ -402,6 +537,8 @@ def text(self, value: str) -> None:
402537
def to_dict(self) -> dict:
403538
return {
404539
"text": self.doc._text,
540+
"position": self.doc.position,
541+
"edit_metadata": self.doc.edit_metadata,
405542
"elements": self.elements,
406543
"name": self.name,
407544
"first_timestamp": self.first_timestamp,
@@ -482,102 +619,18 @@ def _walk(item):
482619
def apply_bundle(self, bundle: dict, default_tab: str, event_timestamp: Optional[int] = None) -> None:
483620
commands = bundle.get("commands", [])
484621
for cmd in commands:
485-
self._apply_cmd(cmd, default_tab, event_timestamp)
622+
self._dispatch_cmd(cmd, default_tab, event_timestamp)
486623

487-
def _apply_cmd(self, cmd: dict, current_tab: str, event_timestamp: Optional[int] = None) -> None:
624+
def _dispatch_cmd(self, cmd: dict, current_tab: str, event_timestamp: Optional[int] = None) -> None:
488625
ty = cmd.get("ty")
489626
if not ty:
490627
return
491628

492-
tab = self.tabs[current_tab]
493-
if event_timestamp is not None:
494-
if tab.first_timestamp is None:
495-
tab.first_timestamp = event_timestamp
496-
tab.last_timestamp = event_timestamp
497-
498-
if ty == "mlti":
499-
for sub in cmd.get("mts", []):
500-
self._apply_cmd(sub, current_tab, event_timestamp)
501-
return
502-
503-
if ty == "nm":
504-
target_tab = current_tab
505-
nmr = cmd.get("nmr") or []
506-
for item in reversed(nmr):
507-
if isinstance(item, str) and item.startswith("t."):
508-
target_tab = item
509-
break
510-
inner_cmd = cmd.get("nmc", {})
511-
self._apply_cmd(inner_cmd, target_tab, event_timestamp)
512-
return
513-
514-
if ty == "mkch":
515-
name = self._extract_name_from_d(cmd.get("d"))
516-
if name:
517-
tab.name = name
518-
return
519-
520-
if ty == "ucp":
521-
data = cmd.get("d")
522-
if not isinstance(data, list) or len(data) < 2:
523-
return
524-
tab_id = data[0] or current_tab
525-
name = self._extract_name_from_d(data[1])
526-
if name:
527-
target = self.tabs[tab_id]
528-
target.name = name
529-
if event_timestamp is not None:
530-
if target.first_timestamp is None:
531-
target.first_timestamp = event_timestamp
532-
target.last_timestamp = event_timestamp
533-
return
534-
535-
if ty == "ac":
536-
data = cmd.get("d")
537-
if not isinstance(data, list) or len(data) < 2:
538-
return
539-
tab_id = data[0]
540-
if not isinstance(tab_id, str):
541-
return
542-
name = self._extract_name_from_d(data[1])
543-
target = self.tabs[tab_id]
544-
if name:
545-
target.name = name
546-
if event_timestamp is not None:
547-
if target.first_timestamp is None:
548-
target.first_timestamp = event_timestamp
549-
target.last_timestamp = event_timestamp
550-
return
551-
552-
if ty == "ae":
553-
el_id = cmd.get("id")
554-
if not el_id:
555-
return
556-
et = cmd.get("et")
557-
if et == "dropdown-definition":
558-
tab.dropdown_defs[el_id] = cmd
559-
return
560-
if et == "dropdown":
561-
tab.dropdown_elems[el_id] = cmd
562-
return
563-
tab.elements[el_id] = cmd
564-
return
565-
566-
if ty == "te":
567-
el_id = cmd.get("id")
568-
spi = cmd.get("spi")
569-
if not el_id or not isinstance(spi, int):
570-
return
571-
if el_id in tab.dropdown_elems:
572-
tab.dropdown_instances.append((spi, el_id))
573-
return
574-
placeholder = f"[{el_id}]"
575-
insert(tab.doc, "is", spi, placeholder)
576-
return
577-
578-
if ty in dispatch:
579-
dispatch[ty](tab.doc, **cmd)
580-
return
629+
ctx = CommandContext(self, current_tab, event_timestamp)
630+
_touch_tab(ctx.tab, event_timestamp)
631+
handler = dispatch.get(ty)
632+
if handler:
633+
handler(ctx, **cmd)
581634

582635

583636
def _render_tab_text(tab: TabState) -> str:

modules/writing_observer/writing_observer/writing_analysis.py

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -302,17 +302,24 @@ def _extract_tab_title(client):
302302
"text": gdoc_reconstruct_doc.render_tab_text(tab),
303303
})
304304

305-
state = {
305+
active_tab = doc_state.tabs[default_tab]
306+
position = active_tab.doc.position
307+
edit_metadata = active_tab.doc.edit_metadata
308+
309+
internal_state = {
310+
"doc_state": doc_state.to_dict(),
311+
"position": position,
312+
"edit_metadata": edit_metadata,
313+
}
314+
external_state = {
306315
"text": gdoc_reconstruct_doc.render_full_text(doc_state),
307316
"tabs": tabs,
308-
"position": internal_state.get("position", 0) if isinstance(internal_state, dict) else 0,
309-
"edit_metadata": internal_state.get("edit_metadata", {"cursor": [], "length": []})
310-
if isinstance(internal_state, dict) else {"cursor": [], "length": []},
311-
"doc_state": doc_state.to_dict(),
317+
"position": position,
318+
"edit_metadata": edit_metadata,
312319
}
313320
if learning_observer.settings.module_setting('writing_observer', 'verbose'):
314-
print(state)
315-
return state, state
321+
print(external_state)
322+
return internal_state, external_state
316323

317324

318325
gdoc_scope_reconstruct = kvs_pipeline(scope=gdoc_scope)(reconstruct)
@@ -537,4 +544,4 @@ def document_link_to_doc_id(event):
537544
event['doc_id'] = doc_id
538545
return event
539546

540-
learning_observer.adapters.adapter.add_common_migrator(document_link_to_doc_id, __file__)
547+
learning_observer.adapters.adapter.add_common_migrator(document_link_to_doc_id, __file__)

0 commit comments

Comments
 (0)