From 6f9bfeda6625386561eaa0ad51334a9f8d4b1144 Mon Sep 17 00:00:00 2001
From: Francis Bond <bond@ieee.org>
Date: Mon, 8 Jun 2020 01:22:10 +0800
Subject: [PATCH 01/41] parse tdl from grammar file, closes #2; allow both lkb
 and ace grammars; many minor bugfixes

---
 README.rst          |  18 ++--
 ToDo                |   6 ++
 gold2db.py          | 189 +++++++++++++++++++------------------
 html/ltdb.css       |   6 +-
 html/ltdb.py        |  36 ++++++-
 html/ltypes.cgi     |   4 +-
 html/more.cgi       |   2 +-
 html/rules.cgi      |   4 +-
 html/search.cgi     |   2 +-
 html/showtype.cgi   |   6 +-
 make-ltdb.bash      | 132 +++++++++++++++++---------
 makehome.py         |  30 +++++-
 patch-lextypedb.lsp |   2 +-
 tables.sql          |  13 ++-
 tdl2db.py           | 225 ++++++++++++++++++++++++++++++++------------
 xml2db.py           |  13 +--
 16 files changed, 449 insertions(+), 239 deletions(-)

diff --git a/README.rst b/README.rst
index 58231fb..4a36937 100644
--- a/README.rst
+++ b/README.rst
@@ -17,11 +17,17 @@ the DELPH-IN Wiki.
 Usage
 -----
 
-1. Run ``./make-ltdb.bash --grmdir /path/to/grammar``
+1. Run ``./make-ltdb.bash --script /path/to/grammar/lkb/script``
+
+or (somewhat experimental but gets more docstrings)
+
+2. Run ``./make-ltdb.bash --grmtdl /path/to/grammar/grammar.tdl``
+   
 
 .. code:: bash
 
-    ./make-ltdb.bash --grmdir ~/logon/dfki/jacy
+    ./make-ltdb.bash --script ~/logon/dfki/jacy/lkb/script
+    ./make-ltdb.bash --grmtdl ~/logon/dfki/jacy/japanese.tdl
 
 Everything is installed to ``~/public_html/``
 
@@ -43,7 +49,7 @@ Requirements
 We prefer that Sentence IDs are unique, if we see two sentences in the
 gold treebank with the same ID, we only store the first one.
 
-Only the new LKB-FOS (http://moin.delph-in.net/LkbFos) suppoorts the new docstring comments.  We assume it is installed in
+Only the new LKB-FOS (http://moin.delph-in.net/LkbFos) supports the new docstring comments.  We assume it is installed in
 ``LKBFOS=~/delphin/lkb_fos/lkb.linux_x86_64``.
 
 Install dependencies (in ubuntu):
@@ -114,15 +120,15 @@ Types, instances in the same table, distinguished by status.
 +==========+====================================+===================+======+
 |type      |normal type                         |                   |      |
 +----------+------------------------------------+-------------------+------+
-|ltype     |lexical type                        |type + in lexicon  | _lt  |
+|lex-type  |lexical type                        |type + in lexicon  | _lt  |
 +----------+------------------------------------+-------------------+------+
 |lex-entry |lexical entry                       |                   | _le  |   
 +----------+------------------------------------+-------------------+------+
 |rule      |syntactic construction/grammar rule | LKB:\*RULES       | _c   |
 +----------+------------------------------------+-------------------+------+
-|lrule	   |lexical rule                        | LKB:\*LRULES      | lr   |
+|lex-rule  | lexical rule                       | LKB:\*LRULES      | lr   |
 +----------+------------------------------------+-------------------+------+
-|irule	   |inflectional rule                   | LKB:\*LRULES +    | ilr  | 
+|inf-rule  |inflectional rule                   | LKB:\*LRULES +    | ilr  | 
 +----------+------------------------------------+-------------------+------+
 |          |            (inflectional-rule-pid )|                   |      |
 +----------+------------------------------------+-------------------+------+
diff --git a/ToDo b/ToDo
index b7348f2..65d6c3f 100644
--- a/ToDo
+++ b/ToDo
@@ -1,3 +1,9 @@
+ * look at lisp with John
+  * prettier lisp
+  * hyperlinked types
+  * types without glb
+
+
  * Better linking to surface form
 
 
diff --git a/gold2db.py b/gold2db.py
index 50cd3d9..b9d66a6 100644
--- a/gold2db.py
+++ b/gold2db.py
@@ -1,7 +1,7 @@
 #export PYTHONPATH=~/svn/pydelphin
 # python3 gold2db.py
 ##
-## takes two paramaters -- directory with the xml and database
+## takes two paramaters -- directory with the grammar and database
 ##
 ## Actually does the lexicon too :-)
 ##
@@ -10,11 +10,8 @@
 ##
 import sqlite3, sys, re, os
 from collections import defaultdict as dd
-from delphin import itsdb
-import delphin.mrs
-import delphin.derivation
-import delphin.mrs.xmrs
-import delphin.mrs.simplemrs
+from delphin import itsdb, derivation, dmrs
+from delphin.codecs import simplemrs, dmrsjson, mrsjson
 import json
 
 if (len(sys.argv) < 3):
@@ -41,114 +38,124 @@
 
 mroot=re.compile(r'^\(([-a-zA-z0-9_+]+?)\s+\(')
 mrule=re.compile(r'\([0-9]+ ([^ ]+) [-0-9.]+ ([0-9]+) ([0-9]+) ')
-mlex=re.compile(r'\([0-9]+ ([^ ]+) [-0-9.]+ [0-9]+ [0-9]+ \("(.*?)" ')
+#mlex=re.compile(r'\([0-9]+ ([^ ]+) [-0-9.]+ [0-9]+ [0-9]+ \("(.*?)" ')
 
 ### make a log in the same directory as the database
 log = open(os.path.join(os.path.dirname(dbfile),"gold.log"), 'w')
 
-
-
 golddir = '%s/tsdb/gold' % grmdir
 typefreq=dd(int)                  # typefreq[type] = freq
 lexfreq=dd(lambda: dd(int))       # lexfreq[lexid][surf] = freq
 lxidfreq=dd(lambda: dd(int))      # lxidfreq[typ][lexid] = freq
-typind=dd(lambda: dd(set))        # typind[type][sid]((frm, to), ...)
-sent=dd(list)                     # sent[sid][(surf, lexid)]
-pname=dict()                      # pname[sid]=profile 
+typind=dd(lambda: dd(set))        # typind[type][(profile, sid)]((frm, to), ...)
+sent=dd(list)                     # sent[(profile, sid)][(surf, lexid)]
 roots=dd(lambda: 'rootless')
 allroots=set()
 for root, dirs, files in os.walk(golddir):
+    #if not root.endswith('e'): for debugging, don't load everything
+    #    continue
     ### find valid profiles
     if 'result' in files or 'result.gz' in files:
         # if 'mrs' not in root: ## debug
         #     continue
         print("Processing %s" % root, file=sys.stderr)
-        profile = itsdb.ItsdbProfile(root)
-        head, profname = os.path.split(root)
-        items = {}
-        for  row in profile.read_table('item'):
-            items[row['i-id']] = (row['i-input'], row['i-comment'])
-        for row in profile.read_table('result'):
-            pid = row['parse-id']
-            pname[pid] = profname
-            deriv = row['derivation']  # DERIVATION TREE
-            deriv_json = delphin.derivation.Derivation.from_string(deriv).to_dict(fields=['id','entity','score','form','tokens'])            
-            mrs_string = row['mrs']
-            try:
-                mrs_obj = delphin.mrs.simplemrs.loads(mrs_string, single=True, version=1.1, errors='strict')
-                # mrs_obj = delphin.mrs.simplemrs.loads(row['mrs'], single=True, version=1.1, strict=False, errors='warn')
-                # mrs_string = row['mrs']  # CHANGING
-                mrs_json = delphin.mrs.xmrs.Mrs.to_dict(mrs_obj)
-                dmrs_json = delphin.mrs.xmrs.Dmrs.to_dict(mrs_obj)
-            except Exception as e:
-                log.write("\n\nMRS failed to convert in pydelphin:\n")
-                log.write("{}: {}\n".format(root, pid))
-                log.write(items[pid][0])
-                log.write("\n\n")
-                log.write(str(mrs_string))
-                log.write("\n\n")
-                if hasattr(e, 'message'):
-                    log.write(e.message)
-                else:
-                    log.write(str(e))
-                log.write("\n\n")
-                mrs_json = dict()
-                dmrs_json = dict()
+        ts = itsdb.TestSuite(root)
+        for response in ts.processed_items():
+            sid=response['i-id']
+            profile = ts.path.name 
+            if response['readings'] > 0:
+                try:
+                    first_result=response.result(0)
+                    deriv = first_result.derivation()
+                    mrs_obj=first_result.mrs()
+                    mrs_str = first_result['mrs']
+                    tree = first_result.get('tree', '')
+                    deriv_str = deriv.to_udf(indent=None)
+                    deriv_json = json.dumps(deriv.to_dict(fields=['id','entity','score','form','tokens']))
+                except Exception as e:
+                    log.write("\n\nSomething went wrong getting the result:\n")
+                    log.write("{}: {} {}\n".format(root, profile, sid))
+                    deriv = ''
+                    mrs_obj = None
+                    mrs_str =''
+                    tree=''
+                    deriv_str = ''
+                    derv_json = ''
+                try:
+                    mrs_obj=first_result.mrs()
+                except Exception as e:
+                    log.write("\n\nMRS couldn't be retrieved in pydelphin:\n")
+                    log.write("{}: {} {}\n".format(root, profile, sid))
+                    mrs_obj = None 
+                try:
+                    dmrs_obj=dmrs.from_mrs(mrs_obj)
+                    mrs_json = mrsjson.encode(mrs_obj)
+                    dmrs_json = dmrsjson.encode(dmrs_obj)
+                except Exception as e:
+                    log.write("\n\nMRS failed to convert in pydelphin:\n")
+                    log.write("{}: {} {}\n".format(root, profile, sid))
+                    log.write(response['i-input']) ### FIXME
+                    log.write("\n\n")
+                    if mrs_obj:
+                        log.write(simplemrs.encode(mrs_obj,indent=True))
+                    log.write("\n\n")
+                    log.write(repr(e))
+                    if hasattr(e, 'message'):
+                        log.write(e.message)
+                    # else:
+                    #     log.write(str(e))
+                    log.write("\n\n")
+                    mrs_json = '{}'
+                    dmrs_json = '{}'
             
             # STORE gold info IN DB
             try:
-                c.execute("""INSERT INTO gold (sid, sent, comment, 
+                c.execute("""INSERT INTO gold (profile, sid, sent, comment, 
                                          deriv, deriv_json, pst, 
                                          mrs, mrs_json, dmrs_json, flags) 
-                                         VALUES (?,?,?,?,?,?,?,?,?,?)""", (pid, items[pid][0],  items[pid][1], 
-                                                                           deriv, json.dumps(deriv_json), None,
-                                                                           mrs_string, json.dumps(mrs_json),
-                                                                           json.dumps(dmrs_json), None))
-                ### ToDo use pydelphin to walk down tree
-                ### leaves
-                m = re.findall(mlex,deriv)
-                lexids=set()
-                if m:
-                    #print('leaves')
-                    #print(m)
-                    wid =0
-                    for (lexid, surf) in m:
-                        lexids.add(lexid)
+                                         VALUES (?,?,?,?,?,?,?,?,?,?,?)""",
+                        (profile,
+                         sid,
+                         response['i-input'],
+                         response['i-comment'],
+                         deriv_str,
+                         deriv_json,
+                         tree,
+                         mrs_str,
+                         mrs_json,
+                         dmrs_json,
+                         None))
+                ##leaves
+                if deriv:
+                    for (preterminal, terminal) in zip(deriv.preterminals(),deriv.terminals()):
+                        lexid=preterminal.entity
+                        surf=terminal.form
+                        start=preterminal.start
+                        end=preterminal.end
                         lexfreq[lexid][surf] +=1
-                        sent[pid].append((surf, lexid))
+                        sent[(profile, sid)].append((surf, lexid))
                         if ltypes[lexid]:
                             typefreq[ltypes[lexid]]  += 1
                             lxidfreq[ltypes[lexid]][lexid]   += 1
-                            typind[ltypes[lexid]][pid].add((wid, wid+1))
-                        wid+=1
-            ### rules (store as type)
-                m = re.findall(mrule,deriv)
-                if m:
-                    for (typ, frm, to) in m:
-                        if typ not in lexids: ## counted these!
-                            typefreq[typ]  += 1
-                            typind[typ][pid].add((frm, to))
-                #print('rule')
-                #print(m)
-            ### Root (treat as another type)
-                m = re.search(mroot,deriv)
-                if m:
-                #print('root {}'.format(root))
-                #print(m.groups()[0])
-                #print(deriv)
-                #print()
-                    roots[pid] = m.groups()[0]
+                            typind[ltypes[lexid]][(profile, sid)].add((start, end))
+                ### internal node (store as type)
+                    for node in deriv.internals():
+                        typ =  node.entity
+                        start= node.start
+                        end=   node.end
+                        typefreq[typ]  += 1
+                        typind[typ][(profile, sid)].add((start, end))
 
             ##print('\n\n\n')
             except sqlite3.Error as e:
                 log.write('ERROR:   ({}) of type ({}), {}: {}\n'.format(e, type(e).__name__,
-                                                                      root, pid))
+                                                                      root, sid))
 
-### each sentence should have a root
-for s in sent:
-    allroots.add(roots[s])
-    typind[roots[s]][s].add((0, len(sent[s])))
-    typefreq[roots[s]] += 1
+# ### each sentence should have a root
+# for s in sent:
+#     allroots.add(roots[s])
+#     typind[roots[s]][s].add((0, len(sent[s])))
+#     typefreq[roots[s]] += 1
 
 ### calculate the lexical type frequencies
 for typ in lxidfreq:
@@ -190,20 +197,20 @@
         c.execute("""INSERT INTO lexfreq (lexid, word, freq) 
                  VALUES (?,?,?)""", (l, w, lexfreq[l][w]))
 
-for s in sent:
+for p,s in sent:
     ##print(s, " ".join([surf for (surf, lexid) in sent[s]]))
-    for i, (w, l) in enumerate(sent[s]):
+    for i, (w, l) in enumerate(sent[(p,s)]):
         c.execute("""INSERT INTO sent (profile, sid, wid, word, lexid) 
-                 VALUES (?,?,?,?,?)""", (pname[s], s, i, w, l))
+                 VALUES (?,?,?,?,?)""", (p, s, i, w, l))
 
  
 
 for t in typind:
-    for s in typind[t]:
+    for p,s in typind[t]:
         ##print("%s\t%s\t%s" % (t, s, typind[t][s]))
-        for (k, m) in typind[t][s]:
-            c.execute("""INSERT INTO typind (typ, sid, kara, made) 
-                 VALUES (?,?,?,?)""", (t, s, k, m))
+        for (k, m) in typind[t][(p, s)]:
+            c.execute("""INSERT INTO typind (typ, profile, sid, kara, made) 
+                 VALUES (?,?,?,?,?)""", (t, p, s, k, m))
 
    
 
diff --git a/html/ltdb.css b/html/ltdb.css
index 91768b3..15b4b5d 100644
--- a/html/ltdb.css
+++ b/html/ltdb.css
@@ -117,10 +117,10 @@ tr {background:#F3FFF3} /* FCB likes green */
 td {padding:4pt}
 caption { font-weight:bold; font-size: 18pt;}
 /* check colors */
-tr.irule {background:#EEEEEE}
+tr.inf-rule {background:#EEEEEE}
 tr.rule {background:#FFAAAA}
-tr.lrule {background:#AAAAFF}
-tr.ltype {background:#AAFFAA}
+tr.lex-rule {background:#AAAAFF}
+tr.lex-type {background:#AAFFAA}
 tr.root {background:#FFAAFF}
 
 pre.code {
diff --git a/html/ltdb.py b/html/ltdb.py
index b249d02..5f35986 100644
--- a/html/ltdb.py
+++ b/html/ltdb.py
@@ -8,6 +8,7 @@
 import sqlite3, collections
 import cgi, re, urllib, sys
 from collections import defaultdict as dd
+from collections import OrderedDict as od
 import json
 
 
@@ -22,6 +23,33 @@
               ('nil','nil'):(' ', ' '),
               (None,None):(' ', ' ')}
 
+### the different kinds of things we deal with
+statuses = od()
+
+##things used when parsing
+statuses["lex-rule"] = "Lexical Rules"
+statuses["rule"] = "Syntactic Rules"
+statuses["token-mapping-rule"] = "Rules for token mapping"
+statuses["root"] = "Root Conditions for well formed utterances"
+
+## Lexical entries
+statuses["lex-entry"] = "Lexical Entries"
+statuses["generic-lex-entry"] = "Generic Lexical Entries"
+
+## types 
+statuses["lex-type"] = "Types for lexical entries (immediate supertypes of lex-entries)"
+statuses["type"] = "Other Internal Types"
+
+## pre and post processing
+statuses["lexical-filtering-rule"] = "lexical filtering rule"
+statuses["post-generation-mapping-rule"] = "post generation mapping rule"
+
+## interface 
+statuses["labels"] = "Labels for trees in the (parse-nodes)"
+
+
+
+
 def getpar (params):
     par=dict()
     try:
@@ -452,19 +480,19 @@ def searchbar():
 
 
     
-def footer():
+def footer(version):
     return """</div> <!-- end of outline -->
   <br>
   <address>
   <a href='http://moin.delph-in.net/LkbLtdb'>Linguistic Type Database</a> 
-    for the grammar %s; 
-  <br>By Chikara Hashimoto, Luis Morgado da Costa and Francis Bond; 
+    for the grammar {}; 
+  <br>By Chikara Hashimoto, Luis Morgado da Costa, Michael Goodman and Francis Bond; 
   Maintained by Francis Bond &lt;<a href='mailto:bond@ieee.org'>bond@ieee.org</a>&gt;;
     <br>
     <a href ='https://github.com/fcbond/ltdb'>Source code (GitHub)</a>
   </address>
   </body>
-</html>""" % (par['ver'])
+</html>""".format(version)
 
 
 def munge_desc(typ,description):
diff --git a/html/ltypes.cgi b/html/ltypes.cgi
index 0abf4c9..798faee 100755
--- a/html/ltypes.cgi
+++ b/html/ltypes.cgi
@@ -26,7 +26,7 @@ con = sqlite3.connect(par['db'])
 c = con.cursor()
 c.execute("""SELECT types.typ, lname, words, lfreq, cfreq 
              FROM types LEFT JOIN ltypes ON types.typ=ltypes.typ  
-             WHERE status ='ltype' ORDER BY types.typ""")
+             WHERE status ='lex-type' ORDER BY types.typ""")
 results = c.fetchall()
 if results:
     print """
@@ -60,5 +60,5 @@ if results:
     print "</table>"
 
 
-print ltdb.footer()
+print (ltdb.footer(par['ver']))
 
diff --git a/html/more.cgi b/html/more.cgi
index b684689..291d0ff 100755
--- a/html/more.cgi
+++ b/html/more.cgi
@@ -43,4 +43,4 @@ elif(lextyp):
 else:
     print("<p>More examples of what?")
 print("</div>")
-print ltdb.footer()
+print ltdb.footer(par['ver'])
diff --git a/html/rules.cgi b/html/rules.cgi
index 0659ebe..5fb33a6 100755
--- a/html/rules.cgi
+++ b/html/rules.cgi
@@ -26,7 +26,7 @@ con = sqlite3.connect(par['db'])
 c = con.cursor()
 c.execute("""SELECT types.typ, parents, lname, status, freq, arity, head 
              FROM types left join typfreq on types.typ=typfreq.typ
-             WHERE status in ('rule', 'lrule', 'irule', 'root') order by
+             WHERE status in ('rule', 'lex-rule', 'inf-rule', 'root') order by
              types.typ""" )
 results = c.fetchall()
 if results:
@@ -72,5 +72,5 @@ if results:
     print ("</table>")
 
 
-print (ltdb.footer())
+print (ltdb.footer(par['ver']))
 
diff --git a/html/search.cgi b/html/search.cgi
index 9ef5986..0377e54 100755
--- a/html/search.cgi
+++ b/html/search.cgi
@@ -108,4 +108,4 @@ elif(typ):
   </div>""".format(typ, par['ver']))
 
 
-print ltdb.footer()
+print (ltdb.footer(par['ver']))
diff --git a/html/showtype.cgi b/html/showtype.cgi
index 5c0cb32..bef0169 100755
--- a/html/showtype.cgi
+++ b/html/showtype.cgi
@@ -26,9 +26,9 @@ maxexe = 3
 
 par=ltdb.getpar('params')
 
-print ltdb.header()
+print (ltdb.header())
 
-print ltdb.searchbar()
+print (ltdb.searchbar())
 
 
 
@@ -151,5 +151,5 @@ else:
     print "<br><br><p style='font-size:large;'>Please give me a type (or rule or lexeme)"
     
 
-print ltdb.footer()
+print (ltdb.footer(par['ver']))
 
diff --git a/make-ltdb.bash b/make-ltdb.bash
index 7460b24..235cc23 100755
--- a/make-ltdb.bash
+++ b/make-ltdb.bash
@@ -7,6 +7,51 @@ echo  Welcome to the Linguistic Type Database
 echo
 
 
+
+###
+### get the grammar directory
+###
+
+while [ $# -gt 0 -a "${1#-}" != "$1" ]; do
+  case ${1} in
+      --script)
+	  lkbscript=${2};
+	  shift 2;
+	  ;;
+      --grmtdl)
+	  grammartdl=${2};
+	  shift 2;
+	  ;;
+      *)
+	  echo """You need to give a grammar directory or script file (or both)
+    --script path/to/lkb/script
+    --grmtdl path/to/grammar.tdl
+"""
+	  exit 0	
+  esac
+done
+
+
+if [ ${lkbscript} ]
+then
+    echo "LKB script file is" ${lkbscript}
+    grammardir=`dirname ${lkbscript}`
+    grammardir=`dirname ${grammardir}`
+    echo "Grammar directory is " ${grammardir}
+elif [ ${grammartdl} ]
+then
+    echo "Grammar file is " ${grammartdl}
+    grammardir=`dirname ${grammartdl}`
+    echo "Grammar directory is " ${grammardir}
+else
+    echo """You need to give a grammar directory or script file
+    --script path/to/lkb/script
+    --grmtdl path/to/grammar.tdl
+"""
+    exit 0
+fi
+
+
 # If you want to use LKB_FOS you must set this variable
 # unset LKBFOS
 LKBFOS=~/delphin/lkb_fos/lkb.linux_x86_64
@@ -22,20 +67,6 @@ else
 fi
     
 
-###
-### get the grammar directory
-###
-
-while [ $# -gt 0 -a "${1#-}" != "$1" ]; do
-  case ${1} in
-    --grmdir)
-      grammardir=${2};
-      shift 2;
-      ;;
-  esac
-done
-
-echo "Grammar directory is " ${grammardir}
 
 ###
 ### set things up
@@ -110,17 +141,19 @@ mkdir -p "${outdir}"
 
 db=${outdir}/${LTDB_FILE}
 
-### dump  the lex-types
-echo "Dumping lex-type definitions and lexicon using the LKB (slow but steady)" 
-
-
-unset DISPLAY;
-unset LUI;
-
+if [ ${lkbscript} ]
+then
+    ### dump  the lex-types
+    echo "Dumping lex-type definitions and lexicon using the LKB (slow but steady)" 
+    
+    
+    unset DISPLAY;
+    unset LUI;
+    
 { 
  cat 2>&1 <<- LISP
   (format t "~%Read Grammar~%")
-  (lkb::read-script-file-aux  "${grammardir}/lkb/script")
+  (lkb::read-script-file-aux  "${lkbscript}")
   (lkb::lkb-load-lisp "." "patch-lextypedb.lsp")
   (format t "~%Output types~%")
   (lkb::output-types :xml "${outdir}/${TYPES_FILE}")
@@ -135,20 +168,21 @@ unset LUI;
 LISP
 } | ${LISPCOMMAND}   2>${log} >${log}
 # } | cat   
-
-###
-### Try to validate the types.xml
-###
-if which xmlstarlet  &> /dev/null; then
-    xmlstarlet val  -e ${outdir}/${TYPES_FILE}
-    xmlstarlet val  -e ${outdir}/${RULES_FILE}
-    xmlstarlet val  -e ${outdir}/${LRULES_FILE}
-    xmlstarlet val  -e ${outdir}/${ROOTS_FILE}
-else 
-    echo
-    echo "   types files not validated, please install xmlstarlet."
-    echo "   sudo apt-get install xmlstarlet"
-    echo
+    
+    ###
+    ### Try to validate the types.xml
+    ###
+    if which xmlstarlet  &> /dev/null; then
+        xmlstarlet val  -e ${outdir}/${TYPES_FILE}
+        xmlstarlet val  -e ${outdir}/${RULES_FILE}
+        xmlstarlet val  -e ${outdir}/${LRULES_FILE}
+        xmlstarlet val  -e ${outdir}/${ROOTS_FILE}
+    else 
+        echo
+        echo "   types files not validated, please install xmlstarlet."
+        echo "   sudo apt-get install xmlstarlet"
+        echo
+    fi
 fi
 ###
 ### make the databases
@@ -157,17 +191,25 @@ echo
 echo "Creating the databases ..."
 echo
 
-### create the db, write in the
-echo "Adding in the info from the lisp"
-echo
-python3 xml2db.py ${outdir} ${db}
+sqlite3 ${db} < tables.sql
 
-echo "Adding in the info from the tdl with pydelphin"
-echo
-python3 tdl2db.py ${grammardir} ${db}   ### add tdl and comments
+###
+if [ ${lkbscript} ]
+then
+    echo "Adding in the info from the lisp"
+    echo
+    python3 xml2db.py ${outdir} ${db}
+fi
 
-echo "Adding in the info from the gold trees"
-echo
+if [ ${grammartdl} ]
+then
+    echo "Adding in the info from the tdl with pydelphin"
+    echo
+    python3 tdl2db.py ${grammartdl} ${db}   ### add tdl and comments
+fi
+
+#echo "Adding in the info from the gold trees"
+#echo
 python3 gold2db.py ${grammardir} ${db}
 
 
diff --git a/makehome.py b/makehome.py
index 047ccc0..baf82d7 100644
--- a/makehome.py
+++ b/makehome.py
@@ -9,7 +9,9 @@
 import sys, os
 import datetime
 from collections import OrderedDict
-#import html/ltdb
+### get some local utilities
+sys.path.append(os.getcwd() + '/html')
+from ltdb import statuses, footer
 
 (script, version, grmdir) = sys.argv
 
@@ -18,6 +20,7 @@
 <head>
   <title>{0} ltdb</title>
   <link rel='stylesheet' type='text/css' href='lextypedb.css'/>
+  <link rel="icon"  type="image/png"  href="%s/ltdb.png"/>
 </head>
 <body>
 <h1>Welcome to {0}</h1>
@@ -77,13 +80,30 @@
         print("<tr><td>{}</td><td>{}</td></tr>".format(a,v))
 print("</table>")
 
+###
+### Statuses
+###
+print("""<h3>Types and Instances in the Database</h3>
+""")
+print("<table>")
+for (typ, desc) in statuses.items():
+    print(f"<tr><td>{typ}</td><td>{desc}</td></tr>")
+print("</table>")
+###
+### Links to Logs
+###
 print("""
 <h3>Logs</h3>
 <ul>
    <li><a href='lkb.log'>LKB conversion log</a>
    <li><a href='tdl.log'>TDL conversion log</a>
    <li><a href='gold.log'>Gold profiles conversion log</a>
-</ul>
+</ul>""")
+
+###
+### Links to ltdb
+###
+print("""
 <h3>Linguistic Type Database</h3>
 <ul>
    <li><a href='lt.db.7z'>Compressed SQLITE Database: lt.db.7z</a>
@@ -95,6 +115,6 @@
 """)
 
     
-print("""  <p>Created on {}</p>
-  </html>
-</body>""".format(datetime.datetime.now()))
+print("""  <p>Created on {}</p>""".format(datetime.datetime.now()))
+
+print(footer(version))
diff --git a/patch-lextypedb.lsp b/patch-lextypedb.lsp
index 420922b..1d3b839 100644
--- a/patch-lextypedb.lsp
+++ b/patch-lextypedb.lsp
@@ -66,7 +66,7 @@
           (format
            stream
            "  <type name=\"~(~a~)\" parents=\"~(~a~)\" status=\"~a\" cat=\"~(~a~)\" val=\"~(~a~)\" cont=\"~(~a~)\" arity=\"~(~a~)\" head=\"~(~a~)\"/>~%"
-           id type (if (inflectional-rule-p id) "irule" "lrule")
+           id type (if (inflectional-rule-p id) "inf-rule" "lex-rule")
 	   (lkb::dag-type (mrs::path-value (tdfs-indef tdfs) 
 					   '(SYNSEM LOCAL CAT HEAD)))
 	   (lkb::dag-type (mrs::path-value (tdfs-indef tdfs) 
diff --git a/tables.sql b/tables.sql
index 3ce66a9..8710d1d 100644
--- a/tables.sql
+++ b/tables.sql
@@ -20,13 +20,16 @@ CREATE TABLE lex (lexid TEXT primary key,
 		  typ TEXT,
 		  orth TEXT,
 		  pred TEXT,
-		  altpred TEXT);
+		  altpred TEXT,
+		  carg TEXT,
+		  altcarg TEXT,
+		  docstring TEXT);
 -- preprocess this
 CREATE TABLE ltypes (typ TEXT primary key,
 		     words TEXT,
 		     lfreq INTEGER default 0,
 		     cfreq INTEGER DEFAULT 0);
--- sentences in the database (assumes unique sid)
+-- sentences in the database (assumes unique profile+sid)
 CREATE TABLE sent (sid INTEGER,
                    profile TEXT,
 		   wid INTEGER,
@@ -34,6 +37,7 @@ CREATE TABLE sent (sid INTEGER,
 		   lexid TEXT);
 -- Information from the gold profiles
 CREATE TABLE gold (sid INTEGER primary key,
+       	     	   profile TEXT,				
        	     	   sent TEXT,
 		   comment TEXT,
 		   deriv TEXT,
@@ -44,6 +48,7 @@ CREATE TABLE gold (sid INTEGER primary key,
 		   dmrs_json TEXT,
 		   flags TEXT);
 CREATE TABLE typind (typ TEXT,
+       	     	     profile TEXT,	    
                      sid INTEGER,
 		     kara INTEGER,
                      made INTEGER);
@@ -56,5 +61,9 @@ CREATE TABLE lexfreq(lexid TEXT,
 CREATE TABLE tdl (typ TEXT,
        	     	  src TEXT,
 		  line INTEGER,
+		  kind TEXT,
                   tdl TEXT,
 		  docstring TEXT);
+-- Hierarchy extracted by PyDelphin		 
+CREATE TABLE hie (child TEXT,
+                  parent TEXT);
diff --git a/tdl2db.py b/tdl2db.py
index c10a10b..8000353 100644
--- a/tdl2db.py
+++ b/tdl2db.py
@@ -1,78 +1,181 @@
-#export PYTHONPATH=~/svn/pydelphin
-# python3 tdl2db.py
 ##
-## takes two paramaters -- directory with the tdl and database
+## takes two paramaters -- tdl grammarfile and database
 ##
-## FIXME --- 
-##
-import sqlite3, sys, os
+from collections import defaultdict as dd
+from pathlib import Path
+
 from delphin import tdl
-import delphin
-cwd = os.getcwd()
-### get some local utilities
-sys.path.append(cwd + '/html')
-import ltdb
 
+import sqlite3, sys, os
 
+###  ToDo
+# identify lextypes
+# get orth
+# 
 if (len(sys.argv) < 3):
     # prints standard error msg (stderr)
     print('''You need to give two arguments, 
- tdl directory and LTDB''', file=sys.stderr)
+ tdl grammar file and LTDB''', file=sys.stderr)
     sys.exit(1)
 else:
-    (script, grmdir, dbfile) = sys.argv
-    print("Adding files from %s to %s" % (grmdir, dbfile), file=sys.stderr)
+    (script, grammar, dbfile) = sys.argv
+    print("Adding files from %s to %s" % (grammar, dbfile), file=sys.stderr)
+
 
 ## make a log in the same directory as the database
 log = open(os.path.join(os.path.dirname(dbfile),"tdl.log"), 'w')
+
     
-conn = sqlite3.connect(dbfile)    # loads dbfile as con
-c = conn.cursor()    # creates a cursor object that can perform SQL commands with c.execute("...")
-
-### [(typ, file, lineno, tdl, docstring), ...
-tdls = list()
-
-
-for root, dirs, files in os.walk(grmdir):
-    ### find valid profiles
-    for f in files:
-        if f.endswith('.tdl'):
-            if 'pet' in f or 'qc' in f or 'config' in f:
-                continue
-            print("Processing %s" % f, file=log)
-            try:
-                for event, obj, lineno in delphin.tdl.iterparse(os.path.join(root, f)): # assume utf-8
-                #print(lineno, event, sep = '\t')
-                    if event in ['TypeDefinition',  'TypeAddendum',
-                                 'LexicalRuleDefinition']:
-                        if obj.documentation(): ### The tdl has a docstring
-                            descript,exes,nams= ltdb.munge_desc(obj.identifier,obj.documentation())
-                            obj.docstring=None
-                        else:
-                            descript = ''
-                        tdls.append((obj.identifier,
-                                     f, lineno,
-                                     tdl.format(obj),
-                                     descript))
-                    elif event not in ['LineComment', 'BlockComment',
-                                       'BeginEnvironment', 'EndEnvironment',
-                                       'FileInclude' ]:
-                        ## ToDo log properly
-                        print('Unknown Event', event, obj, f, lineno,
-                              sep = '\t',
-                        file=log)
-            except Exception as e:
-                print("Unable to parse tdl for {}, see log for details".format(os.path.join(root, f)),
-                      file=sys.stderr)
-                print("Unable to parse tdl for {}".format(os.path.join(root, f)),
-                       file=log)
-                if hasattr(e, 'message'):
-                    print(e.message, file=log)
+#grammar = '/home/bond/svn/erg-mo/educ.tdl'
+tdls = []
+hierarchy = []  #(child, parent)
+types = dd(list)
+les = {}
+
+
+def readgrm (grammarfile, tdls, types, hierarchy, les):
+    print("FILE", grammarfile)
+    path = Path(grammarfile)
+    base = path.parent
+    for event, obj, lineno in tdl.iterparse(grammarfile):
+        if  event == "LineComment":
+            continue
+        #print (event, obj, lineno)
+        # if event == "BeginEnvironment":
+        #     status = getattr(obj, 'status', 'type')
+        #     print ("Environment is", status)
+        if event == "EndEnvironment":
+            status = getattr(obj, 'status', 'type')
+            for entry in obj.entries:
+                #print('ENTRY', entry, status)
+                if isinstance(entry,tdl.FileInclude):
+                    path = Path(base, entry.path).with_suffix('.tdl')
+                    if path.is_file():
+                        process_type(str(base), str(path), status, tdls, hierarchy, les)
+                    else:
+                        print('INCLUDED FILE NOT FOUND: {!s}'.format(path))
+                else:
+                    print('what to do with', entry.status, file=log)
+
+def  process_type(base, path, status, tdls, hierachy, les):
+    if 'root' in path:
+        status = 'root'
+    elif 'parse-nodes' in path:
+        status = 'labels'
+
+    print(f"Processing types in {path} as {status}")
+    for event, obj, lineno in tdl.iterparse(path): # assume utf-8
+        #print(lineno, event, sep = '\t')
+        if event in ['TypeDefinition',  'TypeAddendum',
+                     'LexicalRuleDefinition']:
+            # if obj.documentation(): ### The tdl has a docstring
+            #     descript,exes,nams= ltdb.munge_desc(obj.identifier,obj.documentation())
+            #     obj.docstring=None
+            # else:
+            # get the parents
+            parents = [c for c in obj.conjunction.types()]
+            if status == 'lex-entry':
+                if len(parents) != 1:
+                    print ("LE has non unique parent", obj.identifier, parents)
                 else:
-                    print(str(e), file=log)
+                    ### (lex-type, docstring)
+                    ### fixme get orth, pred, altpred
+                    orths = obj.conjunction.get('ORTH', default=None)
+                    try:
+                        orth=' '.join([str(s) for s in orths.values()])
+                    except:
+                        orth = ''
+                        print('No Orthography', obj.identifier,
+                              sep = '\t', file=log) 
+                    pred=obj.conjunction.get('SYNSEM.LKEYS.KEYREL.PRED', default=None)
+                    altpred=obj.conjunction.get('SYNSEM.LKEYS.ALTKEYREL.PRED', default=None)
+                    carg=obj.conjunction.get('SYNSEM.LKEYS.KEYREL.CARG', default=None)
+                    altcarg=obj.conjunction.get('SYNSEM.LKEYS.ALTKEYREL.CARG', default=None)
+                    les[obj.identifier] = (str(parents[0]), orth, pred, altpred, carg, altcarg, obj.documentation())
+            else: # not a lexical entry
+                tdls.append((obj.identifier,
+                         path[len(base):], lineno,
+                         event,
+                         tdl.format(obj),
+                         obj.documentation()))
+            for c in parents:
+                hierarchy.append((obj.identifier, str(c)))
+            if event != 'TypeAddendum':
+                types[obj.identifier].append(status)
+        elif event not in ['LineComment', 'BlockComment',
+                           'BeginEnvironment', 'EndEnvironment',
+                           'FileInclude' ]:
+            ## ToDo log properly
+            print('Unknown Event', event, obj, path, lineno,
+                  sep = '\t',
+                  file=log)
+    # except Exception as e:
+    #     print("Unable to parse tdl for {}, see log for details".format(path),
+    #           file=sys.stderr)
+    #     print("Unable to parse tdl for {}".format(path),
+    #           file=log)
+    #     if hasattr(e, 'message'):
+    #         print(e.message, file=log)
+    #     else:
+    #         print(str(e), file=log)
 
-                
-if tdls:                        
-    c.executemany("""INSERT INTO tdl
-                     VALUES (?,?,?,?,?)""", tdls)
+
+def intodb(dbfile, tdls, hierarchy, types,les):
+    conn = sqlite3.connect(dbfile)    # loads dbfile as con
+    c = conn.cursor()    # creates a cursor object that can perform SQL commands with c.execute("...")
+    c.executemany("""INSERT INTO tdl (typ, src, line, kind, tdl, docstring)
+    VALUES (?, ?, ?, ?, ?, ?)""", tdls)
+
+    c.executemany("""INSERT INTO hie (child, parent)
+    VALUES (?,?)""", hierarchy)
+
+    parents = dd(set)
+    children = dd(set)
+    for (ch,pa) in hierarchy:
+        parents[ch].add(pa)
+        children[pa].add(ch)
+        
+    typs = []
+    for t,s in types.items():
+        typs.append((t, s[0], ' '.join(parents[t]), ' '.join(children[t])))
+        
+    c.executemany("""INSERT OR IGNORE INTO types (typ, status, parents, children)
+    VALUES (?,?,?,?) """, typs)
+
+    # too slow
+    # c.execute("""CREATE TEMPORARY TABLE subs 
+    #            AS  SELECT parent, group_concat(child, ' ') as kids
+    #            FROM hie GROUP BY parent""")
+    # c.execute("""UPDATE types 
+    # SET children = (SELECT kids FROM subs 
+    # WHERE types.typ = subs.parent AND types.children is NULL)""")
+    # c.execute("""drop table subs""")
+
+    # c.execute("""CREATE TEMPORARY TABLE sups 
+    #            AS  SELECT child, group_concat(parent, ' ') as olds
+    #            FROM hie GROUP BY child""")
+    # c.execute("""UPDATE types 
+    # SET parents = (SELECT olds FROM sups 
+    # WHERE types.typ = sups.child AND types.parents is NULL)""")
+    # c.execute("""drop table sups""")
+
+    ### lexical items
+    litems = []
+    for t in les:
+        litems.append((t,)+les[t])  
+    c.executemany("""INSERT OR IGNORE INTO lex (lexid, typ, orth, pred, altpred, carg, altcarg, docstring)
+    VALUES (?,?,?,?,?,?,?,?) """, litems)
+
+    ### make immediate hypernyms of lexical entries 'lex-type'
+    c.execute("""UPDATE types SET status='lex-type' 
+    WHERE typ IN (SELECT typ FROM lex)""")
+    
     conn.commit()
+    
+readgrm(grammar,tdls,hierarchy, types, les)
+intodb(dbfile, tdls, hierarchy, types, les)
+
+# for thing in tdls:
+#     print(thing)
+# for thing in hierarchy:
+#     print(thing)
diff --git a/xml2db.py b/xml2db.py
index 8a1faeb..dc582af 100644
--- a/xml2db.py
+++ b/xml2db.py
@@ -33,17 +33,6 @@
 conn = sqlite3.connect(dbfile)    # loads dbfile as con
 c = conn.cursor()    # creates a cursor object that can perform SQL commands with c.execute("...")
 
-f=open('tables.sql')
-
-###
-### Make tables
-### 
-try:
-    c.executescript(f.read())
-    sys.stderr.write('Creating tables for ltdb\n')
-except:
-    pass # handle the error
-conn.commit()
 
 ###
 ### Remember the examples
@@ -163,7 +152,7 @@
     else:
         children=None
     if typ.get("name") in ltypes:
-        status = 'ltype'
+        status = 'lex-type'
     else:
         status = 'type'
 

From 7b5235738a14d48de48e4e8c61dadf157dc21051 Mon Sep 17 00:00:00 2001
From: Francis Bond <bond@ieee.org>
Date: Mon, 8 Jun 2020 21:41:09 +0800
Subject: [PATCH 02/41] made the cgi scripts run (only) on python3; closes #11

---
 html/ltdb.py      | 37 ++++++++++++++++---------------
 html/ltypes.cgi   | 24 ++++++++++-----------
 html/rules.cgi    |  5 ++---
 html/search.cgi   | 55 ++++++++++++++++++++++++++---------------------
 html/showtype.cgi | 36 +++++++++++++++----------------
 make-ltdb.bash    | 12 +++++------
 6 files changed, 85 insertions(+), 84 deletions(-)

diff --git a/html/ltdb.py b/html/ltdb.py
index 5f35986..30a89ec 100644
--- a/html/ltdb.py
+++ b/html/ltdb.py
@@ -1,12 +1,11 @@
 ### --*-- coding: utf-8 --*--
 ### shared code for the ltdb
 ###
-from __future__ import unicode_literals
-from __future__ import print_function
 
 
 import sqlite3, collections
-import cgi, re, urllib, sys
+import cgi, re,  sys
+from html import escape
 from collections import defaultdict as dd
 from collections import OrderedDict as od
 import json
@@ -65,11 +64,11 @@ def getpar (params):
 
 def hlt (typs):
     "hyperlink a list of space seperated types"
-    l = unicode()
+    l = str()
     if typs:
         for t in typs.split():
             l += "<a href='%s/showtype.cgi?typ=%s'>%s</a> " % (par['cgidir'], 
-                                                           urllib.quote(t, ''),
+                                                           escape(t, ''),
                                                            t)
         return l
     else:
@@ -87,11 +86,11 @@ def hltyp(match):
     for typ in c:
         types.add(typ[0])
     #print types
-    t = unicode(match.group(0))
+    t = str(match.group(0))
     #print "<br>%s %s\n" % (t, t in types)
     if t in types and not t.startswith('#'):
         return "<a href='{}/showtype.cgi?typ={}'>{}</a>".format(par['cgidir'], 
-                                                                urllib.quote(t,''),
+                                                                escape(t,''),
                                                                 t)
     else:
         return t
@@ -100,7 +99,7 @@ def hltyp(match):
 def hlall (typs):
     "hyperlink all types in a description or documentation"
     if typs:
-        typs = cgi.escape(typs)
+        typs = escape(typs)
         ### Definition from http://moin.delph-in.net/TdlRfc
         typs=re.sub(r'(#[\w_+*?-]+)', "<span class='coref'>\\1</span>", typs)
         return retyp.sub(hltyp, typs)
@@ -135,7 +134,7 @@ def showsents (c, typ, lexid, limit, biglimit):
                 sids[sid].add((kara, made))
         if limit < total and biglimit > limit:
             limtext= "({:,} out of {:,}: <a href='more.cgi?typ={}&lexid={}&limit={}'>more</a>)".format(limit, total,
-                                                                                                       urllib.quote(typ,''),
+                                                                                                       escape(typ,''),
                                                                                                        lexid,
                                                                                                        biglimit)
         elif limit < total:
@@ -146,7 +145,7 @@ def showsents (c, typ, lexid, limit, biglimit):
         c.execute("""SELECT profile, sid, wid, word, lexid FROM SENT 
                         WHERE sid in (%s) order by sid, wid""" % \
                           ','.join('?'*len(sids)), 
-                      sids.keys())
+                      list(sids.keys()))
         sents = dd(dict)
         profname=dict()
         for (prof, sid, wid, word, lexid) in c:
@@ -358,7 +357,7 @@ def showlexs (c, lextyp, lexid, limit, biglimit):
         
     if results and results[0] > 0:
         total = results[0]
-        lem = dd(unicode) # lemma
+        lem = dd(str) # lemma
 
         if lexid:
             ## You want a specific word
@@ -366,22 +365,22 @@ def showlexs (c, lextyp, lexid, limit, biglimit):
             LEFT JOIN lexfreq ON lex.lexid = lexfreq.lexid
             WHERE lex.lexid=?""", (lexid,))
             for (lexid, orth, freq) in c:
-                lem[lexid] = cgi.escape(orth, quote=True)
+                lem[lexid] = escape(orth)
         else:
             ## You want a lexical type
             c.execute("""SELECT lex.lexid, orth, freq FROM lex 
             LEFT JOIN lexfreq ON lex.lexid = lexfreq.lexid
             WHERE typ=? ORDER BY freq DESC LIMIT ?""", (lextyp, 5 * limit))
             for (lxid, orth, freq) in c:
-                lem[lxid] = cgi.escape(orth, quote=True)
+                lem[lxid] = escape(orth)
 
                 
         c.execute("""SELECT lexid,  word, freq
 FROM lexfreq WHERE lexid in (%s) ORDER BY lexid, freq DESC""" % \
                       ','.join('?'*len(lem)), 
-                  lem.keys())
+                  list(lem.keys()))
         lf = dd(int) # frequency
-        sf = dd(unicode) # surface forms
+        sf = dd(str) # surface forms
         for (lxid, word,freq) in c:
         ### if the word was not in the corpus
             if not word:
@@ -389,13 +388,13 @@ def showlexs (c, lextyp, lexid, limit, biglimit):
             if not freq:
                 freq=0
             sf[lxid] += "<span title='freq=%s'>%s</span>  " % (freq, 
-                                                                cgi.escape(word, quote=True))
+                                                                escape(word))
             lf[lxid] +=freq
         #lf=lf[:50]
         #sf=sf[:50]
         if limit < total and biglimit > limit:
             limtext= "({:,} out of {:,}: <a href='more.cgi?lextyp={}&lexid={}&limit={}'>more</a>)".format(limit, total,
-                                                                                                         urllib.quote(lextyp, ''),
+                                                                                                         escape(lextyp, ''),
                                                                                                          lexid,
                                                                                                          biglimit)
         elif limit < total:
@@ -403,11 +402,11 @@ def showlexs (c, lextyp, lexid, limit, biglimit):
         else:
             limtext ='({:,})'.format(total)
             
-        print ("""<h2>Lexical Examples: {:,} {}</h2>""".format(min(len(lf),limit),
+        print ("""<h2>Lexical Examples: {:,}  {}</h2>""".format(min(len(lf),limit),
                                                                limtext))
         print("""<table><th>lexid</th><th>Lemma</th><th>Surface</th>
 <th>Frequency</th></tr>""")  ### FIXME <th>Sentences
-        for lxid in sorted(lf.keys()[:min(len(lf),limit)], key = lambda x: lf[x], reverse=True):
+        for lxid in sorted(list(lf.keys())[:min(len(lf),limit)], key = lambda x: lf[x], reverse=True):
             print(u"""<td><a href='showtype.cgi?lexid={}'>{}</a></td>
 <td>{}</td>
 <td>{}</td>
diff --git a/html/ltypes.cgi b/html/ltypes.cgi
index 798faee..e24c237 100755
--- a/html/ltypes.cgi
+++ b/html/ltypes.cgi
@@ -3,8 +3,6 @@
 import cgi
 import cgitb; cgitb.enable()  # for troubleshooting
 import sqlite3, collections
-import sys,codecs 
-sys.stdout = codecs.getwriter('utf8')(sys.stdout)
 from collections import defaultdict as dd
 import ltdb
 
@@ -18,9 +16,9 @@ form = cgi.FieldStorage()
 
 par=ltdb.getpar('params')
 
-print ltdb.header()
+print (ltdb.header())
 
-print ltdb.searchbar()
+print (ltdb.searchbar())
 
 con = sqlite3.connect(par['db'])
 c = con.cursor()
@@ -29,17 +27,17 @@ c.execute("""SELECT types.typ, lname, words, lfreq, cfreq
              WHERE status ='lex-type' ORDER BY types.typ""")
 results = c.fetchall()
 if results:
-    print """
+    print ("""
 <div align ='center' id="contents">
 <h1>List of all %d Lexical Types (%s)</h1>
-""" % (len(results), par['ver'])
+""" % (len(results), par['ver']))
     
-    print "<table>"
-    print """<tr><th>%s</th><th>%s</th>
+    print("<table>")
+    print("""<tr><th>%s</th><th>%s</th>
 <th colspan='2'>%s</th><th>%s</th></tr>""" % ("Lexical Type", 
                                               "Name", 
                                               "&nbsp;&nbsp;&nbsp;&nbsp;Frequency<br>Lexicon, Corpus",
-                                              "Examples")
+                                              "Examples"))
     for (typ,  name, words, lfreq, cfreq) in results:
         ### FIXME --- set to zero in the DB
         if not lfreq: 
@@ -52,13 +50,13 @@ if results:
         if words:
             wrds = ", ".join(["<span title='%s (%s)'>%s</a>" % tuple(r.split('\t')) for 
                              r in words.split('\n')])
-        print u"""<tr><td>{}</td>
+        print ("""<tr><td>{}</td>
  <td>{}</td><td align='right'>{:,}</td><td align='right'>{:,}</td>
  <td>{}</td></tr>""".format(ltdb.hlt(typ), 
                         name,
-                        lfreq, cfreq, wrds)
-    print "</table>"
+                        lfreq, cfreq, wrds))
+    print ("</table>")
 
 
-print (ltdb.footer(par['ver']))
+print(ltdb.footer(par['ver']))
 
diff --git a/html/rules.cgi b/html/rules.cgi
index 5fb33a6..602698d 100755
--- a/html/rules.cgi
+++ b/html/rules.cgi
@@ -1,11 +1,10 @@
 #!/usr/bin/python
 # -*- coding: utf-8 -*-
-from __future__ import print_function, unicode_literals
+ 
 import cgi
 import cgitb; cgitb.enable()  # for troubleshooting
 import sqlite3, collections
-import sys,codecs 
-sys.stdout = codecs.getwriter('utf8')(sys.stdout)
+import sys
 from collections import defaultdict as dd
 import ltdb
 form = cgi.FieldStorage()
diff --git a/html/search.cgi b/html/search.cgi
index 0377e54..b9fe06b 100755
--- a/html/search.cgi
+++ b/html/search.cgi
@@ -1,21 +1,20 @@
 #!/usr/bin/python
 # -*- coding: utf-8 -*-
-from __future__ import unicode_literals
+
 
 import cgi
 import cgitb; cgitb.enable()  # for troubleshooting
 import sqlite3, collections
-import sys,codecs 
-sys.stdout = codecs.getwriter('utf8')(sys.stdout)
+import sys
 from collections import defaultdict as dd
 import ltdb
 
 form = cgi.FieldStorage()
 #synset = form.getfirst("synset", "")
 lemma = form.getfirst("lemma", "")
-lemma = lemma.strip().decode('utf-8')
+lemma = lemma.strip()
 typ = form.getfirst("typ", "")
-typ = typ.strip().decode('utf-8')
+typ = typ.strip()
 
 
 par=ltdb.getpar('params')
@@ -33,12 +32,12 @@ lexinfo = ltdb.get_lexinfo(typ,c)
 if (lemma):
     leminfo = ltdb.get_leminfo (lemma,c)
     if leminfo:
-        print """
+        print ("""
 <div align ='center' id="contents">
 <h1>Lexical Types matching "%s" (%s)</h1>
 %d Type(s) found.
-""" % (lemma, par['ver'], len(leminfo))
-        print "<table>"
+""".format(lemma, par['ver'], len(leminfo)))
+        print ("<table>")
         print ("""<tr>
         <th>{}</th><th>{}</th><th>{}</th><th>{}</th><th>{}</th>
         </tr>""".format("Lexical Entry",
@@ -51,10 +50,10 @@ if (lemma):
             ## FIXME ':' -> '\t'
             if not name:
                 name = '<br>'
-            wrds = "<br>"
-            if words:
-                wrds = ", ".join(["<span title='%s (%s)'>%s</a>" % tuple(r.split('\t')) for 
-                                  r in words.split('\n')])
+                wrds = "<br>"
+                if words:
+                    wrds = ", ".join(["<span title='%s (%s)'>%s</a>" % tuple(r.split('\t')) for 
+                                      r in words.split('\n')])
             print("""<tr>
             <td><a href='showtype.cgi?typ={0}'>{0}</a></td>
             <td><a href='search.cgi?lemma={1}'>{1}</a></td>
@@ -63,31 +62,39 @@ if (lemma):
             <td>{4} ({5}, {6})</td>
 </tr>""".format(lexid, orth, ltdb.hlt(typ), name,
                  wrds, typefreq, tokenfreq))
-        print "</table>"
+        print ("</table>")
     else:
-        print "<p>No matches found for lemma %s in %s."  % (lemma, par['ver'])
+        print ("<p>No matches found for lemma %s in %s.".format((lemma,
+                                                                 par['ver'])))
+###
+### deal with types
+###
 elif(typ):
     typsum = ltdb.get_typsum (typ, c)
     if typsum:
-        print """
+        print ("""
 <div id="contents">
 <h1>Types matching "%s" (%s)</h1>
 %d Type(s) found.
-""" % (typ, par['ver'], len(typsum))
-        print "<table>"
-        print "<tr><th>%s</th><th>%s</th><th>%s</th><th>%s</th></tr>" % ("Type", 
-                                                                         "Name", 
-                                                                         "Status", 
-                                                                         "Freq.")
+""" % (typ, par['ver'], len(typsum)))
+        print ("<table>")
+        print ("""<tr>
+  <th>%s</th>
+  <th>%s</th>
+  <th>%s</th>
+  <th>%s</th></tr>""".format("Type", 
+        "Name", 
+        "Status", 
+        "Freq."))
         for (typ, name, status, freq) in typsum:
             if not name:
                 name='<br>'
             if not freq:
                 freq=0
-            print """<tr class='%s'><td>%s</td><td>%s</td>
+            print ("""<tr class='%s'><td>%s</td><td>%s</td>
  <td>%s</td><td align='right'>%s</td></tr>""" % (status, ltdb.hlt(typ), name, 
-                                   status, freq)
-        print "</table>"
+                                   status, freq))
+        print ("</table>")
     elif (lexinfo):
         lexid=typ
         print ("""
diff --git a/html/showtype.cgi b/html/showtype.cgi
index bef0169..2de90d1 100755
--- a/html/showtype.cgi
+++ b/html/showtype.cgi
@@ -3,32 +3,30 @@
 ###
 ### Show the type or lexeme
 ###
-from __future__ import unicode_literals
+
 
 import cgi
 import cgitb; cgitb.enable()  # for troubleshooting
-import sqlite3, collections
-import sys,codecs 
-sys.stdout = codecs.getwriter('utf8')(sys.stdout)
-from collections import defaultdict as dd
-import ltdb
+import sqlite3
 import docutils.core
+import ltdb
+
 
 form = cgi.FieldStorage()
 #synset = form.getfirst("synset", "")
 # lemma = form.getfirst("lemma", "")
 # lemma = lemma.strip().decode('utf-8')
 typ = form.getfirst("typ", "")
-typ = typ.strip().decode('utf-8')
+typ = typ.strip()
 lexid = form.getfirst("lexid", "")
-lexid = lexid.strip().decode('utf-8')
+lexid = lexid.strip()
 maxexe = 3
 
-par=ltdb.getpar('params')
+par = ltdb.getpar('params')
 
-print (ltdb.header())
+print(ltdb.header())
 
-print (ltdb.searchbar())
+print(ltdb.searchbar())
 
 
 
@@ -97,17 +95,17 @@ def show_description(description, tdlinfo):
 ###
 con = sqlite3.connect(par['db'])
 c = con.cursor()
-if lexid <> '':
+if (lexid):
     ## Show the lexeme
     lexinfo = ltdb.get_lexinfo(lexid,c)
     if not lexinfo:
-        print ("<p>Unknown lexical identifier: {}".format(lexid))
+        print("<p>Unknown lexical identifier: {}".format(lexid))
     else:
         (typ, orth) = lexinfo
         description = ''
         tdlinfo = ltdb.get_tdlinfo(lexid, c)
         ### Header
-        print ("""<div id="contents">
+        print("""<div id="contents">
         <h1>{0} (<a href='showtype.cgi?typ={1}'>{1}</a>)</h1>""".format(lexid, typ))
        
         ### Show docstring 
@@ -124,17 +122,17 @@ if lexid <> '':
                 print("""<pre class='code'>%s</pre>""" % ltdb.hlall(tdl))
                 print("(%s: %s)" % (src, lineno))
         
-elif typ <> '':
+elif (typ):
     typinfo = get_typinfo(typ, c)
     if not typinfo:
-        print ("<p>Unknown type: {}".format(typ))
+        print("<p>Unknown type: {}".format(typ))
     else:
         (parents,  children,  cat,  val, cont, definition,  status, arity, head, name, description, criteria, reference, todo) = typinfo
 
         tdlinfo = ltdb.get_tdlinfo(typ, c)
         
         ### Header
-        print ("""<div id="contents">
+        print("""<div id="contents">
         <h1>%s (%s)</h1>""" % (typ, status)) ## FIXME show headedness
 
         ### Show docstring 
@@ -148,8 +146,8 @@ elif typ <> '':
     
 else:
     ## no type or lexid given
-    print "<br><br><p style='font-size:large;'>Please give me a type (or rule or lexeme)"
+    print("<br><br><p style='font-size:large;'>Please give me a type (or rule or lexeme)")
     
 
-print (ltdb.footer(par['ver']))
+print(ltdb.footer(par['ver']))
 
diff --git a/make-ltdb.bash b/make-ltdb.bash
index 235cc23..394ba26 100755
--- a/make-ltdb.bash
+++ b/make-ltdb.bash
@@ -56,12 +56,12 @@ fi
 # unset LKBFOS
 LKBFOS=~/delphin/lkb_fos/lkb.linux_x86_64
 
-if [ ${LKBFOS} ]; then
+if [[ -n ${LKBFOS} && -e ${LKBFOS} ]]; then
     LISPCOMMAND="${LKBFOS}"
-    echo We will use $LISPCOMMAND
-elif [ $LOGONROOT ];then
+    echo We will use ${LISPCOMMAND}
+elif [[ -n $LOGONROOT && -e "${LOGONROOT}/bin/logon" ]]; then
     LISPCOMMAND="${LOGONROOT}/bin/logon --binary -I base -locale ja_JP.UTF-8"
-    echo We will use $LISPCOMMAND
+    echo We will use ${LISPCOMMAND}
 else
     echo we found no suitable LKB
 fi
@@ -141,7 +141,7 @@ mkdir -p "${outdir}"
 
 db=${outdir}/${LTDB_FILE}
 
-if [ ${lkbscript} ]
+if [[ ${lkbscript} && ${LISPCOMMAND} ]]
 then
     ### dump  the lex-types
     echo "Dumping lex-type definitions and lexicon using the LKB (slow but steady)" 
@@ -194,7 +194,7 @@ echo
 sqlite3 ${db} < tables.sql
 
 ###
-if [ ${lkbscript} ]
+if [[ ${lkbscript} && ${LISPCOMMAND} ]]
 then
     echo "Adding in the info from the lisp"
     echo

From 23b49e7218ff1a35a29f8501fe94318b02c17a29 Mon Sep 17 00:00:00 2001
From: Francis Bond <bond@ieee.org>
Date: Mon, 8 Jun 2020 21:41:49 +0800
Subject: [PATCH 03/41] added a requirements.txt; closes #6

---
 requirements.txt | 6 ++++++
 1 file changed, 6 insertions(+)
 create mode 100644 requirements.txt

diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..41bda04
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,6 @@
+#p7zip-full
+#xmlstarlet
+#sqlite3
+pydelphin
+docutils
+lxml

From ff3dd4ff135b9f194441e64386f43c6b84d81f7e Mon Sep 17 00:00:00 2001
From: Francis Bond <bond@ieee.org>
Date: Tue, 9 Jun 2020 09:42:37 +0800
Subject: [PATCH 04/41] properly munge docstring from pydelphin; closes #10

---
 html/ltdb.py      | 6 +++---
 html/showtype.cgi | 9 +++++----
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/html/ltdb.py b/html/ltdb.py
index 30a89ec..d7d88d7 100644
--- a/html/ltdb.py
+++ b/html/ltdb.py
@@ -525,15 +525,15 @@ def munge_desc(typ,description):
             if l.startswith("<ex>"):
                 ex = l[4:].strip()
                 exes.append((ex,typ,1))
-                desc.append("\n{:d}. {}\n".format(count, ex))
+                desc.append("\n{:d}. *{}*\n".format(count, ex))
             elif l.startswith("<nex>"):
                 ex = l[5:].strip()
                 exes.append((ex,typ,0))
-                desc.append("\n{:d}. ∗ {}\n".format(count, ex))
+                desc.append("\n{:d}. ∗ *{}*\n".format(count, ex))
             else: # l.startswith("<mex>")
                 ex = l[5:].strip()
                 exes.append((ex,typ,1))
-                desc.append("\n{:d}. ⊛ {}\n".format(count, ex))
+                desc.append("\n{:d}. ⊛ *{}*\n".format(count, ex))
             if ex.startswith('*'):
                 print("Warning: don't use '*' in examples, just use <nex>:", l,
                       file=sys.stderr)
diff --git a/html/showtype.cgi b/html/showtype.cgi
index 2de90d1..8def841 100755
--- a/html/showtype.cgi
+++ b/html/showtype.cgi
@@ -71,7 +71,7 @@ def showtype (typinfo, tdlinfo):
         print("""<h3>TDL from LKB comment</h3>""")
         print("""<pre class='code'>%s</pre>""" % ltdb.hlall(definition))    ### TDL and type info
 
-def show_description(description, tdlinfo):
+def show_description(typ, description, tdlinfo):
     """print out the linguistic description in the doscstring
        use the value from the LKB if possible, if not then from pydelphin"""
     if description:
@@ -83,8 +83,9 @@ def show_description(description, tdlinfo):
     else:
         for  src, line, tdl, docstring in  tdlinfo:
             if docstring:
+                description, examples, names=  ltdb.munge_desc(typ,docstring)
                 print("""<h2>Linguistic Documentation (TDL)</h2>
-                {}""".format(docutils.core.publish_parts("\n"+ docstring +"\n",
+                {}""".format(docutils.core.publish_parts("\n"+ description +"\n",
                                                          writer_name='html',
                                                          settings_overrides= {'table_style':'colwidths-auto',
                                                                               'initial_header_level':'3'})['body']))
@@ -109,7 +110,7 @@ if (lexid):
         <h1>{0} (<a href='showtype.cgi?typ={1}'>{1}</a>)</h1>""".format(lexid, typ))
        
         ### Show docstring 
-        show_description(description, tdlinfo)
+        show_description(typ, description, tdlinfo)
         ### Corpus examples of lextype, type
         ltdb.showlexs(c, typ, lexid, maxexe, 50) 
         ltdb.showsents(c, typ, lexid, maxexe, 50)
@@ -136,7 +137,7 @@ elif (typ):
         <h1>%s (%s)</h1>""" % (typ, status)) ## FIXME show headedness
 
         ### Show docstring 
-        show_description(description, tdlinfo)
+        show_description(typ, description, tdlinfo)
         ### Corpus examples of lextype, type
         ltdb.showlexs(c, typ, lexid, maxexe, 50) 
         ltdb.showsents(c, typ, lexid, maxexe, 50)

From 39c98ef2638787cb0e492e4e1e9779c73ad27c36 Mon Sep 17 00:00:00 2001
From: Francis Bond <bond@ieee.org>
Date: Wed, 10 Jun 2020 13:13:41 +0800
Subject: [PATCH 05/41] trouble shooting advice

---
 README.rst | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/README.rst b/README.rst
index 4a36937..d4be2c1 100644
--- a/README.rst
+++ b/README.rst
@@ -105,6 +105,15 @@ If the LKB complains
 it probably means you have a docstring in an instance file, or an old
 version of the LKB. Make sure you only document types for now.
 
+If you are having trouble with apache encodings, set the following in ``/etc/apache2/apache2.conf``
+
+::
+
+   SetEnv PYTHONIOENCODING utf8
+
+
+
+
 Todo
 ----
 

From 749f6be11b0ec93285825f6e64e4f4cdb1368f01 Mon Sep 17 00:00:00 2001
From: Francis Bond <bond@ieee.org>
Date: Fri, 12 Jun 2020 19:02:12 +0800
Subject: [PATCH 06/41] Fixed bad print substitution

---
 html/search.cgi | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/html/search.cgi b/html/search.cgi
index b9fe06b..2257b73 100755
--- a/html/search.cgi
+++ b/html/search.cgi
@@ -79,10 +79,10 @@ elif(typ):
 """ % (typ, par['ver'], len(typsum)))
         print ("<table>")
         print ("""<tr>
-  <th>%s</th>
-  <th>%s</th>
-  <th>%s</th>
-  <th>%s</th></tr>""".format("Type", 
+  <th>{}</th>
+  <th>{}</th>
+  <th>{}</th>
+  <th>{}</th></tr>""".format("Type", 
         "Name", 
         "Status", 
         "Freq."))

From ed8dfa892964e19d9d76dbfe81ffe4eb707e8215 Mon Sep 17 00:00:00 2001
From: Francis Bond <bond@ieee.org>
Date: Fri, 12 Jun 2020 19:03:58 +0800
Subject: [PATCH 07/41] added options for no gold profiles and extra lisp

---
 make-ltdb.bash | 37 +++++++++++++++++++++++++------------
 1 file changed, 25 insertions(+), 12 deletions(-)

diff --git a/make-ltdb.bash b/make-ltdb.bash
index 394ba26..adc0f3c 100755
--- a/make-ltdb.bash
+++ b/make-ltdb.bash
@@ -6,12 +6,22 @@ echo
 echo  Welcome to the Linguistic Type Database
 echo
 
+usage="""You need to give a grammar directory or script file (or both)
+    --script path/to/lkb/script
+    --grmtdl path/to/grammar.tdl
+
+You can add some lisp before we load the script
+    --lisp (push :mal *features*)  
+
+You can not add information from the gold trees
+    --nogold 
+"""
 
 
 ###
 ### get the grammar directory
 ###
-
+gold='true'
 while [ $# -gt 0 -a "${1#-}" != "$1" ]; do
   case ${1} in
       --script)
@@ -22,11 +32,16 @@ while [ $# -gt 0 -a "${1#-}" != "$1" ]; do
 	  grammartdl=${2};
 	  shift 2;
 	  ;;
+      --lisp)
+	  lisp=${2};
+	  shift 2;
+	  ;;
+      --nogold)
+	  gold='false'
+	  shift 1;
+	  ;;
       *)
-	  echo """You need to give a grammar directory or script file (or both)
-    --script path/to/lkb/script
-    --grmtdl path/to/grammar.tdl
-"""
+	  echo "${usage}"
 	  exit 0	
   esac
 done
@@ -44,10 +59,7 @@ then
     grammardir=`dirname ${grammartdl}`
     echo "Grammar directory is " ${grammardir}
 else
-    echo """You need to give a grammar directory or script file
-    --script path/to/lkb/script
-    --grmtdl path/to/grammar.tdl
-"""
+    echo "${usage}"
     exit 0
 fi
 
@@ -72,7 +84,6 @@ fi
 ### set things up
 ###
 
-treebanks=`ls -d ${grammardir}/tsdb/gold/*`
 now=`date --rfc-3339=date`
 
 
@@ -210,8 +221,10 @@ fi
 
 #echo "Adding in the info from the gold trees"
 #echo
-python3 gold2db.py ${grammardir} ${db}
-
+if [ ${gold} == 'true' ]
+then
+    python3 gold2db.py ${grammardir} ${db}
+fi
 
 echo
 echo Install to ${CGI_DIR}

From 90f931a9c48da1429dd095b366a52de5d41a22c1 Mon Sep 17 00:00:00 2001
From: Francis Bond <bond@ieee.org>
Date: Sat, 13 Jun 2020 13:19:55 +0800
Subject: [PATCH 08/41] added a chance to add extra lisp before loading script,
 better logging of LISP

---
 make-ltdb.bash | 46 ++++++++++++++++++++++++----------------------
 1 file changed, 24 insertions(+), 22 deletions(-)

diff --git a/make-ltdb.bash b/make-ltdb.bash
index adc0f3c..c899f7b 100755
--- a/make-ltdb.bash
+++ b/make-ltdb.bash
@@ -11,7 +11,7 @@ usage="""You need to give a grammar directory or script file (or both)
     --grmtdl path/to/grammar.tdl
 
 You can add some lisp before we load the script
-    --lisp (push :mal *features*)  
+    --lisp '(push :mal *features*)'
 
 You can not add information from the gold trees
     --nogold 
@@ -33,7 +33,7 @@ while [ $# -gt 0 -a "${1#-}" != "$1" ]; do
 	  shift 2;
 	  ;;
       --lisp)
-	  lisp=${2};
+	  extralisp=${2};
 	  shift 2;
 	  ;;
       --nogold)
@@ -117,8 +117,8 @@ fi
 ### write the temporary files to here
 outdir=${LOGONTMP}/${version}
 
-log=${outdir}/lkb.log
-echo Log file at ${log}
+lkblog=${outdir}/lkb.log
+echo Log file at ${lkblog}
 
 ### write the html here
 HTML_DIR=${HOME}/public_html/ltdb/${version}
@@ -160,25 +160,27 @@ then
     
     unset DISPLAY;
     unset LUI;
-    
-{ 
- cat 2>&1 <<- LISP
-  (format t "~%Read Grammar~%")
-  (lkb::read-script-file-aux  "${lkbscript}")
-  (lkb::lkb-load-lisp "." "patch-lextypedb.lsp")
-  (format t "~%Output types~%")
-  (lkb::output-types :xml "${outdir}/${TYPES_FILE}")
-  (format t "~%Output lrules, rules and roots ~%")
-  (lkb::lrules-to-xml :file "${outdir}/${LRULES_FILE}")
-  (lkb::rules-to-xml :file "${outdir}/${RULES_FILE}")
-  (lkb::roots-to-xml :file "${outdir}/${ROOTS_FILE}")
-  (lkb::output-lex-summary lkb::*lexicon* "${outdir}/${LEXICON_FILE}")
-  (format t "~%All Done!~%")
+
+LISP="
+  ${extralisp}
+  (format t \"~%LTDB Read Grammar~%\")
+  (lkb::read-script-file-aux  \"${lkbscript}\")
+  (lkb::lkb-load-lisp \".\" \"patch-lextypedb.lsp\")
+  (format t \"~%LTDB Output types~%\")
+  (lkb::output-types :xml \"${outdir}/${TYPES_FILE}\")
+  (format t \"~%LTDB Output lrules, rules and roots ~%\")
+  (lkb::lrules-to-xml :file \"${outdir}/${LRULES_FILE}\")
+  (lkb::rules-to-xml :file \"${outdir}/${RULES_FILE}\")
+  (lkb::roots-to-xml :file \"${outdir}/${ROOTS_FILE}\")
+  (format t \"~%LTDB Output lexical summary ~%\")
+  (lkb::output-lex-summary lkb::*lexicon* \"${outdir}/${LEXICON_FILE}\")
+  (format t \"~%LTDB All Done!~%\")
   #+allegro        (excl:exit)
-  #+sbcl           (sb-ext:quit)
-LISP
-} | ${LISPCOMMAND}   2>${log} >${log}
-# } | cat   
+" 
+echo "$LISP" > ${lkblog}
+echo LISP "${LISP}"
+echo "$LISP"  | ${LISPCOMMAND}  2>>${lkblog} >>${lkblog}
+# } | cat     ### DEBUG LISP
     
     ###
     ### Try to validate the types.xml

From 6789f5eee47378d0373fea537412ca4b04af9834 Mon Sep 17 00:00:00 2001
From: Francis Bond <bond@ieee.org>
Date: Sat, 13 Jun 2020 13:38:11 +0800
Subject: [PATCH 09/41] Added description of the new command line options

---
 README.rst | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/README.rst b/README.rst
index d4be2c1..1564d6d 100644
--- a/README.rst
+++ b/README.rst
@@ -23,6 +23,13 @@ or (somewhat experimental but gets more docstrings)
 
 2. Run ``./make-ltdb.bash --grmtdl /path/to/grammar/grammar.tdl``
    
+3. Add extra lisp to call before the script
+   ``./make-ltdb.bash   --lisp '(push :mal *features*)' --script /path/to/grammar/lkb/script``
+
+4. You can tell it to just read the grammar, not gold (mainly useful for debugging)
+   ``./make-ltdb.bash --grmtdl /path/to/grammar/grammar.tdl --nogold``
+
+You can load from lisp and ace versions of the grammar, it will try to merge information from both.
 
 .. code:: bash
 
@@ -39,15 +46,15 @@ Requirements
 
 ::
 
-      * python 2.7, python 3, pydelphin, docutils, lxml
+      * python 3, pydelphin, docutils, lxml
       * Perl
       * SQLite3
       * Apache
       * LKB/Lisp        for db dump
       * xmlstarlet      for validating lisp
 
-We prefer that Sentence IDs are unique, if we see two sentences in the
-gold treebank with the same ID, we only store the first one.
+We store items as (profile, item-id) pairs, so Sentence IDs do not
+need to be unique.
 
 Only the new LKB-FOS (http://moin.delph-in.net/LkbFos) supports the new docstring comments.  We assume it is installed in
 ``LKBFOS=~/delphin/lkb_fos/lkb.linux_x86_64``.
@@ -57,7 +64,7 @@ Install dependencies (in ubuntu):
 .. code:: bash
 
     sudo apt-get install apache2 xmlstarlet
-    sudo apt-get install python-docutils python3-docutils python3-lxml
+    sudo apt-get install python3-docutils python3-lxml
 
     sudo pip install pydelphin --upgrade
     sudo pip3 install pydelphin --upgrade

From 2cfd13f52726f2f47ec9cb2fd3619c478be675d9 Mon Sep 17 00:00:00 2001
From: Francis Bond <bond@ieee.org>
Date: Sat, 13 Jun 2020 13:54:35 +0800
Subject: [PATCH 10/41] added more on how it was made

---
 make-ltdb.bash |  2 +-
 makehome.py    | 26 ++++++++++++++++++++------
 2 files changed, 21 insertions(+), 7 deletions(-)

diff --git a/make-ltdb.bash b/make-ltdb.bash
index c899f7b..782b971 100755
--- a/make-ltdb.bash
+++ b/make-ltdb.bash
@@ -254,7 +254,7 @@ echo "ver=$version" >> ${CGI_DIR}/params
 cp doc/lt-diagram.png html/*.js html/*.css html/ltdb.png ${HTML_DIR}/.
 cp ${outdir}/*.log ${HTML_DIR}
 
-python3 makehome.py ${version}  ${grammardir} > ${HTML_DIR}/index.html
+python3 makehome.py ${version}  ${grammardir} ${lkbscript} ${extralisp} ${grammartdl} > ${HTML_DIR}/index.html
 
 
 ### All done
diff --git a/makehome.py b/makehome.py
index baf82d7..ed8b6aa 100644
--- a/makehome.py
+++ b/makehome.py
@@ -12,24 +12,38 @@
 ### get some local utilities
 sys.path.append(os.getcwd() + '/html')
 from ltdb import statuses, footer
+from html import escape
 
-(script, version, grmdir) = sys.argv
+(script, version, grmdir, extralisp, lkbscript, grammartdl) = sys.argv
 
-print("""
+
+madewith =''
+if lkbscript or grammartdl:
+    madewith += " made from:\n  <ul>\n"
+    if lkbscript:
+        if extralisp:
+            madewith += f"    <li> LKB loading <code>{lkbscript}</code> after executing <code>{extralisp}</code>\n"
+        else:
+            madewith += f"    <li> LKB loading <code>{lkbscript}</code>\n"
+    if grammartdl:
+        madewith += f"<li> PyDelphin, parsing <code>{grammartdl}</code>\n"
+    madewith += "  </ul>" 
+
+print(f"""
 <html>
 <head>
-  <title>{0} ltdb</title>
+  <title>{version} ltdb</title>
   <link rel='stylesheet' type='text/css' href='lextypedb.css'/>
   <link rel="icon"  type="image/png"  href="%s/ltdb.png"/>
 </head>
 <body>
-<h1>Welcome to {0}</h1>
+<h1>Welcome to {version}</h1>
 <ul>  
-  <li>  <a href='../../cgi-bin/{0}/search.cgi'>Lexical Type Database for {0}</a> ( <a href='../../cgi-bin/{0}/search.cgi'>Search</a>)
+  <li>  <a href='../../cgi-bin/{0}/search.cgi'>Lexical Type Database for {version}</a>{madewith}
   <li>  <a href='http://wiki.delph-in.net/moin/LkbLtdb'>Lexical Type Database Wiki</a>
   <li>  <a href='http://wiki.delph-in.net/moin/FrontPage'>DELPH-IN Wiki</a>
 </ul>
-""".format(version))
+""")
 
 # if [ -n "$grammarurl" ]; then
 # echo "  <li>  <a href='$grammarurl'>Grammar Home Page</a>"  >> ${HTML_DIR}/index.html

From f6d31ff4b71a63f57906c3347b1a70cde727ca4c Mon Sep 17 00:00:00 2001
From: Francis Bond <bond@ieee.org>
Date: Mon, 22 Jun 2020 22:39:37 +0800
Subject: [PATCH 11/41] updated for python3

---
 html/more.cgi | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/html/more.cgi b/html/more.cgi
index 291d0ff..63ae2fe 100755
--- a/html/more.cgi
+++ b/html/more.cgi
@@ -3,8 +3,7 @@
 import cgi
 import cgitb; cgitb.enable()  # for troubleshooting
 import sqlite3, collections
-import sys,codecs 
-sys.stdout = codecs.getwriter('utf8')(sys.stdout)
+import sys
 from collections import defaultdict as dd
 import ltdb
 
@@ -13,19 +12,20 @@ form = cgi.FieldStorage()
 # lemma = form.getfirst("lemma", "")
 # lemma = lemma.strip().decode('utf-8')
 typ = form.getfirst("typ", "")
-typ = typ.strip().decode('utf-8')
+typ = typ.strip()
 lextyp = form.getfirst("lextyp", "")
-lextyp = lextyp.strip().decode('utf-8')
+lextyp = lextyp.strip()
 lexid = form.getfirst("lexid", "")
-lexid = lexid.strip().decode('utf-8')
+lexid = lexid.strip()
 limit = int(form.getfirst("limit", 50))
 
 
 par=ltdb.getpar('params')
 
-print ltdb.header()
+print(ltdb.header())
 
-print ltdb.searchbar()
+
+print (ltdb.searchbar())
 ###
 ### Print out the type
 ###
@@ -43,4 +43,4 @@ elif(lextyp):
 else:
     print("<p>More examples of what?")
 print("</div>")
-print ltdb.footer(par['ver'])
+print (ltdb.footer(par['ver']))

From c5281ac5295a6d49ac8e359221bf4f60a3ee5aed Mon Sep 17 00:00:00 2001
From: Francis Bond <bond@ieee.org>
Date: Tue, 23 Jun 2020 13:25:34 +0800
Subject: [PATCH 12/41] several small fixes for robustness

---
 html/lextypedb.css | 124 ---------------------------------------------
 html/ltdb.css      |  10 ++--
 html/ltdb.py       |   7 +--
 html/search.cgi    |   5 +-
 make-ltdb.bash     |  13 +++++
 makehome.py        |  22 ++++----
 6 files changed, 40 insertions(+), 141 deletions(-)
 delete mode 100644 html/lextypedb.css

diff --git a/html/lextypedb.css b/html/lextypedb.css
deleted file mode 100644
index 3df197d..0000000
--- a/html/lextypedb.css
+++ /dev/null
@@ -1,124 +0,0 @@
-body{
-background-color: #A4DBFF;
-}
-
-#outline {
-margin:0 auto;
-}
-
-#contents{
--moz-border-radius: 10px;
-border-style: solid;
-border-color: #0078C8;
-padding: 1%;
-background-color: white;
-width : 97%;
-clear: both;
-}
-
-#header{
-
-}
-
-#menu{
-text-align: center;
--moz-border-radius: 10px;
-border-style: solid;
-border-color: #0078C8;
-padding: 5px;
-background-color: white;
-margin-bottom: 5px;
-width: 25%;
-float: left;
-height: 25px;
-}
-
-#confusing{
-text-align: center;
--moz-border-radius: 10px;
-border-style: solid;
-border-color: #0078C8;
-padding: 5px;
-background-color: white;
-margin-bottom: 5px;
-margin-left: 5px;
-width: 70%;
-float: left;
-height: 25px;
-}
-.form
-{
-margin-left: 10px;
-float: left;
-}
-
-#c-both {
-height:0px; 
-clear:both;
-}
-
-.item{
-padding: 5px;
-}
-
-H1 {
--moz-border-radius: 10px;
-  color: #ffffff;
-  background-color : #005BF2;
-  border-style: solid;
-  border-color: #003999;
-  border-width: 4px;
-  line-height: 120%;
-  text-align: center;
-  font-weight: bolder;
-}
-H2 {
-  color: #004BC8;
-  padding-left: 10px;
-  border-width: 0px 0px 2px 15px;
-  border-style: solid;
-  border-color: #004BC8;
-  line-height: 100%;
-}
-H3 {
-  color: #004BC8;
-  padding-left: 10px;
-  border-width: 0px 0px 2px 0px;
-  border-style: solid;
-  border-color: #004BC8;
-  line-height: 100%;
-}
-H4 {
-  color: #004BC8;
-  padding-left: 10px;
-  border-width: 0px 0px 2px 0px;
-  border-style: none;
-  border-color: #004BC8;
-  line-height: 100%;
-}
-
-.match{
-color: green;
-  font-weight: bold;
-}
-
-strong{
-color: red;
-  font-weight: bolder;
-}
-
-table {
-    border-spacing:3pt;
-    border-collapse: collapse;
-}
-th {background:#C8E9FF; text-align:left; color:black;
-font-size: 14pt; font-weight:bolder; padding:4pt}
-/* tr {background:#E3F4FF} */
-tr {background:#F3FFF3} /* FCB likes green */
-td {padding:4pt}
-caption { font-weight:bold; font-size: 18pt;}
-/* check colors */
-tr.irule {background:#EEEEEE}
-tr.rule {background:#FFAAAA}
-tr.lrule {background:#AAAAFF}
-tr.ltype {background:#AAFFAA}
\ No newline at end of file
diff --git a/html/ltdb.css b/html/ltdb.css
index 15b4b5d..a8bc838 100644
--- a/html/ltdb.css
+++ b/html/ltdb.css
@@ -94,8 +94,9 @@ color: green;
 }
 
 .coref{
-#    border: 1px solid black;
+    border: 1px solid black;
     background: #F3FFFF;
+    font-size: smaller;
 }
 pre{
     white-space: pre-wrap;
@@ -117,10 +118,13 @@ tr {background:#F3FFF3} /* FCB likes green */
 td {padding:4pt}
 caption { font-weight:bold; font-size: 18pt;}
 /* check colors */
-tr.inf-rule {background:#EEEEEE}
+tr.inf-rule {background:#FFEEEE}
+tr.token-mapping-rule {background:#FFEEEE}
 tr.rule {background:#FFAAAA}
-tr.lex-rule {background:#AAAAFF}
+tr.lex-rule {background:#FFDDDD}
 tr.lex-type {background:#AAFFAA}
+tr.lex-entry {background:#DDFFDD}
+tr.generic-lex-entry {background:#DDFFDD}
 tr.root {background:#FFAAFF}
 
 pre.code {
diff --git a/html/ltdb.py b/html/ltdb.py
index d7d88d7..456323a 100644
--- a/html/ltdb.py
+++ b/html/ltdb.py
@@ -27,6 +27,7 @@
 
 ##things used when parsing
 statuses["lex-rule"] = "Lexical Rules"
+statuses["inf-rule"] = "Inflectional Rules"
 statuses["rule"] = "Syntactic Rules"
 statuses["token-mapping-rule"] = "Rules for token mapping"
 statuses["root"] = "Root Conditions for well formed utterances"
@@ -40,8 +41,8 @@
 statuses["type"] = "Other Internal Types"
 
 ## pre and post processing
-statuses["lexical-filtering-rule"] = "lexical filtering rule"
-statuses["post-generation-mapping-rule"] = "post generation mapping rule"
+statuses["lexical-filtering-rule"] = "Lexical filtering rule"
+statuses["post-generation-mapping-rule"] = "Post generation mapping rule"
 
 ## interface 
 statuses["labels"] = "Labels for trees in the (parse-nodes)"
@@ -101,7 +102,7 @@ def hlall (typs):
     if typs:
         typs = escape(typs)
         ### Definition from http://moin.delph-in.net/TdlRfc
-        typs=re.sub(r'(#[\w_+*?-]+)', "<span class='coref'>\\1</span>", typs)
+        typs=re.sub(r'( )(#[\w_+*?-]+)([ ,])', "\\1<span class='coref'>\\2</span>\\3", typs)
         return retyp.sub(hltyp, typs)
     else:
         return '<br>'
diff --git a/html/search.cgi b/html/search.cgi
index 2257b73..74b001f 100755
--- a/html/search.cgi
+++ b/html/search.cgi
@@ -64,8 +64,9 @@ if (lemma):
                  wrds, typefreq, tokenfreq))
         print ("</table>")
     else:
-        print ("<p>No matches found for lemma %s in %s.".format((lemma,
-                                                                 par['ver'])))
+        print ("""<div align ='center' id="contents">
+<p>No matches found for lemma <b><i>{}</i></b> in {}.""".format(lemma,
+                                                  par['ver']))
 ###
 ### deal with types
 ###
diff --git a/make-ltdb.bash b/make-ltdb.bash
index 782b971..2713e91 100755
--- a/make-ltdb.bash
+++ b/make-ltdb.bash
@@ -253,7 +253,20 @@ echo "ver=$version" >> ${CGI_DIR}/params
 ### HTML and logs
 cp doc/lt-diagram.png html/*.js html/*.css html/ltdb.png ${HTML_DIR}/.
 cp ${outdir}/*.log ${HTML_DIR}
+if [ -n ${lkbscript} ]
+then
+    lkbscript='none'
+fi
+if [ -n ${extralisp} ]
+then
+    extralisp='none'
+fi
+if [ -n ${grammartdl} ]
+then
+    grammartdl='none'
+fi
 
+   
 python3 makehome.py ${version}  ${grammardir} ${lkbscript} ${extralisp} ${grammartdl} > ${HTML_DIR}/index.html
 
 
diff --git a/makehome.py b/makehome.py
index ed8b6aa..985828b 100644
--- a/makehome.py
+++ b/makehome.py
@@ -18,14 +18,14 @@
 
 
 madewith =''
-if lkbscript or grammartdl:
+if  lkbscript != 'none' or grammartdl != 'none':
     madewith += " made from:\n  <ul>\n"
-    if lkbscript:
-        if extralisp:
+    if lkbscript != 'none':
+        if extralisp != 'none':
             madewith += f"    <li> LKB loading <code>{lkbscript}</code> after executing <code>{extralisp}</code>\n"
         else:
             madewith += f"    <li> LKB loading <code>{lkbscript}</code>\n"
-    if grammartdl:
+    if grammartdl != 'none':
         madewith += f"<li> PyDelphin, parsing <code>{grammartdl}</code>\n"
     madewith += "  </ul>" 
 
@@ -33,13 +33,16 @@
 <html>
 <head>
   <title>{version} ltdb</title>
-  <link rel='stylesheet' type='text/css' href='lextypedb.css'/>
-  <link rel="icon"  type="image/png"  href="%s/ltdb.png"/>
+  <link rel='stylesheet' type='text/css' href='ltdb.css'/>
+  <link rel="icon"  type="image/png"  href="ltdb.png"/>
 </head>
 <body>
 <h1>Welcome to {version}</h1>
+<div  id="contents">
+<p> This is online documentation for the {version} grammar, made from the grammar itself and its accompanying metadata.
+
 <ul>  
-  <li>  <a href='../../cgi-bin/{0}/search.cgi'>Lexical Type Database for {version}</a>{madewith}
+  <li>  <a href='../../cgi-bin/{version}/search.cgi'>Lexical Type Database for {version}</a>{madewith}
   <li>  <a href='http://wiki.delph-in.net/moin/LkbLtdb'>Lexical Type Database Wiki</a>
   <li>  <a href='http://wiki.delph-in.net/moin/FrontPage'>DELPH-IN Wiki</a>
 </ul>
@@ -101,7 +104,7 @@
 """)
 print("<table>")
 for (typ, desc) in statuses.items():
-    print(f"<tr><td>{typ}</td><td>{desc}</td></tr>")
+    print(f"<tr class='{typ}'><td>{typ}</td><td>{desc}</td></tr>")
 print("</table>")
 ###
 ### Links to Logs
@@ -129,6 +132,7 @@
 """)
 
     
-print("""  <p>Created on {}</p>""".format(datetime.datetime.now()))
+print("""  <p>Created on {}</p>
+</div>""".format(datetime.datetime.now()))
 
 print(footer(version))

From bc4de8b16c0f5cb6546b92cd3867e532ec9abd27 Mon Sep 17 00:00:00 2001
From: Francis Bond <bond@ieee.org>
Date: Sun, 28 Jun 2020 20:49:58 +0800
Subject: [PATCH 13/41] added some quotes to make make-ltdb.bash more robust

---
 make-ltdb.bash | 25 +++++++++++++++----------
 1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/make-ltdb.bash b/make-ltdb.bash
index 2713e91..78c040c 100755
--- a/make-ltdb.bash
+++ b/make-ltdb.bash
@@ -33,7 +33,7 @@ while [ $# -gt 0 -a "${1#-}" != "$1" ]; do
 	  shift 2;
 	  ;;
       --lisp)
-	  extralisp=${2};
+	  extralisp="${2}";
 	  shift 2;
 	  ;;
       --nogold)
@@ -161,11 +161,12 @@ then
     unset DISPLAY;
     unset LUI;
 
-LISP="
-  ${extralisp}
-  (format t \"~%LTDB Read Grammar~%\")
+LISP="""
+  "${extralisp}"
+  (format t \"~%LTDB Read Grammar~% \")
   (lkb::read-script-file-aux  \"${lkbscript}\")
   (lkb::lkb-load-lisp \".\" \"patch-lextypedb.lsp\")
+  (format t \"~%LTDB Grammar Version  ~A~%\" cl-user::*grammar-version*)
   (format t \"~%LTDB Output types~%\")
   (lkb::output-types :xml \"${outdir}/${TYPES_FILE}\")
   (format t \"~%LTDB Output lrules, rules and roots ~%\")
@@ -176,7 +177,7 @@ LISP="
   (lkb::output-lex-summary lkb::*lexicon* \"${outdir}/${LEXICON_FILE}\")
   (format t \"~%LTDB All Done!~%\")
   #+allegro        (excl:exit)
-" 
+"""
 echo "$LISP" > ${lkblog}
 echo LISP "${LISP}"
 echo "$LISP"  | ${LISPCOMMAND}  2>>${lkblog} >>${lkblog}
@@ -253,21 +254,25 @@ echo "ver=$version" >> ${CGI_DIR}/params
 ### HTML and logs
 cp doc/lt-diagram.png html/*.js html/*.css html/ltdb.png ${HTML_DIR}/.
 cp ${outdir}/*.log ${HTML_DIR}
-if [ -n ${lkbscript} ]
+
+if [ -n "${lkbscript}" ]
 then
     lkbscript='none'
 fi
-if [ -n ${extralisp} ]
+
+if [ -n "${extralisp}" ]
 then
     extralisp='none'
 fi
-if [ -n ${grammartdl} ]
+
+if [ -n "${grammartdl}" ]
 then
     grammartdl='none'
 fi
 
-   
-python3 makehome.py ${version}  ${grammardir} ${lkbscript} ${extralisp} ${grammartdl} > ${HTML_DIR}/index.html
+echo python3 makehome.py "${version}"  "${grammardir}" "${lkbscript}" "${extralisp}" "${grammartdl}" to "${HTML_DIR}"/index.html
+
+python3 makehome.py "${version}"  "${grammardir}" "${lkbscript}" "${extralisp}" "${grammartdl}" > "${HTML_DIR}"/index.html
 
 
 ### All done

From 2223789f6e6b519451366ba6d656b8e576d915c5 Mon Sep 17 00:00:00 2001
From: Francis Bond <bond@ieee.org>
Date: Wed, 15 Jul 2020 17:34:53 +0800
Subject: [PATCH 14/41] fixed lemma search to show examples

---
 html/ltdb.py    | 40 +++++++++++++++++++---------------------
 html/search.cgi | 15 +++++++--------
 2 files changed, 26 insertions(+), 29 deletions(-)

diff --git a/html/ltdb.py b/html/ltdb.py
index 456323a..971f0d3 100644
--- a/html/ltdb.py
+++ b/html/ltdb.py
@@ -124,15 +124,15 @@ def showsents (c, typ, lexid, limit, biglimit):
         total = results[0]
         sids = dd(set)
         if lexid:
-            c.execute("""SELECT sid, wid FROM sent 
+            c.execute("""SELECT profile, sid, wid FROM sent 
                          WHERE lexid=? ORDER BY sid LIMIT ?""", (lexid, limit))
-            for (sid, wid) in c:
-                sids[sid].add((wid, wid+1))
+            for (profile, sid, wid) in c:
+                sids[profile, sid].add((wid, wid+1))
         else:
-            c.execute("""SELECT sid, kara, made FROM typind 
+            c.execute("""SELECT profile, sid, kara, made FROM typind 
                          WHERE typ=? ORDER BY sid LIMIT ?""", (typ, limit))
-            for (sid, kara, made) in c:
-                sids[sid].add((kara, made))
+            for (profile, sid, kara, made) in c:
+                sids[profile, sid].add((kara, made))
         if limit < total and biglimit > limit:
             limtext= "({:,} out of {:,}: <a href='more.cgi?typ={}&lexid={}&limit={}'>more</a>)".format(limit, total,
                                                                                                        escape(typ,''),
@@ -143,24 +143,22 @@ def showsents (c, typ, lexid, limit, biglimit):
         else:
             limtext ='({:,})'.format(total)
         print("""<h2>Corpus Examples %s</h2>""" % limtext)
-        c.execute("""SELECT profile, sid, wid, word, lexid FROM SENT 
-                        WHERE sid in (%s) order by sid, wid""" % \
-                          ','.join('?'*len(sids)), 
-                      list(sids.keys()))
         sents = dd(dict)
-        profname=dict()
-        for (prof, sid, wid, word, lexid) in c:
-            sents[sid][wid] = (word, lexid)
-            profname[sid]=prof
+        for profile, sid in sids:
+            c.execute("""SELECT profile, sid, wid, word, lexid FROM SENT 
+            WHERE profile = ? AND sid = ? order by profile, sid, wid""",
+                      (profile, sid))
+            for (prof, sid, wid, word, lexid) in c:
+                sents[prof, sid][wid] = (word, lexid)
 
             
         print("""<ul style="list-style:none;">""")
-        for sid in sorted(sids):
+        for profile, sid in sorted(sids):
 
 
             # fetch json for deriv_tree, mrs and dmrs
             c.execute("""SELECT mrs, mrs_json, dmrs_json, deriv_json, sent, comment FROM gold 
-                        WHERE sid =  ? """, [sid])
+                        WHERE profile=? AND sid =  ? """, (profile, sid))
             for (mrs, mrs_json, dmrs_json, deriv_json, sent, comment) in c:
                 mrs = mrs
                 mrs_json = mrs_json
@@ -169,15 +167,15 @@ def showsents (c, typ, lexid, limit, biglimit):
                 sent=sent
                 #comment unused
 
-            for (kara, made) in sorted(sids[sid]):
+            for (kara, made) in sorted(sids[profile, sid]):
                 print('<li>{}<sub>{}-{}</sub> &nbsp;&nbsp; '.format(sid,kara,made))
-                for wid in sents[sid]:
+                for wid in sents[profile, sid]:
                     if wid >= kara and wid < made:
                         print ("<span class='match'>%s</span>" % \
-                                   sents[sid][wid][0])
+                                   sents[profile, sid][wid][0])
                     else:
-                            print (sents[sid][wid][0])
-                print(" (%s)" % (profname[sid]))
+                            print (sents[profile, sid][wid][0])
+                print(f" ({profile})")
 
             ##############################################################
             # PRINT THE VISUALIZATIONS (only once per sentence)
diff --git a/html/search.cgi b/html/search.cgi
index 74b001f..feb4f06 100755
--- a/html/search.cgi
+++ b/html/search.cgi
@@ -32,11 +32,10 @@ lexinfo = ltdb.get_lexinfo(typ,c)
 if (lemma):
     leminfo = ltdb.get_leminfo (lemma,c)
     if leminfo:
-        print ("""
+        print (f"""
 <div align ='center' id="contents">
-<h1>Lexical Types matching "%s" (%s)</h1>
-%d Type(s) found.
-""".format(lemma, par['ver'], len(leminfo)))
+<h1>Lexical Entries for <i>{lemma}</i></h1>
+{len(leminfo)} Type(s) found.""")  #.format(lemma, par['ver'], len(leminfo)))
         print ("<table>")
         print ("""<tr>
         <th>{}</th><th>{}</th><th>{}</th><th>{}</th><th>{}</th>
@@ -55,7 +54,7 @@ if (lemma):
                     wrds = ", ".join(["<span title='%s (%s)'>%s</a>" % tuple(r.split('\t')) for 
                                       r in words.split('\n')])
             print("""<tr>
-            <td><a href='showtype.cgi?typ={0}'>{0}</a></td>
+            <td><a href='showtype.cgi?lexid={0}'>{0}</a></td>
             <td><a href='search.cgi?lemma={1}'>{1}</a></td>
             <td>{2}</td>
             <td>{3}</td>
@@ -64,9 +63,9 @@ if (lemma):
                  wrds, typefreq, tokenfreq))
         print ("</table>")
     else:
-        print ("""<div align ='center' id="contents">
-<p>No matches found for lemma <b><i>{}</i></b> in {}.""".format(lemma,
-                                                  par['ver']))
+        print (f"""<div align ='center' id="contents">
+<p>No matches found for lemma <b><i>{lemma}</i></b> in {par['ver']}.""")
+#        .format(lemma, par['ver'],par))
 ###
 ### deal with types
 ###

From 4b68ac09841f2395d4c821c290dda5f1d5af4b73 Mon Sep 17 00:00:00 2001
From: Francis Bond <bond@ieee.org>
Date: Wed, 15 Jul 2020 19:27:01 +0800
Subject: [PATCH 15/41] read from ace config file

---
 README.rst          |   6 +--
 make-ltdb.bash      | 108 ++++++++++++++++++++++----------------------
 makehome.py         |   8 ++--
 patch-lextypedb.lsp |  44 +++++++++---------
 tdl2db.py           |  36 +++++++++++----
 5 files changed, 112 insertions(+), 90 deletions(-)

diff --git a/README.rst b/README.rst
index 1564d6d..3eabdb1 100644
--- a/README.rst
+++ b/README.rst
@@ -21,20 +21,20 @@ Usage
 
 or (somewhat experimental but gets more docstrings)
 
-2. Run ``./make-ltdb.bash --grmtdl /path/to/grammar/grammar.tdl``
+2. Run ``./make-ltdb.bash --acecfg /path/to/ace/config.tdl``
    
 3. Add extra lisp to call before the script
    ``./make-ltdb.bash   --lisp '(push :mal *features*)' --script /path/to/grammar/lkb/script``
 
 4. You can tell it to just read the grammar, not gold (mainly useful for debugging)
-   ``./make-ltdb.bash --grmtdl /path/to/grammar/grammar.tdl --nogold``
+   ``./make-ltdb.bash --acecfg /path/to/ace/config.tdl --nogold``
 
 You can load from lisp and ace versions of the grammar, it will try to merge information from both.
 
 .. code:: bash
 
     ./make-ltdb.bash --script ~/logon/dfki/jacy/lkb/script
-    ./make-ltdb.bash --grmtdl ~/logon/dfki/jacy/japanese.tdl
+    ./make-ltdb.bash --acecfg ~/logon/dfki/jacy/ace/config.tdl
 
 Everything is installed to ``~/public_html/``
 
diff --git a/make-ltdb.bash b/make-ltdb.bash
index 78c040c..18bffe6 100755
--- a/make-ltdb.bash
+++ b/make-ltdb.bash
@@ -8,7 +8,7 @@ echo
 
 usage="""You need to give a grammar directory or script file (or both)
     --script path/to/lkb/script
-    --grmtdl path/to/grammar.tdl
+    --acecfg path/to/ace/config.tdl
 
 You can add some lisp before we load the script
     --lisp '(push :mal *features*)'
@@ -19,17 +19,17 @@ You can not add information from the gold trees
 
 
 ###
-### get the grammar directory
+### get the config files
 ###
 gold='true'
 while [ $# -gt 0 -a "${1#-}" != "$1" ]; do
   case ${1} in
       --script)
-	  lkbscript=${2};
+	  lkb_script=${2};
 	  shift 2;
 	  ;;
-      --grmtdl)
-	  grammartdl=${2};
+      --acecfg)
+	  ace_cfg=${2};
 	  shift 2;
 	  ;;
       --lisp)
@@ -47,16 +47,17 @@ while [ $# -gt 0 -a "${1#-}" != "$1" ]; do
 done
 
 
-if [ ${lkbscript} ]
+if [ ${lkb_script} ]
 then
-    echo "LKB script file is" ${lkbscript}
-    grammardir=`dirname ${lkbscript}`
+    echo "LKB script file is" ${lkb_script}
+    grammardir=`dirname ${lkb_script}`
     grammardir=`dirname ${grammardir}`
     echo "Grammar directory is " ${grammardir}
-elif [ ${grammartdl} ]
+elif [ ${ace_cfg} ]
 then
-    echo "Grammar file is " ${grammartdl}
-    grammardir=`dirname ${grammartdl}`
+    echo "ACE Config file is " ${ace_cfg}
+    grammardir=`dirname ${ace_cfg}`
+    grammardir=`dirname ${grammardir}`
     echo "Grammar directory is " ${grammardir}
 else
     echo "${usage}"
@@ -96,47 +97,22 @@ ROOTS_FILE="roots.xml"                # roots
 LRULES_FILE="lrules.xml"              # lexical tules
 LEXICON_FILE="lex.tab"                # lexicon
 
-### I really don't want to do this!
-if [ -f  ${grammardir}/Version.lsp ]; then
-    versionfile=${grammardir}/Version.lsp
-else
-    versionfile=${grammardir}/Version.lisp
-fi
-
-version=`perl -ne 'if (/^\(defparameter\s+\*grammar-version\*\s+\"(.*)\s+\((.*)\)\"/) {print "$1_$2"}' $versionfile`
-if [ -z "$version" ]; then
-    echo "Don't know the version, will use 'something'"
-    version=something
-fi
-
-version=${version// /_}
-
-if [ -z "${LOGONTMP}" ]; then
-  export LOGONTMP=/tmp
-fi
 ### write the temporary files to here
-outdir=${LOGONTMP}/${version}
+outdir=$(mktemp -d -t ltdb_XXXX)
 
 lkblog=${outdir}/lkb.log
 echo Log file at ${lkblog}
 
-### write the html here
-HTML_DIR=${HOME}/public_html/ltdb/${version}
-CGI_DIR=${HOME}/public_html/cgi-bin/${version}
 
 #outdir=/tmp/new
 
 echo 
-echo "Creating a lextypedb for the grammar stored at: $grammardir"
+echo "Creating a Linguistic Type DataBase"
 echo 
 echo "Temporary files will be stored in $outdir"
 echo
 
 echo
-echo "It will be installed into:"
-echo "   $HTML_DIR"
-echo "   $CGI_DIR"
-echo 
 echo "Would you like to continue (Y/n)?"
 read
 case ${REPLY} in
@@ -152,7 +128,7 @@ mkdir -p "${outdir}"
 
 db=${outdir}/${LTDB_FILE}
 
-if [[ ${lkbscript} && ${LISPCOMMAND} ]]
+if [[ ${lkb_script} && ${LISPCOMMAND} ]]
 then
     ### dump  the lex-types
     echo "Dumping lex-type definitions and lexicon using the LKB (slow but steady)" 
@@ -161,12 +137,17 @@ then
     unset DISPLAY;
     unset LUI;
 
-LISP="""
+LISP="
   "${extralisp}"
   (format t \"~%LTDB Read Grammar~% \")
-  (lkb::read-script-file-aux  \"${lkbscript}\")
+  (lkb::read-script-file-aux  \"${lkb_script}\")
   (lkb::lkb-load-lisp \".\" \"patch-lextypedb.lsp\")
   (format t \"~%LTDB Grammar Version  ~A~%\" cl-user::*grammar-version*)
+  (with-open-file (str 	\"${outdir}/lkb_ver\"
+                     :direction :output
+                     :if-exists :supersede
+                     :if-does-not-exist :create)
+  		     (format str \"~A\" cl-user::*grammar-version*))
   (format t \"~%LTDB Output types~%\")
   (lkb::output-types :xml \"${outdir}/${TYPES_FILE}\")
   (format t \"~%LTDB Output lrules, rules and roots ~%\")
@@ -177,7 +158,7 @@ LISP="""
   (lkb::output-lex-summary lkb::*lexicon* \"${outdir}/${LEXICON_FILE}\")
   (format t \"~%LTDB All Done!~%\")
   #+allegro        (excl:exit)
-"""
+"
 echo "$LISP" > ${lkblog}
 echo LISP "${LISP}"
 echo "$LISP"  | ${LISPCOMMAND}  2>>${lkblog} >>${lkblog}
@@ -208,18 +189,18 @@ echo
 sqlite3 ${db} < tables.sql
 
 ###
-if [[ ${lkbscript} && ${LISPCOMMAND} ]]
+if [[ ${lkb_script} && ${LISPCOMMAND} ]]
 then
     echo "Adding in the info from the lisp"
     echo
     python3 xml2db.py ${outdir} ${db}
 fi
 
-if [ ${grammartdl} ]
+if [ ${ace_cfg} ]
 then
     echo "Adding in the info from the tdl with pydelphin"
     echo
-    python3 tdl2db.py ${grammartdl} ${db}   ### add tdl and comments
+    python3 tdl2db.py ${ace_cfg} ${db}   ### add tdl and comments
 fi
 
 #echo "Adding in the info from the gold trees"
@@ -229,9 +210,30 @@ then
     python3 gold2db.py ${grammardir} ${db}
 fi
 
+
+### I really don't want to do this!
+if [ -f  ${outdir}/tdl_ver ]; then
+    versionfile=`cat ${outdir}/tdl_ver`
+    version=`perl -ne 'if (/^\(defparameter\s+\*grammar-version\*\s+\"(.*)\s+\((.*)\)\"/) {print "$1_$2"}' $versionfile`
+else
+    version=`cat ${outdir}/lkb_ver`
+fi
+
+if [ -z "$version" ]; then
+    echo "Don't know the version, will use 'something'"
+    version=something
+fi
+
+version=${version// /_}
+
+### write the html here
+HTML_DIR=${HOME}/public_html/ltdb/${version}
+CGI_DIR=${HOME}/public_html/cgi-bin/${version}
+
 echo
-echo Install to ${CGI_DIR}
+echo Install to ${HTML_DIR}, ${CGI_DIR}
 echo
+
 mkdir -p ${CGI_DIR}
 mkdir -p ${HTML_DIR}
 
@@ -255,24 +257,24 @@ echo "ver=$version" >> ${CGI_DIR}/params
 cp doc/lt-diagram.png html/*.js html/*.css html/ltdb.png ${HTML_DIR}/.
 cp ${outdir}/*.log ${HTML_DIR}
 
-if [ -n "${lkbscript}" ]
+if [ -z "$lkb_script" ]
 then
-    lkbscript='none'
+    lkb_script='none'
 fi
 
-if [ -n "${extralisp}" ]
+if [ -z "${extralisp}" ]
 then
     extralisp='none'
 fi
 
-if [ -n "${grammartdl}" ]
+if [ -z "${ace_cfg}" ]
 then
-    grammartdl='none'
+    ace_cfg='none'
 fi
 
-echo python3 makehome.py "${version}"  "${grammardir}" "${lkbscript}" "${extralisp}" "${grammartdl}" to "${HTML_DIR}"/index.html
+echo python3 makehome.py "${version}"  "${grammardir}" "${lkb_script}" "${extralisp}" "${ace_cfg}" to "${HTML_DIR}"/index.html
 
-python3 makehome.py "${version}"  "${grammardir}" "${lkbscript}" "${extralisp}" "${grammartdl}" > "${HTML_DIR}"/index.html
+python3 makehome.py "${version}"  "${grammardir}" "${lkb_script}" "${extralisp}" "${ace_cfg}" > "${HTML_DIR}"/index.html
 
 
 ### All done
diff --git a/makehome.py b/makehome.py
index 985828b..6a9f719 100644
--- a/makehome.py
+++ b/makehome.py
@@ -21,10 +21,10 @@
 if  lkbscript != 'none' or grammartdl != 'none':
     madewith += " made from:\n  <ul>\n"
     if lkbscript != 'none':
-        if extralisp != 'none':
-            madewith += f"    <li> LKB loading <code>{lkbscript}</code> after executing <code>{extralisp}</code>\n"
-        else:
+        if extralisp == 'none':
             madewith += f"    <li> LKB loading <code>{lkbscript}</code>\n"
+        else:
+            madewith += f"    <li> LKB loading <code>{lkbscript}</code> after executing <code>{extralisp}</code>\n"
     if grammartdl != 'none':
         madewith += f"<li> PyDelphin, parsing <code>{grammartdl}</code>\n"
     madewith += "  </ul>" 
@@ -42,7 +42,7 @@
 <p> This is online documentation for the {version} grammar, made from the grammar itself and its accompanying metadata.
 
 <ul>  
-  <li>  <a href='../../cgi-bin/{version}/search.cgi'>Lexical Type Database for {version}</a>{madewith}
+  <li>  Click Here: <a href='../../cgi-bin/{version}/search.cgi'>Lexical Type Database for {version}</a>{madewith}
   <li>  <a href='http://wiki.delph-in.net/moin/LkbLtdb'>Lexical Type Database Wiki</a>
   <li>  <a href='http://wiki.delph-in.net/moin/FrontPage'>DELPH-IN Wiki</a>
 </ul>
diff --git a/patch-lextypedb.lsp b/patch-lextypedb.lsp
index 1d3b839..37e400a 100644
--- a/patch-lextypedb.lsp
+++ b/patch-lextypedb.lsp
@@ -172,7 +172,7 @@
 
 ;;;#+:tsdb
 (defun output-type-as-xml (name type-struct stream)
- (let* ((def (ltype-local-constraint type-struct))
+ (let* (;(def (ltype-local-constraint type-struct))
         (parents (ltype-parents type-struct))
 	(children (ltype-daughters type-struct))
 	(comment (ltype-comment type-struct))
@@ -259,33 +259,33 @@
   (when file-name 
     (with-open-file 
         (ostream file-name :direction :output :if-exists :supersede)
-      (lex-summary  (collect-psort-ids *lexicon*) ostream))))
+      (lex-summary  (collect-psort-ids lexicon) ostream))))
 
 ;;; how do I define TAB in format?
 (defun lex-summary (lex-entries stream)
   (loop for word-entry in lex-entries
 	for lex-entry =  (get-lex-entry-from-id word-entry :cache nil)
 	for lex-tdfs = (tdfs-indef (psort-full-fs lex-entry))
-	do
-	(format stream "~A	~A	~{~A~^ ~}	~A	~A~%"
-		;; lex-id
-		(string-downcase word-entry)
-		;; Lexical Type
-		(string-downcase (lkb::dag-type lex-tdfs))
-		;; ORTH
-		(lex-entry-orth lex-entry)
-		;; Predicate
-		(string-downcase (if (mrs::path-value 
-				      lex-tdfs '(SYNSEM LKEYS KEYREL PRED))
-				     (lkb::dag-type 
-				      (mrs::path-value 
-				       lex-tdfs '(SYNSEM LKEYS KEYREL PRED)))))
-	       ;; ALTKEY
-		(string-downcase (if (mrs::path-value 
-				      lex-tdfs '(SYNSEM LKEYS ALTKEYREL PRED))
-				     (lkb::dag-type 
-				      (mrs::path-value 
-				       lex-tdfs '(SYNSEM LKEYS ALTKEYREL PRED))))))))
+	    do
+	    (format stream "~A	~A	~{~A~^ ~}	~A	~A~%"
+		    ;; lex-id
+		    (string-downcase word-entry)
+		    ;; Lexical Type
+		    (string-downcase (lkb::dag-type lex-tdfs))
+		    ;; ORTH
+		    (lex-entry-orth lex-entry)
+		    ;; Predicate
+		    (string-downcase (if (mrs::path-value 
+					  lex-tdfs '(SYNSEM LKEYS KEYREL PRED))
+					 (lkb::dag-type 
+					  (mrs::path-value 
+					   lex-tdfs '(SYNSEM LKEYS KEYREL PRED)))))
+		    ;; ALTKEY
+		    (string-downcase (if (mrs::path-value 
+					  lex-tdfs '(SYNSEM LKEYS ALTKEYREL PRED))
+					 (lkb::dag-type 
+					  (mrs::path-value 
+					   lex-tdfs '(SYNSEM LKEYS ALTKEYREL PRED))))))))
 ;               ;; KEYTAG
 ;            (lkb::dag-type (mrs::path-value lex-tdfs
 ;                                              '(SYNSEM LKEYS KEYREL CARG)))
diff --git a/tdl2db.py b/tdl2db.py
index 8000353..48e96b8 100644
--- a/tdl2db.py
+++ b/tdl2db.py
@@ -7,6 +7,7 @@
 from delphin import tdl
 
 import sqlite3, sys, os
+import re
 
 ###  ToDo
 # identify lextypes
@@ -15,25 +16,42 @@
 if (len(sys.argv) < 3):
     # prints standard error msg (stderr)
     print('''You need to give two arguments, 
- tdl grammar file and LTDB''', file=sys.stderr)
+ ace config file and LTDB''', file=sys.stderr)
     sys.exit(1)
 else:
-    (script, grammar, dbfile) = sys.argv
-    print("Adding files from %s to %s" % (grammar, dbfile), file=sys.stderr)
+    (script, config, dbfile) = sys.argv
+    print("Adding files from %s to %s" % (config, dbfile), file=sys.stderr)
 
 
 ## make a log in the same directory as the database
 log = open(os.path.join(os.path.dirname(dbfile),"tdl.log"), 'w')
-
+ver = open(os.path.join(os.path.dirname(dbfile),"tdl_ver"), 'w')
     
-#grammar = '/home/bond/svn/erg-mo/educ.tdl'
+#grammar = '/home/bond/svn/mo/ace/config-mal.tdl'
 tdls = []
 hierarchy = []  #(child, parent)
 types = dd(list)
 les = {}
 
 
-def readgrm (grammarfile, tdls, types, hierarchy, les):
+def read_cfg (config, ver):
+    """
+    read the config file, find the grammar and version
+    """
+    for l in open(config):
+        grmmatch =  re.findall(r'grammar-top\s+:=\s+"([^"]+)".', l.strip())
+        if grmmatch:
+            grammar = grmmatch[0]
+        vermatch =  re.findall(r'version\s+:=\s+"([^"]+)".', l.strip())
+        if vermatch:
+            version = vermatch[0]
+    version_file = os.path.join(os.path.dirname(config), version)
+    grammar_file = os.path.join(os.path.dirname(config), grammar)
+    print(version_file, file=ver)
+    return grammar_file, version_file
+
+
+def read_grm (grammarfile, tdls, types, hierarchy, les):
     print("FILE", grammarfile)
     path = Path(grammarfile)
     base = path.parent
@@ -171,8 +189,10 @@ def intodb(dbfile, tdls, hierarchy, types,les):
     WHERE typ IN (SELECT typ FROM lex)""")
     
     conn.commit()
-    
-readgrm(grammar,tdls,hierarchy, types, les)
+
+grammar, version = read_cfg(config,ver)
+print(grammar, version)
+read_grm(grammar,tdls,hierarchy, types, les)
 intodb(dbfile, tdls, hierarchy, types, les)
 
 # for thing in tdls:

From caa5341fb710ae1abaebd55dd3649919e7eef228 Mon Sep 17 00:00:00 2001
From: Francis Bond <bond@ieee.org>
Date: Wed, 15 Jul 2020 20:11:36 +0800
Subject: [PATCH 16/41] added pkzip dependency

---
 README.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.rst b/README.rst
index 3eabdb1..f6ed1ff 100644
--- a/README.rst
+++ b/README.rst
@@ -63,7 +63,7 @@ Install dependencies (in ubuntu):
 
 .. code:: bash
 
-    sudo apt-get install apache2 xmlstarlet
+    sudo apt-get install apache2 xmlstarlet p7zip 
     sudo apt-get install python3-docutils python3-lxml
 
     sudo pip install pydelphin --upgrade

From 14ecfb5087f6dfd6b4b7b54318838893c2cff736 Mon Sep 17 00:00:00 2001
From: Francis Bond <bond@ieee.org>
Date: Wed, 15 Jul 2020 20:13:50 +0800
Subject: [PATCH 17/41] Added dependency on sqlite3

---
 README.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.rst b/README.rst
index f6ed1ff..3525a54 100644
--- a/README.rst
+++ b/README.rst
@@ -63,7 +63,7 @@ Install dependencies (in ubuntu):
 
 .. code:: bash
 
-    sudo apt-get install apache2 xmlstarlet p7zip 
+    sudo apt-get install apache2 xmlstarlet p7zip sqlite3
     sudo apt-get install python3-docutils python3-lxml
 
     sudo pip install pydelphin --upgrade

From 5f8da7fe13e2896a1412d2859a51120a141e2525 Mon Sep 17 00:00:00 2001
From: Francis Bond <bond@ieee.org>
Date: Wed, 15 Jul 2020 20:23:52 +0800
Subject: [PATCH 18/41] Update README.rst

---
 README.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.rst b/README.rst
index 3525a54..a1bdecc 100644
--- a/README.rst
+++ b/README.rst
@@ -79,7 +79,7 @@ This may be different on different operating systems
     sudo a2enmod userdir
     sudo a2enmod cgi
 
-Put this in ``/etc/apache2/sites-available/000-default.conf``
+Put this at the end of ``/etc/apache2/sites-available/000-default.conf``
 
 .. code:: xml
 

From ef70bd046a53436575a6c6d2bd85ee5bf1d75116 Mon Sep 17 00:00:00 2001
From: Francis Bond <bond@ieee.org>
Date: Wed, 15 Jul 2020 21:29:07 +0800
Subject: [PATCH 19/41] Added troubleshooting notes for Ubuntu 18.04
 python2/python3 issues

---
 README.rst | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/README.rst b/README.rst
index a1bdecc..ee380db 100644
--- a/README.rst
+++ b/README.rst
@@ -118,7 +118,15 @@ If you are having trouble with apache encodings, set the following in ``/etc/apa
 
    SetEnv PYTHONIOENCODING utf8
 
+To make debugging 
 
+On Ubuntu 18.04, to get python3 modwsgi working if you have updated from an earlier version (so your python defaults to 2.7) do this
+
+.. code:: bash
+
+    sudo apt-get install libapache2-mod-wsgi-py3 
+    sudo update-alternatives --install /usr/bin/python python /usr/bin/python2.7 1 
+    sudo update-alternatives --install /usr/bin/python python /usr/bin/python3.6 2 
 
 
 Todo

From 2696aa7f0f7836c98722ed8cf032faa7ebe0a64c Mon Sep 17 00:00:00 2001
From: Francis Bond <bond@ieee.org>
Date: Fri, 17 Jul 2020 18:29:49 +0800
Subject: [PATCH 20/41] added brief explanation of the params file

---
 README.rst       | 25 +++++++++++++++++++------
 requirements.txt |  5 +----
 2 files changed, 20 insertions(+), 10 deletions(-)

diff --git a/README.rst b/README.rst
index ee380db..860cea9 100644
--- a/README.rst
+++ b/README.rst
@@ -66,8 +66,7 @@ Install dependencies (in ubuntu):
     sudo apt-get install apache2 xmlstarlet p7zip sqlite3
     sudo apt-get install python3-docutils python3-lxml
 
-    sudo pip install pydelphin --upgrade
-    sudo pip3 install pydelphin --upgrade
+    pip install pydelphin --upgrade
 
 Enable local directories in Apache2
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -128,12 +127,28 @@ On Ubuntu 18.04, to get python3 modwsgi working if you have updated from an earl
     sudo update-alternatives --install /usr/bin/python python /usr/bin/python2.7 1 
     sudo update-alternatives --install /usr/bin/python python /usr/bin/python3.6 2 
 
+Links go to the wrong place
+---------------------------
 
+ltdb assumes that the code is being served from a machine whose name
+is  ``hostname -f`` using ``http`` in your ``public_html``.  If that is not true, e.g. you
+want to change the host, or port or use https, then please change the
+appropriate parts of ``params``. 
+
+.. code:: bash
+
+    charset=utf-8
+    dbroot=/home/bond/public_html/cgi-bin/ERG_mal_mo
+    db=/home/bond/public_html/cgi-bin/ERG_mal_mo/lt.db
+    cssdir=http://mori/~bond/ltdb/ERG_mal_mo
+    cgidir=http://mori/~bond/cgi-bin/ERG_mal_mo
+    ver=ERG_mal_mo
+
+
+    
 Todo
 ----
 
--  check I am getting lrule/irule right
-
 --------------
 
 Types, instances in the same table, distinguished by status.
@@ -183,5 +198,3 @@ Types, instances in the same table, distinguished by status.
 +--------+--------------------------------------+
 |  ◬    | Binary, Non-Headed                   |
 +--------+--------------------------------------+
-
-FIXME: add IDIOMS as a different table
diff --git a/requirements.txt b/requirements.txt
index 41bda04..12f359a 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,6 +1,3 @@
-#p7zip-full
-#xmlstarlet
-#sqlite3
-pydelphin
+ pydelphin
 docutils
 lxml

From d2258d9e2401a99d63a194885d00cf7879826720 Mon Sep 17 00:00:00 2001
From: Francis Bond <bond@ieee.org>
Date: Sat, 9 Jul 2022 18:42:23 +0200
Subject: [PATCH 21/41] no need for orderedDict in python > 3.7, so changed to
 just DICT

---
 html/ltdb.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/html/ltdb.py b/html/ltdb.py
index 971f0d3..453850a 100644
--- a/html/ltdb.py
+++ b/html/ltdb.py
@@ -7,7 +7,6 @@
 import cgi, re,  sys
 from html import escape
 from collections import defaultdict as dd
-from collections import OrderedDict as od
 import json
 
 
@@ -23,7 +22,7 @@
               (None,None):(' ', ' ')}
 
 ### the different kinds of things we deal with
-statuses = od()
+statuses = dict()
 
 ##things used when parsing
 statuses["lex-rule"] = "Lexical Rules"
@@ -514,7 +513,7 @@ def munge_desc(typ,description):
     """
     exes = []
     nams = []
-    namere=re.compile(r"""<name\s+lang=["'](.*)['"]>(.*)</name>""")
+    namere=re.compile(r"""<name\s+lang=["'](.*)['"]>(.*)(</name>)?""")
     desc = []
     count = 1
     for l in description.splitlines():

From a3a4ea3c58713fcda4cb1ac56cd97a3afd0834e0 Mon Sep 17 00:00:00 2001
From: Francis Bond <bond@ieee.org>
Date: Sat, 9 Jul 2022 18:48:10 +0200
Subject: [PATCH 22/41] make a list, then join it

---
 html/ltdb.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/html/ltdb.py b/html/ltdb.py
index 453850a..4c5d0e8 100644
--- a/html/ltdb.py
+++ b/html/ltdb.py
@@ -64,13 +64,11 @@ def getpar (params):
 
 def hlt (typs):
     "hyperlink a list of space seperated types"
-    l = str()
+    linked = []
     if typs:
         for t in typs.split():
-            l += "<a href='%s/showtype.cgi?typ=%s'>%s</a> " % (par['cgidir'], 
-                                                           escape(t, ''),
-                                                           t)
-        return l
+            linked.append(f"<a href='{par['cgidir']}/showtype.cgi?typ={escape(t, '')}'>{t}</a>")
+        return ' '.join(linked)
     else:
         return '<br>'
 

From 1aefdea080a804bf76f6162b0f70a77cd61291a3 Mon Sep 17 00:00:00 2001
From: Francis Bond <bond@ieee.org>
Date: Sat, 9 Jul 2022 18:49:14 +0200
Subject: [PATCH 23/41] use dict instead of defaultdict

---
 html/ltdb.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/html/ltdb.py b/html/ltdb.py
index 4c5d0e8..f8e688a 100644
--- a/html/ltdb.py
+++ b/html/ltdb.py
@@ -353,7 +353,7 @@ def showlexs (c, lextyp, lexid, limit, biglimit):
         
     if results and results[0] > 0:
         total = results[0]
-        lem = dd(str) # lemma
+        lem = dict() # lemma
 
         if lexid:
             ## You want a specific word

From e5b415fc2e59204fca9827ff4720a3ca31174485 Mon Sep 17 00:00:00 2001
From: Francis Bond <bond@ieee.org>
Date: Sat, 9 Jul 2022 19:10:39 +0200
Subject: [PATCH 24/41] quoted many things I should have

---
 make-ltdb.bash | 68 ++++++++++++++++++++++++++------------------------
 1 file changed, 36 insertions(+), 32 deletions(-)

diff --git a/make-ltdb.bash b/make-ltdb.bash
index 18bffe6..6ae2aa2 100755
--- a/make-ltdb.bash
+++ b/make-ltdb.bash
@@ -22,14 +22,14 @@ You can not add information from the gold trees
 ### get the config files
 ###
 gold='true'
-while [ $# -gt 0 -a "${1#-}" != "$1" ]; do
-  case ${1} in
+while [ $# -gt 0 ]; do
+  case "${1}" in
       --script)
-	  lkb_script=${2};
+	  lkb_script="${2}";
 	  shift 2;
 	  ;;
       --acecfg)
-	  ace_cfg=${2};
+	  ace_cfg="${2}";
 	  shift 2;
 	  ;;
       --lisp)
@@ -40,20 +40,24 @@ while [ $# -gt 0 -a "${1#-}" != "$1" ]; do
 	  gold='false'
 	  shift 1;
 	  ;;
+      -h|--help)
+	  echo "${usage}"
+	  exit 0
+	  ;;
       *)
 	  echo "${usage}"
-	  exit 0	
+	  exit 1	
   esac
 done
 
 
-if [ ${lkb_script} ]
+if [ -f "${lkb_script}" ]
 then
     echo "LKB script file is" ${lkb_script}
     grammardir=`dirname ${lkb_script}`
     grammardir=`dirname ${grammardir}`
     echo "Grammar directory is " ${grammardir}
-elif [ ${ace_cfg} ]
+elif [ -f ${ace_cfg} ]
 then
     echo "ACE Config file is " ${ace_cfg}
     grammardir=`dirname ${ace_cfg}`
@@ -100,8 +104,8 @@ LEXICON_FILE="lex.tab"                # lexicon
 ### write the temporary files to here
 outdir=$(mktemp -d -t ltdb_XXXX)
 
-lkblog=${outdir}/lkb.log
-echo Log file at ${lkblog}
+lkblog="${outdir}"/lkb.log
+echo Log file at "${lkblog}"
 
 
 #outdir=/tmp/new
@@ -159,19 +163,19 @@ LISP="
   (format t \"~%LTDB All Done!~%\")
   #+allegro        (excl:exit)
 "
-echo "$LISP" > ${lkblog}
+echo "$LISP" > "${lkblog}"
 echo LISP "${LISP}"
-echo "$LISP"  | ${LISPCOMMAND}  2>>${lkblog} >>${lkblog}
+echo "$LISP"  | ${LISPCOMMAND}  2>>"${lkblog}" >>"${lkblog}"
 # } | cat     ### DEBUG LISP
     
     ###
     ### Try to validate the types.xml
     ###
     if which xmlstarlet  &> /dev/null; then
-        xmlstarlet val  -e ${outdir}/${TYPES_FILE}
-        xmlstarlet val  -e ${outdir}/${RULES_FILE}
-        xmlstarlet val  -e ${outdir}/${LRULES_FILE}
-        xmlstarlet val  -e ${outdir}/${ROOTS_FILE}
+        xmlstarlet val  -e "${outdir}"/${TYPES_FILE}
+        xmlstarlet val  -e "${outdir}"/${RULES_FILE}
+        xmlstarlet val  -e "${outdir}"/${LRULES_FILE}
+        xmlstarlet val  -e "${outdir}"/${ROOTS_FILE}
     else 
         echo
         echo "   types files not validated, please install xmlstarlet."
@@ -189,7 +193,7 @@ echo
 sqlite3 ${db} < tables.sql
 
 ###
-if [[ ${lkb_script} && ${LISPCOMMAND} ]]
+if [[ -f" ${lkb_script}" && -n "${LISPCOMMAND}" ]]
 then
     echo "Adding in the info from the lisp"
     echo
@@ -207,7 +211,7 @@ fi
 #echo
 if [ ${gold} == 'true' ]
 then
-    python3 gold2db.py ${grammardir} ${db}
+    python3 gold2db.py "${grammardir}" "${db}"
 fi
 
 
@@ -227,31 +231,31 @@ fi
 version=${version// /_}
 
 ### write the html here
-HTML_DIR=${HOME}/public_html/ltdb/${version}
-CGI_DIR=${HOME}/public_html/cgi-bin/${version}
+HTML_DIR="${HOME}"/public_html/ltdb/"${version}"
+CGI_DIR="${HOME}"/public_html/cgi-bin/"${version}"
 
 echo
-echo Install to ${HTML_DIR}, ${CGI_DIR}
+echo Install to "${HTML_DIR}", "${CGI_DIR}"
 echo
 
-mkdir -p ${CGI_DIR}
-mkdir -p ${HTML_DIR}
+mkdir -p "${CGI_DIR}"
+mkdir -p "${HTML_DIR}"
 
 ###  copy cgi, javascript and css to cgi-bin
 cp html/*.cgi html/*.py html/*.js html/*.css  ${CGI_DIR}/.   
 
 ### copy database to cgi-bin
-cp ${outdir}/${LTDB_FILE} ${CGI_DIR}/.
+cp "${outdir}"/"${LTDB_FILE}" $"{CGI_DIR}"/.
 
 
 ### params
 dbhost=`hostname -f`
-echo "charset=utf-8" > ${CGI_DIR}/params
-echo "dbroot=$CGI_DIR" >> ${CGI_DIR}/params
-echo "db=$CGI_DIR/lt.db" >> ${CGI_DIR}/params
-echo "cssdir=http://$dbhost/~$USER/ltdb/$version" >> ${CGI_DIR}/params
-echo "cgidir=http://$dbhost/~$USER/cgi-bin/$version" >> ${CGI_DIR}/params
-echo "ver=$version" >> ${CGI_DIR}/params
+echo "charset=utf-8" > $"{CGI_DIR}"/params
+echo "dbroot=$CGI_DIR" >> $"{CGI_DIR}"/params
+echo "db=$CGI_DIR/lt.db" >> $"{CGI_DIR}"/params
+echo "cssdir=http://$dbhost/~$USER/ltdb/$version" >> $"{CGI_DIR}"/params
+echo "cgidir=http://$dbhost/~$USER/cgi-bin/$version" >> $"{CGI_DIR}"/params
+echo "ver=$version" >> $"{CGI_DIR}"/params
 
 ### HTML and logs
 cp doc/lt-diagram.png html/*.js html/*.css html/ltdb.png ${HTML_DIR}/.
@@ -278,14 +282,14 @@ python3 makehome.py "${version}"  "${grammardir}" "${lkb_script}" "${extralisp}"
 
 
 ### All done
-URL=http://localhost/~${USER}/ltdb/${version}/
+URL=http://localhost/~${USER}/ltdb/"${version}"/
 echo
 echo
 echo
-echo "Almost done!  Take a look at " ${URL}
+echo "Almost done!  Take a look at " "${URL}"
 echo
 echo
 echo
 echo "Still compressing the db for download" 
-7z a ${HTML_DIR}/${LTDB_FILE}.7z ${CGI_DIR}/${LTDB_FILE}
+7z a "${HTML_DIR}"/"${LTDB_FILE}".7z $"{CGI_DIR}"/"${LTDB_FILE}"
 echo "Really Done"

From 50985d41cf637e8007f2f2d2e2a631b28a60db08 Mon Sep 17 00:00:00 2001
From: Francis Bond <bond@ieee.org>
Date: Wed, 13 Jul 2022 13:35:12 +0200
Subject: [PATCH 25/41] properly encode URLS, closes #20

---
 html/ltdb.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/html/ltdb.py b/html/ltdb.py
index f8e688a..8b4fbf1 100644
--- a/html/ltdb.py
+++ b/html/ltdb.py
@@ -8,7 +8,7 @@
 from html import escape
 from collections import defaultdict as dd
 import json
-
+import urllib.parse as up
 
 ### labels for branching: arity, head
 headedness = {(1,0):('▲', 'unary: headed'),
@@ -67,7 +67,7 @@ def hlt (typs):
     linked = []
     if typs:
         for t in typs.split():
-            linked.append(f"<a href='{par['cgidir']}/showtype.cgi?typ={escape(t, '')}'>{t}</a>")
+            linked.append(f"<a href='{par['cgidir']}/showtype.cgi?typ={up.quote(t)}'>{t}</a>")
         return ' '.join(linked)
     else:
         return '<br>'
@@ -88,7 +88,7 @@ def hltyp(match):
     #print "<br>%s %s\n" % (t, t in types)
     if t in types and not t.startswith('#'):
         return "<a href='{}/showtype.cgi?typ={}'>{}</a>".format(par['cgidir'], 
-                                                                escape(t,''),
+                                                                up.quote(t),
                                                                 t)
     else:
         return t
@@ -132,7 +132,7 @@ def showsents (c, typ, lexid, limit, biglimit):
                 sids[profile, sid].add((kara, made))
         if limit < total and biglimit > limit:
             limtext= "({:,} out of {:,}: <a href='more.cgi?typ={}&lexid={}&limit={}'>more</a>)".format(limit, total,
-                                                                                                       escape(typ,''),
+                                                                                                       up.quote(typ),
                                                                                                        lexid,
                                                                                                        biglimit)
         elif limit < total:
@@ -390,7 +390,7 @@ def showlexs (c, lextyp, lexid, limit, biglimit):
         #sf=sf[:50]
         if limit < total and biglimit > limit:
             limtext= "({:,} out of {:,}: <a href='more.cgi?lextyp={}&lexid={}&limit={}'>more</a>)".format(limit, total,
-                                                                                                         escape(lextyp, ''),
+                                                                                                         up.quote(lextyp),
                                                                                                          lexid,
                                                                                                          biglimit)
         elif limit < total:

From a733fe3d243647cfaa594d493410ab4a64c47d3a Mon Sep 17 00:00:00 2001
From: Francis Bond <bond@ieee.org>
Date: Wed, 13 Jul 2022 13:36:08 +0200
Subject: [PATCH 26/41] describe assumptions about directory structure, use of
 virtual environment

---
 README.rst | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/README.rst b/README.rst
index 860cea9..bad2501 100644
--- a/README.rst
+++ b/README.rst
@@ -12,11 +12,29 @@ documentation in the grammar, a kind of literate programming.
 There is `more documentation <http://moin.delph-in.net/LkbLtdb>`__ at
 the DELPH-IN Wiki.
 
+
+LTDB assumes that the grammar follows the usual DELPH-IN conventions,
+in particular that there is a grammar directory with sub directories
+for ace and lkb config files.  
+
+``
+grammar/ace/config.tdl
+grammar/lkb/script
+``
+
 --------------
 
 Usage
 -----
 
+0. Prepare the local environment
+   ``
+   python3 -m venv .venv
+   source .venv/bin/activate
+   python3 -m pip install --upgrade pip
+   pip install -r requirements.txt
+   ``
+
 1. Run ``./make-ltdb.bash --script /path/to/grammar/lkb/script``
 
 or (somewhat experimental but gets more docstrings)

From 9450dae42c0135e2b0ac313d35b448443e99ef69 Mon Sep 17 00:00:00 2001
From: Francis Bond <bond@ieee.org>
Date: Thu, 14 Jul 2022 11:23:37 +0200
Subject: [PATCH 27/41] maked gold2db more robust for entries with
 unconvertible MRS

---
 gold2db.py | 106 +++++++++++++++++++++++++----------------------------
 1 file changed, 49 insertions(+), 57 deletions(-)

diff --git a/gold2db.py b/gold2db.py
index b9d66a6..1bdd34a 100644
--- a/gold2db.py
+++ b/gold2db.py
@@ -63,68 +63,61 @@
         for response in ts.processed_items():
             sid=response['i-id']
             profile = ts.path.name 
-            if response['readings'] > 0:
+            if response['results']:
+                first_result=response.result(0)
+                deriv = first_result.derivation()
+                tree = first_result.get('tree', '')
+                deriv_str = deriv.to_udf(indent=None)
                 try:
-                    first_result=response.result(0)
-                    deriv = first_result.derivation()
-                    mrs_obj=first_result.mrs()
-                    mrs_str = first_result['mrs']
-                    tree = first_result.get('tree', '')
-                    deriv_str = deriv.to_udf(indent=None)
                     deriv_json = json.dumps(deriv.to_dict(fields=['id','entity','score','form','tokens']))
                 except Exception as e:
-                    log.write("\n\nSomething went wrong getting the result:\n")
-                    log.write("{}: {} {}\n".format(root, profile, sid))
-                    deriv = ''
-                    mrs_obj = None
-                    mrs_str =''
-                    tree=''
-                    deriv_str = ''
-                    derv_json = ''
+                    log.write("\n\ncouldn't convert deriv to json:\n")
+                    log.write(f"{root}: {profile} {sid} {e}\n")
+                    deriv_json = '{}'
                 try:
-                    mrs_obj=first_result.mrs()
+                    mrs_str=first_result.mrs()
                 except Exception as e:
                     log.write("\n\nMRS couldn't be retrieved in pydelphin:\n")
-                    log.write("{}: {} {}\n".format(root, profile, sid))
-                    mrs_obj = None 
+                    log.write(f"{root}: {profile} {sid} {e}\n")
+                    mrs_str = None 
                 try:
-                    dmrs_obj=dmrs.from_mrs(mrs_obj)
-                    mrs_json = mrsjson.encode(mrs_obj)
-                    dmrs_json = dmrsjson.encode(dmrs_obj)
+                    dmrs_str=dmrs.from_mrs(mrs_str)
+                    mrs_json = mrsjson.encode(mrs_str)
+                    dmrs_json = dmrsjson.encode(dmrs_str)
                 except Exception as e:
                     log.write("\n\nMRS failed to convert in pydelphin:\n")
-                    log.write("{}: {} {}\n".format(root, profile, sid))
+                    log.write(f"{root}: {profile} {sid} {e}\n")
                     log.write(response['i-input']) ### FIXME
                     log.write("\n\n")
-                    if mrs_obj:
-                        log.write(simplemrs.encode(mrs_obj,indent=True))
-                    log.write("\n\n")
                     log.write(repr(e))
                     if hasattr(e, 'message'):
                         log.write(e.message)
-                    # else:
-                    #     log.write(str(e))
-                    log.write("\n\n")
+                        log.write("\n\n")
+                    if mrs_obj:
+                        log.write(simplemrs.encode(mrs_obj,indent=True))
                     mrs_json = '{}'
                     dmrs_json = '{}'
-            
-            # STORE gold info IN DB
-            try:
-                c.execute("""INSERT INTO gold (profile, sid, sent, comment, 
-                                         deriv, deriv_json, pst, 
-                                         mrs, mrs_json, dmrs_json, flags) 
-                                         VALUES (?,?,?,?,?,?,?,?,?,?,?)""",
-                        (profile,
-                         sid,
-                         response['i-input'],
-                         response['i-comment'],
-                         deriv_str,
-                         deriv_json,
-                         tree,
-                         mrs_str,
-                         mrs_json,
-                         dmrs_json,
-                         None))
+                # STORE gold info IN DB
+                try:
+                    c.execute("""INSERT INTO gold (profile, sid, sent, comment, 
+                    deriv, deriv_json, pst, 
+                    mrs, mrs_json, dmrs_json, flags) 
+                    VALUES (?,?,?,?,?,?,?,?,?,?,?)""",
+                              (profile,
+                               sid,
+                               response['i-input'],
+                               response['i-comment'],
+                               deriv_str,
+                               deriv_json,
+                               tree,
+                               mrs_str,
+                               mrs_json,
+                               dmrs_json,
+                               None))
+                except sqlite3.Error as e:
+                    log.write('ERROR:   ({}) of type ({}), {}: {} {}\n'.format(e, type(e).__name__,
+                                                                               root, profile, sid))
+
                 ##leaves
                 if deriv:
                     for (preterminal, terminal) in zip(deriv.preterminals(),deriv.terminals()):
@@ -138,7 +131,7 @@
                             typefreq[ltypes[lexid]]  += 1
                             lxidfreq[ltypes[lexid]][lexid]   += 1
                             typind[ltypes[lexid]][(profile, sid)].add((start, end))
-                ### internal node (store as type)
+                    ### internal node (store as type)
                     for node in deriv.internals():
                         typ =  node.entity
                         start= node.start
@@ -146,11 +139,6 @@
                         typefreq[typ]  += 1
                         typind[typ][(profile, sid)].add((start, end))
 
-            ##print('\n\n\n')
-            except sqlite3.Error as e:
-                log.write('ERROR:   ({}) of type ({}), {}: {}\n'.format(e, type(e).__name__,
-                                                                      root, sid))
-
 # ### each sentence should have a root
 # for s in sent:
 #     allroots.add(roots[s])
@@ -185,7 +173,9 @@
   VALUES (?,?,?,?)""", (typ, wrds, 
                         lfreq[typ],
                         typefreq[typ]))
+        
 
+    
 ### Wack these into a database
 for typ in typefreq:
     #print("%d\t%s" % (typefreq[typ], typ))
@@ -199,11 +189,13 @@
 
 for p,s in sent:
     ##print(s, " ".join([surf for (surf, lexid) in sent[s]]))
-    for i, (w, l) in enumerate(sent[(p,s)]):
-        c.execute("""INSERT INTO sent (profile, sid, wid, word, lexid) 
-                 VALUES (?,?,?,?,?)""", (p, s, i, w, l))
-
- 
+    try:
+        for i, (w, l) in enumerate(sent[(p,s)]):
+            c.execute("""INSERT INTO sent (profile, sid, wid, word, lexid) 
+            VALUES (?,?,?,?,?)""", (p, s, i, w, l))
+    except sqlite3.Error as e:
+        log.write('ERROR:   ({}) of type ({}), {}: {} {}\n'.format(e, type(e).__name__,
+                                                                   root, profile, sid))
 
 for t in typind:
     for p,s in typind[t]:

From abc0583081b4c8eebb79b3a43b0e236c88d77a22 Mon Sep 17 00:00:00 2001
From: Francis Bond <bond@ieee.org>
Date: Thu, 14 Jul 2022 11:24:23 +0200
Subject: [PATCH 28/41] remove extra space

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 12f359a..66fd928 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,3 @@
- pydelphin
+pydelphin
 docutils
 lxml

From 8ea5a14067f795e4a76b0d8ed9e0ddafa331959a Mon Sep 17 00:00:00 2001
From: Francis Bond <bond@ieee.org>
Date: Thu, 14 Jul 2022 11:25:31 +0200
Subject: [PATCH 29/41] quote things a little better

---
 make-ltdb.bash | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/make-ltdb.bash b/make-ltdb.bash
index 6ae2aa2..6b0592e 100755
--- a/make-ltdb.bash
+++ b/make-ltdb.bash
@@ -242,24 +242,24 @@ mkdir -p "${CGI_DIR}"
 mkdir -p "${HTML_DIR}"
 
 ###  copy cgi, javascript and css to cgi-bin
-cp html/*.cgi html/*.py html/*.js html/*.css  ${CGI_DIR}/.   
+cp html/*.cgi html/*.py html/*.js html/*.css  "${CGI_DIR}/".   
 
 ### copy database to cgi-bin
-cp "${outdir}"/"${LTDB_FILE}" $"{CGI_DIR}"/.
+cp "${outdir}"/"${LTDB_FILE}" "${CGI_DIR}"/.
 
 
 ### params
 dbhost=`hostname -f`
-echo "charset=utf-8" > $"{CGI_DIR}"/params
-echo "dbroot=$CGI_DIR" >> $"{CGI_DIR}"/params
-echo "db=$CGI_DIR/lt.db" >> $"{CGI_DIR}"/params
-echo "cssdir=http://$dbhost/~$USER/ltdb/$version" >> $"{CGI_DIR}"/params
-echo "cgidir=http://$dbhost/~$USER/cgi-bin/$version" >> $"{CGI_DIR}"/params
-echo "ver=$version" >> $"{CGI_DIR}"/params
+echo "charset=utf-8" > "${CGI_DIR}"/params
+echo "dbroot=$CGI_DIR" >> "${CGI_DIR}"/params
+echo "db=$CGI_DIR/lt.db" >> "${CGI_DIR}"/params
+echo "cssdir=http://$dbhost/~$USER/ltdb/$version" >> "${CGI_DIR}"/params
+echo "cgidir=http://$dbhost/~$USER/cgi-bin/$version" >> "${CGI_DIR}"/params
+echo "ver=$version" >> "${CGI_DIR}"/params
 
 ### HTML and logs
-cp doc/lt-diagram.png html/*.js html/*.css html/ltdb.png ${HTML_DIR}/.
-cp ${outdir}/*.log ${HTML_DIR}
+cp doc/lt-diagram.png html/*.js html/*.css html/ltdb.png "${HTML_DIR}/."
+cp "${outdir}"/*.log "${HTML_DIR}"
 
 if [ -z "$lkb_script" ]
 then

From 2b986cef2e5a64cd2dca6365582a89dd40533987 Mon Sep 17 00:00:00 2001
From: Francis Bond <bond@ieee.org>
Date: Thu, 14 Jul 2022 11:26:23 +0200
Subject: [PATCH 30/41] get the unique constraint right

---
 tables.sql | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/tables.sql b/tables.sql
index 8710d1d..5c4c59e 100644
--- a/tables.sql
+++ b/tables.sql
@@ -29,14 +29,18 @@ CREATE TABLE ltypes (typ TEXT primary key,
 		     words TEXT,
 		     lfreq INTEGER default 0,
 		     cfreq INTEGER DEFAULT 0);
--- sentences in the database (assumes unique profile+sid)
+-- words in the database (assumes unique profile+sid+wid)
+-- each sentence has words and their lexical ids, ordered by wid
 CREATE TABLE sent (sid INTEGER,
                    profile TEXT,
 		   wid INTEGER,
 		   word TEXT,
-		   lexid TEXT);
+		   lexid TEXT,
+		   UNIQUE(profile, sid, wid) );
 -- Information from the gold profiles
-CREATE TABLE gold (sid INTEGER primary key,
+-- The json could be built on the fly,
+-- but it is useful to have a log of when conversion fails
+CREATE TABLE gold (sid INTEGER,
        	     	   profile TEXT,				
        	     	   sent TEXT,
 		   comment TEXT,
@@ -46,7 +50,8 @@ CREATE TABLE gold (sid INTEGER primary key,
 		   mrs TEXT,
 		   mrs_json TEXT,
 		   dmrs_json TEXT,
-		   flags TEXT);
+		   flags TEXT,
+		   UNIQUE(profile, sid) );
 CREATE TABLE typind (typ TEXT,
        	     	     profile TEXT,	    
                      sid INTEGER,

From b695d43844e9ab6eac2c0958007276694e6583c1 Mon Sep 17 00:00:00 2001
From: Francis Bond <bond@ieee.org>
Date: Thu, 14 Jul 2022 11:27:55 +0200
Subject: [PATCH 31/41] pass paramters as needed

---
 tdl2db.py | 53 +++++++++++++++++++++++++++++++----------------------
 1 file changed, 31 insertions(+), 22 deletions(-)

diff --git a/tdl2db.py b/tdl2db.py
index 48e96b8..2d4c166 100644
--- a/tdl2db.py
+++ b/tdl2db.py
@@ -33,25 +33,30 @@
 types = dd(list)
 les = {}
 
+cfg = dict()
+
+
 
 def read_cfg (config, ver):
     """
-    read the config file, find the grammar and version
+    read the config file, find the grammar, version and path to orthography
     """
+    cfg = dict()
     for l in open(config):
-        grmmatch =  re.findall(r'grammar-top\s+:=\s+"([^"]+)".', l.strip())
-        if grmmatch:
-            grammar = grmmatch[0]
-        vermatch =  re.findall(r'version\s+:=\s+"([^"]+)".', l.strip())
-        if vermatch:
-            version = vermatch[0]
-    version_file = os.path.join(os.path.dirname(config), version)
-    grammar_file = os.path.join(os.path.dirname(config), grammar)
-    print(version_file, file=ver)
-    return grammar_file, version_file
-
-
-def read_grm (grammarfile, tdls, types, hierarchy, les):
+        for attr in ["version",
+                     "grammar-top",
+                     "orth-path"]:
+            match =  re.findall(rf'{attr}\s+:=\s+"?([^"]+)"?.', l.strip())
+            if match:
+                cfg[attr] = match[0]
+    cfg['version_file'] = os.path.join(os.path.dirname(config), cfg['version'])
+    cfg['grammar_file'] = os.path.join(os.path.dirname(config), cfg['grammar-top'])
+    print(cfg['version_file'], file=ver)
+    return cfg
+
+
+def read_grm (cfg, tdls, types, hierarchy, les):
+    grammarfile=cfg['grammar_file']
     print("FILE", grammarfile)
     path = Path(grammarfile)
     base = path.parent
@@ -69,13 +74,13 @@ def read_grm (grammarfile, tdls, types, hierarchy, les):
                 if isinstance(entry,tdl.FileInclude):
                     path = Path(base, entry.path).with_suffix('.tdl')
                     if path.is_file():
-                        process_type(str(base), str(path), status, tdls, hierarchy, les)
+                        process_type(cfg, str(base), str(path), status, tdls, hierarchy, les)
                     else:
                         print('INCLUDED FILE NOT FOUND: {!s}'.format(path))
                 else:
-                    print('what to do with', entry.status, file=log)
+                    print('WARNING unknown type:', entry.status, file=log)
 
-def  process_type(base, path, status, tdls, hierachy, les):
+def  process_type(cfg, base, path, status, tdls, hierachy, les):
     if 'root' in path:
         status = 'root'
     elif 'parse-nodes' in path:
@@ -83,7 +88,7 @@ def  process_type(base, path, status, tdls, hierachy, les):
 
     print(f"Processing types in {path} as {status}")
     for event, obj, lineno in tdl.iterparse(path): # assume utf-8
-        #print(lineno, event, sep = '\t')
+        ##print(lineno, event, sep = '\t')
         if event in ['TypeDefinition',  'TypeAddendum',
                      'LexicalRuleDefinition']:
             # if obj.documentation(): ### The tdl has a docstring
@@ -98,7 +103,8 @@ def  process_type(base, path, status, tdls, hierachy, les):
                 else:
                     ### (lex-type, docstring)
                     ### fixme get orth, pred, altpred
-                    orths = obj.conjunction.get('ORTH', default=None)
+                    ORTH= cfg['orth-path']
+                    orths = obj.conjunction.get(ORTH, default=None)
                     try:
                         orth=' '.join([str(s) for s in orths.values()])
                     except:
@@ -139,6 +145,7 @@ def  process_type(base, path, status, tdls, hierachy, les):
 
 
 def intodb(dbfile, tdls, hierarchy, types,les):
+    print(f"Adding types to database {dbfile}")
     conn = sqlite3.connect(dbfile)    # loads dbfile as con
     c = conn.cursor()    # creates a cursor object that can perform SQL commands with c.execute("...")
     c.executemany("""INSERT INTO tdl (typ, src, line, kind, tdl, docstring)
@@ -189,10 +196,12 @@ def intodb(dbfile, tdls, hierarchy, types,les):
     WHERE typ IN (SELECT typ FROM lex)""")
     
     conn.commit()
+    print(f"Added types to database {dbfile}")
+     
 
-grammar, version = read_cfg(config,ver)
-print(grammar, version)
-read_grm(grammar,tdls,hierarchy, types, les)
+cfg = read_cfg(config,ver)
+print(cfg['grammar_file'], cfg['version_file'])
+read_grm(cfg,tdls,hierarchy, types, les)
 intodb(dbfile, tdls, hierarchy, types, les)
 
 # for thing in tdls:

From 08e5462f6f8ceff661f19cc1ab5695a67fb06f64 Mon Sep 17 00:00:00 2001
From: Francis Bond <bond@ieee.org>
Date: Thu, 14 Jul 2022 11:28:45 +0200
Subject: [PATCH 32/41] add a rudimentary search for predicates

---
 html/ltdb.py    | 47 +++++++++++++++++++++++++++++++++--------------
 html/search.cgi | 15 +++++++++++++++
 2 files changed, 48 insertions(+), 14 deletions(-)

diff --git a/html/ltdb.py b/html/ltdb.py
index 8b4fbf1..a88467b 100644
--- a/html/ltdb.py
+++ b/html/ltdb.py
@@ -443,36 +443,45 @@ def header():
 """ % (par['ver'], par['cssdir'], par['cssdir'])
 
 def searchbar():
-    return """
+    """
+    Return the Searchbar
+    """
+    return f"""
 <div id="outline">
 <div id="header">
 <div id="menu">
-<a href='%s/index.html'>Home</a>&nbsp;&nbsp;
-<a href='%s/ltypes.cgi'>Lex Types</a>&nbsp;&nbsp;
-<a href='%s/rules.cgi'>Rules</a>
+<a href='{par['cssdir']}/index.html'>Home</a>&nbsp;&nbsp;
+<a href='{par['cgidir']}/ltypes.cgi'>Lex Types</a>&nbsp;&nbsp;
+<a href='{par['cgidir']}/rules.cgi'>Rules</a>
 </div> <!-- end of menu -->
-<div id="confusing">  <!-- search for word -->
+<div id="confusing">
+<div class='form'>
+
+<form name="frm2" action="{par['cgidir']}/search.cgi" method="GET">
+Type:&nbsp;<input type="text" name="typ" size=20
+ placeholder="lextype, lexid, rule or type">
+<input type="submit" value="Go" name="submitbtn">
+</form>
+</div>
+  <!-- search for word -->
 <div class='form'>
-<form name="frm1" action="%s/search.cgi" method="GET">
+<form name="frm1" action="{par['cgidir']}/search.cgi" method="GET">
 Lemma:&nbsp;<input type="text" name="lemma" size=15
  placeholder="lemma">
 <input type="submit" value="Go" name="submitbtn">
 </form>
 </div>
+  <!-- search for predicate in mrs -->
 <div class='form'>
-
-<form name="frm2" action="%s/search.cgi" method="GET">
-Type:&nbsp;<input type="text" name="typ" size=20
- placeholder="lextype, lexid, rule or type">
+<form name="frm3" action="{par['cgidir']}/search.cgi" method="GET">
+Predicate:&nbsp;<input type="text" name="pred" size=15
+ placeholder="pred_x">
 <input type="submit" value="Go" name="submitbtn">
 </form>
 </div>
 </div> <!-- end of confusing -->
 </div> <!-- end of header -->
-"""  %  (par['cssdir'], 
-                                      par['cgidir'], par['cgidir'], 
-                                      par['cgidir'], par['cgidir'])
-
+"""
 
     
 def footer(version):
@@ -573,3 +582,13 @@ def get_leminfo (lemma,c):
                  WHERE orth GLOB ?
     ORDER BY orth = ? DESC""", ('*{0}*'.format(lemma), lemma))
     return c.fetchall()
+
+def get_predsents (pred,c):
+    """
+    get predicates from mrs_json
+    put quotes around to match the entire predicate
+    """
+    c.execute("""SELECT sid, profile, sent, deriv_json, mrs_json, dmrs_json
+    FROM gold
+    WHERE instr(mrs_json, ?) > 0""",  (f'"{pred}"',))
+    return c.fetchall()
diff --git a/html/search.cgi b/html/search.cgi
index feb4f06..086b9e7 100755
--- a/html/search.cgi
+++ b/html/search.cgi
@@ -13,6 +13,8 @@ form = cgi.FieldStorage()
 #synset = form.getfirst("synset", "")
 lemma = form.getfirst("lemma", "")
 lemma = lemma.strip()
+pred = form.getfirst("pred", "")
+pred = pred.strip()
 typ = form.getfirst("typ", "")
 typ = typ.strip()
 
@@ -113,6 +115,19 @@ elif(typ):
   <div align ='center' id="contents">
     <p>No matches found for type {} in {}.
   </div>""".format(typ, par['ver']))
+elif(pred):
+    sents = ltdb.get_predsents (pred,c)
+    print (f"""
+<div align ='center' id="contents">
+<h1>Sentences with  <i>{pred}</i></h1>
+</div>
 
+  <ul>
+""")
+    for (sid, profile, sent, deriv_json, mrs_json, dmrs_json) in sents[:10]:
+        print(f"""<li>{sent} ({profile}, {sid})""")
+        
+    print("""  </ul>\n""")
+        
 
 print (ltdb.footer(par['ver']))

From eeeb65d31b0c167017d47dca4d3156b7074867a8 Mon Sep 17 00:00:00 2001
From: Francis Bond <bond@ieee.org>
Date: Thu, 14 Jul 2022 11:44:38 +0200
Subject: [PATCH 33/41] changed DELPH-IN wiki links to point to github, closes
 #15

---
 html/ltdb.py | 4 ++--
 html/tree.js | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/html/ltdb.py b/html/ltdb.py
index a88467b..52b6ac5 100644
--- a/html/ltdb.py
+++ b/html/ltdb.py
@@ -98,7 +98,7 @@ def hlall (typs):
     "hyperlink all types in a description or documentation"
     if typs:
         typs = escape(typs)
-        ### Definition from http://moin.delph-in.net/TdlRfc
+        ### Definition from https://github.com/delph-in/docs/wiki/TdlRFC
         typs=re.sub(r'( )(#[\w_+*?-]+)([ ,])', "\\1<span class='coref'>\\2</span>\\3", typs)
         return retyp.sub(hltyp, typs)
     else:
@@ -488,7 +488,7 @@ def footer(version):
     return """</div> <!-- end of outline -->
   <br>
   <address>
-  <a href='http://moin.delph-in.net/LkbLtdb'>Linguistic Type Database</a> 
+  <a href='https://github.com/delph-in/docs/wiki/LkbLtdb'>Linguistic Type Database</a> 
     for the grammar {}; 
   <br>By Chikara Hashimoto, Luis Morgado da Costa, Michael Goodman and Francis Bond; 
   Maintained by Francis Bond &lt;<a href='mailto:bond@ieee.org'>bond@ieee.org</a>&gt;;
diff --git a/html/tree.js b/html/tree.js
index b27559c..4daaa5e 100755
--- a/html/tree.js
+++ b/html/tree.js
@@ -1,6 +1,6 @@
 /* An SVG renderer for DELPH-IN derivation trees. Targets the ERG API that is
  * currently under development and is documented here:
- * http://moin.delph-in.net/ErgApi
+ * https://github.com/delph-in/docs/wiki/ErgApi
  * 
  * This code is adapted from code found in Woodley Packard's full forest treebanker,
  * which can be found here: http://sweaglesw.org/svn/treebank/trunk/

From 69c80b93eb91443adfe6d0810fd4ffec87513163 Mon Sep 17 00:00:00 2001
From: Francis Bond <bond@ieee.org>
Date: Thu, 14 Jul 2022 13:21:36 +0200
Subject: [PATCH 34/41] even robuster parsing of gold profiles

---
 gold2db.py | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/gold2db.py b/gold2db.py
index 1bdd34a..1ee160a 100644
--- a/gold2db.py
+++ b/gold2db.py
@@ -75,15 +75,18 @@
                     log.write(f"{root}: {profile} {sid} {e}\n")
                     deriv_json = '{}'
                 try:
-                    mrs_str=first_result.mrs()
+                    mrs_obj = first_result.mrs()
+                    mrs_str = simplemrs.encode(mrs_obj,indent=True)
+                    mrs_json = mrsjson.encode(mrs_obj)
                 except Exception as e:
                     log.write("\n\nMRS couldn't be retrieved in pydelphin:\n")
                     log.write(f"{root}: {profile} {sid} {e}\n")
-                    mrs_str = None 
+                    mrs_obj = None
+                    mrs_str = ''
+                    mrs_json = '{}'
                 try:
-                    dmrs_str=dmrs.from_mrs(mrs_str)
-                    mrs_json = mrsjson.encode(mrs_str)
-                    dmrs_json = dmrsjson.encode(dmrs_str)
+                    dmrs_obj=dmrs.from_mrs(mrs_obj)
+                    dmrs_json = dmrsjson.encode(dmrs_obj)
                 except Exception as e:
                     log.write("\n\nMRS failed to convert in pydelphin:\n")
                     log.write(f"{root}: {profile} {sid} {e}\n")
@@ -93,9 +96,9 @@
                     if hasattr(e, 'message'):
                         log.write(e.message)
                         log.write("\n\n")
-                    if mrs_obj:
-                        log.write(simplemrs.encode(mrs_obj,indent=True))
-                    mrs_json = '{}'
+                    if mrs_str:
+                        log.write(mrs_str))
+                    dmrs_str = '{}'  
                     dmrs_json = '{}'
                 # STORE gold info IN DB
                 try:

From 6c9ba70c9cb944da0b18de87a7aef17ca5b7ea87 Mon Sep 17 00:00:00 2001
From: Francis Bond <bond@ieee.org>
Date: Tue, 19 Jul 2022 11:40:06 -0700
Subject: [PATCH 35/41] linked grammar homepage to new wiki; really closes #15

---
 makehome.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/makehome.py b/makehome.py
index 6a9f719..c1819b6 100644
--- a/makehome.py
+++ b/makehome.py
@@ -43,8 +43,8 @@
 
 <ul>  
   <li>  Click Here: <a href='../../cgi-bin/{version}/search.cgi'>Lexical Type Database for {version}</a>{madewith}
-  <li>  <a href='http://wiki.delph-in.net/moin/LkbLtdb'>Lexical Type Database Wiki</a>
-  <li>  <a href='http://wiki.delph-in.net/moin/FrontPage'>DELPH-IN Wiki</a>
+  <li>  <a href='https://github.com/delph-in/docs/wiki/LkbLtdb</a>
+  <li>  <a href='https://github.com/delph-in/docs/wiki/'>DELPH-IN Wiki</a>
 </ul>
 """)
 

From b17dfa393efc109840cc8f867c52f10d7be2a900 Mon Sep 17 00:00:00 2001
From: Francis Bond <bond@ieee.org>
Date: Tue, 19 Jul 2022 11:40:40 -0700
Subject: [PATCH 36/41] make display of rules robust to missinf kara/made

---
 html/ltdb.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/html/ltdb.py b/html/ltdb.py
index 52b6ac5..59853b6 100644
--- a/html/ltdb.py
+++ b/html/ltdb.py
@@ -167,7 +167,7 @@ def showsents (c, typ, lexid, limit, biglimit):
             for (kara, made) in sorted(sids[profile, sid]):
                 print('<li>{}<sub>{}-{}</sub> &nbsp;&nbsp; '.format(sid,kara,made))
                 for wid in sents[profile, sid]:
-                    if wid >= kara and wid < made:
+                    if kara and made and wid >= kara and wid < made:
                         print ("<span class='match'>%s</span>" % \
                                    sents[profile, sid][wid][0])
                     else:

From 136efffded7e2b6cb43ca8e08d789e1d6324ca56 Mon Sep 17 00:00:00 2001
From: Francis Bond <bond@ieee.org>
Date: Tue, 26 Jul 2022 16:12:44 -0700
Subject: [PATCH 37/41] make Dan happy

---
 gold2db.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gold2db.py b/gold2db.py
index 1ee160a..6e9e899 100644
--- a/gold2db.py
+++ b/gold2db.py
@@ -97,7 +97,7 @@
                         log.write(e.message)
                         log.write("\n\n")
                     if mrs_str:
-                        log.write(mrs_str))
+                        log.write(mrs_str)
                     dmrs_str = '{}'  
                     dmrs_json = '{}'
                 # STORE gold info IN DB

From 1336380e89876a2505470165e134949ab5842bae Mon Sep 17 00:00:00 2001
From: Francis Bond <bond@ieee.org>
Date: Tue, 26 Jul 2022 16:13:23 -0700
Subject: [PATCH 38/41] better quotes in bash file

---
 make-ltdb.bash | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/make-ltdb.bash b/make-ltdb.bash
index 6b0592e..f2cd94f 100755
--- a/make-ltdb.bash
+++ b/make-ltdb.bash
@@ -193,7 +193,7 @@ echo
 sqlite3 ${db} < tables.sql
 
 ###
-if [[ -f" ${lkb_script}" && -n "${LISPCOMMAND}" ]]
+if [[ -f "${lkb_script}" && -n "${LISPCOMMAND}" ]]
 then
     echo "Adding in the info from the lisp"
     echo

From ab5057cf26d0238e41976ca52c3837a20ef6b398 Mon Sep 17 00:00:00 2001
From: Francis Bond <bond@ieee.org>
Date: Tue, 26 Jul 2022 16:13:51 -0700
Subject: [PATCH 39/41] make sure you list the extra lisp

---
 makehome.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/makehome.py b/makehome.py
index c1819b6..3a50ea0 100644
--- a/makehome.py
+++ b/makehome.py
@@ -14,7 +14,7 @@
 from ltdb import statuses, footer
 from html import escape
 
-(script, version, grmdir, extralisp, lkbscript, grammartdl) = sys.argv
+(script, version, grmdir, lkbscript, extralisp, grammartdl) = sys.argv
 
 
 madewith =''

From 47ce4fc5992d78db6dc1a342a3e6622d940ef3c8 Mon Sep 17 00:00:00 2001
From: Francis Bond <bond@ieee.org>
Date: Wed, 27 Jul 2022 11:44:39 -0700
Subject: [PATCH 40/41] log warnings in GOLD conversion

---
 gold2db.py | 87 ++++++++++++++++++++++++++++++++++--------------------
 1 file changed, 55 insertions(+), 32 deletions(-)

diff --git a/gold2db.py b/gold2db.py
index 6e9e899..70a25d2 100644
--- a/gold2db.py
+++ b/gold2db.py
@@ -13,6 +13,7 @@
 from delphin import itsdb, derivation, dmrs
 from delphin.codecs import simplemrs, dmrsjson, mrsjson
 import json
+import warnings
 
 if (len(sys.argv) < 3):
     # prints standard error msg (stderr)
@@ -68,39 +69,61 @@
                 deriv = first_result.derivation()
                 tree = first_result.get('tree', '')
                 deriv_str = deriv.to_udf(indent=None)
-                try:
-                    deriv_json = json.dumps(deriv.to_dict(fields=['id','entity','score','form','tokens']))
-                except Exception as e:
-                    log.write("\n\ncouldn't convert deriv to json:\n")
-                    log.write(f"{root}: {profile} {sid} {e}\n")
-                    deriv_json = '{}'
-                try:
-                    mrs_obj = first_result.mrs()
-                    mrs_str = simplemrs.encode(mrs_obj,indent=True)
-                    mrs_json = mrsjson.encode(mrs_obj)
-                except Exception as e:
-                    log.write("\n\nMRS couldn't be retrieved in pydelphin:\n")
-                    log.write(f"{root}: {profile} {sid} {e}\n")
-                    mrs_obj = None
-                    mrs_str = ''
-                    mrs_json = '{}'
-                try:
-                    dmrs_obj=dmrs.from_mrs(mrs_obj)
-                    dmrs_json = dmrsjson.encode(dmrs_obj)
-                except Exception as e:
-                    log.write("\n\nMRS failed to convert in pydelphin:\n")
-                    log.write(f"{root}: {profile} {sid} {e}\n")
-                    log.write(response['i-input']) ### FIXME
-                    log.write("\n\n")
-                    log.write(repr(e))
-                    if hasattr(e, 'message'):
-                        log.write(e.message)
+                with warnings.catch_warnings(record=True) as caught_warnings:
+                    warnings.simplefilter("always")
+                    try:
+                        deriv_json = json.dumps(deriv.to_dict(fields=['id','entity','score','form','tokens']))
+                    except Exception as e:
+                        log.write("\n\ncouldn't convert deriv to json:\n")
+                        log.write(f"{root}: {profile} {sid} {e}\n")
+                        deriv_json = '{}'
+                    try:
+                        mrs_obj = first_result.mrs()
+                        mrs_str = simplemrs.encode(mrs_obj,indent=True)
+                        mrs_json = mrsjson.encode(mrs_obj)
+                    except Exception as e:
+                        log.write("\n\nMRS couldn't be retrieved in pydelphin:\n")
+                        log.write(f"{root}: {profile} {sid} {e}\n")
+                        mrs_obj = None
+                        mrs_str = ''
+                        mrs_json = '{}'
+                    try:
+                        dmrs_obj=dmrs.from_mrs(mrs_obj)
+                    except Exception as e:
+                        log.write("\n\nMRS failed to convert to DMRS:\n")
+                        log.write(f"{root}: {profile} {sid} {e}\n")
+                        log.write(response['i-input']) ### FIXME
+                        log.write("\n\n")
+                        log.write(repr(e))
+                        if hasattr(e, 'message'):
+                            log.write(e.message)
+                            log.write("\n\n")
+                        if mrs_str:
+                            log.write(mrs_str)
+                        dmrs_obj = None  
+                    try:
+                        if dmrs_obj:
+                            dmrs_json = dmrsjson.encode(dmrs_obj)
+                        else:
+                            dmrs_json = '{}'
+                    except Exception as e:
+                        log.write("\n\nDMRS failed to serialize to JSON:\n")
+                        log.write(f"{root}: {profile} {sid} {e}\n")
+                        log.write(response['i-input']) ### FIXME
                         log.write("\n\n")
-                    if mrs_str:
-                        log.write(mrs_str)
-                    dmrs_str = '{}'  
-                    dmrs_json = '{}'
-                # STORE gold info IN DB
+                        log.write(repr(e))
+                        if hasattr(e, 'message'):
+                            log.write(e.message)
+                            log.write("\n\n")
+                        if mrs_str:
+                            log.write(mrs_str)
+                        dmrs_json = '{}'
+                for warn in caught_warnings:
+                # STORE gfor warn in caught_warnings:
+                    log.write(f"\n\nWarning: {warn.message}\n")
+                    log.write(f"{root}: {profile} {sid}\n")
+                    #log.write(f"{warn.category}\n")
+                    #log.write(f"{str(warn)}\n")
                 try:
                     c.execute("""INSERT INTO gold (profile, sid, sent, comment, 
                     deriv, deriv_json, pst, 

From cea2fe3658b216ddb646b2be5e84d69202a33019 Mon Sep 17 00:00:00 2001
From: Francis Bond <bond@ieee.org>
Date: Wed, 27 Jul 2022 13:43:02 -0700
Subject: [PATCH 41/41] if orth-path is not defined, use STEM, document this

---
 README.rst | 3 +++
 tdl2db.py  | 2 +-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/README.rst b/README.rst
index bad2501..040ec87 100644
--- a/README.rst
+++ b/README.rst
@@ -22,6 +22,9 @@ grammar/ace/config.tdl
 grammar/lkb/script
 ``
 
+If your `orth-path` is not `STEM` then you must have it defined in the
+**top** ace config file, we do not follow includes for config files (yet). 
+
 --------------
 
 Usage
diff --git a/tdl2db.py b/tdl2db.py
index 2d4c166..a102f27 100644
--- a/tdl2db.py
+++ b/tdl2db.py
@@ -103,7 +103,7 @@ def  process_type(cfg, base, path, status, tdls, hierachy, les):
                 else:
                     ### (lex-type, docstring)
                     ### fixme get orth, pred, altpred
-                    ORTH= cfg['orth-path']
+                    ORTH= cfg.get('orth-path', 'STEM')
                     orths = obj.conjunction.get(ORTH, default=None)
                     try:
                         orth=' '.join([str(s) for s in orths.values()])