diff --git a/README.rst b/README.rst index 58231fb..040ec87 100644 --- a/README.rst +++ b/README.rst @@ -12,16 +12,50 @@ documentation in the grammar, a kind of literate programming. There is `more documentation `__ at the DELPH-IN Wiki. + +LTDB assumes that the grammar follows the usual DELPH-IN conventions, +in particular that there is a grammar directory with sub directories +for ace and lkb config files. + +`` +grammar/ace/config.tdl +grammar/lkb/script +`` + +If your `orth-path` is not `STEM` then you must have it defined in the +**top** ace config file, we do not follow includes for config files (yet). + -------------- Usage ----- -1. Run ``./make-ltdb.bash --grmdir /path/to/grammar`` +0. Prepare the local environment + `` + python3 -m venv .venv + source .venv/bin/activate + python3 -m pip install --upgrade pip + pip install -r requirements.txt + `` + +1. Run ``./make-ltdb.bash --script /path/to/grammar/lkb/script`` + +or (somewhat experimental but gets more docstrings) + +2. Run ``./make-ltdb.bash --acecfg /path/to/ace/config.tdl`` + +3. Add extra lisp to call before the script + ``./make-ltdb.bash --lisp '(push :mal *features*)' --script /path/to/grammar/lkb/script`` + +4. You can tell it to just read the grammar, not gold (mainly useful for debugging) + ``./make-ltdb.bash --acecfg /path/to/ace/config.tdl --nogold`` + +You can load from lisp and ace versions of the grammar, it will try to merge information from both. .. code:: bash - ./make-ltdb.bash --grmdir ~/logon/dfki/jacy + ./make-ltdb.bash --script ~/logon/dfki/jacy/lkb/script + ./make-ltdb.bash --acecfg ~/logon/dfki/jacy/ace/config.tdl Everything is installed to ``~/public_html/`` @@ -33,28 +67,27 @@ Requirements :: - * python 2.7, python 3, pydelphin, docutils, lxml + * python 3, pydelphin, docutils, lxml * Perl * SQLite3 * Apache * LKB/Lisp for db dump * xmlstarlet for validating lisp -We prefer that Sentence IDs are unique, if we see two sentences in the -gold treebank with the same ID, we only store the first one. +We store items as (profile, item-id) pairs, so Sentence IDs do not +need to be unique. -Only the new LKB-FOS (http://moin.delph-in.net/LkbFos) suppoorts the new docstring comments. We assume it is installed in +Only the new LKB-FOS (http://moin.delph-in.net/LkbFos) supports the new docstring comments. We assume it is installed in ``LKBFOS=~/delphin/lkb_fos/lkb.linux_x86_64``. Install dependencies (in ubuntu): .. code:: bash - sudo apt-get install apache2 xmlstarlet - sudo apt-get install python-docutils python3-docutils python3-lxml + sudo apt-get install apache2 xmlstarlet p7zip sqlite3 + sudo apt-get install python3-docutils python3-lxml - sudo pip install pydelphin --upgrade - sudo pip3 install pydelphin --upgrade + pip install pydelphin --upgrade Enable local directories in Apache2 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -66,7 +99,7 @@ This may be different on different operating systems sudo a2enmod userdir sudo a2enmod cgi -Put this in ``/etc/apache2/sites-available/000-default.conf`` +Put this at the end of ``/etc/apache2/sites-available/000-default.conf`` .. code:: xml @@ -99,11 +132,44 @@ If the LKB complains it probably means you have a docstring in an instance file, or an old version of the LKB. Make sure you only document types for now. +If you are having trouble with apache encodings, set the following in ``/etc/apache2/apache2.conf`` + +:: + + SetEnv PYTHONIOENCODING utf8 + +To make debugging + +On Ubuntu 18.04, to get python3 modwsgi working if you have updated from an earlier version (so your python defaults to 2.7) do this + +.. code:: bash + + sudo apt-get install libapache2-mod-wsgi-py3 + sudo update-alternatives --install /usr/bin/python python /usr/bin/python2.7 1 + sudo update-alternatives --install /usr/bin/python python /usr/bin/python3.6 2 + +Links go to the wrong place +--------------------------- + +ltdb assumes that the code is being served from a machine whose name +is ``hostname -f`` using ``http`` in your ``public_html``. If that is not true, e.g. you +want to change the host, or port or use https, then please change the +appropriate parts of ``params``. + +.. code:: bash + + charset=utf-8 + dbroot=/home/bond/public_html/cgi-bin/ERG_mal_mo + db=/home/bond/public_html/cgi-bin/ERG_mal_mo/lt.db + cssdir=http://mori/~bond/ltdb/ERG_mal_mo + cgidir=http://mori/~bond/cgi-bin/ERG_mal_mo + ver=ERG_mal_mo + + + Todo ---- -- check I am getting lrule/irule right - -------------- Types, instances in the same table, distinguished by status. @@ -114,15 +180,15 @@ Types, instances in the same table, distinguished by status. +==========+====================================+===================+======+ |type |normal type | | | +----------+------------------------------------+-------------------+------+ -|ltype |lexical type |type + in lexicon | _lt | +|lex-type |lexical type |type + in lexicon | _lt | +----------+------------------------------------+-------------------+------+ |lex-entry |lexical entry | | _le | +----------+------------------------------------+-------------------+------+ |rule |syntactic construction/grammar rule | LKB:\*RULES | _c | +----------+------------------------------------+-------------------+------+ -|lrule |lexical rule | LKB:\*LRULES | lr | +|lex-rule | lexical rule | LKB:\*LRULES | lr | +----------+------------------------------------+-------------------+------+ -|irule |inflectional rule | LKB:\*LRULES + | ilr | +|inf-rule |inflectional rule | LKB:\*LRULES + | ilr | +----------+------------------------------------+-------------------+------+ | | (inflectional-rule-pid )| | | +----------+------------------------------------+-------------------+------+ @@ -153,5 +219,3 @@ Types, instances in the same table, distinguished by status. +--------+--------------------------------------+ | ◬ | Binary, Non-Headed | +--------+--------------------------------------+ - -FIXME: add IDIOMS as a different table diff --git a/ToDo b/ToDo index b7348f2..65d6c3f 100644 --- a/ToDo +++ b/ToDo @@ -1,3 +1,9 @@ + * look at lisp with John + * prettier lisp + * hyperlinked types + * types without glb + + * Better linking to surface form diff --git a/gold2db.py b/gold2db.py index 50cd3d9..70a25d2 100644 --- a/gold2db.py +++ b/gold2db.py @@ -1,7 +1,7 @@ #export PYTHONPATH=~/svn/pydelphin # python3 gold2db.py ## -## takes two paramaters -- directory with the xml and database +## takes two paramaters -- directory with the grammar and database ## ## Actually does the lexicon too :-) ## @@ -10,12 +10,10 @@ ## import sqlite3, sys, re, os from collections import defaultdict as dd -from delphin import itsdb -import delphin.mrs -import delphin.derivation -import delphin.mrs.xmrs -import delphin.mrs.simplemrs +from delphin import itsdb, derivation, dmrs +from delphin.codecs import simplemrs, dmrsjson, mrsjson import json +import warnings if (len(sys.argv) < 3): # prints standard error msg (stderr) @@ -41,114 +39,137 @@ mroot=re.compile(r'^\(([-a-zA-z0-9_+]+?)\s+\(') mrule=re.compile(r'\([0-9]+ ([^ ]+) [-0-9.]+ ([0-9]+) ([0-9]+) ') -mlex=re.compile(r'\([0-9]+ ([^ ]+) [-0-9.]+ [0-9]+ [0-9]+ \("(.*?)" ') +#mlex=re.compile(r'\([0-9]+ ([^ ]+) [-0-9.]+ [0-9]+ [0-9]+ \("(.*?)" ') ### make a log in the same directory as the database log = open(os.path.join(os.path.dirname(dbfile),"gold.log"), 'w') - - golddir = '%s/tsdb/gold' % grmdir typefreq=dd(int) # typefreq[type] = freq lexfreq=dd(lambda: dd(int)) # lexfreq[lexid][surf] = freq lxidfreq=dd(lambda: dd(int)) # lxidfreq[typ][lexid] = freq -typind=dd(lambda: dd(set)) # typind[type][sid]((frm, to), ...) -sent=dd(list) # sent[sid][(surf, lexid)] -pname=dict() # pname[sid]=profile +typind=dd(lambda: dd(set)) # typind[type][(profile, sid)]((frm, to), ...) +sent=dd(list) # sent[(profile, sid)][(surf, lexid)] roots=dd(lambda: 'rootless') allroots=set() for root, dirs, files in os.walk(golddir): + #if not root.endswith('e'): for debugging, don't load everything + # continue ### find valid profiles if 'result' in files or 'result.gz' in files: # if 'mrs' not in root: ## debug # continue print("Processing %s" % root, file=sys.stderr) - profile = itsdb.ItsdbProfile(root) - head, profname = os.path.split(root) - items = {} - for row in profile.read_table('item'): - items[row['i-id']] = (row['i-input'], row['i-comment']) - for row in profile.read_table('result'): - pid = row['parse-id'] - pname[pid] = profname - deriv = row['derivation'] # DERIVATION TREE - deriv_json = delphin.derivation.Derivation.from_string(deriv).to_dict(fields=['id','entity','score','form','tokens']) - mrs_string = row['mrs'] - try: - mrs_obj = delphin.mrs.simplemrs.loads(mrs_string, single=True, version=1.1, errors='strict') - # mrs_obj = delphin.mrs.simplemrs.loads(row['mrs'], single=True, version=1.1, strict=False, errors='warn') - # mrs_string = row['mrs'] # CHANGING - mrs_json = delphin.mrs.xmrs.Mrs.to_dict(mrs_obj) - dmrs_json = delphin.mrs.xmrs.Dmrs.to_dict(mrs_obj) - except Exception as e: - log.write("\n\nMRS failed to convert in pydelphin:\n") - log.write("{}: {}\n".format(root, pid)) - log.write(items[pid][0]) - log.write("\n\n") - log.write(str(mrs_string)) - log.write("\n\n") - if hasattr(e, 'message'): - log.write(e.message) - else: - log.write(str(e)) - log.write("\n\n") - mrs_json = dict() - dmrs_json = dict() - - # STORE gold info IN DB - try: - c.execute("""INSERT INTO gold (sid, sent, comment, - deriv, deriv_json, pst, - mrs, mrs_json, dmrs_json, flags) - VALUES (?,?,?,?,?,?,?,?,?,?)""", (pid, items[pid][0], items[pid][1], - deriv, json.dumps(deriv_json), None, - mrs_string, json.dumps(mrs_json), - json.dumps(dmrs_json), None)) - ### ToDo use pydelphin to walk down tree - ### leaves - m = re.findall(mlex,deriv) - lexids=set() - if m: - #print('leaves') - #print(m) - wid =0 - for (lexid, surf) in m: - lexids.add(lexid) + ts = itsdb.TestSuite(root) + for response in ts.processed_items(): + sid=response['i-id'] + profile = ts.path.name + if response['results']: + first_result=response.result(0) + deriv = first_result.derivation() + tree = first_result.get('tree', '') + deriv_str = deriv.to_udf(indent=None) + with warnings.catch_warnings(record=True) as caught_warnings: + warnings.simplefilter("always") + try: + deriv_json = json.dumps(deriv.to_dict(fields=['id','entity','score','form','tokens'])) + except Exception as e: + log.write("\n\ncouldn't convert deriv to json:\n") + log.write(f"{root}: {profile} {sid} {e}\n") + deriv_json = '{}' + try: + mrs_obj = first_result.mrs() + mrs_str = simplemrs.encode(mrs_obj,indent=True) + mrs_json = mrsjson.encode(mrs_obj) + except Exception as e: + log.write("\n\nMRS couldn't be retrieved in pydelphin:\n") + log.write(f"{root}: {profile} {sid} {e}\n") + mrs_obj = None + mrs_str = '' + mrs_json = '{}' + try: + dmrs_obj=dmrs.from_mrs(mrs_obj) + except Exception as e: + log.write("\n\nMRS failed to convert to DMRS:\n") + log.write(f"{root}: {profile} {sid} {e}\n") + log.write(response['i-input']) ### FIXME + log.write("\n\n") + log.write(repr(e)) + if hasattr(e, 'message'): + log.write(e.message) + log.write("\n\n") + if mrs_str: + log.write(mrs_str) + dmrs_obj = None + try: + if dmrs_obj: + dmrs_json = dmrsjson.encode(dmrs_obj) + else: + dmrs_json = '{}' + except Exception as e: + log.write("\n\nDMRS failed to serialize to JSON:\n") + log.write(f"{root}: {profile} {sid} {e}\n") + log.write(response['i-input']) ### FIXME + log.write("\n\n") + log.write(repr(e)) + if hasattr(e, 'message'): + log.write(e.message) + log.write("\n\n") + if mrs_str: + log.write(mrs_str) + dmrs_json = '{}' + for warn in caught_warnings: + # STORE gfor warn in caught_warnings: + log.write(f"\n\nWarning: {warn.message}\n") + log.write(f"{root}: {profile} {sid}\n") + #log.write(f"{warn.category}\n") + #log.write(f"{str(warn)}\n") + try: + c.execute("""INSERT INTO gold (profile, sid, sent, comment, + deriv, deriv_json, pst, + mrs, mrs_json, dmrs_json, flags) + VALUES (?,?,?,?,?,?,?,?,?,?,?)""", + (profile, + sid, + response['i-input'], + response['i-comment'], + deriv_str, + deriv_json, + tree, + mrs_str, + mrs_json, + dmrs_json, + None)) + except sqlite3.Error as e: + log.write('ERROR: ({}) of type ({}), {}: {} {}\n'.format(e, type(e).__name__, + root, profile, sid)) + + ##leaves + if deriv: + for (preterminal, terminal) in zip(deriv.preterminals(),deriv.terminals()): + lexid=preterminal.entity + surf=terminal.form + start=preterminal.start + end=preterminal.end lexfreq[lexid][surf] +=1 - sent[pid].append((surf, lexid)) + sent[(profile, sid)].append((surf, lexid)) if ltypes[lexid]: typefreq[ltypes[lexid]] += 1 lxidfreq[ltypes[lexid]][lexid] += 1 - typind[ltypes[lexid]][pid].add((wid, wid+1)) - wid+=1 - ### rules (store as type) - m = re.findall(mrule,deriv) - if m: - for (typ, frm, to) in m: - if typ not in lexids: ## counted these! - typefreq[typ] += 1 - typind[typ][pid].add((frm, to)) - #print('rule') - #print(m) - ### Root (treat as another type) - m = re.search(mroot,deriv) - if m: - #print('root {}'.format(root)) - #print(m.groups()[0]) - #print(deriv) - #print() - roots[pid] = m.groups()[0] - - ##print('\n\n\n') - except sqlite3.Error as e: - log.write('ERROR: ({}) of type ({}), {}: {}\n'.format(e, type(e).__name__, - root, pid)) - -### each sentence should have a root -for s in sent: - allroots.add(roots[s]) - typind[roots[s]][s].add((0, len(sent[s]))) - typefreq[roots[s]] += 1 + typind[ltypes[lexid]][(profile, sid)].add((start, end)) + ### internal node (store as type) + for node in deriv.internals(): + typ = node.entity + start= node.start + end= node.end + typefreq[typ] += 1 + typind[typ][(profile, sid)].add((start, end)) + +# ### each sentence should have a root +# for s in sent: +# allroots.add(roots[s]) +# typind[roots[s]][s].add((0, len(sent[s]))) +# typefreq[roots[s]] += 1 ### calculate the lexical type frequencies for typ in lxidfreq: @@ -178,7 +199,9 @@ VALUES (?,?,?,?)""", (typ, wrds, lfreq[typ], typefreq[typ])) + + ### Wack these into a database for typ in typefreq: #print("%d\t%s" % (typefreq[typ], typ)) @@ -190,20 +213,22 @@ c.execute("""INSERT INTO lexfreq (lexid, word, freq) VALUES (?,?,?)""", (l, w, lexfreq[l][w])) -for s in sent: +for p,s in sent: ##print(s, " ".join([surf for (surf, lexid) in sent[s]])) - for i, (w, l) in enumerate(sent[s]): - c.execute("""INSERT INTO sent (profile, sid, wid, word, lexid) - VALUES (?,?,?,?,?)""", (pname[s], s, i, w, l)) - - + try: + for i, (w, l) in enumerate(sent[(p,s)]): + c.execute("""INSERT INTO sent (profile, sid, wid, word, lexid) + VALUES (?,?,?,?,?)""", (p, s, i, w, l)) + except sqlite3.Error as e: + log.write('ERROR: ({}) of type ({}), {}: {} {}\n'.format(e, type(e).__name__, + root, profile, sid)) for t in typind: - for s in typind[t]: + for p,s in typind[t]: ##print("%s\t%s\t%s" % (t, s, typind[t][s])) - for (k, m) in typind[t][s]: - c.execute("""INSERT INTO typind (typ, sid, kara, made) - VALUES (?,?,?,?)""", (t, s, k, m)) + for (k, m) in typind[t][(p, s)]: + c.execute("""INSERT INTO typind (typ, profile, sid, kara, made) + VALUES (?,?,?,?,?)""", (t, p, s, k, m)) diff --git a/html/lextypedb.css b/html/lextypedb.css deleted file mode 100644 index 3df197d..0000000 --- a/html/lextypedb.css +++ /dev/null @@ -1,124 +0,0 @@ -body{ -background-color: #A4DBFF; -} - -#outline { -margin:0 auto; -} - -#contents{ --moz-border-radius: 10px; -border-style: solid; -border-color: #0078C8; -padding: 1%; -background-color: white; -width : 97%; -clear: both; -} - -#header{ - -} - -#menu{ -text-align: center; --moz-border-radius: 10px; -border-style: solid; -border-color: #0078C8; -padding: 5px; -background-color: white; -margin-bottom: 5px; -width: 25%; -float: left; -height: 25px; -} - -#confusing{ -text-align: center; --moz-border-radius: 10px; -border-style: solid; -border-color: #0078C8; -padding: 5px; -background-color: white; -margin-bottom: 5px; -margin-left: 5px; -width: 70%; -float: left; -height: 25px; -} -.form -{ -margin-left: 10px; -float: left; -} - -#c-both { -height:0px; -clear:both; -} - -.item{ -padding: 5px; -} - -H1 { --moz-border-radius: 10px; - color: #ffffff; - background-color : #005BF2; - border-style: solid; - border-color: #003999; - border-width: 4px; - line-height: 120%; - text-align: center; - font-weight: bolder; -} -H2 { - color: #004BC8; - padding-left: 10px; - border-width: 0px 0px 2px 15px; - border-style: solid; - border-color: #004BC8; - line-height: 100%; -} -H3 { - color: #004BC8; - padding-left: 10px; - border-width: 0px 0px 2px 0px; - border-style: solid; - border-color: #004BC8; - line-height: 100%; -} -H4 { - color: #004BC8; - padding-left: 10px; - border-width: 0px 0px 2px 0px; - border-style: none; - border-color: #004BC8; - line-height: 100%; -} - -.match{ -color: green; - font-weight: bold; -} - -strong{ -color: red; - font-weight: bolder; -} - -table { - border-spacing:3pt; - border-collapse: collapse; -} -th {background:#C8E9FF; text-align:left; color:black; -font-size: 14pt; font-weight:bolder; padding:4pt} -/* tr {background:#E3F4FF} */ -tr {background:#F3FFF3} /* FCB likes green */ -td {padding:4pt} -caption { font-weight:bold; font-size: 18pt;} -/* check colors */ -tr.irule {background:#EEEEEE} -tr.rule {background:#FFAAAA} -tr.lrule {background:#AAAAFF} -tr.ltype {background:#AAFFAA} \ No newline at end of file diff --git a/html/ltdb.css b/html/ltdb.css index 91768b3..a8bc838 100644 --- a/html/ltdb.css +++ b/html/ltdb.css @@ -94,8 +94,9 @@ color: green; } .coref{ -# border: 1px solid black; + border: 1px solid black; background: #F3FFFF; + font-size: smaller; } pre{ white-space: pre-wrap; @@ -117,10 +118,13 @@ tr {background:#F3FFF3} /* FCB likes green */ td {padding:4pt} caption { font-weight:bold; font-size: 18pt;} /* check colors */ -tr.irule {background:#EEEEEE} +tr.inf-rule {background:#FFEEEE} +tr.token-mapping-rule {background:#FFEEEE} tr.rule {background:#FFAAAA} -tr.lrule {background:#AAAAFF} -tr.ltype {background:#AAFFAA} +tr.lex-rule {background:#FFDDDD} +tr.lex-type {background:#AAFFAA} +tr.lex-entry {background:#DDFFDD} +tr.generic-lex-entry {background:#DDFFDD} tr.root {background:#FFAAFF} pre.code { diff --git a/html/ltdb.py b/html/ltdb.py index b249d02..59853b6 100644 --- a/html/ltdb.py +++ b/html/ltdb.py @@ -1,15 +1,14 @@ ### --*-- coding: utf-8 --*-- ### shared code for the ltdb ### -from __future__ import unicode_literals -from __future__ import print_function import sqlite3, collections -import cgi, re, urllib, sys +import cgi, re, sys +from html import escape from collections import defaultdict as dd import json - +import urllib.parse as up ### labels for branching: arity, head headedness = {(1,0):('▲', 'unary: headed'), @@ -22,6 +21,34 @@ ('nil','nil'):(' ', ' '), (None,None):(' ', ' ')} +### the different kinds of things we deal with +statuses = dict() + +##things used when parsing +statuses["lex-rule"] = "Lexical Rules" +statuses["inf-rule"] = "Inflectional Rules" +statuses["rule"] = "Syntactic Rules" +statuses["token-mapping-rule"] = "Rules for token mapping" +statuses["root"] = "Root Conditions for well formed utterances" + +## Lexical entries +statuses["lex-entry"] = "Lexical Entries" +statuses["generic-lex-entry"] = "Generic Lexical Entries" + +## types +statuses["lex-type"] = "Types for lexical entries (immediate supertypes of lex-entries)" +statuses["type"] = "Other Internal Types" + +## pre and post processing +statuses["lexical-filtering-rule"] = "Lexical filtering rule" +statuses["post-generation-mapping-rule"] = "Post generation mapping rule" + +## interface +statuses["labels"] = "Labels for trees in the (parse-nodes)" + + + + def getpar (params): par=dict() try: @@ -37,13 +64,11 @@ def getpar (params): def hlt (typs): "hyperlink a list of space seperated types" - l = unicode() + linked = [] if typs: for t in typs.split(): - l += "%s " % (par['cgidir'], - urllib.quote(t, ''), - t) - return l + linked.append(f"{t}") + return ' '.join(linked) else: return '
' @@ -59,11 +84,11 @@ def hltyp(match): for typ in c: types.add(typ[0]) #print types - t = unicode(match.group(0)) + t = str(match.group(0)) #print "
%s %s\n" % (t, t in types) if t in types and not t.startswith('#'): return "{}".format(par['cgidir'], - urllib.quote(t,''), + up.quote(t), t) else: return t @@ -72,9 +97,9 @@ def hltyp(match): def hlall (typs): "hyperlink all types in a description or documentation" if typs: - typs = cgi.escape(typs) - ### Definition from http://moin.delph-in.net/TdlRfc - typs=re.sub(r'(#[\w_+*?-]+)', "\\1", typs) + typs = escape(typs) + ### Definition from https://github.com/delph-in/docs/wiki/TdlRFC + typs=re.sub(r'( )(#[\w_+*?-]+)([ ,])', "\\1\\2\\3", typs) return retyp.sub(hltyp, typs) else: return '
' @@ -96,18 +121,18 @@ def showsents (c, typ, lexid, limit, biglimit): total = results[0] sids = dd(set) if lexid: - c.execute("""SELECT sid, wid FROM sent + c.execute("""SELECT profile, sid, wid FROM sent WHERE lexid=? ORDER BY sid LIMIT ?""", (lexid, limit)) - for (sid, wid) in c: - sids[sid].add((wid, wid+1)) + for (profile, sid, wid) in c: + sids[profile, sid].add((wid, wid+1)) else: - c.execute("""SELECT sid, kara, made FROM typind + c.execute("""SELECT profile, sid, kara, made FROM typind WHERE typ=? ORDER BY sid LIMIT ?""", (typ, limit)) - for (sid, kara, made) in c: - sids[sid].add((kara, made)) + for (profile, sid, kara, made) in c: + sids[profile, sid].add((kara, made)) if limit < total and biglimit > limit: limtext= "({:,} out of {:,}: more)".format(limit, total, - urllib.quote(typ,''), + up.quote(typ), lexid, biglimit) elif limit < total: @@ -115,24 +140,22 @@ def showsents (c, typ, lexid, limit, biglimit): else: limtext ='({:,})'.format(total) print("""

Corpus Examples %s

""" % limtext) - c.execute("""SELECT profile, sid, wid, word, lexid FROM SENT - WHERE sid in (%s) order by sid, wid""" % \ - ','.join('?'*len(sids)), - sids.keys()) sents = dd(dict) - profname=dict() - for (prof, sid, wid, word, lexid) in c: - sents[sid][wid] = (word, lexid) - profname[sid]=prof + for profile, sid in sids: + c.execute("""SELECT profile, sid, wid, word, lexid FROM SENT + WHERE profile = ? AND sid = ? order by profile, sid, wid""", + (profile, sid)) + for (prof, sid, wid, word, lexid) in c: + sents[prof, sid][wid] = (word, lexid) print("""""") + +### +### Links to ltdb +### +print("""

Linguistic Type Database