1919
2020import edu .stanford .nlp .util .StringUtils ;
2121import edu .stanford .nlp .util .logging .RedwoodConfiguration ;
22+ import org .w3c .dom .Attr ;
2223import org .w3c .dom .Document ;
2324import org .w3c .dom .Element ;
2425import org .w3c .dom .Node ;
@@ -397,7 +398,7 @@ protected static class SsurgeonArgs {
397398 * whitespace, but retain everything inside quotes, so we can pass
398399 * in hashmaps in String form.
399400 */
400- private static String [] parseArgs (String argsString ) {
401+ private static Map < String , String > parseArgs (String argsString ) {
401402 List <String > retList = new ArrayList <>();
402403 String patternString = "(?:[^\\ s\\ \" ]++|\\ \" [^\\ \" ]*+\\ \" |(\\ \" ))++" ;
403404 Pattern pattern = Pattern .compile (patternString );
@@ -413,59 +414,58 @@ private static String[] parseArgs(String argsString) {
413414 } else
414415 throw new SsurgeonParseException ("Unmatched quote in string to parse" );
415416 }
416- return retList .toArray (StringUtils .EMPTY_STRING_ARRAY );
417+
418+ Map <String , String > parsedArgs = new LinkedHashMap <>();
419+ for (int i = 0 ; i < retList .size () - 1 ; i += 2 ) {
420+ parsedArgs .put (retList .get (i ), retList .get (i + 1 ));
421+ }
422+ return parsedArgs ;
417423 }
418424
419- private static SsurgeonArgs parseArgsBox (String args ) {
425+ private static SsurgeonArgs parseArgsBox (String args , Map < String , String > additionalArgs ) {
420426 SsurgeonArgs argsBox = new SsurgeonArgs ();
421- final String [] argsArray = parseArgs (args );
427+ Map <String , String > argsArray = parseArgs (args );
428+ for (String additional : additionalArgs .keySet ()) {
429+ argsArray .put ("-" + additional , additionalArgs .get (additional ));
430+ }
422431
423- for (int argIndex = 0 ; argIndex < argsArray .length ; ++argIndex ) {
424- switch (argsArray [argIndex ]) {
432+ for (String argsKey : argsArray .keySet ()) {
433+ String argsValue = argsArray .get (argsKey );
434+ switch (argsKey ) {
425435 case GOV_NODENAME_ARG :
426- argsBox .govNodeName = argsArray [argIndex + 1 ];
427- argIndex += 1 ;
436+ argsBox .govNodeName = argsValue ;
428437 break ;
429438 case DEP_NODENAME_ARG :
430- argsBox .dep = argsArray [argIndex + 1 ];
431- argIndex += 1 ;
439+ argsBox .dep = argsValue ;
432440 break ;
433441 case EDGE_NAME_ARG :
434- argsBox .edge = argsArray [argIndex + 1 ];
435- argIndex += 1 ;
442+ argsBox .edge = argsValue ;
436443 break ;
437444 case RELN_ARG :
438- argsBox .reln = argsArray [argIndex + 1 ];
439- argIndex += 1 ;
445+ argsBox .reln = argsValue ;
440446 break ;
441447 case NODENAME_ARG :
442- argsBox .node = argsArray [argIndex + 1 ];
443- argIndex += 1 ;
448+ argsBox .node = argsValue ;
444449 break ;
445450 case NODE_PROTO_ARG :
446- argsBox .nodeString = argsArray [argIndex + 1 ];
447- argIndex += 1 ;
451+ argsBox .nodeString = argsValue ;
448452 break ;
449453 case WEIGHT_ARG :
450- argsBox .weight = Double .valueOf (argsArray [argIndex + 1 ]);
451- argIndex += 1 ;
454+ argsBox .weight = Double .valueOf (argsValue );
452455 break ;
453456 case NAME_ARG :
454- argsBox .name = argsArray [argIndex + 1 ];
455- argIndex += 1 ;
457+ argsBox .name = argsValue ;
456458 break ;
457459 case POSITION_ARG :
458- argsBox .position = argsArray [argIndex + 1 ];
459- argIndex += 1 ;
460+ argsBox .position = argsValue ;
460461 break ;
461462 default :
462- String key = argsArray [ argIndex ] .substring (1 );
463+ String key = argsKey .substring (1 );
463464 Class <? extends CoreAnnotation <?>> annotation = AnnotationLookup .toCoreKey (key );
464465 if (annotation == null ) {
465- throw new SsurgeonParseException ("Parsing Ssurgeon args: unknown flag " + argsArray [ argIndex ] );
466+ throw new SsurgeonParseException ("Parsing Ssurgeon args: unknown flag " + argsKey );
466467 }
467- argsBox .annotations .put (key , argsArray [argIndex + 1 ]);
468- argIndex += 1 ;
468+ argsBox .annotations .put (key , argsValue );
469469 }
470470 }
471471 return argsBox ;
@@ -474,7 +474,7 @@ private static SsurgeonArgs parseArgsBox(String args) {
474474 /**
475475 * Given a string entry, converts it into a SsurgeonEdit object.
476476 */
477- public static SsurgeonEdit parseEditLine (String editLine , Language language ) {
477+ public static SsurgeonEdit parseEditLine (String editLine , Map < String , String > attributeArgs , Language language ) {
478478 try {
479479 // Extract the operation name first
480480 final String [] tuples1 = editLine .split ("\\ s+" , 2 );
@@ -492,7 +492,7 @@ public static SsurgeonEdit parseEditLine(String editLine, Language language) {
492492 }
493493
494494 // Parse the arguments based upon the type of command to execute.
495- final SsurgeonArgs argsBox = parseArgsBox (tuples1 .length == 1 ? "" : tuples1 [1 ]);
495+ final SsurgeonArgs argsBox = parseArgsBox (tuples1 .length == 1 ? "" : tuples1 [1 ], attributeArgs );
496496
497497 if (command .equalsIgnoreCase (AddDep .LABEL )) {
498498 if (argsBox .reln == null ) {
@@ -726,9 +726,23 @@ public static SsurgeonPattern ssurgeonPatternFromXML(Element elt) {
726726 for (int i =0 ; i <editNodes .getLength (); i ++) {
727727 Node node = editNodes .item (i );
728728 if (node .getNodeType () == Node .ELEMENT_NODE ) {
729+ // read all arguments such as `after=" "` off the node
730+ // this way, arguments which can't be parsed via whitespace
731+ // (especially arguments which actually contain whitespace)
732+ // can be passed to an EditLine
733+ // LinkedHashMap so we can preserve insertion order
734+ Map <String , String > attributeArgs = new LinkedHashMap <>();
735+ for (int j = 0 ; j < node .getAttributes ().getLength (); ++j ) {
736+ Node attrNode = node .getAttributes ().item (j );
737+ if (attrNode .getNodeType () == Node .ATTRIBUTE_NODE ) {
738+ Attr attr = (Attr ) attrNode ;
739+ attributeArgs .put (attr .getName (), attr .getValue ());
740+ }
741+ }
742+
729743 Element editElt = (Element ) node ;
730744 String editVal = getEltText (editElt );
731- retPattern .addEdit (Ssurgeon .parseEditLine (editVal , retPattern .getLanguage ()));
745+ retPattern .addEdit (Ssurgeon .parseEditLine (editVal , attributeArgs , retPattern .getLanguage ()));
732746 }
733747 }
734748
0 commit comments