diff --git a/pom.xml b/pom.xml
index 013b1c1..84ad971 100644
--- a/pom.xml
+++ b/pom.xml
@@ -8,14 +8,41 @@
de.uros.citlab
textalignment
- 1.0.2-SNAPSHOT
+ 1.0.3
+
- de.uros.citlab
+ de.uros.citlab
+ ../CITlabParentPom/pom.xml
parent_pom
- 1.5
+ 1.6
+
+
+
+ org.apache.maven.plugins
+ maven-shade-plugin
+ 3.2.2
+
+
+ package
+
+ shade
+
+
+
+
+ de.uros.citlab.textalignment.AlignText
+
+
+
+
+
+
+
+
+
diff --git a/src/main/java/de/uros/citlab/textalignment/AlignText.java b/src/main/java/de/uros/citlab/textalignment/AlignText.java
new file mode 100644
index 0000000..c6e2025
--- /dev/null
+++ b/src/main/java/de/uros/citlab/textalignment/AlignText.java
@@ -0,0 +1,141 @@
+package de.uros.citlab.textalignment;
+
+import de.uros.citlab.confmat.CharMap;
+import de.uros.citlab.confmat.ConfMat;
+import de.uros.citlab.textalignment.types.LineMatch;
+//import org.junit.Assert;
+//import org.junit.Test;
+import java.io.File;
+import java.lang.NullPointerException;
+import java.io.FileNotFoundException;
+import java.util.Scanner;
+import java.util.*;
+
+public class AlignText {
+
+ private static Random r = new Random(1234);
+ private static double propNaC = 0.5;
+ private static double doubleChar = 0.5;
+ private static double variance = 0.5;
+ private static double offsetBP = 10.0;
+
+ public static void main(String[] args) {
+
+ long startTime = System.currentTimeMillis();
+
+ TextAligner textAligner = new TextAligner(" ", 4.0, 0.2, 6.0, 0 // threshold 0.1: only very trustful matches, less than 0.01 = caution
+ );
+ Scanner s1;
+ Scanner s2;
+ ArrayList references = new ArrayList();
+ ArrayList recos = new ArrayList();
+
+ try {
+ s1 = new Scanner(new File(args[0]));
+
+ while (s1.hasNextLine()) {
+ references.add(s1.nextLine());
+ }
+ s1.close();
+ } catch (FileNotFoundException e) {
+ e.printStackTrace();
+ }
+
+ try {
+ s2 = new Scanner(new File(args[1]));
+ while (s2.hasNextLine()) {
+ recos.add(s2.nextLine());
+ }
+ s2.close();
+ } catch (FileNotFoundException e) {
+ e.printStackTrace();
+ }
+
+ CharMap cm = getCharMap(args[2]);
+
+ List recoConfMatsList = new ArrayList<>();
+ for (String reco : recos) {
+ recoConfMatsList.add(generateConfMat(cm, reco, r));
+ }
+ List alignmentResult = textAligner.getAlignmentResult(references, recoConfMatsList);
+
+ List res = new LinkedList<>();
+ for (int i = 0; i < alignmentResult.size(); i++) {
+ LineMatch lineMatch = alignmentResult.get(i);
+ res.add(lineMatch == null ? null : lineMatch.getReference());
+ }
+
+ int count = 0;
+ boolean isNull;
+ for (int i = 0; i < alignmentResult.size(); i++) {
+ isNull = alignmentResult.get(i) == null;
+ if (!isNull) {
+ count += 1;
+ }
+ }
+ System.out.printf("Number of aligned lines : %d out of %d \n", count, alignmentResult.size());
+
+ for (int i = 0; i < alignmentResult.size(); i++) {
+ try {
+ LineMatch match = alignmentResult.get(i);
+ String reference = match.getReference();
+ double confidence = match.getConfidence();
+ System.out.printf("line: %d prediction: %s reference: %s confidence: %s\n", i, recos.get(i), reference,
+ confidence);
+ } catch (NullPointerException e) {
+ // ignore this line
+ }
+ }
+ }
+
+ public static CharMap getCharMap(String chars) {
+ CharMap res = new CharMap();
+ for (int i=0; i < chars.length(); i++) {
+ res.add(chars.charAt(i));
+ }
+ res.add(' ');
+ res.add("\t");
+ res.add("'");
+ res.add("\"");
+ return res;
+ }
+
+ private static ConfMat generateConfMat(CharMap cm, String reference, Random rnd) {
+ return generateConfMat(cm, reference, rnd, propNaC, doubleChar, variance, offsetBP);
+ }
+
+ private static ConfMat generateConfMat(CharMap cm, String reference, Random rnd, double propNaC, double doubleChar,
+ double variance, double offsetBP) {
+ StringBuilder sb = new StringBuilder();
+ char last = CharMap.NaC;
+ for (int i = 0; i < reference.length(); i++) {
+ char cur = reference.charAt(i);
+
+ if (cm.get(cur) == null) {
+ throw new RuntimeException("character '" + cur + "' is not in CharMap");
+ }
+ if (cur == last || rnd.nextDouble() < propNaC) {
+ sb.append(CharMap.NaC);
+ }
+ sb.append(cur);
+ if (rnd.nextDouble() < doubleChar) {
+ sb.append(CharMap.NaC);
+ }
+ last = cur;
+ }
+ if (rnd.nextDouble() < propNaC) {
+ sb.append(CharMap.NaC);
+ }
+ // BestPath ready
+ String bp = sb.toString();
+ double[][] mat = new double[bp.length()][cm.size()];
+ for (int i = 0; i < mat.length; i++) {
+ double[] vec = mat[i];
+ for (int j = 0; j < vec.length; j++) {
+ vec[j] = rnd.nextGaussian() * variance;
+ }
+ vec[cm.get(bp.charAt(i))] += offsetBP;
+ }
+ return new ConfMat(cm, mat);
+ }
+}
diff --git a/src/main/java/de/uros/citlab/textalignment/TextAligner.java b/src/main/java/de/uros/citlab/textalignment/TextAligner.java
index d64c26c..f975bc6 100644
--- a/src/main/java/de/uros/citlab/textalignment/TextAligner.java
+++ b/src/main/java/de/uros/citlab/textalignment/TextAligner.java
@@ -106,7 +106,17 @@ private void init() {
impl.addCostCalculator(new CostCalculatorSkipChar(1 + 2, 1));
}
}
+
+ setLoggingLevel(ch.qos.logback.classic.Level.INFO);
+
}
+
+
+ public static void setLoggingLevel(ch.qos.logback.classic.Level level) {
+ ch.qos.logback.classic.Logger root = (ch.qos.logback.classic.Logger) org.slf4j.LoggerFactory.getLogger(ch.qos.logback.classic.Logger.ROOT_LOGGER_NAME);
+ root.setLevel(level);
+ }
+
public void setNacOffset(double nacOffset) {
this.nacOffset = nacOffset;
@@ -302,4 +312,4 @@ private void setRecognition(List confMats) {
}
}
-}
\ No newline at end of file
+}