From cd55445ec9db426e5b3bcf35833f1bfaca8371b4 Mon Sep 17 00:00:00 2001 From: Manuel Schaaf Date: Thu, 6 Nov 2025 08:46:16 +0100 Subject: [PATCH] feat: Unified source and target view setting for every driver --- .../driver/DUUIPipelineComponent.java | 110 +++++++++++++----- .../driver/DUUIUIMADriver.java | 35 +++--- .../IDUUIInstantiatedPipelineComponent.java | 107 ++++++++--------- 3 files changed, 154 insertions(+), 98 deletions(-) diff --git a/src/main/java/org/texttechnologylab/DockerUnifiedUIMAInterface/driver/DUUIPipelineComponent.java b/src/main/java/org/texttechnologylab/DockerUnifiedUIMAInterface/driver/DUUIPipelineComponent.java index 9864d683..2bdf6c6f 100644 --- a/src/main/java/org/texttechnologylab/DockerUnifiedUIMAInterface/driver/DUUIPipelineComponent.java +++ b/src/main/java/org/texttechnologylab/DockerUnifiedUIMAInterface/driver/DUUIPipelineComponent.java @@ -1,11 +1,13 @@ package org.texttechnologylab.DockerUnifiedUIMAInterface.driver; import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; import org.apache.commons.compress.compressors.CompressorException; import org.apache.commons.compress.compressors.CompressorInputStream; import org.apache.commons.compress.compressors.CompressorOutputStream; import org.apache.commons.compress.compressors.CompressorStreamFactory; import org.apache.uima.analysis_engine.AnalysisEngineDescription; +import org.apache.uima.cas.CAS; import org.apache.uima.fit.factory.AnalysisEngineFactory; import org.apache.uima.util.InvalidXMLException; import org.json.JSONArray; @@ -28,8 +30,8 @@ * @author Alexander Leonhardt */ public class DUUIPipelineComponent { - private HashMap _options; - private HashMap _parameters; + private Map _options; + private Map _parameters; private AnalysisEngineDescription _engine; private String _finalizedEncoded; @@ -65,7 +67,7 @@ public class DUUIPipelineComponent { private static String versionInformation = "version"; private static String writeToViewName = "uimaViewName"; - private static String initialViewFromInitialViewName = "uimaViewInitializeFromInitial"; + private static String initializeTargetView = "uimaViewInitializeFromInitial"; private static String componentName = "name"; @@ -114,6 +116,9 @@ public void finalizeComponent() throws CompressorException, IOException, SAXExce cos.close(); _finalizedEncoded = Base64.getEncoder().encodeToString(out.toByteArray()); _finalizedEncodedHash = _finalizedEncoded.hashCode(); + + _options = ImmutableMap.copyOf(_options); + _parameters = ImmutableMap.copyOf(_parameters); } public String getFinalizedRepresentation() { @@ -466,39 +471,70 @@ public Boolean getDockerImageFetching(Boolean defaultValue) { return Boolean.parseBoolean(result); } + /** + * @deprecated Use {@link #withTargetView(String)} instead + */ + @Deprecated public DUUIPipelineComponent withWriteToView(String viewName) { - return withWriteToView(viewName,false); + System.err.printf("[DEPRECATED] DUUIPipelineComponent.withWriteToView(String) is deprecated, use withTargetView(String) instead.%n"); + this.withTargetView(viewName); + return this; } - public DUUIPipelineComponent withWriteToView(String viewName, boolean createViewFromInitialView) { - if(_finalizedEncoded!=null) { - throw new RuntimeException("DUUIPipelineComponent has already been finalized, it is immutable now!"); + /** + * @deprecated Use {@link #withTargetView(String)} and {@link #withInitializeTargetView(boolean)} instead + */ + @Deprecated + public DUUIPipelineComponent withWriteToView(String viewName, boolean initializeTargetView) { + System.err.printf("[DEPRECATED] DUUIPipelineComponent.withWriteToView(String, boolean) is deprecated, use withTargetView(String) and withInitializeTargetView(boolean) instead.%n"); + if (viewName == null) { + this.withTargetView(CAS.NAME_DEFAULT_SOFA); + this.withInitializeTargetView(false); + return this; } + this.withTargetView(viewName); + this.withInitializeTargetView(initializeTargetView); + return this; + } - if(viewName==null) { - _options.remove(writeToViewName); - _options.remove(initialViewFromInitialViewName); - return this; + /** + * If set {@code true} and the configured {@link #withTargetView(String) target view} does not exist, + * the {@code Sofa}s {@link org.apache.uima.jcas.JCas#setDocumentText(String) document text}, + * {@link org.apache.uima.jcas.JCas#setDocumentLanguage(String) document language} from the + * {@link org.apache.uima.jcas.tcas.DocumentAnnotation DocumentAnnotation}, and—if present—the + * {@link de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData DocumentMetaData} + * will be copied from the {@link #withSourceView(String) source view} ({@code _InitialView} by default). + */ + public DUUIPipelineComponent withInitializeTargetView(boolean bool) { + if(_finalizedEncoded!=null) { + throw new RuntimeException("DUUIPipelineComponent has already been finalized, it is immutable now!"); } - _options.put(writeToViewName,viewName); - _options.put(initialViewFromInitialViewName,String.valueOf(createViewFromInitialView)); + _options.put(initializeTargetView, String.valueOf(bool)); return this; } - public Boolean getCreateViewFromInitialView() { - String value = _options.get(initialViewFromInitialViewName); - if(value == null) return null; + public Boolean getInitializeTargetView() { + String value = _options.get(initializeTargetView); + if (value == null) return false; return Boolean.valueOf(value); } + /** + * @deprecated Use {@link #getTargetView()} instead + */ + @Deprecated public String getViewName() { - return getViewName(null); + System.err.printf("[DEPRECATED] DUUIPipelineComponent.getViewName() is deprecated, use getTargetView() instead.%n"); + return this.getTargetView(); } + /** + * @deprecated Use {@link #getTargetView()} instead + */ + @Deprecated public String getViewName(String defaultValue) { - String value = _options.get(writeToViewName); - if(value==null) return defaultValue; - return value; + System.err.printf("[DEPRECATED] DUUIPipelineComponent.getViewName(String) is deprecated, use getTargetView() instead.%n"); + return _options.getOrDefault(targetView, defaultValue); } public String toJson() { @@ -538,24 +574,44 @@ public DUUIPipelineComponent withParameter(String key, String value) { return this; } + /** + * Sets source and target view to {@code viewName}. + */ public DUUIPipelineComponent withView(String viewName){ + if (_finalizedEncoded != null) { + throw new RuntimeException("DUUIPipelineComponent has already been finalized, it is immutable now!"); + } withSourceView(viewName); withTargetView(viewName); return this; } public DUUIPipelineComponent withSourceView(String viewName) { + if (_finalizedEncoded != null) { + throw new RuntimeException("DUUIPipelineComponent has already been finalized, it is immutable now!"); + } + if (viewName == null) { + viewName = CAS.NAME_DEFAULT_SOFA; + } _options.put(sourceView, viewName); return this; } - public DUUIPipelineComponent withTargetView(String viewName) { + if (_finalizedEncoded != null) { + throw new RuntimeException("DUUIPipelineComponent has already been finalized, it is immutable now!"); + } + if (viewName == null) { + viewName = CAS.NAME_DEFAULT_SOFA; + } _options.put(targetView, viewName); return this; } public DUUIPipelineComponent withTimeout(long lLong) { + if (_finalizedEncoded != null) { + throw new RuntimeException("DUUIPipelineComponent has already been finalized, it is immutable now!"); + } _parameters.put(timeout, String.valueOf(lLong)); return this; } @@ -634,19 +690,11 @@ public final Map getParameters() { } public String getSourceView() { - String result = _options.get(sourceView); - if(result == null) { - return "_InitialView"; - } - return result; + return _options.getOrDefault(sourceView, CAS.NAME_DEFAULT_SOFA); } public String getTargetView() { - String result = _options.get(targetView); - if(result == null) { - return "_InitialView"; - } - return result; + return _options.getOrDefault(targetView, CAS.NAME_DEFAULT_SOFA); } public DUUIPipelineComponent clearParameters() { diff --git a/src/main/java/org/texttechnologylab/DockerUnifiedUIMAInterface/driver/DUUIUIMADriver.java b/src/main/java/org/texttechnologylab/DockerUnifiedUIMAInterface/driver/DUUIUIMADriver.java index 746f511a..484859ba 100644 --- a/src/main/java/org/texttechnologylab/DockerUnifiedUIMAInterface/driver/DUUIUIMADriver.java +++ b/src/main/java/org/texttechnologylab/DockerUnifiedUIMAInterface/driver/DUUIUIMADriver.java @@ -1,11 +1,11 @@ package org.texttechnologylab.DockerUnifiedUIMAInterface.driver; +import de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData; import org.apache.commons.compress.compressors.CompressorException; import org.apache.commons.lang3.exception.ExceptionUtils; import org.apache.uima.UIMAException; import org.apache.uima.analysis_engine.AnalysisEngine; import org.apache.uima.analysis_engine.AnalysisEngineDescription; -import org.apache.uima.analysis_engine.AnalysisEngineProcessException; import org.apache.uima.cas.CASException; import org.apache.uima.cas.CASRuntimeException; import org.apache.uima.fit.factory.AnalysisEngineFactory; @@ -16,6 +16,7 @@ import org.apache.uima.resource.metadata.ConfigurationParameter; import org.apache.uima.resource.metadata.NameValuePair; import org.apache.uima.resource.metadata.TypeSystemDescription; +import org.apache.uima.util.CasCopier; import org.apache.uima.util.InvalidXMLException; import org.texttechnologylab.DockerUnifiedUIMAInterface.DUUIComposer; import org.texttechnologylab.DockerUnifiedUIMAInterface.exception.PipelineComponentException; @@ -374,18 +375,24 @@ public void run(String uuid, JCas aCas, DUUIPipelineDocumentPerformance perf, DU long mutexEnd = System.nanoTime(); try { long annotatorStart = mutexEnd; - JCas jc; - String viewName = component.getPipelineComponent().getViewName(); - if (viewName == null) { - jc = aCas; - } else { + String sourceViewName = component.getPipelineComponent().getSourceView(); + JCas sourceView = aCas.getView(sourceViewName); + + JCas targetView = sourceView; + String targetViewName = component.getPipelineComponent().getTargetView(); + if (targetViewName != null && !Objects.equals(sourceViewName, targetViewName)) { try { - jc = aCas.getView(viewName); + targetView = sourceView.getView(targetViewName); } catch (CASException | CASRuntimeException e) { - if (component.getPipelineComponent().getCreateViewFromInitialView()) { - jc = aCas.createView(viewName); - jc.setDocumentText(aCas.getDocumentText()); - jc.setDocumentLanguage(aCas.getDocumentLanguage()); + targetView = aCas.createView(targetViewName); + if (component.getPipelineComponent().getInitializeTargetView()) { + targetView.setDocumentText(sourceView.getDocumentText()); + targetView.setDocumentLanguage(sourceView.getDocumentLanguage()); + try { + DocumentMetaData documentMetaData = DocumentMetaData.get(sourceView); + CasCopier casCopier = new CasCopier(sourceView.getCas(), targetView.getCas()); + casCopier.copyFs(documentMetaData).addToIndexes(); + } catch (IllegalArgumentException ignored) {} } else { throw e; } @@ -393,15 +400,15 @@ public void run(String uuid, JCas aCas, DUUIPipelineDocumentPerformance perf, DU } // if (composer.shouldShutdown()) return; - engine.process(jc); + engine.process(targetView); long annotatorEnd = System.nanoTime(); - ReproducibleAnnotation ann = new ReproducibleAnnotation(jc); + ReproducibleAnnotation ann = new ReproducibleAnnotation(targetView); ann.setDescription(component.getPipelineComponent().getFinalizedRepresentation()); ann.setCompression(DUUIPipelineComponent.compressionMethod); ann.setTimestamp(System.nanoTime()); ann.setPipelineName(perf.getRunKey()); ann.addToIndexes(); - perf.addData(0, 0, annotatorEnd - annotatorStart, mutexEnd - mutexStart, annotatorEnd - mutexStart, String.valueOf(component.getPipelineComponent().getFinalizedRepresentationHash()), 0, jc, null); + perf.addData(0, 0, annotatorEnd - annotatorStart, mutexEnd - mutexStart, annotatorEnd - mutexStart, String.valueOf(component.getPipelineComponent().getFinalizedRepresentationHash()), 0, targetView, null); } catch (Exception e) { // track error docs diff --git a/src/main/java/org/texttechnologylab/DockerUnifiedUIMAInterface/driver/IDUUIInstantiatedPipelineComponent.java b/src/main/java/org/texttechnologylab/DockerUnifiedUIMAInterface/driver/IDUUIInstantiatedPipelineComponent.java index 9aa07a53..b0f9aeb1 100644 --- a/src/main/java/org/texttechnologylab/DockerUnifiedUIMAInterface/driver/IDUUIInstantiatedPipelineComponent.java +++ b/src/main/java/org/texttechnologylab/DockerUnifiedUIMAInterface/driver/IDUUIInstantiatedPipelineComponent.java @@ -4,10 +4,12 @@ import de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData; import org.apache.commons.lang3.exception.ExceptionUtils; import org.apache.uima.cas.CASException; +import org.apache.uima.cas.CASRuntimeException; import org.apache.uima.fit.factory.TypeSystemDescriptionFactory; import org.apache.uima.jcas.JCas; import org.apache.uima.resource.ResourceInitializationException; import org.apache.uima.resource.metadata.TypeSystemDescription; +import org.apache.uima.util.CasCopier; import org.javatuples.Triplet; import org.texttechnologylab.DockerUnifiedUIMAInterface.DUUIComposer; import org.texttechnologylab.DockerUnifiedUIMAInterface.IDUUICommunicationLayer; @@ -28,6 +30,7 @@ import java.time.Duration; import java.util.List; import java.util.Map; +import java.util.Objects; /** * The interface for the instance of each component that is executed in a pipeline. @@ -110,7 +113,7 @@ public static TypeSystemDescription getTypesystem(String uuid, IDUUIInstantiated * @throws CASException * @throws PipelineComponentException */ - public static void process(JCas jc, IDUUIInstantiatedPipelineComponent comp, DUUIPipelineDocumentPerformance perf) throws CASException, PipelineComponentException { + public static void process(final JCas jc, IDUUIInstantiatedPipelineComponent comp, DUUIPipelineDocumentPerformance perf) throws CASException, PipelineComponentException { Triplet queue = comp.getComponent(); IDUUICommunicationLayer layer = queue.getValue0().getCommunicationLayer(); @@ -118,45 +121,41 @@ public static void process(JCas jc, IDUUIInstantiatedPipelineComponent comp, DUU try { DUUIPipelineComponent pipelineComponent = comp.getPipelineComponent(); - String viewName = pipelineComponent.getViewName(); - JCas viewJc; - if(viewName == null) { - viewJc = jc; - } - else { + + final String sourceViewName = pipelineComponent.getSourceView(); + final JCas sourceView = jc.getView(sourceViewName); + + final String targetViewName = pipelineComponent.getTargetView(); + JCas targetView = sourceView; + if (targetViewName != null & !Objects.equals(sourceViewName, targetViewName)) { try { - viewJc = jc.getView(viewName); - } - catch(CASException e) { - if(pipelineComponent.getCreateViewFromInitialView()) { - viewJc = jc.createView(viewName); - viewJc.setDocumentText(jc.getDocumentText()); - viewJc.setDocumentLanguage(jc.getDocumentLanguage()); - } - else { + targetView = jc.getView(targetViewName); + } catch (CASException | CASRuntimeException e) { + targetView = jc.createView(targetViewName); + if (pipelineComponent.getInitializeTargetView()) { + targetView.setDocumentText(sourceView.getDocumentText()); + targetView.setDocumentLanguage(sourceView.getDocumentLanguage()); + try { + DocumentMetaData documentMetaData = DocumentMetaData.get(sourceView); + CasCopier casCopier = new CasCopier(sourceView.getCas(), targetView.getCas()); + casCopier.copyFs(documentMetaData).addToIndexes(); + } catch (IllegalArgumentException ignored) {} + } else { throw e; } } } if (layer.supportsProcess()) { - JCas sourceCas = viewJc.getView(comp.getSourceView()); - JCas targetCas; - try { - targetCas = viewJc.getView(comp.getTargetView()); - } catch (CASException e) { - targetCas = viewJc.createView(comp.getTargetView()); - } - layer.process( - sourceCas, + sourceView, new DUUIHttpRequestHandler(_client, queue.getValue0().generateURL(), pipelineComponent.getTimeout()), comp.getParameters(), - targetCas + targetView ); ReproducibleAnnotation ann = new ReproducibleAnnotation(jc); - ann.setDescription(comp.getPipelineComponent().getFinalizedRepresentation()); + ann.setDescription(pipelineComponent.getFinalizedRepresentation()); ann.setCompression(DUUIPipelineComponent.compressionMethod); ann.setTimestamp(System.nanoTime()); ann.setPipelineName(perf.getRunKey()); @@ -168,7 +167,7 @@ public static void process(JCas jc, IDUUIInstantiatedPipelineComponent comp, DUU ByteArrayOutputStream out = new ByteArrayOutputStream(1024*1024); // Invoke Lua serialize() - layer.serialize(viewJc,out,comp.getParameters(), comp.getSourceView()); + layer.serialize(jc, out, comp.getParameters(), targetViewName); byte[] ok = out.toByteArray(); long sizeArray = ok.length; @@ -183,7 +182,7 @@ public static void process(JCas jc, IDUUIInstantiatedPipelineComponent comp, DUU try { HttpRequest request = HttpRequest.newBuilder() .uri(URI.create(queue.getValue0().generateURL() + DUUIComposer.V1_COMPONENT_ENDPOINT_PROCESS)) - .timeout(Duration.ofSeconds(comp.getPipelineComponent().getTimeout())) + .timeout(Duration.ofSeconds(pipelineComponent.getTimeout())) .POST(HttpRequest.BodyPublishers.ofByteArray(ok)) .version(HttpClient.Version.HTTP_1_1) .build(); @@ -194,7 +193,7 @@ public static void process(JCas jc, IDUUIInstantiatedPipelineComponent comp, DUU // e.printStackTrace(); System.out.printf("Cannot reach endpoint trying again %d/%d...\n",tries+1,postTries); try { - Thread.sleep(comp.getPipelineComponent().getTimeout()); + Thread.sleep(pipelineComponent.getTimeout()); } catch (InterruptedException ex) { throw new RuntimeException(ex); } @@ -213,17 +212,17 @@ public static void process(JCas jc, IDUUIInstantiatedPipelineComponent comp, DUU long annotatorEnd = System.nanoTime(); long deserializeStart = annotatorEnd; - layer.deserialize(viewJc, st, comp.getTargetView()); + layer.deserialize(jc, st, comp.getTargetView()); long deserializeEnd = System.nanoTime(); ReproducibleAnnotation ann = new ReproducibleAnnotation(jc); - ann.setDescription(comp.getPipelineComponent().getFinalizedRepresentation()); + ann.setDescription(pipelineComponent.getFinalizedRepresentation()); ann.setCompression(DUUIPipelineComponent.compressionMethod); ann.setTimestamp(System.nanoTime()); ann.setPipelineName(perf.getRunKey()); ann.addToIndexes(); - perf.addData(serializeEnd-serializeStart,deserializeEnd-deserializeStart,annotatorEnd-annotatorStart,queue.getValue2()-queue.getValue1(),deserializeEnd-queue.getValue1(), String.valueOf(comp.getPipelineComponent().getFinalizedRepresentationHash()), sizeArray, jc, null); + perf.addData(serializeEnd-serializeStart,deserializeEnd-deserializeStart,annotatorEnd-annotatorStart,queue.getValue2()-queue.getValue1(),deserializeEnd-queue.getValue1(), String.valueOf(pipelineComponent.getFinalizedRepresentationHash()), sizeArray, jc, null); } else { ByteArrayInputStream st = new ByteArrayInputStream(resp.body()); @@ -238,7 +237,7 @@ public static void process(JCas jc, IDUUIInstantiatedPipelineComponent comp, DUU String error = "Expected response 200, got " + resp.statusCode() + ": " + responseBody; - perf.addData(serializeEnd - serializeStart, deserializeEnd - deserializeStart, annotatorEnd - annotatorStart, queue.getValue2() - queue.getValue1(), deserializeEnd - queue.getValue1(), String.valueOf(comp.getPipelineComponent().getFinalizedRepresentationHash()), sizeArray, jc, error); + perf.addData(serializeEnd - serializeStart, deserializeEnd - deserializeStart, annotatorEnd - annotatorStart, queue.getValue2() - queue.getValue1(), deserializeEnd - queue.getValue1(), String.valueOf(pipelineComponent.getFinalizedRepresentationHash()), sizeArray, jc, error); } if (!pipelineComponent.getIgnoringHTTP200Error()) { @@ -291,28 +290,32 @@ public static void process_handler(JCas jc, DUUIPipelineComponent pipelineComponent = comp.getPipelineComponent(); - String viewName = pipelineComponent.getViewName(); - JCas viewJc; - if(viewName == null) { - viewJc = jc; - } - else { + final String sourceViewName = pipelineComponent.getSourceView(); + final JCas sourceView = jc.getView(sourceViewName); + + final String targetViewName = pipelineComponent.getTargetView(); + JCas targetView = sourceView; + if (targetViewName != null & !Objects.equals(sourceViewName, targetViewName)) { try { - viewJc = jc.getView(viewName); - } - catch(CASException e) { - if(pipelineComponent.getCreateViewFromInitialView()) { - viewJc = jc.createView(viewName); - viewJc.setDocumentText(jc.getDocumentText()); - viewJc.setDocumentLanguage(jc.getDocumentLanguage()); - } - else { + targetView = jc.getView(targetViewName); + } catch (CASException | CASRuntimeException e) { + targetView = jc.createView(targetViewName); + if (pipelineComponent.getInitializeTargetView()) { + targetView.setDocumentText(sourceView.getDocumentText()); + targetView.setDocumentLanguage(sourceView.getDocumentLanguage()); + try { + DocumentMetaData documentMetaData = DocumentMetaData.get(sourceView); + CasCopier casCopier = new CasCopier(sourceView.getCas(), targetView.getCas()); + casCopier.copyFs(documentMetaData).addToIndexes(); + } catch (IllegalArgumentException ignored) {} + } else { throw e; } } } + // lua serialize call() - layer.serialize(viewJc,out,comp.getParameters(), comp.getSourceView()); + layer.serialize(jc,out,comp.getParameters(), comp.getSourceView()); // ok is the message. byte[] ok = out.toByteArray(); @@ -324,8 +327,6 @@ public static void process_handler(JCas jc, if (handler.getClass() == DUUIWebsocketAlt.class){ String error = null; - JCas finalViewJc = viewJc; - List results = handler.send(ok); long annotatorEnd = System.nanoTime(); @@ -340,7 +341,7 @@ public static void process_handler(JCas jc, * Merging results before deserializing. */ result = layer.merge(results); - layer.deserialize(finalViewJc, result, comp.getTargetView()); + layer.deserialize(jc, result, comp.getTargetView()); } catch(Exception e) { e.printStackTrace();