diff --git a/dnainator-core/src/main/java/nl/tudelft/dnainator/graph/Graph.java b/dnainator-core/src/main/java/nl/tudelft/dnainator/graph/Graph.java index c8293a21..ebe264f5 100644 --- a/dnainator-core/src/main/java/nl/tudelft/dnainator/graph/Graph.java +++ b/dnainator-core/src/main/java/nl/tudelft/dnainator/graph/Graph.java @@ -69,12 +69,12 @@ public interface Graph extends AnnotationCollection { /** * Return a list of nodes that belong to the same cluster as the given startId. - * @param startNodes the start nodes + * @param start the start nodes * @param end the maximum rank of the cluster * @param threshold the clustering threshold * @return a list representing the cluster */ - Map> getAllClusters(List startNodes, int end, int threshold); + Map> getAllClusters(int start, int end, int threshold); /** * Sets the interestingness strategy which calculates the interestingness when diff --git a/dnainator-core/src/main/java/nl/tudelft/dnainator/graph/impl/Neo4jGraph.java b/dnainator-core/src/main/java/nl/tudelft/dnainator/graph/impl/Neo4jGraph.java index 6b5e2a23..943a821e 100644 --- a/dnainator-core/src/main/java/nl/tudelft/dnainator/graph/impl/Neo4jGraph.java +++ b/dnainator-core/src/main/java/nl/tudelft/dnainator/graph/impl/Neo4jGraph.java @@ -169,9 +169,9 @@ public int getRankFromBasePair(int base) { } @Override - public Map> getAllClusters(List startNodes, + public Map> getAllClusters(int start, int end, int threshold) { - return query(new AllClustersQuery(startNodes, end, threshold, is)); + return query(new AllClustersQuery(start, end, threshold, is)); } @Override @@ -268,8 +268,12 @@ public void setInterestingnessStrategy(InterestingnessStrategy is) { * order, to assign ranks and scores to nodes. */ protected void analyze() { - // Rank the graph. - execute(e -> new AnalyzeCommand(rootIterator()).execute(e)); + ResourceIterator roots; + try (Transaction tx = service.beginTx()) { + roots = rootIterator(); + new AnalyzeCommand(roots).execute(service); + tx.success(); + } } @Override diff --git a/dnainator-core/src/main/java/nl/tudelft/dnainator/graph/impl/NodeLabels.java b/dnainator-core/src/main/java/nl/tudelft/dnainator/graph/impl/NodeLabels.java index 8cf0c9bb..5baf00b3 100644 --- a/dnainator-core/src/main/java/nl/tudelft/dnainator/graph/impl/NodeLabels.java +++ b/dnainator-core/src/main/java/nl/tudelft/dnainator/graph/impl/NodeLabels.java @@ -10,5 +10,6 @@ public enum NodeLabels implements Label { ANNOTATION, DRMUTATION, SOURCE, - NODE + NODE, + BUBBLE_SOURCE, } diff --git a/dnainator-core/src/main/java/nl/tudelft/dnainator/graph/impl/RelTypes.java b/dnainator-core/src/main/java/nl/tudelft/dnainator/graph/impl/RelTypes.java index 7b3bc85e..0da49130 100644 --- a/dnainator-core/src/main/java/nl/tudelft/dnainator/graph/impl/RelTypes.java +++ b/dnainator-core/src/main/java/nl/tudelft/dnainator/graph/impl/RelTypes.java @@ -10,4 +10,6 @@ public enum RelTypes implements RelationshipType { ANNOTATED, NEXT, SOURCE, + MUTATION, + BUBBLE_SOURCE_OF } diff --git a/dnainator-core/src/main/java/nl/tudelft/dnainator/graph/impl/command/AnalyzeCommand.java b/dnainator-core/src/main/java/nl/tudelft/dnainator/graph/impl/command/AnalyzeCommand.java index 0ef4187d..d4e357a8 100644 --- a/dnainator-core/src/main/java/nl/tudelft/dnainator/graph/impl/command/AnalyzeCommand.java +++ b/dnainator-core/src/main/java/nl/tudelft/dnainator/graph/impl/command/AnalyzeCommand.java @@ -10,15 +10,12 @@ import nl.tudelft.dnainator.graph.impl.properties.SourceProperties; import nl.tudelft.dnainator.graph.interestingness.Scores; -import org.neo4j.collection.primitive.Primitive; -import org.neo4j.collection.primitive.PrimitiveLongSet; import org.neo4j.graphdb.Direction; import org.neo4j.graphdb.GraphDatabaseService; import org.neo4j.graphdb.Node; import org.neo4j.graphdb.Relationship; import org.neo4j.graphdb.ResourceIterator; -import org.neo4j.graphdb.Transaction; -import org.neo4j.graphdb.traversal.InitialBranchState.State; +import org.neo4j.graphdb.traversal.InitialBranchState; import org.neo4j.graphdb.traversal.Uniqueness; import static nl.tudelft.dnainator.graph.impl.properties.SequenceProperties.BASE_DIST; @@ -30,7 +27,6 @@ * ranks the nodes in the Neo4j database accordingly. */ public class AnalyzeCommand implements Command { - private static final int INIT_CAP = 4096; private static final String LABEL = "n"; private static final String GET_NODES_BASEDIST = "MATCH (n:" + NodeLabels.NODE.name() + ")-[:" + RelTypes.SOURCE.name() + "]-s, " @@ -55,16 +51,12 @@ public AnalyzeCommand(ResourceIterator roots) { * @param service the database service * @return a topological ordering, starting from the roots */ + @SuppressWarnings("unchecked") public Iterable topologicalOrder(GraphDatabaseService service) { - return topologicalOrder(service, Primitive.longSet()); - } - - private Iterable topologicalOrder(GraphDatabaseService service, - PrimitiveLongSet processed) { return service.traversalDescription() + // Depth first order, for creating bubbles. .depthFirst() - .expand(new TopologicalPathExpander() - , new State<>(processed, null)) + .expand(new TopologicalPathExpander(), InitialBranchState.NO_STATE) // We manage uniqueness for ourselves. .uniqueness(Uniqueness.NONE) .traverse(loop(roots)) @@ -73,18 +65,10 @@ private Iterable topologicalOrder(GraphDatabaseService service, @Override public void execute(GraphDatabaseService service) { - try ( - Transaction tx = service.beginTx(); - // Our set is located "off heap", i.e. not managed by the garbage collector. - // It is automatically closed after the try block, which frees the allocated memory. - PrimitiveLongSet processed = Primitive.offHeapLongSet(INIT_CAP) - ) { - for (Node n : topologicalOrder(service, processed)) { - rankDest(n); - } - scoreDRMutations(service); - tx.success(); + for (Node n : topologicalOrder(service)) { + rankDest(n); } + scoreDRMutations(service); } /** diff --git a/dnainator-core/src/main/java/nl/tudelft/dnainator/graph/impl/command/TopologicalPathExpander.java b/dnainator-core/src/main/java/nl/tudelft/dnainator/graph/impl/command/TopologicalPathExpander.java index ee44c515..609aaa22 100644 --- a/dnainator-core/src/main/java/nl/tudelft/dnainator/graph/impl/command/TopologicalPathExpander.java +++ b/dnainator-core/src/main/java/nl/tudelft/dnainator/graph/impl/command/TopologicalPathExpander.java @@ -1,7 +1,10 @@ package nl.tudelft.dnainator.graph.impl.command; +import nl.tudelft.dnainator.graph.impl.NodeLabels; import nl.tudelft.dnainator.graph.impl.RelTypes; -import org.neo4j.collection.primitive.PrimitiveLongSet; +import nl.tudelft.dnainator.graph.impl.properties.BubbleProperties; +import nl.tudelft.dnainator.graph.impl.properties.SequenceProperties; + import org.neo4j.graphdb.Direction; import org.neo4j.graphdb.Node; import org.neo4j.graphdb.Path; @@ -9,42 +12,153 @@ import org.neo4j.graphdb.Relationship; import org.neo4j.graphdb.traversal.BranchState; +import java.util.HashMap; +import java.util.HashSet; import java.util.LinkedList; import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; /** * PathExpander for determining the topological ordering. */ -public class TopologicalPathExpander implements PathExpander { - private boolean hasUnprocessedIncoming(PrimitiveLongSet processed, Node n) { +public class TopologicalPathExpander implements PathExpander { + private static final String PROCESSED = "PROCESSED"; + private Map> relIDtoSourceIDs; + private Map> bubbleSourceIDtoEndIDs; + + /** + * Constructs a new {@link TopologicalPathExpander}. + */ + public TopologicalPathExpander() { + this.relIDtoSourceIDs = new HashMap<>(); + this.bubbleSourceIDtoEndIDs = new HashMap<>(); + } + + private boolean hasUnprocessedIncoming(Node n) { Iterable in = n.getRelationships(RelTypes.NEXT, Direction.INCOMING); for (Relationship r : in) { - if (!processed.contains(r.getId())) { + if (!r.hasProperty(PROCESSED)) { return true; } } + // Clean up after ourselves. + in.forEach(rel -> rel.removeProperty(PROCESSED)); // All incoming edges have been processed. return false; } @Override public Iterable expand(Path path, - BranchState state) { + BranchState noState) { Node from = path.endNode(); + // Propagate all unclosed bubbles and the newly created ones. + Set toPropagate = getSourcesToPropagate(from); + + // For each unclosed bubble source, remove the current node from the endings and + // add outgoing nodes to the ending nodes, thereby advancing the bubble endings. + toPropagate.forEach(e -> advanceEnds(e, from)); + // Store in this node the bubbles in which it is nested. + storeOuterBubbles(from, toPropagate); + // Create a new bubblesource, that will have its own bubble endings. + createBubbleSource(from, toPropagate); + + // Encode the unclosed propagated bubbles on the edges. + from.getRelationships(RelTypes.NEXT, Direction.OUTGOING) + .forEach(out -> propagateSourceIDs(toPropagate, out)); + List expand = new LinkedList<>(); - for (Relationship r : from.getRelationships(RelTypes.NEXT, Direction.OUTGOING)) { - PrimitiveLongSet processed = state.getState(); - processed.add(r.getId()); - if (!hasUnprocessedIncoming(processed, r.getEndNode())) { + for (Relationship out : from.getRelationships(RelTypes.NEXT, Direction.OUTGOING)) { + setNumStrainsThrough(out); + out.setProperty(PROCESSED, true); + if (!hasUnprocessedIncoming(out.getEndNode())) { + createBubbleSink(out.getEndNode()); // All of the dependencies of this node have been added to the result. - expand.add(r); + expand.add(out); } } return expand; } + private void storeOuterBubbles(Node from, Set toPropagate) { + // Set the source id of the bubbles to which this node belongs. Excludes its own + // source id if it's a source. + from.setProperty(BubbleProperties.BUBBLE_SOURCE_IDS.name(), + toPropagate.stream().mapToLong(l -> l).toArray()); + } + + private Set getSourcesToPropagate(Node from) { + Iterable ins = from.getRelationships(RelTypes.NEXT, Direction.INCOMING); + + // This function accumulates unclosed bubble sources from a mapping of incoming edge ids. + Set propagatedSources = new HashSet<>(); + for (Relationship in : ins) { + propagatedSources.addAll(relIDtoSourceIDs.remove(Long.valueOf(in.getId())).stream() + .filter(source -> bubbleSourceIDtoEndIDs.get(source) != null) + .collect(Collectors.toList())); + } + return propagatedSources; + } + + private void advanceEnds(Long bubbleSource, Node endnode) { + Set pathEndIDs = bubbleSourceIDtoEndIDs.get(bubbleSource); + if (pathEndIDs != null) { + pathEndIDs.remove(Long.valueOf(endnode.getId())); + + // FIXME: we add twice here in most cases. + endnode.getRelationships(RelTypes.NEXT, Direction.OUTGOING) + .forEach(rel -> pathEndIDs.add(Long.valueOf(rel.getEndNode().getId()))); + } + } + + private void createBubbleSource(Node n, Set toPropagate) { + int outDegree = n.getDegree(RelTypes.NEXT, Direction.OUTGOING); + if (outDegree >= 2) { + Set pathEnds = new HashSet<>(outDegree); + toPropagate.add(Long.valueOf(n.getId())); + + n.addLabel(NodeLabels.BUBBLE_SOURCE); + n.getRelationships(RelTypes.NEXT, Direction.OUTGOING) + .forEach(rel -> pathEnds.add(Long.valueOf(rel.getEndNode().getId()))); + + bubbleSourceIDtoEndIDs.put(Long.valueOf(n.getId()), pathEnds); + } + } + + private void propagateSourceIDs(Set propagatedUnique, Relationship out) { + relIDtoSourceIDs.put(Long.valueOf(out.getId()), propagatedUnique); + } + + private void createBubbleSink(Node n) { + int degree = n.getDegree(RelTypes.NEXT, Direction.INCOMING); + if (degree >= 2) { + Set bubbleSourceID = new HashSet<>(); + for (Relationship in : n.getRelationships(RelTypes.NEXT, Direction.INCOMING)) { + for (Long sourceID : relIDtoSourceIDs.get(Long.valueOf(in.getId()))) { + if (bubbleSourceIDtoEndIDs.get(sourceID).size() == 1) { + bubbleSourceID.add(sourceID); + } + } + } + bubbleSourceID.forEach(id -> { + if (bubbleSourceIDtoEndIDs.get(id).size() == 1) { + bubbleSourceIDtoEndIDs.remove(id); + } + Node bubbleSource = n.getGraphDatabase().getNodeById(id.longValue()); + bubbleSource.createRelationshipTo(n, RelTypes.BUBBLE_SOURCE_OF); + }); + } + } + + private void setNumStrainsThrough(Relationship r) { + r.setProperty(SequenceProperties.EDGE_NUM_STRAINS.name(), Math.abs( + r.getStartNode().getDegree(RelTypes.SOURCE) + - r.getEndNode().getDegree(RelTypes.SOURCE))); + } + @Override - public PathExpander reverse() { + public PathExpander reverse() { throw new UnsupportedOperationException(); } diff --git a/dnainator-core/src/main/java/nl/tudelft/dnainator/graph/impl/properties/BubbleProperties.java b/dnainator-core/src/main/java/nl/tudelft/dnainator/graph/impl/properties/BubbleProperties.java new file mode 100644 index 00000000..6493a1f9 --- /dev/null +++ b/dnainator-core/src/main/java/nl/tudelft/dnainator/graph/impl/properties/BubbleProperties.java @@ -0,0 +1,8 @@ +package nl.tudelft.dnainator.graph.impl.properties; + +/** + * Properties of nodes within bubbles. + */ +public enum BubbleProperties { + BUBBLE_SOURCE_IDS +} diff --git a/dnainator-core/src/main/java/nl/tudelft/dnainator/graph/impl/properties/SequenceProperties.java b/dnainator-core/src/main/java/nl/tudelft/dnainator/graph/impl/properties/SequenceProperties.java index 9110c1c1..48733c7a 100644 --- a/dnainator-core/src/main/java/nl/tudelft/dnainator/graph/impl/properties/SequenceProperties.java +++ b/dnainator-core/src/main/java/nl/tudelft/dnainator/graph/impl/properties/SequenceProperties.java @@ -10,5 +10,6 @@ public enum SequenceProperties { SEQUENCE, BASE_DIST, RANK, - INTERESTINGNESS + INTERESTINGNESS, + EDGE_NUM_STRAINS } diff --git a/dnainator-core/src/main/java/nl/tudelft/dnainator/graph/impl/query/AllClustersQuery.java b/dnainator-core/src/main/java/nl/tudelft/dnainator/graph/impl/query/AllClustersQuery.java index 7aac2fa2..52d4c265 100644 --- a/dnainator-core/src/main/java/nl/tudelft/dnainator/graph/impl/query/AllClustersQuery.java +++ b/dnainator-core/src/main/java/nl/tudelft/dnainator/graph/impl/query/AllClustersQuery.java @@ -2,128 +2,264 @@ import nl.tudelft.dnainator.core.EnrichedSequenceNode; import nl.tudelft.dnainator.core.impl.Cluster; +import nl.tudelft.dnainator.graph.impl.Neo4jScoreContainer; import nl.tudelft.dnainator.graph.impl.Neo4jSequenceNode; import nl.tudelft.dnainator.graph.impl.NodeLabels; import nl.tudelft.dnainator.graph.impl.RelTypes; +import nl.tudelft.dnainator.graph.impl.properties.BubbleProperties; import nl.tudelft.dnainator.graph.impl.properties.SequenceProperties; import nl.tudelft.dnainator.graph.interestingness.InterestingnessStrategy; +import org.neo4j.collection.primitive.Primitive; +import org.neo4j.collection.primitive.PrimitiveLongSet; +import org.neo4j.graphalgo.GraphAlgoFactory; import org.neo4j.graphdb.Direction; import org.neo4j.graphdb.GraphDatabaseService; import org.neo4j.graphdb.Node; -import org.neo4j.graphdb.traversal.TraversalDescription; +import org.neo4j.graphdb.Path; +import org.neo4j.graphdb.PathExpander; +import org.neo4j.graphdb.PathExpanders; +import org.neo4j.graphdb.Relationship; +import org.neo4j.graphdb.ResourceIterable; +import org.neo4j.helpers.collection.IteratorUtil; import java.util.ArrayList; +import java.util.Collections; import java.util.HashMap; import java.util.HashSet; -import java.util.LinkedList; +import java.util.Iterator; import java.util.List; import java.util.Map; -import java.util.PriorityQueue; -import java.util.Queue; import java.util.Set; import java.util.stream.Collectors; +import java.util.stream.Stream; +import java.util.stream.StreamSupport; /** * The {@link AllClustersQuery} creates {@link Cluster}s from all nodes, - * starting at the startNodes, and ending when the maximum specified start rank is reached. + * between the given ranks using the given threshold value. */ public class AllClustersQuery implements Query>> { - private Set visited; - private List startNodes; - private int threshold; + private int minRank; private int maxRank; + private int threshold; private InterestingnessStrategy is; + private Map nodesInBubbleParameters; + private List startNodes; + private Set bubbleSourcesToKeepIntact; + private PrimitiveLongSet visited; + private static final String GET_NODES_IN_BUBBLE = "match (n: " + NodeLabels.NODE.name() + ") " + + "where n." + SequenceProperties.RANK.name() + " > {sourceRank} " + + "and n." + SequenceProperties.RANK.name() + " < {sinkRank} " + + "and {sourceID} in n." + BubbleProperties.BUBBLE_SOURCE_IDS.name() + " return n"; /** * Create a new {@link AllClustersQuery}, which will:. - * - start clustering at the specified startNodes - * - stop clustering when the end rank is reached + * - get all clusters between the given ranks * - use the specified clustering threshold - * @param startNodes the start nodes + * @param minRank the minimum rank * @param maxRank the maximum rank * @param threshold the clustering threshold * @param is the interestingness strategy, which determines how the * interestingness score is calculated. */ - public AllClustersQuery(List startNodes, int maxRank, int threshold, + public AllClustersQuery(int minRank, int maxRank, int threshold, InterestingnessStrategy is) { - this.startNodes = startNodes; + this.minRank = minRank; this.maxRank = maxRank; this.threshold = threshold; - this.visited = new HashSet<>(); this.is = is; + this.nodesInBubbleParameters = new HashMap<>(2 + 1); + this.startNodes = new ArrayList<>(1); + this.bubbleSourcesToKeepIntact = new HashSet<>(); + this.visited = Primitive.longSet(); + } + + private Iterable getNodesInRank(GraphDatabaseService service, int rank) { + return IteratorUtil.loop(service.findNodes(NodeLabels.NODE, + SequenceProperties.RANK.name(), rank)); + } + + private ResourceIterable withinRange(GraphDatabaseService service, + int startRank, int endRank) { + return withinRange(service, getNodesInRank(service, startRank), endRank, + PathExpanders.forTypeAndDirection(RelTypes.NEXT, Direction.OUTGOING)); + } + + private ResourceIterable withinRange(GraphDatabaseService service, + Iterable start, int endRank, PathExpander pe) { + return service.traversalDescription() + .breadthFirst() + .expand(pe) + .evaluator(new UntilRankEvaluator(endRank)) + .traverse(start).nodes(); } @Override public Map> execute(GraphDatabaseService service) { - Queue rootClusters = new PriorityQueue<>((e1, e2) -> - e1.getStartRank() - e2.getStartRank() - ); - Map> result = new HashMap>(); - - rootClusters.addAll(clustersFrom(service, startNodes)); - - // Find adjacent clusters as long as there are root clusters in the queue - int minrank = rootClusters.stream().mapToInt(e -> e.getStartRank()).min().orElse(0); - while (!rootClusters.isEmpty()) { - Cluster c = rootClusters.poll(); - if (c.getStartRank() < minrank || c.getStartRank() > maxRank) { - continue; + // First determine which nodes are interesting. + for (Node n : withinRange(service, minRank, maxRank)) { + int interestingness = is.compute(new Neo4jScoreContainer(n)); + n.setProperty(SequenceProperties.INTERESTINGNESS.name(), interestingness); + if (interestingness > threshold) { + for (long sourceID : getBubbleIDs(n)) { + bubbleSourcesToKeepIntact.add(sourceID); + } } - result.putIfAbsent(c.getStartRank(), new ArrayList<>()); - result.get(c.getStartRank()).add(c); - - c.getNodes().forEach(sn -> { - rootClusters.addAll(clustersFrom(service, sn.getOutgoing())); - }); } + // Then cluster everything, except for the bubbles in bubbleSourcesToKeepIntact. + return cluster(service, minRank, maxRank); + } - return result; + private long[] getBubbleIDs(Node n) { + return (long[]) n.getProperty(BubbleProperties.BUBBLE_SOURCE_IDS.name()); } - private Queue clustersFrom(GraphDatabaseService service, List startNodes) { - Queue rootClusters = new LinkedList(); + private Map> cluster(GraphDatabaseService service, + int startRank, int endRank) { + return cluster(service, getNodesInRank(service, startRank), endRank); + } + + private Map> cluster(GraphDatabaseService service, + Iterable startNodes, int endRank) { + Map> result = new HashMap>(); + cluster(service, startNodes, endRank, result, new HashSet<>()); + return result; + } - for (String sn : startNodes) { - // Continue if this startNode was consumed by another cluster - if (visited.contains(sn)) { + // For debugging purposes. + private int recursionLevelGlobal = 0; + private void cluster(GraphDatabaseService service, Iterable startNodes, + int endRank, Map> acc, Set visitedSinks) { + int recursionLevel = recursionLevelGlobal;; + System.out.println("--> Begin Recursion level: " + recursionLevelGlobal++);; + for (Node n : withinRange(service, startNodes, endRank, BubbleSkipper.get())) { + if (visited.contains(n.getId())) { continue; } + visited.add(n.getId()); + if (isSource(n)) { + Node sink = getSinkFromSource(n); + if (!isSink(n)) { + putClusterInto(createSingletonCluster(service, n), acc); + } + if (!visitedSinks.contains(sink.getId())) { + visitedSinks.add(sink.getId()); + putClusterInto(createSingletonCluster(service, sink), acc); + } + if (bubbleSourcesToKeepIntact.contains(n.getId())) { + System.out.println("Intact bubble: " + n.getProperty("ID"));; + int sinkRank = (int) sink.getProperty(SequenceProperties.RANK.name()); + this.startNodes.clear(); + n.getRelationships(RelTypes.NEXT, Direction.OUTGOING) + .forEach(rel -> { + // FIXME: is this necessary? + if (rel.getEndNode() != sink) { + this.startNodes.add(rel.getEndNode()); + } + }); + cluster(service, this.startNodes, sinkRank - 1, acc, visitedSinks); + } else { + System.out.println("Collapsed bubble: " + n.getProperty("ID"));; + // Cluster the bubble. + putClusterInto(collapseBubble(service, n, sink), acc); + } + } else if (!n.hasRelationship(RelTypes.BUBBLE_SOURCE_OF)) { + System.out.println("Singleton: " + n.getProperty("ID"));; + putClusterInto(createSingletonCluster(service, n), acc); + } + } + recursionLevelGlobal = recursionLevel;; + System.out.println("--> End Recursion level: " + recursionLevel);; + } - // Otherwise get the cluster starting from this startNode - rootClusters.add(cluster(service, sn)); + private boolean isSource(Node n) { + return n.hasLabel(NodeLabels.BUBBLE_SOURCE); + } + + private boolean isSink(Node n) { + for (Relationship in : getSourcesFromSink(n)) { + if ((int) in.getStartNode().getProperty(SequenceProperties.RANK.name()) >= minRank) { + // If the source is outside of the range, pretend it's not there. + return true; + } } + return false; + } - return rootClusters; + private static Node getSinkFromSource(Node source) { + return source.getSingleRelationship(RelTypes.BUBBLE_SOURCE_OF, Direction.OUTGOING) + .getEndNode(); } - private Cluster cluster(GraphDatabaseService service, String start) { - Cluster cluster = null; - Node startNode = service.findNode(NodeLabels.NODE, SequenceProperties.ID.name(), start); - List result = new ArrayList<>(); + private static Iterable getSourcesFromSink(Node sink) { + return sink.getRelationships(RelTypes.BUBBLE_SOURCE_OF, Direction.INCOMING); + } - // A depth first traversal traveling along both incoming and outgoing edges. - TraversalDescription clusterDesc = service.traversalDescription() - .depthFirst() - .relationships(RelTypes.NEXT, Direction.BOTH) - .evaluator(new ClusterEvaluator(threshold, visited, is)); - // Traverse the cluster starting from the startNode. - int rankStart = (int) startNode.getProperty(SequenceProperties.RANK.name()); - for (Node end : clusterDesc.traverse(startNode).nodes()) { - result.add(end); + private Cluster createSingletonCluster(GraphDatabaseService service, Node n) { + EnrichedSequenceNode sn = new Neo4jSequenceNode(service, n); + return new Cluster((int) n.getProperty(SequenceProperties.RANK.name()), + Collections.singletonList(sn)); + } - // Update this cluster's start rank according to the lowest node rank. - int endRank = (int) startNode.getProperty(SequenceProperties.RANK.name()); - if (endRank < rankStart) { - rankStart = endRank; - } - } - // Might want to internally pass nodes. - List retrieve = result.stream() - .map(e -> new Neo4jSequenceNode(service, e)) + private Cluster collapseBubble(GraphDatabaseService service, + Node source, Node sink) { + int sourceRank = (int) source.getProperty(SequenceProperties.RANK.name()); + int sinkRank = (int) sink.getProperty(SequenceProperties.RANK.name()); + // Set the rank of the cluster to be in the middle. + int clusterRank = sourceRank + (sinkRank - sourceRank) / 2; + + List nodes = + nodesWithinBubble(service, sourceRank, sinkRank, source, sink) + .map(n -> new Neo4jSequenceNode(service, n)) .collect(Collectors.toList()); - cluster = new Cluster(rankStart, retrieve); + Cluster cluster = new Cluster(clusterRank, nodes); return cluster; } + + private Iterable trimPath(Path path) { + if (path.length() < 2) { + return Collections.emptyList(); + } + Iterator nodes = path.nodes().iterator(); + List res = new ArrayList<>(path.length() - 1); + nodes.next(); + for (int i = 1; i <= path.length() - 1; i++) { + res.add(nodes.next()); + } + return res; + } + + private Stream nodesWithinBubble(GraphDatabaseService service, + int sourceRank, int sinkRank, Node source, Node sink) { + if (sinkRank - sourceRank > 30) { + nodesInBubbleParameters.put("sourceRank", sourceRank); + nodesInBubbleParameters.put("sinkRank", sinkRank); + nodesInBubbleParameters.put("sourceID", source.getId()); + return stream(IteratorUtil.loop( + service.execute(GET_NODES_IN_BUBBLE, nodesInBubbleParameters).columnAs("n") + )); + } else { + return stream(GraphAlgoFactory.allSimplePaths( + PathExpanders.forTypeAndDirection(RelTypes.NEXT, Direction.OUTGOING), + sinkRank - sourceRank).findAllPaths(source, sink)) + .flatMap(path -> stream(trimPath(path))) + .distinct(); + } + } + + private void putClusterInto(Cluster c, Map> into) { + if (into.containsKey(c.getStartRank())) { + into.get(c.getStartRank()).add(c); + } else { + List cs = new ArrayList<>(); + cs.add(c); + into.put(c.getStartRank(), cs); + } + } + + private static Stream stream(Iterable in) { + // Quick utility method, for converting iterables to streams. + return StreamSupport.stream(in.spliterator(), false); + } } diff --git a/dnainator-core/src/main/java/nl/tudelft/dnainator/graph/impl/query/BubbleSkipper.java b/dnainator-core/src/main/java/nl/tudelft/dnainator/graph/impl/query/BubbleSkipper.java new file mode 100644 index 00000000..4870ae62 --- /dev/null +++ b/dnainator-core/src/main/java/nl/tudelft/dnainator/graph/impl/query/BubbleSkipper.java @@ -0,0 +1,41 @@ +package nl.tudelft.dnainator.graph.impl.query; + +import nl.tudelft.dnainator.graph.impl.NodeLabels; +import nl.tudelft.dnainator.graph.impl.RelTypes; + +import org.neo4j.graphdb.Direction; +import org.neo4j.graphdb.Node; +import org.neo4j.graphdb.Path; +import org.neo4j.graphdb.PathExpander; +import org.neo4j.graphdb.Relationship; +import org.neo4j.graphdb.traversal.BranchState; + +/** + * A {@link PathExpander} which skips all nodes in bubbles on its path. + */ +public enum BubbleSkipper implements PathExpander { + + INSTANCE { + @Override + public Iterable expand(Path path, BranchState state) { + Node from = path.endNode(); + if (from.hasLabel(NodeLabels.BUBBLE_SOURCE)) { + return from.getRelationships(RelTypes.BUBBLE_SOURCE_OF, Direction.OUTGOING); + } else { + return from.getRelationships(RelTypes.NEXT, Direction.OUTGOING); + } + } + + @Override + public PathExpander reverse() { + throw new UnsupportedOperationException(); + } + }; + + /** + * @return The {@link BubbleSkipper} instance. + */ + public static BubbleSkipper get() { + return INSTANCE; + } +} diff --git a/dnainator-core/src/main/java/nl/tudelft/dnainator/graph/impl/query/ClusterEvaluator.java b/dnainator-core/src/main/java/nl/tudelft/dnainator/graph/impl/query/ClusterEvaluator.java deleted file mode 100644 index d971f905..00000000 --- a/dnainator-core/src/main/java/nl/tudelft/dnainator/graph/impl/query/ClusterEvaluator.java +++ /dev/null @@ -1,66 +0,0 @@ -package nl.tudelft.dnainator.graph.impl.query; - -import nl.tudelft.dnainator.graph.impl.Neo4jScoreContainer; -import nl.tudelft.dnainator.graph.impl.properties.SequenceProperties; -import nl.tudelft.dnainator.graph.interestingness.InterestingnessStrategy; - -import org.neo4j.graphdb.Node; -import org.neo4j.graphdb.Path; -import org.neo4j.graphdb.traversal.Evaluation; -import org.neo4j.graphdb.traversal.Evaluator; - -import java.util.Set; - -import static nl.tudelft.dnainator.graph.impl.properties.SequenceProperties.ID; - -/** - * Evaluates whether a node is part of a cluster based on the given threshold. - */ -public class ClusterEvaluator implements Evaluator { - private int threshold; - private Set visited; - private InterestingnessStrategy is; - - /** - * Create a new {@link ClusterEvaluator}, which will:. - *
    - *
  • only cluster nodes that haven't been visited yet
  • - *
  • use the specified threshold
  • - *
- * @param threshold the clustering threshold - * @param visited the visited nodes - * @param is the strategy for calculating the interestingness score. - */ - public ClusterEvaluator(int threshold, Set visited, InterestingnessStrategy is) { - this.threshold = threshold; - this.visited = visited; - this.is = is; - } - - /** - * Evaluates a node and determines whether to include and / or continue. - * Continues on and returns exactly those nodes that: - *
    - *
  • haven't been visited yet and
  • - *
  • are the start node - *
      - *
    • have a sequence < threshold (and thus belong to the same cluster)
    • - *
    - *
- */ - @Override - public Evaluation evaluate(Path path) { - Node end = path.endNode(); - int score = is.compute(new Neo4jScoreContainer(end)); - end.setProperty(SequenceProperties.INTERESTINGNESS.name(), score); - String id = (String) end.getProperty(ID.name()); - - if (!visited.contains(id) - && (path.startNode().getId() == path.endNode().getId() - || score < threshold)) { - visited.add(id); - return Evaluation.INCLUDE_AND_CONTINUE; - } - return Evaluation.EXCLUDE_AND_PRUNE; - } -} \ No newline at end of file diff --git a/dnainator-core/src/main/java/nl/tudelft/dnainator/graph/impl/query/UntilRankEvaluator.java b/dnainator-core/src/main/java/nl/tudelft/dnainator/graph/impl/query/UntilRankEvaluator.java new file mode 100644 index 00000000..0a985c5c --- /dev/null +++ b/dnainator-core/src/main/java/nl/tudelft/dnainator/graph/impl/query/UntilRankEvaluator.java @@ -0,0 +1,38 @@ +package nl.tudelft.dnainator.graph.impl.query; + +import nl.tudelft.dnainator.graph.impl.properties.SequenceProperties; + +import org.neo4j.graphdb.Node; +import org.neo4j.graphdb.Path; +import org.neo4j.graphdb.traversal.Evaluation; +import org.neo4j.graphdb.traversal.Evaluator; + +/** + * Makes the traverser go up to a certain rank, and manages uniqueness of nodes. + */ +public class UntilRankEvaluator implements Evaluator { + private int endRank; + + /** + * Constructs a new {@link UntilRankEvaluator}. + * @param endRank the rank to stop at. + */ + public UntilRankEvaluator(int endRank) { + this.endRank = endRank; + } + + private int getRank(Node n) { + return (int) n.getProperty(SequenceProperties.RANK.name()); + } + + @Override + public Evaluation evaluate(Path path) { + Node from = path.endNode(); + if (getRank(from) <= endRank) { + return Evaluation.INCLUDE_AND_CONTINUE; + } else { + return Evaluation.EXCLUDE_AND_PRUNE; + } + } + +} diff --git a/dnainator-core/src/test/java/nl/tudelft/dnainator/graph/impl/Neo4jClusterTest.java b/dnainator-core/src/test/java/nl/tudelft/dnainator/graph/impl/Neo4jClusterTest.java index 296c6c63..2982a0e9 100644 --- a/dnainator-core/src/test/java/nl/tudelft/dnainator/graph/impl/Neo4jClusterTest.java +++ b/dnainator-core/src/test/java/nl/tudelft/dnainator/graph/impl/Neo4jClusterTest.java @@ -1,6 +1,7 @@ package nl.tudelft.dnainator.graph.impl; import nl.tudelft.dnainator.annotation.impl.AnnotationCollectionImpl; +import nl.tudelft.dnainator.core.EnrichedSequenceNode; import nl.tudelft.dnainator.core.impl.Cluster; import nl.tudelft.dnainator.core.impl.SequenceNodeFactoryImpl; import nl.tudelft.dnainator.graph.interestingness.Scores; @@ -21,13 +22,12 @@ import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; -import java.util.Arrays; import java.util.List; import java.util.Map; -import java.util.Set; -import java.util.stream.Collectors; +import java.util.stream.Stream; -import static org.junit.Assert.assertEquals; +import static nl.tudelft.dnainator.graph.impl.Neo4jTestUtils.assertUnorderedIDEquals; +import static org.junit.Assert.assertTrue; /** * Test clustering in a DNA sequence graph. @@ -47,8 +47,6 @@ public static void setUp() { FileUtils.deleteRecursively(new File(DB_PATH)); nodeFile = Neo4jGraphTest.class.getResourceAsStream("/strains/cluster.node.graph"); edgeFile = Neo4jGraphTest.class.getResourceAsStream("/strains/cluster.edge.graph"); -// nodeFile = new File("10_strains_graph/simple_graph.node.graph"); -// edgeFile = new File("10_strains_graph/simple_graph.edge.graph"); NodeParser np = new NodeParserImpl(new SequenceNodeFactoryImpl(), new BufferedReader(new InputStreamReader(nodeFile, "UTF-8"))); EdgeParser ep = new EdgeParserImpl(new BufferedReader( @@ -61,38 +59,110 @@ public static void setUp() { } } + private Stream getAllNodes(Map> clusters) { + return clusters.values().stream() + .flatMap(list -> list.stream()) + .flatMap(cluster -> cluster.getNodes().stream()); + } + /** * Test returning various clusters from the sample graph. */ @Test - public void test() { - Set expected; + public void testSingleNestedBubble() { + // CHECKSTYLE.OFF: MagicNumber + Map> clusters = db.getAllClusters(0, 6, 11); + System.out.println(clusters); + assertProperClustering(clusters, 10); + testSingleNestedBubbleNodes(clusters); + // CHECKSTYLE.ON: MagicNumber + } + + private void testSingleNestedBubbleNodes(Map> clusters) { + // CHECKSTYLE.OFF: MagicNumber + // The root node is not associated with a bubble, so it should be a singleton cluster. + assertUnorderedIDEquals(Sets.newSet("0"), clusters.get(0).get(0).getNodes()); + + // first bubble is not clustered because one node has length greater than 11. + assertUnorderedIDEquals(Sets.newSet("1"), clusters.get(1).get(0).getNodes()); + + // 2 and 3 Expected on rank 1 + assertUnorderedIDEquals(Sets.newSet("2"), clusters.get(2).get(1).getNodes()); + assertUnorderedIDEquals(Sets.newSet("3"), clusters.get(2).get(0).getNodes()); + + // Source node is not collapsed. + assertUnorderedIDEquals(Sets.newSet("4"), clusters.get(3).get(0).getNodes()); + + // Collapsed bubble. + assertUnorderedIDEquals(Sets.newSet("5"), clusters.get(4).get(1).getNodes()); + assertUnorderedIDEquals(Sets.newSet("6"), clusters.get(4).get(0).getNodes()); + + // Multiple bubble sink. + assertUnorderedIDEquals(Sets.newSet("7", "20"), clusters.get(5).get(0).getNodes()); + + // Sink node is not collapsed. + assertUnorderedIDEquals(Sets.newSet("8"), clusters.get(6).get(0).getNodes()); + } + + /** + * Test the part of the graph that has multiple bubbles nested. + */ + @Test + public void testMultipleNestedBubbles() { + // CHECKSTYLE.OFF: MagicNumber + Map> clusters = db.getAllClusters(7, 13, 11); + System.out.println(clusters); + assertProperClustering(clusters, 11); + testMultipleNestedBubbleNodes(clusters); + } + + private void testMultipleNestedBubbleNodes(Map> clusters) { + // CHECKSTYLE.OFF: MagicNumber + // Source node of new bubble is not collapsed. + assertUnorderedIDEquals(Sets.newSet("9"), clusters.get(7).get(0).getNodes()); + + // Source node of nested bubble is not collapsed. + assertUnorderedIDEquals(Sets.newSet("18"), clusters.get(8).get(0).getNodes()); + assertUnorderedIDEquals(Sets.newSet("10"), clusters.get(8).get(1).getNodes()); + + // 15 and 16 have sequencelength of 8. + assertUnorderedIDEquals(Sets.newSet("16"), clusters.get(9).get(0).getNodes()); + assertUnorderedIDEquals(Sets.newSet("15"), clusters.get(9).get(1).getNodes()); + // Source node of nested nested bubble is not collapsed. + assertUnorderedIDEquals(Sets.newSet("11"), clusters.get(9).get(2).getNodes()); - List start = Arrays.asList("1"); + // 12 and 13 are not clustered, because 13 has sequencelength of 12. + assertUnorderedIDEquals(Sets.newSet("12", "13"), clusters.get(10).get(0).getNodes()); + + // 14, 17, and 19 are sink nodes, so the'yre not clustered. + assertUnorderedIDEquals(Sets.newSet("14"), clusters.get(11).get(0).getNodes()); + assertUnorderedIDEquals(Sets.newSet("17"), clusters.get(12).get(0).getNodes()); + assertUnorderedIDEquals(Sets.newSet("19"), clusters.get(13).get(0).getNodes()); + // CHECKSTYLE.ON: MagicNumber + } + + /** + * Test the entire graph. + */ + @Test + public void testEntireGraph() { // Set the interestingness strategy to return the sequence length. db.setInterestingnessStrategy(container -> container.getScore(Scores.SEQ_LENGTH)); // CHECKSTYLE.OFF: MagicNumber - Map> clusters = db.getAllClusters(start, Integer.MAX_VALUE, 11); - expected = Sets.newSet("1", "3", "4", "5", "6", "7"); - assertEquals(expected, clusters.get(0).get(0).getNodes() - .stream() - .map(sn -> sn.getId()) - .collect(Collectors.toSet())); - // 2 Expected on rank 1 - expected = Sets.newSet("2"); - assertEquals(expected, clusters.get(1).get(0).getNodes() - .stream() - .map(sn -> sn.getId()) - .collect(Collectors.toSet())); - // 8 Expected on rank 5 - expected = Sets.newSet("8"); - assertEquals(expected, clusters.get(5).get(0).getNodes() - .stream() - .map(sn -> sn.getId()) - .collect(Collectors.toSet())); + Map> clusters = db.getAllClusters(0, 13, 11); + System.out.println(clusters); + assertProperClustering(clusters, 21); + testSingleNestedBubbleNodes(clusters); + testMultipleNestedBubbleNodes(clusters); // CHECKSTYLE.ON: MagicNumber } + private void assertProperClustering(Map> clustering, int numNodes) { + // Assert that all elements occur only once, no duplicates. + assertTrue(getAllNodes(clustering).count() == getAllNodes(clustering).distinct().count()); + // Assert that no elements are missing. + assertTrue(getAllNodes(clustering).count() == numNodes); + } /** * Clean up after ourselves. * @throws IOException when the database could not be deleted diff --git a/dnainator-core/src/test/java/nl/tudelft/dnainator/graph/impl/Neo4jGraphTest.java b/dnainator-core/src/test/java/nl/tudelft/dnainator/graph/impl/Neo4jGraphTest.java index 4a843406..e6920b57 100644 --- a/dnainator-core/src/test/java/nl/tudelft/dnainator/graph/impl/Neo4jGraphTest.java +++ b/dnainator-core/src/test/java/nl/tudelft/dnainator/graph/impl/Neo4jGraphTest.java @@ -3,12 +3,12 @@ import nl.tudelft.dnainator.annotation.Annotation; import nl.tudelft.dnainator.annotation.impl.AnnotationCollectionImpl; import nl.tudelft.dnainator.annotation.impl.AnnotationImpl; -import nl.tudelft.dnainator.core.EnrichedSequenceNode; import nl.tudelft.dnainator.core.SequenceNode; import nl.tudelft.dnainator.core.impl.Edge; import nl.tudelft.dnainator.core.impl.SequenceNodeFactoryImpl; import nl.tudelft.dnainator.core.impl.SequenceNodeImpl; import nl.tudelft.dnainator.graph.impl.command.AnalyzeCommand; +import nl.tudelft.dnainator.graph.impl.properties.SequenceProperties; import nl.tudelft.dnainator.graph.query.GraphQueryDescription; import nl.tudelft.dnainator.parser.EdgeParser; import nl.tudelft.dnainator.parser.NodeParser; @@ -22,7 +22,10 @@ import org.junit.AfterClass; import org.junit.BeforeClass; import org.junit.Test; +import org.neo4j.graphdb.Direction; +import org.neo4j.graphdb.GraphDatabaseService; import org.neo4j.graphdb.Node; +import org.neo4j.helpers.collection.IteratorUtil; import org.neo4j.io.fs.FileUtils; import java.io.BufferedReader; @@ -37,10 +40,9 @@ import java.util.HashSet; import java.util.LinkedList; import java.util.Set; -import java.util.stream.Collectors; +import static nl.tudelft.dnainator.graph.impl.Neo4jTestUtils.assertUnorderedIDEquals; import static nl.tudelft.dnainator.graph.impl.properties.SequenceProperties.ID; - import static org.hamcrest.Matchers.lessThan; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; @@ -111,9 +113,9 @@ private static File getTreeFile() throws URISyntaxException { @Test public void testNodeLookup() { // CHECKSTYLE.OFF: MagicNumber - SequenceNode node1 = new SequenceNodeImpl("2", Arrays.asList("ASDF", "ASD"), 1, 5, "TATA"); - SequenceNode node2 = new SequenceNodeImpl("3", Arrays.asList("ASDF"), 5, 9, "TATA"); - SequenceNode node3 = new SequenceNodeImpl("5", Arrays.asList("ASDF"), 4, 8, "TATA"); + SequenceNode node1 = new SequenceNodeImpl("2", Arrays.asList("A", "B", "C"), 2, 6, "TATA"); + SequenceNode node2 = new SequenceNodeImpl("3", Arrays.asList("C"), 5, 9, "TATA"); + SequenceNode node3 = new SequenceNodeImpl("5", Arrays.asList("A", "B", "C"), 4, 8, "TATA"); assertEquals(node1, db.getNode("2")); assertEquals(node2, db.getNode("3")); assertEquals(node3, db.getNode("5")); @@ -126,7 +128,8 @@ public void testNodeLookup() { @Test public void testRootLookup() { // CHECKSTYLE.OFF: MagicNumber - SequenceNode root = new SequenceNodeImpl("5", Arrays.asList("ASDF"), 4, 8, "TATA"); + SequenceNode root = new SequenceNodeImpl("1", Arrays.asList("A", "B", "C", "D"), + 1, 5, "TATA"); assertEquals(root, db.getRootNode()); // CHECKSTYLE.ON: MagicNumber } @@ -169,17 +172,26 @@ public void testTopologicalOrder() { */ @Test public void testRanks() { + // CHECKSTYLE.OFF: MagicNumber Set rank0Expect = new HashSet<>(); - Collections.addAll(rank0Expect, "7", "5", "3"); + Collections.addAll(rank0Expect, "1"); assertUnorderedIDEquals(rank0Expect, db.getRank(0)); - Set rank1Expect = new HashSet<>(); - Collections.addAll(rank1Expect, "11", "8"); + Collections.addAll(rank1Expect, "11", "2"); assertUnorderedIDEquals(rank1Expect, db.getRank(1)); - Set rank2Expect = new HashSet<>(); - Collections.addAll(rank2Expect, "2", "9", "10"); + Collections.addAll(rank2Expect, "13", "14", "15", "3", "7"); assertUnorderedIDEquals(rank2Expect, db.getRank(2)); + Set rank3Expect = new HashSet<>(); + Collections.addAll(rank3Expect, "12", "4", "8", "10"); + assertUnorderedIDEquals(rank3Expect, db.getRank(3)); + Set rank4Expect = new HashSet<>(); + Collections.addAll(rank4Expect, "5"); + assertUnorderedIDEquals(rank4Expect, db.getRank(4)); + Set rank5Expect = new HashSet<>(); + Collections.addAll(rank5Expect, "6"); + assertUnorderedIDEquals(rank5Expect, db.getRank(5)); + // CHECKSTYLE.ON: MagicNumber } /** @@ -192,7 +204,7 @@ public void testQueryRanks() { .fromRank(0) .toRank(2); Set expect = new HashSet<>(); - Collections.addAll(expect, "7", "5", "3", "11", "8"); + Collections.addAll(expect, "1", "11", "2"); assertUnorderedIDEquals(expect, db.queryNodes(qd)); } @@ -229,7 +241,7 @@ public void testQueryFilter() { .filter((sn) -> Integer.parseInt(sn.getId()) > 8); // CHECKSTYLE.ON: MagicNumber Set expect = new HashSet<>(); - Collections.addAll(expect, "9", "10", "11"); + Collections.addAll(expect, "10", "11", "12", "13", "14", "15"); assertUnorderedIDEquals(expect, db.queryNodes(qd)); } @@ -239,19 +251,19 @@ public void testQueryFilter() { @Test public void testQuerySources() { GraphQueryDescription qd = new GraphQueryDescription() - .containsSource("ASDF"); + .containsSource("A"); Set expect = new HashSet<>(); - Collections.addAll(expect, "2", "5", "3", "7", "8", "11"); + Collections.addAll(expect, "1", "2", "5", "6", "7", "8"); assertUnorderedIDEquals(expect, db.queryNodes(qd)); // Also test for multiple sources (reusing the old one) - qd = qd.containsSource("ASD"); - Collections.addAll(expect, "9", "10"); + qd = qd.containsSource("B"); + Collections.addAll(expect, "10"); assertUnorderedIDEquals(expect, db.queryNodes(qd)); // Search non-existing source. qd = new GraphQueryDescription() - .containsSource("FOO"); + .containsSource("NONEXISTINGSOURCE"); // Expect an empty result expect = new HashSet<>(); assertUnorderedIDEquals(expect, db.queryNodes(qd)); @@ -283,10 +295,29 @@ public void testGetAnnotationsRangeInclusive() { assertTrue(as.contains(last)); } - private static void assertUnorderedIDEquals(Collection expected, - Collection actual) { - assertEquals(expected.stream().collect(Collectors.toSet()), - actual.stream().map(sn -> sn.getId()).collect(Collectors.toSet())); + /** + * Test bubble creation. + */ + @Test + public void testBubbles() { + db.execute(service -> { + assertBubble(service, "1", "6"); + assertBubble(service, "2", "5"); + // Tests for one source node across multiple bubbles, not able to implement right now. + //assertBubble(service, "2", "4"); + assertBubble(service, "7", "5"); + assertBubble(service, "11", "12"); + }); + } + + private void assertBubble(GraphDatabaseService service, String source, String sink) { + Node sourceN = service.findNode(NodeLabels.BUBBLE_SOURCE, + SequenceProperties.ID.name(), source); + Node sinkN = service.findNode(NodeLabels.NODE, SequenceProperties.ID.name(), sink); + assertTrue(IteratorUtil.asCollection(sourceN.getRelationships(RelTypes.BUBBLE_SOURCE_OF, + Direction.OUTGOING)).stream() + .map(rel -> rel.getEndNode()) + .anyMatch(n -> n.getId() == sinkN.getId())); } /** diff --git a/dnainator-core/src/test/java/nl/tudelft/dnainator/graph/impl/Neo4jTestUtils.java b/dnainator-core/src/test/java/nl/tudelft/dnainator/graph/impl/Neo4jTestUtils.java new file mode 100644 index 00000000..3f01a977 --- /dev/null +++ b/dnainator-core/src/test/java/nl/tudelft/dnainator/graph/impl/Neo4jTestUtils.java @@ -0,0 +1,30 @@ +package nl.tudelft.dnainator.graph.impl; + +import java.util.Collection; +import java.util.stream.Collectors; + +import org.junit.Assert; + +import nl.tudelft.dnainator.core.EnrichedSequenceNode; + +/** + * Test utility methods for graph tests. + */ +public final class Neo4jTestUtils { + + private Neo4jTestUtils() { + + } + + /** + * assert in unordered manner. + * @param expected + * @param actual + */ + protected static void assertUnorderedIDEquals(Collection expected, + Collection actual) { + Assert.assertEquals(expected.stream().collect(Collectors.toSet()), + actual.stream().map(sn -> sn.getId()).collect(Collectors.toSet())); + } + +} diff --git a/dnainator-core/src/test/resources/strains/cluster.edge.graph b/dnainator-core/src/test/resources/strains/cluster.edge.graph index b70f5de4..e87b261f 100644 --- a/dnainator-core/src/test/resources/strains/cluster.edge.graph +++ b/dnainator-core/src/test/resources/strains/cluster.edge.graph @@ -1,3 +1,4 @@ +0 1 1 2 1 3 2 4 @@ -6,5 +7,21 @@ 4 6 5 8 6 7 -6 8 -7 8 \ No newline at end of file +6 20 +7 8 +20 8 +8 9 +9 10 +9 18 +18 19 +10 11 +10 15 +10 16 +11 12 +11 13 +12 14 +13 14 +14 17 +15 17 +16 17 +17 19 \ No newline at end of file diff --git a/dnainator-core/src/test/resources/strains/cluster.node.graph b/dnainator-core/src/test/resources/strains/cluster.node.graph index 1e6f2d37..0ccd6f09 100644 --- a/dnainator-core/src/test/resources/strains/cluster.node.graph +++ b/dnainator-core/src/test/resources/strains/cluster.node.graph @@ -1,3 +1,5 @@ +> 0 | LENGTH 20 | 0 | 0 +TATATATATATATATATATA > 1 | LENGTH 10 | 0 | 0 TATATATATA > 2 | LENGTH 12 | 0 | 0 @@ -6,11 +8,35 @@ TATATATATATA TATAT > 4 | LENGTH 10 | 0 | 0 TATATATATA -> 5 | LENGTH 8 | 0 | 0 -TATATATA +> 5 | LENGTH 12 | 0 | 0 +TATATATATATA > 6 | LENGTH 8 | 0 | 0 TATATATA > 7 | LENGTH 8 | 0 | 0 TATATATA +> 20 | LENGTH 8 | 0 | 0 +TATATATA > 8 | LENGTH 12 | 0 | 0 +TATATATATATA +> 9 | LENGTH 12 | 0 | 0 +TATATATATATA +> 10 | LENGTH 12 | 0 | 0 +TATATATATATA +> 11 | LENGTH 12 | 0 | 0 +TATATATATATA +> 12 | LENGTH 8 | 0 | 0 +TATATATA +> 13 | LENGTH 8 | 0 | 0 +TATATATA +> 14 | LENGTH 12 | 0 | 0 +TATATATATATA +> 15 | LENGTH 8 | 0 | 0 +TATATATA +> 16 | LENGTH 8 | 0 | 0 +TATATATA +> 17 | LENGTH 12 | 0 | 0 +TATATATATATA +> 18 | LENGTH 8 | 0 | 0 +TATATATATATA +> 19 | LENGTH 12 | 0 | 0 TATATATATATA \ No newline at end of file diff --git a/dnainator-core/src/test/resources/strains/indel.edge.graph b/dnainator-core/src/test/resources/strains/indel.edge.graph new file mode 100644 index 00000000..e685ac7d --- /dev/null +++ b/dnainator-core/src/test/resources/strains/indel.edge.graph @@ -0,0 +1,3 @@ +1 2 +1 3 +2 3 \ No newline at end of file diff --git a/dnainator-core/src/test/resources/strains/indel.node.graph b/dnainator-core/src/test/resources/strains/indel.node.graph new file mode 100644 index 00000000..e9bf4d2e --- /dev/null +++ b/dnainator-core/src/test/resources/strains/indel.node.graph @@ -0,0 +1,6 @@ +> 1 | LENGTH 10 | 0 | 0 +TATATATATA +> 2 | LENGTH 8 | 0 | 0 +TATATATA +> 3 | LENGTH 5 | 0 | 0 +TATAT \ No newline at end of file diff --git a/dnainator-core/src/test/resources/strains/topo.edge.graph b/dnainator-core/src/test/resources/strains/topo.edge.graph index 7230e1df..3c6a47f9 100644 --- a/dnainator-core/src/test/resources/strains/topo.edge.graph +++ b/dnainator-core/src/test/resources/strains/topo.edge.graph @@ -1,9 +1,18 @@ -7 11 +1 2 +2 3 +3 4 +4 5 +5 6 +2 7 7 8 -5 11 -3 8 -3 10 -11 2 -11 9 -11 10 -8 9 \ No newline at end of file +8 5 +7 10 +10 5 +1 11 +12 6 +11 13 +11 14 +13 12 +14 12 +2 15 +15 4 diff --git a/dnainator-core/src/test/resources/strains/topo.node.graph b/dnainator-core/src/test/resources/strains/topo.node.graph index d89b3d73..8bd5e44c 100644 --- a/dnainator-core/src/test/resources/strains/topo.node.graph +++ b/dnainator-core/src/test/resources/strains/topo.node.graph @@ -1,16 +1,28 @@ -> 2 | ASDF,ASD | 1 | 5 +> 1 | A,B,C,D | 1 | 5 TATA -> 9 | ASD | 2 | 6 +> 2 | A,B,C | 2 | 6 TATA -> 10 | ASD | 3 | 7 +> 3 | C | 5 | 9 TATA -> 5 | ASDF | 4 | 8 +> 4 | C | 4 | 8 TATA -> 3 | ASDF | 5 | 9 +> 5 | A,B,C | 4 | 8 TATA -> 7 | ASDF | 6 | 10 +> 6 | A,B,C,D | 6 | 10 TATA -> 11 | ASD,FDSA,ASDF | 7 | 11 +> 7 | A,B | 7 | 11 +TATA +> 8 | A | 8 | 12 +TATA +> 10 | B | 8 | 12 +TATA +> 11 | D | 8 | 12 +TATA +> 12 | D | 8 | 12 +TATA +> 13 | D | 8 | 12 +TATA +> 14 | D | 8 | 12 +TATA +> 15 | D | 8 | 12 TATA -> 8 | ASDF | 8 | 12 -TATA \ No newline at end of file diff --git a/dnainator-core/src/test/resources/strains/topo.nwk b/dnainator-core/src/test/resources/strains/topo.nwk index b39014db..13ea36d6 100644 --- a/dnainator-core/src/test/resources/strains/topo.nwk +++ b/dnainator-core/src/test/resources/strains/topo.nwk @@ -1 +1 @@ -(FDSA:0.1,(ASDF:0.2,ASD:0.3)) \ No newline at end of file +(A:0.1,(B:0.2,C:0.3)) \ No newline at end of file diff --git a/dnainator-javafx/src/main/java/nl/tudelft/dnainator/javafx/drawables/strains/Strain.java b/dnainator-javafx/src/main/java/nl/tudelft/dnainator/javafx/drawables/strains/Strain.java index e39f8397..c58f11c9 100644 --- a/dnainator-javafx/src/main/java/nl/tudelft/dnainator/javafx/drawables/strains/Strain.java +++ b/dnainator-javafx/src/main/java/nl/tudelft/dnainator/javafx/drawables/strains/Strain.java @@ -5,7 +5,6 @@ import javafx.scene.Group; import nl.tudelft.dnainator.annotation.Annotation; import nl.tudelft.dnainator.annotation.Range; -import nl.tudelft.dnainator.core.SequenceNode; import nl.tudelft.dnainator.core.impl.Cluster; import nl.tudelft.dnainator.graph.Graph; import nl.tudelft.dnainator.javafx.ColorServer; @@ -83,12 +82,8 @@ protected void loadContent(Range ranks, double zoom) { + " with zoom level " + zoom + " (" + interestingness + ": " + lastThreshold + ")"); List annotations = getSortedAnnotations(ranks); - List roots = graph.getRank(lastLoaded.getX()).stream() - .map(SequenceNode::getId) - .sorted((s1, s2) -> s1.compareTo(s2)) - .collect(Collectors.toList()); - Map> result = graph.getAllClusters(roots, lastLoaded.getY(), - lastThreshold.get()); + Map> result = graph.getAllClusters(lastLoaded.getX(), + lastLoaded.getY(), lastThreshold.get()); content.getChildren().clear(); clusters.clear();