package nl.knaw.huygens.alexandria.service;
/*
* #%L
* alexandria-service
* =======
* Copyright (C) 2015 - 2017 Huygens ING (KNAW)
* =======
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program. If not, see
* <http://www.gnu.org/licenses/gpl-3.0.html>.
* #L%
*/
import static java.util.stream.Collectors.groupingBy;
import static java.util.stream.Collectors.joining;
import static java.util.stream.Collectors.toList;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.UUID;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.Function;
import java.util.stream.Stream;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.tinkerpop.gremlin.process.traversal.Order;
import org.apache.tinkerpop.gremlin.process.traversal.P;
import org.apache.tinkerpop.gremlin.process.traversal.Traverser;
import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversal;
import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.__;
import org.apache.tinkerpop.gremlin.structure.Direction;
import org.apache.tinkerpop.gremlin.structure.Edge;
import org.apache.tinkerpop.gremlin.structure.T;
import org.apache.tinkerpop.gremlin.structure.Vertex;
import org.apache.tinkerpop.gremlin.structure.VertexProperty;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import nl.knaw.huygens.alexandria.api.model.text.TextRangeAnnotation;
import nl.knaw.huygens.alexandria.api.model.text.TextRangeAnnotation.AbsolutePosition;
import nl.knaw.huygens.alexandria.storage.EdgeLabels;
import nl.knaw.huygens.alexandria.storage.Storage;
import nl.knaw.huygens.alexandria.storage.VertexLabels;
import nl.knaw.huygens.alexandria.storage.frames.TextRangeAnnotationVF;
import nl.knaw.huygens.alexandria.textgraph.ParseResult;
import nl.knaw.huygens.alexandria.textgraph.TextAnnotation;
import nl.knaw.huygens.alexandria.textgraph.TextGraphSegment;
import nl.knaw.huygens.alexandria.textgraph.XmlAnnotation;
import nl.knaw.huygens.alexandria.util.StreamUtil;
import peapod.FramedGraphTraversal;
public class TextGraphService {
private static final Logger LOG = LoggerFactory.getLogger(TextGraphService.class);
private static Storage storage;
public TextGraphService(Storage storage) {
TextGraphService.storage = storage;
}
public void storeTextGraph(UUID resourceUUID, ParseResult result) {
storage.runInTransaction(() -> {
Vertex resource = getResourceVertex(resourceUUID);
Vertex text = storage.addVertex(T.label, VertexLabels.TEXTGRAPH);
resource.addEdge(EdgeLabels.HAS_TEXTGRAPH, text);
resource.property("hasText", true);
List<Vertex> textSegments = storeTextSegments(result.getTextSegments(), text);
storeTextAnnotations(result.getXmlAnnotations(), text, textSegments);
reindex(resourceUUID);
});
}
public Stream<TextGraphSegment> getTextGraphSegmentStream(UUID resourceUUID, List<List<String>> orderedLayerDefinitions) {
return getTextSegmentVertexStream(resourceUUID)//
.map(vertex -> toTextGraphSegment(vertex, orderedLayerDefinitions));
}
public Stream<TextAnnotation> getTextAnnotationStream(UUID resourceUUID) {
return getTextAnnotationVertexStream(resourceUUID).map(this::toTextAnnotation);
}
private Vertex getResourceVertex(UUID resourceUUID) {
return getVertexTraversalFromResource(resourceUUID).next();
}
private GraphTraversal<Vertex, Vertex> getVertexTraversalFromResource(UUID resourceUUID) {
return storage.getResourceVertexTraversal()//
.has(Storage.IDENTIFIER_PROPERTY, resourceUUID.toString());
}
public Stream<Vertex> getTextSegmentVertexStream(UUID resourceUUID) {
Iterator<Vertex> textSegmentIterator = new Iterator<Vertex>() {
Vertex textSegment = getVertexTraversalFromResource(resourceUUID)//
.out(EdgeLabels.HAS_TEXTGRAPH)//
.out(EdgeLabels.FIRST_TEXT_SEGMENT)//
.next();// because there can be only one
@Override
public boolean hasNext() {
return textSegment != null;
}
@Override
public Vertex next() {
Vertex next = textSegment;
Iterator<Vertex> nextVertices = textSegment.vertices(Direction.OUT, EdgeLabels.NEXT);
textSegment = nextVertices.hasNext() ? nextVertices.next() : null;
return next;
}
};
return StreamUtil.stream(textSegmentIterator);
}
private Stream<Vertex> getTextAnnotationVertexStream(UUID resourceUUID) {
Iterator<Vertex> textAnnotationIterator = new Iterator<Vertex>() {
Vertex textAnnotationVertex = getVertexTraversalFromResource(resourceUUID)//
.out(EdgeLabels.HAS_TEXTGRAPH)//
.out(EdgeLabels.FIRST_ANNOTATION)//
.next();// because there can only be one
@Override
public boolean hasNext() {
return textAnnotationVertex != null;
}
@Override
public Vertex next() {
Vertex next = textAnnotationVertex;
Iterator<Vertex> nextVertices = textAnnotationVertex.vertices(Direction.OUT, EdgeLabels.NEXT);
textAnnotationVertex = nextVertices.hasNext() ? nextVertices.next() : null;
return next;
}
};
return StreamUtil.stream(textAnnotationIterator);
}
public void updateTextAnnotation(TextAnnotation textAnnotation) {
Vertex vertex = getTextAnnotationVertex(textAnnotation);
update(vertex, textAnnotation);
}
public void wrapContentInChildTextAnnotation(TextAnnotation parentTextAnnotation, TextAnnotation newChildTextAnnotation) {
Vertex parentVertex = getTextAnnotationVertex(parentTextAnnotation);
Iterator<Edge> parentOutEdges = parentVertex.edges(Direction.OUT, EdgeLabels.FIRST_TEXT_SEGMENT, EdgeLabels.LAST_TEXT_SEGMENT, EdgeLabels.NEXT);
Vertex childVertex = toVertex(newChildTextAnnotation);
// copy FIRST_TEXT_SEGMENT, LAST_TEXT_SEGMENT, NEXT edges from parentVertex to childVertex
while (parentOutEdges.hasNext()) {
Edge outEdge = parentOutEdges.next();
childVertex.addEdge(outEdge.label(), outEdge.inVertex());
}
// remove existing NEXT edge for parentVertex, replace with NEXT edge pointing to childVertex
Iterator<Edge> parentNextEdgeIterator = parentVertex.edges(Direction.OUT, EdgeLabels.NEXT);
if (parentNextEdgeIterator.hasNext()) {
Edge nextEdge = parentNextEdgeIterator.next();
nextEdge.remove();
parentVertex.addEdge(EdgeLabels.NEXT, childVertex);
// increase the depth of the next textannotations as long as the textrange they annotate overlaps with the textrange of the parent annotation
updateDepths(parentVertex, childVertex, parentTextAnnotation.getDepth());
}
}
private static void updateDepths(Vertex parentVertex, Vertex childVertex, int parentDepth) {
// TODO: use start/end textSegmentIndex to determine the relevant annotations to adjust.
Vertex firstTextSegment = firstTextSegment(parentVertex);
Vertex lastTextSegment = lastTextSegment(parentVertex);
Set<Vertex> updatedVertices = Sets.newHashSet();
updatedVertices.add(parentVertex);
updatedVertices.add(childVertex);
boolean goOn = true;
Vertex textSegment = firstTextSegment;
while (goOn) {
StreamUtil.stream(textSegment.vertices(Direction.IN, EdgeLabels.FIRST_TEXT_SEGMENT))//
.filter(v -> VertexLabels.TEXTANNOTATION.equals(v.label()))//
.filter(v -> !updatedVertices.contains(v))//
.forEach(v -> {
// LOG.debug("v={}", v);
// LOG.debug("updatedVertices={}", updatedVertices);
int currentDepth = getIntValue(v, TextAnnotation.Properties.depth);
if (currentDepth > parentDepth) {
v.property(TextAnnotation.Properties.depth, currentDepth + 1);
updatedVertices.add(v);
}
});
goOn = !(textSegment.equals(lastTextSegment));
if (goOn) {
Iterator<Vertex> nextTextSegment = textSegment.vertices(Direction.OUT, EdgeLabels.NEXT);
if (nextTextSegment.hasNext()) {
textSegment = nextTextSegment.next();
} else {
LOG.error("There seems to be something wrong with the graph.");
goOn = false;
}
}
}
}
public void updateTextAnnotationLink(TextRangeAnnotationVF vf, TextRangeAnnotation textRangeAnnotation, UUID resourceUUID) {
// LOG.debug("textRangeAnnotation={}", textRangeAnnotation);
// if the TextRangeAnnotationVF is already linked to a TextAnnotation, remove that TextAnnotation
FramedGraphTraversal<TextRangeAnnotationVF, Vertex> traversal = vf.out(nl.knaw.huygens.alexandria.storage.frames.TextRangeAnnotationVF.EdgeLabels.HAS_TEXTANNOTATION);
if (traversal.hasNext()) {
traversal.next().remove();
}
Vertex newTextAnnotationVertex = createNewTextAnnotation(vf, textRangeAnnotation);
// LOG.debug("textRangeAnnotation={}", textRangeAnnotation);
TextAnnotationInsertionContext context = new TextAnnotationInsertionContext(newTextAnnotationVertex, textRangeAnnotation);
Vertex parentTextAnnotationVertex = getVertexTraversalFromResource(resourceUUID)//
.out(EdgeLabels.HAS_TEXTGRAPH)//
.out(EdgeLabels.FIRST_ANNOTATION)//
// find the TextAnnotation with the xml:id from annotation.position.xmlid
.until(__.has(TextAnnotation.Properties.xmlid, textRangeAnnotation.getAbsolutePosition().getXmlId()))//
.repeat(__.out(EdgeLabels.NEXT))//
.next();
if (context.annotationIsMilestone) {
handleMilestoneAnnotation(newTextAnnotationVertex, context, parentTextAnnotationVertex);
} else {
handleRegularAnnotation(resourceUUID, context, parentTextAnnotationVertex);
}
if (context.reindexNeeded) {
reindex(resourceUUID);
}
context.insertNewTextAnnotationVertex();
reindex(resourceUUID);
if (LOG.isDebugEnabled()) {
LOG.debug("newTextAnnotationVertex={}", visualizeVertex(newTextAnnotationVertex));
}
}
private void handleRegularAnnotation(UUID resourceUUID, TextAnnotationInsertionContext context, Vertex parentTextAnnotationVertex) {
LOG.debug("handleMilestoneAnnotation");
List<Vertex> list = storage.getVertexTraversal(parentTextAnnotationVertex)//
.out(EdgeLabels.FIRST_TEXT_SEGMENT)//
// find the textsegment where the textrange from annotation.position starts
.until(context::rangeStartsInThisTextSegment)//
.repeat(__.out(EdgeLabels.NEXT))//
.sideEffect(context::processFirstTextSegmentInRange)//
// iterate over the next TextSegments until you find the one the textrange from annotation.position ends in
.until(context::rangeEndsInThisTextSegment)//
.repeat(__.out(EdgeLabels.NEXT))//
.sideEffect(context::processLastTextSegmentInRange)//
.toList();
if (list.size() != 1) {
LOG.error("listsize should be 1, is {}", list.size());
}
}
private void handleMilestoneAnnotation(Vertex newTextAnnotationVertex, TextAnnotationInsertionContext context, Vertex parentTextAnnotationVertex) {
LOG.debug("handleMilestoneAnnotation");
// TODO: refactor this mess!
int startOffset = 0;
int endOffset = 0;
String text = "";
Vertex textSegment = parentTextAnnotationVertex.vertices(Direction.OUT, EdgeLabels.FIRST_TEXT_SEGMENT).next();
boolean parentTextAnnotationIsMilestone = parentTextAnnotationIsMilestone(parentTextAnnotationVertex, textSegment);
if (parentTextAnnotationIsMilestone) {
context.linkTextAnnotationToTextSegment(newTextAnnotationVertex, textSegment);
} else {
Vertex emptyTextSegment = newTextSegmentVertex("");
context.linkTextAnnotationToTextSegment(newTextAnnotationVertex, emptyTextSegment);
// find the textsegment where the textrange from annotation.position starts
boolean goOn = true;
while (goOn) {
text = getStringValue(textSegment, TextSegment.Properties.text);
int segmentSize = text.length();
goOn = startOffset + segmentSize < context.rangeStart;
endOffset += segmentSize;
if (goOn) {
Iterator<Vertex> nextTextSegments = textSegment.vertices(Direction.OUT, EdgeLabels.NEXT);
if (nextTextSegments.hasNext()) {
startOffset += segmentSize;
textSegment = nextTextSegments.next();
} else {
goOn = false;
}
}
}
if (startOffset == context.rangeStart) {
context.insertNewBeforeCurrent(emptyTextSegment, textSegment);
StreamUtil.stream(textSegment.edges(Direction.IN, EdgeLabels.FIRST_TEXT_SEGMENT))//
.forEach(e -> context.moveEdge(e, EdgeLabels.FIRST_TEXT_SEGMENT, emptyTextSegment));
} else if (endOffset == context.rangeEnd) {
context.insertNewAfterCurrent(emptyTextSegment, textSegment);
StreamUtil.stream(textSegment.edges(Direction.IN, EdgeLabels.LAST_TEXT_SEGMENT))//
.forEach(e -> context.moveEdge(e, EdgeLabels.LAST_TEXT_SEGMENT, emptyTextSegment));
} else {
// split up textSegment?
int headLength = context.rangeStart - startOffset - 1;
String headText = text.substring(0, headLength);
String tailText = text.substring(headLength);
textSegment.property(TextSegment.Properties.text, headText);
Vertex newTail = newTextSegmentVertex(tailText);
StreamUtil.stream(textSegment.edges(Direction.IN, EdgeLabels.LAST_TEXT_SEGMENT))//
.forEach(e -> context.moveEdge(e, EdgeLabels.LAST_TEXT_SEGMENT, newTail));
context.insertNewAfterCurrent(newTail, textSegment);
context.insertNewAfterCurrent(emptyTextSegment, textSegment);
}
}
context.reindexNeeded = true;
}
private boolean parentTextAnnotationIsMilestone(Vertex parentTextAnnotationVertex, Vertex textSegment) {
Vertex lastTextSegment = parentTextAnnotationVertex.vertices(Direction.OUT, EdgeLabels.LAST_TEXT_SEGMENT).next();
String firstText = getStringValue(textSegment, TextSegment.Properties.text);
return firstText.isEmpty() && textSegment.equals(lastTextSegment);
}
private Vertex createNewTextAnnotation(TextRangeAnnotationVF textRangeAnnotationVF, TextRangeAnnotation textRangeAnnotation) {
// create new TextAnnotation
Map<String, String> attributes = ImmutableMap.<String, String> builder()//
.putAll(textRangeAnnotation.getAttributes())//
.put(TextRangeAnnotation.RESPONSIBILITY_ATTRIBUTE, "#" + textRangeAnnotation.getAnnotator())//
.build();
TextAnnotation newTextAnnotation = new TextAnnotation(textRangeAnnotation.getName(), attributes, 1000); // 1000 is temporary depth, adjust depth once place in textannotationlist has been
// determined
Vertex newTextAnnotationVertex = toVertex(newTextAnnotation);
// link TextAnnotation to TextRangeAnnotation
textRangeAnnotationVF.vertex().addEdge(TextRangeAnnotationVF.EdgeLabels.HAS_TEXTANNOTATION, newTextAnnotationVertex);
return newTextAnnotationVertex;
}
private static class TextAnnotationInsertionContext {
private int textSize;
private int rangeStart;
private int rangeEnd;
private Vertex newTextAnnotationVertex;
private Vertex startingTextSegment;
private Vertex endingTextSegment;
private boolean reindexNeeded = false;
private boolean useOffset;
private String parentXmlId;
private boolean annotationIsMilestone = false;
public TextAnnotationInsertionContext(Vertex newTextAnnotationVertex, TextRangeAnnotation textRangeAnnotation) {
this.newTextAnnotationVertex = newTextAnnotationVertex;
// LOG.debug("textRangeAnnotation={}", textRangeAnnotation);
this.textSize = 0;
this.useOffset = textRangeAnnotation.getUseOffset();
AbsolutePosition absolutePosition = textRangeAnnotation.getAbsolutePosition();
this.parentXmlId = absolutePosition.getXmlId();
this.rangeStart = absolutePosition.getOffset();
Integer length = absolutePosition.getLength();
this.rangeEnd = this.rangeStart + length - 1;
if (this.rangeEnd == 0) {
this.rangeStart = 0;
}
this.annotationIsMilestone = length == 0;
// LOG.debug("range = [{},{}]", rangeStart, rangeEnd);
}
private void linkTextAnnotationToTextSegment(Vertex newTextAnnotationVertex, Vertex emptyTextSegment) {
this.startingTextSegment = emptyTextSegment;
this.endingTextSegment = emptyTextSegment;
newTextAnnotationVertex.addEdge(EdgeLabels.FIRST_TEXT_SEGMENT, this.startingTextSegment);
newTextAnnotationVertex.addEdge(EdgeLabels.LAST_TEXT_SEGMENT, this.endingTextSegment);
}
void insertNewTextAnnotationVertex() {
// LOG.info("startingTextSegment:{}", visualizeVertex(startingTextSegment));
if (useOffset) {
insertUsingOffset();
} else {
// then straight after the parent annotation
insertAfterParent();
}
}
private void insertAfterParent() {
// LOG.debug("startingTextSegment={}", visualizeVertex(startingTextSegment));
Vertex parentVertex = storage.getVertexTraversal(startingTextSegment)//
.in(EdgeLabels.FIRST_TEXT_SEGMENT)//
.hasLabel(VertexLabels.TEXTANNOTATION)//
.has(TextAnnotation.Properties.xmlid, parentXmlId)//
.next();
// LOG.debug("parentVertex={}", visualizeVertex(parentVertex));
Iterator<Edge> edges = parentVertex.edges(Direction.OUT, EdgeLabels.NEXT);
if (edges.hasNext()) {
Edge oldNextEdge = edges.next();
Vertex nextTextAnnotation = oldNextEdge.inVertex();
newTextAnnotationVertex.addEdge(EdgeLabels.NEXT, nextTextAnnotation);
oldNextEdge.remove();
}
parentVertex.addEdge(EdgeLabels.NEXT, newTextAnnotationVertex);
int parentDepth = getDepth(parentVertex);
newTextAnnotationVertex.property(TextAnnotation.Properties.depth, parentDepth + 1);
updateDepths(parentVertex, newTextAnnotationVertex, parentDepth);
}
private void insertUsingOffset() {
// int startingTextSegmentIndex = getIntValue(startingTextSegment, TextAnnotation.Properties.index);
int endingTextSegmentIndex = getIntValue(endingTextSegment, TextAnnotation.Properties.index);
// LOG.debug("startIndex,endIndex=({},{})", startingTextSegmentIndex, endingTextSegmentIndex);
Vertex parentTextAnnotationVertex = null;
Vertex textSegment = startingTextSegment;
while (parentTextAnnotationVertex == null) {
GraphTraversal<Vertex, Vertex> tailTraversal = storage.getVertexTraversal(textSegment)//
// find TextAnnotations that start here
.in(EdgeLabels.FIRST_TEXT_SEGMENT)//
.hasLabel(VertexLabels.TEXTANNOTATION)//
// that are not the newTextAnnotation
.not(__.hasId(newTextAnnotationVertex.id()))//
// and that don't end before endingTextSegment
.not(__.out(EdgeLabels.LAST_TEXT_SEGMENT).has(TextSegment.Properties.index, P.lt(endingTextSegmentIndex)))//
// sort by depth
.order().by(TextAnnotation.Properties.depth, Order.incr)//
// get the deepest
.tail();
if (tailTraversal.hasNext()) {
// We found the parent!
parentTextAnnotationVertex = tailTraversal.next();
} else {
// go to the previous textSegment, and start over
textSegment = textSegment.vertices(Direction.IN, EdgeLabels.NEXT).next();
}
}
int parentDepth = getDepth(parentTextAnnotationVertex);
setDepth(newTextAnnotationVertex, parentDepth + 1);
updateDepths(parentTextAnnotationVertex, newTextAnnotationVertex, parentDepth);
// Iterator<Edge> nextEdges = parentTextAnnotationVertex.edges(Direction.OUT, EdgeLabels.NEXT);
// if (nextEdges.hasNext()) {
// Edge next = nextEdges.next();
// Vertex nextTextAnnotation = next.inVertex();
// newTextAnnotationVertex.addEdge(EdgeLabels.NEXT, nextTextAnnotation);
// next.remove();
// }
// parentTextAnnotationVertex.addEdge(EdgeLabels.NEXT, newTextAnnotationVertex);
//
// Iterator<Vertex> vertices = startingTextSegment.vertices(Direction.IN, EdgeLabels.FIRST_TEXT_SEGMENT);
// StreamUtil.stream(vertices)//
// .filter(v -> v.label().equals(VertexLabels.TEXTANNOTATION))//
// .filter(v -> !v.equals(newTextAnnotationVertex))//
// .filter(v -> getDepth(v) > parentDepth)//
// .forEach(this::incrementDepth);
}
// private void incrementDepth(Vertex v) {
// int currentDepth = getDepth(v);
// setDepth(v, currentDepth + 1);
// }
private VertexProperty<Integer> setDepth(Vertex v, int value) {
return v.property(TextAnnotation.Properties.depth, value);
}
private int getDepth(Vertex v) {
return getIntValue(v, TextAnnotation.Properties.depth);
}
// private void insertUsingOffset0() {
// // insert after the deepest textannotation that starts at the startingTextSegment, and doesn't end before endingTextSegment
// // then increase depth of the annotations that are children of the new textannotation
// GraphTraversal<Vertex, Vertex> tail = storage.getVertexTraversal(startingTextSegment)//
// .in(EdgeLabels.FIRST_TEXT_SEGMENT)//
// .hasLabel(VertexLabels.TEXTANNOTATION)//
// .order().by(TextAnnotation.Properties.depth, Order.incr)//
// .tail(2L);
// if (tail.hasNext()) {
// Vertex deepestTextAnnotationVertex = tail.next();
// checkVertexLabel(deepestTextAnnotationVertex, VertexLabels.TEXTANNOTATION);
// int depth = getDepth(deepestTextAnnotationVertex);
// setDepth(newTextAnnotationVertex, depth + 1);
// deepestTextAnnotationVertex.addEdge(EdgeLabels.NEXT, newTextAnnotationVertex);
// GraphTraversal<Vertex, Edge> nextTraversal = storage.getVertexTraversal(deepestTextAnnotationVertex.id())//
// .outE(EdgeLabels.NEXT);
// if (nextTraversal.hasNext()) {
// Edge next = nextTraversal.next();
// Vertex nextTextAnnotation = next.inVertex();
// newTextAnnotationVertex.addEdge(EdgeLabels.NEXT, nextTextAnnotation);
// next.remove();
// }
// // LOG.info("deepestTextAnnotationVertex={}", visualizeVertex(deepestTextAnnotationVertex));
// }
// // LOG.info("newTextAnnotationVertex={}", visualizeVertex(newTextAnnotationVertex));
// // LOG.info("startingTextSegment:{}", visualizeVertex(startingTextSegment));
// }
boolean rangeStartsInThisTextSegment(Traverser<Vertex> t) {
incTextSize(t);
return textSize >= rangeStart;
}
void processFirstTextSegmentInRange(Traverser<Vertex> t) {
LOG.debug("processFirstTextSegmentInRange()");
Vertex textSegmentVertex = t.get();
checkVertexLabel(textSegmentVertex, VertexLabels.TEXTSEGMENT);
// if needed, split up the textsegment, preserving the TextAnnotation links
int tailLength = Math.min(textSize, textSize - rangeStart + 1);
LOG.debug("textSize = {}, tailLength = {}", textSize, tailLength);
// link the new TextAnnotation to the tail if detaching was necessary, to the firstTextSegment otherwise
this.startingTextSegment = detachTail(textSegmentVertex, tailLength);
newTextAnnotationVertex.addEdge(EdgeLabels.FIRST_TEXT_SEGMENT, this.startingTextSegment);
}
boolean rangeEndsInThisTextSegment(Traverser<Vertex> t) {
incTextSize(t);
LOG.debug("textSize:{},rangeEnd:{}", textSize, rangeEnd);
return textSize >= rangeEnd;
}
void processLastTextSegmentInRange(Traverser<Vertex> t) {
LOG.debug("processLastTextSegmentInRange");
Vertex textSegmentVertex = t.get();
checkVertexLabel(textSegmentVertex, VertexLabels.TEXTSEGMENT);
// if needed, split up the textsegment, preserving the TextAnnotation links
int tailLength = textSize - rangeEnd;
LOG.debug("textSize = {}, tailLength = {}", textSize, tailLength);
// link the new TextAnnotation to the head if detaching was necessary, to the lastTextSegment otherwise
this.endingTextSegment = detachHead(textSegmentVertex, tailLength);
newTextAnnotationVertex.addEdge(EdgeLabels.LAST_TEXT_SEGMENT, this.endingTextSegment);
}
private void incTextSize(Traverser<Vertex> t) {
Vertex textSegmentVertex = t.get();
checkVertexLabel(textSegmentVertex, VertexLabels.TEXTSEGMENT);
String text = getStringValue(textSegmentVertex, TextSegment.Properties.text);
// LOG.debug("text=\"{}\"", text);
textSize += text.length();
// lastTextSegmentVertex = textSegmentVertex;
}
private Vertex detachTail(Vertex textSegment, int tailLength) {
Preconditions.checkArgument(textSegment.label().equals(VertexLabels.TEXTSEGMENT));
String text = getStringValue(textSegment, TextSegment.Properties.text);
int length = text.length();
textSize = textSize - length;
int headLength = length - tailLength;
String headText = text.substring(0, headLength);
String tailText = text.substring(headLength);
LOG.debug("detachTail(): head = [{}], tail = [{}]", headText, tailText);
if (headLength == 0) {
// no detachment necessary?
return textSegment;
} else {
textSegment.property(TextSegment.Properties.text, headText);
Vertex tailTextSegment = newTextSegmentVertex(tailText);
insertNewAfterCurrent(tailTextSegment, textSegment);
// move LAST_TEXT_SEGMENT edges to tailTextSegment
StreamUtil.stream(textSegment.edges(Direction.IN, EdgeLabels.LAST_TEXT_SEGMENT))//
.forEach(e -> moveEdge(e, EdgeLabels.LAST_TEXT_SEGMENT, tailTextSegment));
reindexNeeded = true;
return tailTextSegment;
}
}
private void insertNewAfterCurrent(Vertex newVertex, Vertex currentVertex) {
Iterator<Edge> edges = currentVertex.edges(Direction.OUT, EdgeLabels.NEXT);
if (edges.hasNext()) {
Edge nextEdge = edges.next();
Vertex nextVertex = nextEdge.inVertex();
nextEdge.remove();
newVertex.addEdge(EdgeLabels.NEXT, nextVertex);
}
currentVertex.addEdge(EdgeLabels.NEXT, newVertex);
}
private Vertex detachHead(Vertex textSegment, int tailLength) {
Preconditions.checkArgument(textSegment.label().equals(VertexLabels.TEXTSEGMENT));
String text = getStringValue(textSegment, TextSegment.Properties.text);
int length = text.length();
int headLength = length - tailLength;
String headText = text.substring(0, headLength);
String tailText = text.substring(headLength);
LOG.debug("detachHead(): head = [{}], tail = [{}]", headText, tailText);
if (tailLength == 0) {
// no detachment necessary
return textSegment;
} else {
textSegment.property(TextSegment.Properties.text, tailText);
Vertex headTextSegment = newTextSegmentVertex(headText);
// move FIRST_TEXT_SEGMENT edges to tailTextSegment
insertNewBeforeCurrent(headTextSegment, textSegment);
StreamUtil.stream(textSegment.edges(Direction.IN, EdgeLabels.FIRST_TEXT_SEGMENT))//
.forEach(e -> moveEdge(e, EdgeLabels.FIRST_TEXT_SEGMENT, headTextSegment));
reindexNeeded = true;
return headTextSegment;
}
}
private void moveEdge(Edge e, String firstTextSegment, Vertex headTextSegment) {
Vertex textAnnotationVertex = e.outVertex();
textAnnotationVertex.addEdge(firstTextSegment, headTextSegment);
e.remove();
}
private void insertNewBeforeCurrent(Vertex newVertex, Vertex currentVertex) {
Iterator<Edge> edges = currentVertex.edges(Direction.IN, EdgeLabels.NEXT);
if (edges.hasNext()) {
Edge prevEdge = edges.next();
Vertex prevVertex = prevEdge.outVertex();
prevEdge.remove();
prevVertex.addEdge(EdgeLabels.NEXT, newVertex);
}
newVertex.addEdge(EdgeLabels.NEXT, currentVertex);
}
}
// private methods //
private static int getIntValue(Vertex vertex, String propertyName) {
return (int) vertex.value(propertyName);
}
private static String getStringValue(Vertex vertex, String propertyName) {
return (String) vertex.value(propertyName);
}
private Vertex nextTextAnnotation(Vertex childVertex) {
return childVertex.vertices(Direction.OUT, EdgeLabels.NEXT).next();
}
private static Vertex lastTextSegment(Vertex parentVertex) {
return parentVertex.vertices(Direction.OUT, EdgeLabels.LAST_TEXT_SEGMENT).next();
}
private static Vertex firstTextSegment(Vertex parentVertex) {
return parentVertex.vertices(Direction.OUT, EdgeLabels.FIRST_TEXT_SEGMENT).next();
}
private Vertex getTextAnnotationVertex(TextAnnotation textAnnotation) {
Object id = textAnnotation.getId();
return storage.getVertexTraversal(id).next();
}
private List<Vertex> storeTextSegments(List<String> textSegments, Vertex text) {
List<Vertex> textSegmentVertices = new ArrayList<>();
Vertex previous = null;
for (String s : textSegments) {
Vertex v = newTextSegmentVertex(s);
if (previous == null) {
text.addEdge(EdgeLabels.FIRST_TEXT_SEGMENT, v);
} else {
previous.addEdge(EdgeLabels.NEXT, v);
}
textSegmentVertices.add(v);
previous = v;
}
return textSegmentVertices;
}
private void storeTextAnnotations(Set<XmlAnnotation> xmlAnnotations, Vertex text, List<Vertex> textSegments) {
Vertex previous = null;
for (XmlAnnotation xmlAnnotation : xmlAnnotations) {
Vertex v = toVertex(xmlAnnotation);
v.addEdge(EdgeLabels.FIRST_TEXT_SEGMENT, textSegments.get(xmlAnnotation.getFirstSegmentIndex()));
v.addEdge(EdgeLabels.LAST_TEXT_SEGMENT, textSegments.get(xmlAnnotation.getLastSegmentIndex()));
if (previous == null) {
text.addEdge(EdgeLabels.FIRST_ANNOTATION, v);
} else {
previous.addEdge(EdgeLabels.NEXT, v);
}
previous = v;
}
}
private static Vertex toVertex(TextAnnotation textAnnotation) {
Vertex v = storage.addVertex(T.label, VertexLabels.TEXTANNOTATION);
update(v, textAnnotation);
return v;
}
private static void update(Vertex v, TextAnnotation textAnnotation) {
Map<String, String> attributes = textAnnotation.getAttributes();
String[] attributeKeys = new String[attributes.size()];
String[] attributeValues = new String[attributes.size()];
int i = 0;
for (Entry<String, String> kv : attributes.entrySet()) {
attributeKeys[i] = kv.getKey();
attributeValues[i] = kv.getValue();
i++;
}
v.property(TextAnnotation.Properties.name, textAnnotation.getName());
v.property(TextAnnotation.Properties.attribute_keys, attributeKeys);
v.property(TextAnnotation.Properties.attribute_values, attributeValues);
v.property(TextAnnotation.Properties.depth, textAnnotation.getDepth());
if (attributes.containsKey("xml:id")) {
v.property(TextAnnotation.Properties.xmlid, attributes.get("xml:id"));
}
}
private TextGraphSegment toTextGraphSegment(Vertex textSegment, List<List<String>> orderedLayerDefinitions) {
TextGraphSegment textGraphSegment = new TextGraphSegment();
if (textSegment.keys().contains(TextSegment.Properties.text)) {
textGraphSegment.setTextSegment(textSegment.value(TextSegment.Properties.text));
}
List<TextAnnotation> textAnnotationsToOpen = getTextAnnotationsToOpen(textSegment, orderedLayerDefinitions);
List<TextAnnotation> textAnnotationsToClose = getTextAnnotationsToClose(textSegment, orderedLayerDefinitions);
boolean isMilestone = StringUtils.isEmpty(textGraphSegment.getTextSegment())//
&& !textAnnotationsToOpen.isEmpty()//
&& !textAnnotationsToClose.isEmpty();
if (isMilestone) {
TextAnnotation lastToOpen = textAnnotationsToOpen.get(textAnnotationsToOpen.size() - 1);
TextAnnotation firstToClose = textAnnotationsToClose.get(0);
if (lastToOpen.equals(firstToClose)) {
textAnnotationsToOpen.remove(lastToOpen);
textGraphSegment.setMilestoneAnnotation(lastToOpen);
textAnnotationsToClose.remove(firstToClose);
}
}
textGraphSegment.setAnnotationsToOpen(textAnnotationsToOpen);
textGraphSegment.setAnnotationsToClose(textAnnotationsToClose);
return textGraphSegment;
}
private List<TextAnnotation> getTextAnnotationsToOpen(Vertex textSegment, List<List<String>> orderedLayerDefinitions) {
return getTextAnnotations(textSegment, EdgeLabels.FIRST_TEXT_SEGMENT, orderedLayerDefinitions);
}
private List<TextAnnotation> getTextAnnotationsToClose(Vertex textSegment, List<List<String>> orderedLayerDefinitions) {
return Lists.reverse(getTextAnnotations(textSegment, EdgeLabels.LAST_TEXT_SEGMENT, orderedLayerDefinitions));
}
private static final Comparator<TextAnnotation> BY_INCREASING_DEPTH = Comparator.comparing(TextAnnotation::getDepth);
private List<TextAnnotation> getTextAnnotations(Vertex textSegment, String edgeLabel, List<List<String>> orderedLayerDefinitions) {
// LOG.info("orderedLayerDefinitions:'{}'",orderedLayerDefinitions);
// LOG.info("textsegment:'{}'",(String) textSegment.value("text"));
List<Vertex> textAnnotationVertexList = StreamUtil.stream(textSegment.vertices(Direction.IN, edgeLabel))//
.filter(v -> v.label().equals(VertexLabels.TEXTANNOTATION)).collect(toList());
// LOG.info("textAnnotationVertexList.size={}",textAnnotationVertexList.size());
List<List<Vertex>> vertexListPerLayer = new ArrayList<>();
AtomicInteger relevantVertexCount = new AtomicInteger(0);
List<Vertex> otherVertexList = Lists.newArrayList(textAnnotationVertexList);
// process tags defined in orderedLayerDefinitions
orderedLayerDefinitions.forEach(layerTags -> {
List<Vertex> vertexList = textAnnotationVertexList.stream()//
.filter(v -> layerTags.contains(v.value(TextAnnotation.Properties.name)))//
.collect(toList());
vertexListPerLayer.add(vertexList);
relevantVertexCount.set(relevantVertexCount.get() + vertexList.size());
otherVertexList.removeAll(vertexList);
});
// put all other tags in separate layers, grouped by depth
// LOG.info("otherVertexList.size={}",otherVertexList.size());
vertexListPerLayer.addAll(groupByDecreasingDepth(otherVertexList));
relevantVertexCount.set(relevantVertexCount.get() + otherVertexList.size());
boolean useLayerOrder = relevantVertexCount.get() > 1;
Map<Vertex, Integer> overriddenDepth = new HashMap<>();
if (useLayerOrder) {
createPairs(vertexListPerLayer).stream()//
.filter(this::hasSameTextRange)//
.forEach(pair -> {
Vertex leftVertex = pair.getLeft();
Vertex rightVertex = pair.getRight();
Integer leftDepth = getDepth(leftVertex, overriddenDepth);
Integer rightDepth = getDepth(rightVertex, overriddenDepth);
boolean swapDepths = leftDepth < rightDepth;
if (swapDepths) {
overriddenDepth.put(leftVertex, rightDepth);
overriddenDepth.put(rightVertex, leftDepth);
}
});
}
Function<Vertex, TextAnnotation> toTextAnnotationWithOverriddenDepth = vertex -> {
TextAnnotation textAnnotation = toTextAnnotation(vertex);
if (overriddenDepth.containsKey(vertex)) {
textAnnotation.setDepth(overriddenDepth.get(vertex));
}
return textAnnotation;
};
return textAnnotationVertexList.stream()//
.map(toTextAnnotationWithOverriddenDepth)//
.sorted(BY_INCREASING_DEPTH)//
.collect(toList());
}
private Integer getDepth(Vertex leftVertex, Map<Vertex, Integer> overriddenDepth) {
return overriddenDepth.containsKey(leftVertex)//
? overriddenDepth.get(leftVertex)//
: (Integer) leftVertex.value(TextAnnotation.Properties.depth);
}
List<List<Vertex>> groupByDecreasingDepth(List<Vertex> vertexList) {
Map<Integer, List<Vertex>> groupedByDepth = vertexList.stream()//
.collect(groupingBy(v -> (Integer) v.value(TextAnnotation.Properties.depth)));
return groupedByDepth.keySet().stream()//
.sorted((d0, d1) -> d1.compareTo(d0))//
.map(d -> groupedByDepth.get(d))//
.collect(toList());
}
List<Pair<Vertex, Vertex>> createPairs(List<List<Vertex>> vertexListPerLayer) {
List<Pair<Vertex, Vertex>> pairList = Lists.newArrayList();
for (int i = 0; i < vertexListPerLayer.size() - 1; i++) {
List<Vertex> vertexLayer1 = vertexListPerLayer.get(i);
for (int j = i + 1; j < vertexListPerLayer.size(); j++) {
List<Vertex> vertexLayer2 = vertexListPerLayer.get(j);
pairList.addAll(createPairs(vertexLayer1, vertexLayer2));
}
}
return pairList;
}
private List<Pair<Vertex, Vertex>> createPairs(List<Vertex> vertexLayer1, List<Vertex> vertexLayer2) {
List<Pair<Vertex, Vertex>> list = new ArrayList<>();
vertexLayer1.forEach(v1 -> {
vertexLayer2.forEach(v2 -> {
list.add(Pair.of(v1, v2));
});
});
return list;
}
Boolean hasSameTextRange(Pair<Vertex, Vertex> vertexPair) {
Vertex left = vertexPair.getLeft();
Vertex leftFirstTextSegment = left.vertices(Direction.OUT, EdgeLabels.FIRST_TEXT_SEGMENT).next();
Vertex leftLastTextSegment = left.vertices(Direction.OUT, EdgeLabels.LAST_TEXT_SEGMENT).next();
Vertex right = vertexPair.getRight();
Vertex rightFirstTextSegment = right.vertices(Direction.OUT, EdgeLabels.FIRST_TEXT_SEGMENT).next();
Vertex rightLastTextSegment = right.vertices(Direction.OUT, EdgeLabels.LAST_TEXT_SEGMENT).next();
return leftFirstTextSegment.equals(rightFirstTextSegment) //
&& leftLastTextSegment.equals(rightLastTextSegment);
}
private TextAnnotation toTextAnnotation(Vertex vertex) {
Map<String, String> attributes = getAttributeMap(vertex);
TextAnnotation textAnnotation = new TextAnnotation(//
vertex.value(TextAnnotation.Properties.name), //
attributes, //
vertex.value(TextAnnotation.Properties.depth)//
);
textAnnotation.setId(vertex.id());
return textAnnotation;
}
private Map<String, String> getAttributeMap(Vertex vertex) {
Map<String, String> attributes = Maps.newLinkedHashMap();
if (vertex.keys().contains(TextAnnotation.Properties.attribute_keys)) {
String[] keys = vertex.value(TextAnnotation.Properties.attribute_keys);
String[] values = vertex.value(TextAnnotation.Properties.attribute_values);
for (int i = 0; i < keys.length; i++) {
attributes.put(keys[i], values[i]);
}
}
return attributes;
}
private static Vertex newTextSegmentVertex(String s) {
return storage.addVertex(T.label, VertexLabels.TEXTSEGMENT, TextSegment.Properties.text, s);
}
private static void checkVertexLabel(Vertex vertex, String label) {
if (!vertex.label().equals(label)) {
throw new IllegalArgumentException("vertex label should be '" + label + "', but is '" + vertex.label() + "'.");
}
}
private void reindex(UUID resourceUUID) {
reindexTextSegments(resourceUUID);
reindexTextAnnotations(resourceUUID);
}
private void reindexTextSegments(UUID resourceUUID) {
AtomicInteger counter = new AtomicInteger(1);
getTextSegmentVertexStream(resourceUUID)//
.forEach(v -> v.property(TextSegment.Properties.index, counter.getAndIncrement()));
}
private void reindexTextAnnotations(UUID resourceUUID) {
AtomicInteger counter = new AtomicInteger(1);
getTextAnnotationVertexStream(resourceUUID)//
.forEach(v -> v.property(TextAnnotation.Properties.index, counter.getAndIncrement()));
}
private String visualizeVertex(Vertex v) {
StringBuilder visualization = new StringBuilder();
visualization.append("\n").append(vertexRepresentation(v));
StreamUtil.stream(v.edges(Direction.IN)).forEach(e -> visualization.append("\n<-[:").append(e.label()).append("]-").append(vertexRepresentation(e.outVertex())));
StreamUtil.stream(v.edges(Direction.OUT)).forEach(e -> visualization.append("\n-[:").append(e.label()).append("]->").append(vertexRepresentation(e.inVertex())));
return visualization.toString();
}
private String vertexRepresentation(Vertex v) {
String props = StreamUtil.stream(v.properties())//
.map(this::propertyRepresentation)//
.collect(joining());
return "(:" + v.label() + "{id:" + v.id() + props + "})";
}
private String propertyRepresentation(VertexProperty<Object> vp) {
return ", " + vp.key() + ":\"" + vp.value().toString().replace("\n", "\\n") + "\"";
}
}