package nl.knaw.huygens.alexandria.textgraph;
/*
* #%L
* alexandria-main
* =======
* Copyright (C) 2015 - 2017 Huygens ING (KNAW)
* =======
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program. If not, see
* <http://www.gnu.org/licenses/gpl-3.0.html>.
* #L%
*/
import static java.util.stream.Collectors.groupingBy;
import static java.util.stream.Collectors.toList;
import java.io.BufferedWriter;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.UnsupportedEncodingException;
import java.io.Writer;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Optional;
import java.util.Set;
import java.util.Stack;
import java.util.UUID;
import java.util.function.Consumer;
import javax.ws.rs.WebApplicationException;
import javax.ws.rs.core.StreamingOutput;
import org.apache.commons.lang3.StringUtils;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import nl.knaw.huygens.alexandria.api.model.text.view.AttributePreCondition;
import nl.knaw.huygens.alexandria.api.model.text.view.ElementView;
import nl.knaw.huygens.alexandria.api.model.text.view.ElementView.AttributeFunction;
import nl.knaw.huygens.alexandria.api.model.text.view.ElementView.AttributeMode;
import nl.knaw.huygens.alexandria.api.model.text.view.ElementView.ElementMode;
import nl.knaw.huygens.alexandria.api.model.text.view.TextView;
import nl.knaw.huygens.alexandria.api.model.text.view.TextViewDefinition;
import nl.knaw.huygens.alexandria.exception.NotFoundException;
import nl.knaw.huygens.alexandria.service.AlexandriaService;
import nl.knaw.huygens.tei.Document;
public class TextGraphUtil {
public static ParseResult parse(String xml) {
ParseResult result = new ParseResult();
Document document = Document.createFromXml(xml, true);
// TODO: verify xml:ids are unique
XmlVisitor visitor = new XmlVisitor(result);
document.accept(visitor);
return result;
}
public static StreamingOutput streamXML(AlexandriaService service, UUID resourceId) {
return output -> {
Writer writer = createBufferedUTF8OutputStreamWriter(output);
Consumer<TextGraphSegment> action = segment -> streamTextGraphSegment(writer, segment);
stream(service, resourceId, writer, action, new ArrayList<List<String>>());
};
}
public static void streamTextGraphSegment(Writer writer, TextGraphSegment segment) {
try {
writeOpenTags(writer, segment);
writeMilestoneTags(writer, segment);
writeText(writer, segment);
writeCloseTags(writer, segment);
} catch (IOException ioe) {
throw new RuntimeException(ioe);
}
}
public static StreamingOutput xmlOutputStream(AlexandriaService service, UUID resourceId, String viewName) {
Map<String, String> viewParameters = new HashMap<>();
return xmlOutputStream(service, resourceId, viewName, viewParameters);
}
public static StreamingOutput xmlOutputStream(AlexandriaService service, UUID resourceId, String viewName, Map<String, String> viewParameters) {
if (StringUtils.isNotBlank(viewName)) {
TextView textView = service.getTextView(resourceId, viewName)//
.orElseThrow(() -> new NotFoundException("No view '" + viewName + "' found for this resource."));
textView.substitute(viewParameters);
// Log.info("textView={}", textView);
return streamTextViewXML(service, resourceId, textView);
}
return streamXML(service, resourceId);
}
private static void writeOpenTags(Writer writer, TextGraphSegment segment) throws IOException {
for (TextAnnotation textAnnotation : segment.getTextAnnotationsToOpen()) {
String name = textAnnotation.getName();
String openTag = getOpenTag(name, textAnnotation.getAttributes());
writer.write(openTag);
}
}
private static void writeMilestoneTags(Writer writer, TextGraphSegment segment) throws IOException {
Optional<TextAnnotation> optionalMilestone = segment.getMilestoneTextAnnotation();
if (optionalMilestone.isPresent()) {
TextAnnotation milestone = optionalMilestone.get();
String milestoneTag = getMilestoneTag(milestone.getName(), milestone.getAttributes());
writer.write(milestoneTag);
}
}
private static void writeText(Writer writer, TextGraphSegment segment) throws IOException {
writer.write(segment.getTextSegment());
}
private static void writeCloseTags(Writer writer, TextGraphSegment segment) throws IOException {
for (TextAnnotation textAnnotation : segment.getTextAnnotationsToClose()) {
String name = textAnnotation.getName();
String closeTag = getCloseTag(name);
writer.write(closeTag);
}
}
public static StreamingOutput streamTextViewXML(AlexandriaService service, UUID resourceId, TextView textView) {
return output -> {
Writer writer = createBufferedUTF8OutputStreamWriter(output);
TextViewContext textViewContext = new TextViewContext(textView);
Consumer<TextGraphSegment> action = segment -> streamTextGraphSegment(writer, segment, textViewContext);
stream(service, resourceId, writer, action, textView.getOrderedLayerTags());
};
}
public static String asString(StreamingOutput outputStream) {
ByteArrayOutputStream output = new ByteArrayOutputStream();
String xml;
try {
outputStream.write(output);
output.close();
xml = output.toString();
} catch (WebApplicationException | IOException e) {
e.printStackTrace();
throw new RuntimeException(e);
}
return xml;
}
protected static class TextViewContext {
private Map<String, ElementView> elementViewMap;
private Stack<TextAnnotation> ignoredAnnotationStack = new Stack<>();
private List<TextAnnotation> overruledTextAnnotations = Lists.newArrayList();
public TextViewContext(TextView textView) {
elementViewMap = textView.getElementViewMap();
}
public boolean includeTag(String name, TextAnnotation textAnnotation) {
ElementView defaultElementView = elementViewMap.get(TextViewDefinition.DEFAULT_ATTRIBUTENAME);
ElementView elementView = elementViewMap.getOrDefault(name, defaultElementView);
ElementMode elementMode = elementView.getElementMode();
ElementMode defaultViewElementMode = defaultElementView.getElementMode();
Optional<AttributePreCondition> preCondition = elementView.getPreCondition();
boolean includeAccordingToElementMode = elementMode.equals(ElementMode.show);
boolean preConditionIsMet = preConditionIsMet(preCondition, textAnnotation);
if (!preConditionIsMet) {
boolean preConditionIsFirstOfFunction = preCondition.isPresent() //
&& AttributeFunction.firstOf.equals(preCondition.get().getFunction());
includeAccordingToElementMode = !preConditionIsFirstOfFunction && defaultViewElementMode.equals(ElementMode.show);
}
return notInsideIgnoredElement() && includeAccordingToElementMode;
}
private boolean preConditionIsMet(Optional<AttributePreCondition> preCondition, TextAnnotation textAnnotation) {
if (preCondition.isPresent()) {
if (overruledTextAnnotations.contains(textAnnotation)) {
return false;
}
AttributePreCondition attributePreCondition = preCondition.get();
String attribute = attributePreCondition.getAttribute();
List<String> values = attributePreCondition.getValues();
String actualValue = textAnnotation.getAttributes().get(attribute);
switch (attributePreCondition.getFunction()) {
case is:
return values.contains(actualValue);
case isNot:
return !values.contains(actualValue);
case firstOf:
// TODO
break;
default:
// TODO
break;
}
}
return true;
}
public Map<String, String> includedAttributes(TextAnnotation textAnnotation) {
ElementView elementView = elementViewMap.getOrDefault(textAnnotation.getName(), elementViewMap.get(TextViewDefinition.DEFAULT_ATTRIBUTENAME));
Map<String, String> allAttributes = textAnnotation.getAttributes();
Map<String, String> attributesToInclude = Maps.newHashMap();
AttributeMode attributeMode = elementView.getAttributeMode();
switch (attributeMode) {
case showAll:
return allAttributes;
case hideAll:
break;
case showOnly:
allAttributes.forEach((k, v) -> {
if (elementView.getRelevantAttributes().contains(k)) {
attributesToInclude.put(k, v);
}
});
break;
case hideOnly:
allAttributes.forEach((k, v) -> {
if (!elementView.getRelevantAttributes().contains(k)) {
attributesToInclude.put(k, v);
}
});
break;
default:
throw new RuntimeException("unexpected attributemode: " + attributeMode);
}
return attributesToInclude;
}
public void pushWhenIgnoring(TextAnnotation textAnnotation) {
boolean ignoring = determineIgnoring(textAnnotation);
if (ignoring) {
ignoredAnnotationStack.push(textAnnotation);
}
}
private boolean determineIgnoring(TextAnnotation textAnnotation) {
ElementView defaultElementView = elementViewMap.get(TextViewDefinition.DEFAULT_ATTRIBUTENAME);
ElementView elementView = elementViewMap.getOrDefault(textAnnotation.getName(), defaultElementView);
ElementMode elementMode = elementView.getElementMode();
ElementMode defaultElementMode = defaultElementView.getElementMode();
Optional<AttributePreCondition> preCondition = elementView.getPreCondition();
boolean preConditionIsMet = preConditionIsMet(preCondition, textAnnotation);
boolean hideThisElement = elementMode.equals(ElementMode.hide);
if (!preConditionIsMet) {
hideThisElement = defaultElementMode.equals(ElementMode.hide);
}
return hideThisElement;
}
public void popWhenIgnoring(TextAnnotation textAnnotation) {
boolean ignoring = determineIgnoring(textAnnotation);
if (ignoring && !ignoredAnnotationStack.isEmpty()) {
ignoredAnnotationStack.pop();
}
}
public boolean notInsideIgnoredElement() {
return ignoredAnnotationStack.isEmpty();
}
/**
* registers TextAnnotations in segment that:
* - annotate the same textrange
* - have the same name
* - that name is used in a firstOf function in the viewDefinition
* So they can be handled properly in includeTag
*
* @param segment
* the TextGraphSegment to analyze
*/
public void registerCompetingTextAnnotations(TextGraphSegment segment) {
List<String> relevantElementNames = elementViewMap.entrySet().stream()//
.filter(this::hasFirstOfAttributeFunction)//
.map(Entry::getKey)//
.collect(toList());
Set<TextAnnotation> segmentTextAnnotations = Sets.newHashSet(segment.textAnnotationsToOpen);
segmentTextAnnotations.addAll(segment.textAnnotationsToClose);
segmentTextAnnotations.removeIf(ta -> !relevantElementNames.contains(ta.getName()));
overruledTextAnnotations = segmentTextAnnotations.stream()//
.collect(groupingBy(this::textAnnotationGrouping))//
.values()//
.stream()//
.map(this::overruledTextAnnotations)//
.flatMap(List::stream)//
.collect(toList());
}
// TODO: handle consecutive milestones
private boolean hasFirstOfAttributeFunction(Entry<String, ElementView> entry) {
Optional<AttributePreCondition> preCondition = entry.getValue().getPreCondition();
return preCondition.isPresent() && ElementView.AttributeFunction.firstOf.equals(preCondition.get().getFunction());
}
private String textAnnotationGrouping(TextAnnotation textAnnotation) {
// TODO: include annotated textRange
return textAnnotation.getName();
}
private List<TextAnnotation> overruledTextAnnotations(List<TextAnnotation> group) {
String elementName = group.get(0).getName();
AttributePreCondition attributePreCondition = elementViewMap.get(elementName)//
.getPreCondition().get();
String attribute = attributePreCondition.getAttribute();
List<String> prioritizedValues = Lists.newArrayList(attributePreCondition.getValues());
int originalSize = group.size();
do {
String value = prioritizedValues.remove(0);
group.removeIf(ta -> value.equals(ta.getAttributes().get(attribute)));
} while (group.size() == originalSize && !prioritizedValues.isEmpty());
return group;
}
public List<TextAnnotation> getOrderedTextAnnotationsToOpen(TextGraphSegment segment) {
List<TextAnnotation> textAnnotationsToOpen = segment.getTextAnnotationsToOpen();
textAnnotationsToOpen.forEach(textAnnotation -> {
});
return textAnnotationsToOpen;
}
public List<TextAnnotation> getOrderedTextAnnotationsToClose(TextGraphSegment segment) {
List<TextAnnotation> textAnnotationsToClose = segment.getTextAnnotationsToClose();
return textAnnotationsToClose;
}
}
public static void streamTextGraphSegment(Writer writer, TextGraphSegment segment, TextViewContext textViewContext) {
try {
textViewContext.registerCompetingTextAnnotations(segment);
writeOpenTags(writer, segment, textViewContext);
writeMilestoneTags(writer, segment, textViewContext);
writeText(writer, segment, textViewContext);
writeCloseTags(writer, segment, textViewContext);
} catch (IOException ioe) {
throw new RuntimeException(ioe);
}
}
private static void writeCloseTags(Writer writer, TextGraphSegment segment, TextViewContext textViewContext) throws IOException {
for (TextAnnotation textAnnotation : segment.getTextAnnotationsToClose()) {
String name = textAnnotation.getName();
if (textViewContext.includeTag(name, textAnnotation)) {
String closeTag = getCloseTag(name);
writer.write(closeTag);
}
textViewContext.popWhenIgnoring(textAnnotation);
}
}
private static void writeText(Writer writer, TextGraphSegment segment, TextViewContext textViewContext) throws IOException {
if (textViewContext.notInsideIgnoredElement()) {
writeText(writer, segment);
}
}
private static void writeMilestoneTags(Writer writer, TextGraphSegment segment, TextViewContext textViewContext) throws IOException {
Optional<TextAnnotation> optionalMilestone = segment.getMilestoneTextAnnotation();
if (optionalMilestone.isPresent()) {
TextAnnotation milestone = optionalMilestone.get();
String name = milestone.getName();
if (textViewContext.includeTag(name, milestone)) {
String milestoneTag = getMilestoneTag(name, textViewContext.includedAttributes(milestone));
writer.write(milestoneTag);
}
}
}
private static void writeOpenTags(Writer writer, TextGraphSegment segment, TextViewContext textViewContext) throws IOException {
List<TextAnnotation> textAnnotationsToOpen = textViewContext.getOrderedTextAnnotationsToOpen(segment);
for (TextAnnotation textAnnotation : textAnnotationsToOpen) {
String name = textAnnotation.getName();
if (textViewContext.includeTag(name, textAnnotation)) {
String openTag = getOpenTag(name, textViewContext.includedAttributes(textAnnotation));
writer.write(openTag);
}
textViewContext.pushWhenIgnoring(textAnnotation);
}
}
public static String getMilestoneTag(String name, Map<String, String> attributes) {
return openingTagBuilder(name, attributes).append("/>").toString();
}
public static String getOpenTag(String name, Map<String, String> attributes) {
return openingTagBuilder(name, attributes).append(">").toString();
}
public static String getCloseTag(String name) {
return "</" + name + ">";
}
public static void appendAttributes(StringBuilder builder, Map<String, String> attributes) {
for (Map.Entry<String, String> entry : attributes.entrySet()) {
builder.append(' ').append(entry.getKey()).append('=');
builder.append('"');
appendAttributeValue(builder, entry.getValue());
builder.append('"');
}
}
/* private methods */
private static Writer createBufferedUTF8OutputStreamWriter(OutputStream output) throws UnsupportedEncodingException {
return new BufferedWriter(new OutputStreamWriter(output, "UTF-8"));
}
private static void stream(AlexandriaService service, UUID resourceId, Writer writer, Consumer<TextGraphSegment> action, List<List<String>> orderedLayerTags) throws IOException {
service.runInTransaction(() -> service.getTextGraphSegmentStream(resourceId, orderedLayerTags).forEach(action));
writer.flush();
}
private static StringBuilder openingTagBuilder(String name, Map<String, String> attributes) {
StringBuilder builder = new StringBuilder("<").append(name);
appendAttributes(builder, attributes);
return builder;
}
private static void appendAttributeValue(StringBuilder builder, String value) {
int n = value.length();
for (int i = 0; i < n; i++) {
char c = value.charAt(i);
switch (c) {
case '<':
builder.append("<");
break;
case '>':
builder.append(">");
break;
case '&':
builder.append("&");
break;
default:
builder.append(c);
break;
}
}
}
}