/*******************************************************************************
* Copyright 2013
* Ubiquitous Knowledge Processing (UKP) Lab
* Technische Universität Darmstadt
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
******************************************************************************/
package de.tudarmstadt.ukp.csniper.webapp.analysis.uima;
import java.io.File;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import org.apache.commons.io.FileUtils;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.Type;
import org.apache.uima.cas.TypeSystem;
import org.apache.uima.cas.text.AnnotationFS;
import org.apache.uima.fit.component.CasAnnotator_ImplBase;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.util.CasUtil;
import org.apache.uima.resource.ResourceInitializationException;
import org.dom4j.io.SAXContentHandler;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.AttributesImpl;
/**
* A simple consumer which produces an html-file. Given annotation types are marked in given colors.
*
* @author Erik-Lân Do Dinh
*
*/
public class HTMLColorMarkerConsumer
extends CasAnnotator_ImplBase
{
public static final String PARAM_MARKED_TYPES = "MarkedTypes";
@ConfigurationParameter(name = PARAM_MARKED_TYPES, mandatory = true)
private String[] markedTypes;
public static final String PARAM_MARKER_COLORS = "MarkerColors";
@ConfigurationParameter(name = PARAM_MARKER_COLORS, mandatory = true)
private String[] markerColors;
public static final String PARAM_OUTPUT_FILE = "OutputFile";
@ConfigurationParameter(name = PARAM_OUTPUT_FILE, mandatory = true)
private File outputFile;
public static final String PARAM_ENCODING = "Encoding";
@ConfigurationParameter(name = PARAM_ENCODING, mandatory = true, defaultValue = "UTF-8")
private String encoding;
public static final String PARAM_CONTAINER_CSS_CLASS = "OutputClass";
@ConfigurationParameter(name = PARAM_CONTAINER_CSS_CLASS, mandatory = true, defaultValue = "markedOutput")
private String outputClass;
private SAXContentHandler handler;
private Map<String, String> colors;
private Type tokenType, rootType;
private int typeCount;
@Override
public void typeSystemInit(TypeSystem ts)
{
tokenType = ts.getType("de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token");
rootType = ts.getType("de.tudarmstadt.ukp.dkpro.core.api.syntax.type.constituent.ROOT");
// TODO use sentence type instead; blocked by a bug mpst probably in the tsurgeon AE
// sentenceType =
// ts.getType("de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence");
}
@Override
public void initialize(UimaContext context)
throws ResourceInitializationException
{
super.initialize(context);
// TODO notify user that the number of arguments is not equal
// if (markedAnnotations.length != markerColors.length) {
// throw new IllegalArgumentException(
// "The amount of colors and annotation types have to be equal.");
// }
typeCount = Math.min(markerColors.length, markedTypes.length);
// build map: type->color
colors = new HashMap<String, String>();
for (int i = 0; i < typeCount; i++) {
colors.put(markedTypes[i], markerColors[i]);
}
handler = new SAXContentHandler();
try {
// add stylesheet information
handler.startDocument();
AttributesImpl attr = new AttributesImpl();
attr.addAttribute("", "", "id", "CDATA", outputClass);
handler.startElement("", "div", "", attr);
AttributesImpl style = new AttributesImpl();
style.addAttribute("", "", "type", "CDATA", "text/css");
handler.startElement("", "style", "", style);
StringBuffer headerCss = new StringBuffer();
headerCss.append("div#");
headerCss.append(outputClass);
headerCss.append(" span {");
headerCss.append(" display:inline-block; padding:0 1 0 1;");
headerCss.append(" margin:1px; border:solid 1px #FFFFFF; }");
handler.characters(headerCss.toString().toCharArray(), 0, headerCss.toString().length());
handler.endElement("", "style", "");
}
catch (SAXException e) {
throw new ResourceInitializationException(e);
}
}
@Override
public void process(CAS cas)
throws AnalysisEngineProcessException
{
List<AnnotationFS> filtered = new ArrayList<AnnotationFS>();
Iterator<CAS> viewIterator = cas.getViewIterator();
while (viewIterator.hasNext()) {
CAS view = viewIterator.next();
for (int i = 0; i < typeCount; i++) {
try {
Type type = CasUtil.getType(view, markedTypes[i]);
filtered.addAll(CasUtil.select(view, type));
}
catch (IllegalArgumentException e) {
// TODO at the moment, don't do anything when a type is not found
}
}
try {
for (AnnotationFS root : CasUtil.select(view, rootType)) {
for (AnnotationFS token : CasUtil.selectCovered(view, tokenType, root)) {
for (AnnotationFS a : getAnnotationsBeginningAt(token.getBegin(), filtered)) {
String color = "background:" + colors.get(a.getType().getName())
+ "; color:#DDDDDD;";
AttributesImpl attr = new AttributesImpl();
attr.addAttribute("", "", "style", "CDATA", color);
handler.startElement("", "span", "", attr);
}
char[] t = token.getCoveredText().toCharArray();
handler.characters(t, 0, t.length);
for (AnnotationFS a : getAnnotationsEndingAt(token.getEnd(), filtered)) {
handler.endElement("", "span", "");
}
handler.characters(new char[] { ' ' }, 0, 1);
}
// newline for each sentence
handler.startElement("", "br", "", new AttributesImpl());
handler.endElement("", "br", "");
}
}
catch (SAXException e) {
throw new AnalysisEngineProcessException(e);
}
}
}
/**
* Get annotations which start at a specified index.
*
* @param i
* the index where an annotation should start.
* @param filtered
* the List to take the Annotations from.
* @return a List of AnnotationFS contained in filtered which start at index i.
*/
private List<AnnotationFS> getAnnotationsBeginningAt(int i, List<AnnotationFS> filtered)
{
List<AnnotationFS> annotations = new ArrayList<AnnotationFS>();
for (AnnotationFS a : filtered) {
if (a.getBegin() == i) {
annotations.add(a);
}
}
return annotations;
}
/**
* Get annotations which end at a specified index.
*
* @param i
* the index where an annotation should end.
* @param filtered
* the List to take the Annotations from.
* @returna a List of AnnotationFS contained in filtered which end at index i.
*/
private List<AnnotationFS> getAnnotationsEndingAt(int i, List<AnnotationFS> filtered)
{
List<AnnotationFS> annotations = new ArrayList<AnnotationFS>();
for (AnnotationFS a : filtered) {
if (a.getEnd() == i) {
annotations.add(a);
}
}
return annotations;
}
@Override
public void collectionProcessComplete()
{
try {
handler.endElement("", "div", "");
handler.endDocument();
String xml = handler.getDocument().asXML();
FileUtils.writeStringToFile(outputFile, xml, encoding);
}
catch (Exception e) {
e.printStackTrace();
}
}
}