package org.bbaw.wsp.cms.transform;
import java.util.ArrayList;
import org.bbaw.wsp.cms.lucene.IndexHandler;
import org.xml.sax.*;
import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException;
import de.mpg.mpiwg.berlin.mpdl.util.StringUtils;
public class HighlightContentHandler implements ContentHandler {
private String xmlnsString = "";
private String highlightElemName;
private int highlightElemPos = 1;
private int currentHighlightElemPos = 0;
private boolean highlightElemMode = false;
private int highlightElemModeOpenTags = 0;
private String highlightQueryType = "orig"; // orig, reg, norm or morph
private String highlightQuery; // complex Lucene query
private String highlightQueryForms; // highlight terms separated by a blank
private boolean highlightHitMode = false;
private int highlightHitModeOpenTags = 0;
private boolean firstPageBreakReachedMode = false; // in a page fragment: if a page break element is surrounded by an element (e.g. "s") then this element should not increment the currentHighlightElemPos
private boolean firstPageBreakReached = true;
private StringBuilder result = new StringBuilder();
public HighlightContentHandler() throws ApplicationException {
}
public HighlightContentHandler(String highlightElemName, int highlightElemPos) throws ApplicationException {
this.highlightElemName = highlightElemName;
this.highlightElemPos = highlightElemPos;
}
public HighlightContentHandler(String highlightElemName, int highlightElemPos, String highlightQueryType, String highlightQuery, String language) throws ApplicationException {
this.highlightElemName = highlightElemName;
this.highlightElemPos = highlightElemPos;
this.highlightQueryType = highlightQueryType;
this.highlightQuery = highlightQuery;
if (highlightQuery != null) {
IndexHandler indexHandler = IndexHandler.getInstance();
ArrayList<String> queryTerms = indexHandler.fetchTerms(highlightQuery, language); // all query terms in query (also morphological terms)
highlightQueryForms = toString(queryTerms);
}
}
public void setFirstPageBreakReachedMode(boolean firstPageBreakReachedMode) {
this.firstPageBreakReachedMode = firstPageBreakReachedMode;
if (firstPageBreakReachedMode)
this.firstPageBreakReached = false; // is first set to false and later if a page break is found (by startElement) it is set to true
}
public StringBuilder getResult() {
return result;
}
public void startDocument() throws SAXException {
}
public void endDocument() throws SAXException {
}
public void characters(char[] c, int start, int length) throws SAXException {
char[] cCopy = new char[length];
System.arraycopy(c, start, cCopy, 0, length);
String charactersStr = String.valueOf(cCopy);
if (charactersStr != null && ! charactersStr.equals("")) {
charactersStr = StringUtils.deresolveXmlEntities(charactersStr);
write(charactersStr);
}
}
public void ignorableWhitespace(char[] c, int start, int length) throws SAXException {
}
public void processingInstruction(String target, String data) throws SAXException {
}
public void setDocumentLocator(Locator locator) {
}
public void startPrefixMapping(String prefix, String uri) throws SAXException {
xmlnsString += "xmlns:" + prefix + "=\"" + uri + "\" ";
if (prefix != null && prefix.equals(""))
xmlnsString = "xmlns" + "=\"" + uri + "\" ";
}
public void endPrefixMapping(String prefix) throws SAXException {
}
public void skippedEntity(String name) throws SAXException {
}
public void startElement(String uri, String localName, String name, Attributes attrs) throws SAXException {
int attrSize = attrs.getLength();
String attrString = "";
for (int i=0; i<attrSize; i++) {
String attrQName = attrs.getQName(i);
String attrValue = attrs.getValue(i);
attrValue = StringUtils.forXML(attrValue);
attrString = attrString + " " + attrQName + "=\"" + attrValue + "\"";
}
if (attrString != null && ! attrString.isEmpty()) {
attrString = attrString.trim();
}
if (xmlnsString != null && ! xmlnsString.isEmpty()) {
xmlnsString = xmlnsString.trim();
}
if (localName.equals("pb"))
firstPageBreakReached = true;
// start highlight element at position
if (highlightElemName != null && highlightElemName.equals(localName) && firstPageBreakReached) {
currentHighlightElemPos++;
if (currentHighlightElemPos == highlightElemPos && highlightElemModeOpenTags == 0) {
highlightElemMode = true;
write("<hi type=\"elem\">");
}
}
if (highlightElemMode) {
highlightElemModeOpenTags++;
}
// start highlight query
if (highlightQuery != null && localName.equals("w")) {
boolean matched = false;
String attrQName = "form";
if (highlightQueryType.equals("orig"))
attrQName = "form";
else if (highlightQueryType.equals("reg"))
attrQName = "formRegularized";
else if (highlightQueryType.equals("norm"))
attrQName = "formNormalized";
else if (highlightQueryType.equals("morph"))
attrQName = "lemmas";
String attrValue = getAttrValue(attrs, attrQName);
if (attrValue != null) {
String[] forms = highlightQueryForms.split(" ");
for (int i=0; i<forms.length; i++) {
if (! matched) {
String form = forms[i];
if (form.endsWith("*")) { // TODO support middle wildcard queries: bla*bla bla?bla
form = form.replace("*", "");
matched = attrValue.startsWith(form);
} else {
matched = attrValue.equals(form);
}
}
}
}
if ((highlightElemName == null && matched && highlightHitModeOpenTags == 0) || (highlightElemName != null && highlightElemMode && matched && highlightHitModeOpenTags == 0)) {
highlightHitMode = true;
write("<hi type=\"hit\">");
}
}
if (highlightHitMode) {
highlightHitModeOpenTags++;
}
write("<" + name);
if (xmlnsString != null && ! xmlnsString.isEmpty())
write(" " + xmlnsString);
if (attrString != null && ! attrString.isEmpty())
write(" " + attrString);
write(">");
xmlnsString = "";
}
public void endElement(String uri, String localName, String name) throws SAXException {
write("</" + name + ">");
// end highlight element at position
if (highlightElemMode) {
if (highlightElemModeOpenTags == 1) {
highlightElemMode = false;
write("</hi>");
}
highlightElemModeOpenTags--;
}
// end highlight query
if (highlightHitMode) {
if (highlightHitModeOpenTags == 1) {
highlightHitMode = false;
write("</hi>");
}
highlightHitModeOpenTags--;
}
}
private String toString(ArrayList<String> queryForms) {
String queryFormsStr = "";
for (int i=0; i<queryForms.size(); i++) {
String form = queryForms.get(i);
queryFormsStr = queryFormsStr + form + " ";
}
if (queryForms == null || queryForms.size() == 0)
return null;
else
return queryFormsStr.substring(0, queryFormsStr.length() -1);
}
private void write(String outStr) throws SAXException {
result.append(outStr);
}
private String getAttrValue(Attributes attrs, String attrQName) {
String retValue = null;
int attrSize = attrs.getLength();
for (int i=0; i<attrSize; i++) {
String attrQNameTmp = attrs.getQName(i);
String attrValue = attrs.getValue(i);
if (attrQNameTmp.equals(attrQName))
return attrValue;
}
return retValue;
}
}