/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nutch.searcher.response.xml;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.HashSet;
import java.util.Set;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.nutch.html.Entities;
import org.apache.nutch.searcher.Hit;
import org.apache.nutch.searcher.HitDetails;
import org.apache.nutch.searcher.Summary;
import org.apache.nutch.searcher.response.ResponseWriter;
import org.apache.nutch.searcher.response.SearchResults;
import org.w3c.dom.Attr;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
/**
* A ResponseWriter implementation that returns search results in XML format.
*/
public class XMLResponseWriter
implements ResponseWriter {
private String contentType = null;
private Configuration conf;
private int maxAgeInSeconds;
private boolean prettyPrint;
/**
* Creates and returns a new node within the XML document.
*
* @param doc The XML document.
* @param parent The parent Node.
* @param name The name of the new node.
*
* @return The newly created node Element.
*/
private static Element addNode(Document doc, Node parent, String name) {
Element child = doc.createElement(name);
parent.appendChild(child);
return child;
}
/**
* Creates and returns a new node within the XML document. The node contains
* the text supplied as a child node.
*
* @param doc The XML document.
* @param parent The parent Node.
* @param name The name of the new node.
* @param text A text string to append as a child node.
*
* @return The newly created node Element.
*/
private static void addNode(Document doc, Node parent, String name,
String text) {
Element child = doc.createElement(name);
child.appendChild(doc.createTextNode(getLegalXml(text)));
parent.appendChild(child);
}
/**
* Adds an attribute name and value to a node Element in the XML document.
*
* @param doc The XML document.
* @param node The node Element on which to attach the attribute.
* @param name The name of the attribute.
* @param value The value of the attribute.
*/
private static void addAttribute(Document doc, Element node, String name,
String value) {
Attr attribute = doc.createAttribute(name);
attribute.setValue(getLegalXml(value));
node.getAttributes().setNamedItem(attribute);
}
/**
* Transforms and returns the text string as legal XML text.
*
* @param text The text to transform.
*
* @return The text string in the form of legal XML text.
*/
protected static String getLegalXml(String text) {
if (text == null) {
return null;
}
StringBuffer buffer = null;
for (int i = 0; i < text.length(); i++) {
char c = text.charAt(i);
if (!isLegalXml(c)) {
if (buffer == null) {
buffer = new StringBuffer(text.length());
buffer.append(text.substring(0, i));
}
}
else {
if (buffer != null) {
buffer.append(c);
}
}
}
return (buffer != null) ? buffer.toString() : text;
}
/**
* Determines if the character is a legal XML character.
*
* @param c The character to check.
*
* @return True if the character is legal xml, false otherwise.
*/
private static boolean isLegalXml(final char c) {
return c == 0x9 || c == 0xa || c == 0xd || (c >= 0x20 && c <= 0xd7ff)
|| (c >= 0xe000 && c <= 0xfffd) || (c >= 0x10000 && c <= 0x10ffff);
}
public void setContentType(String contentType) {
this.contentType = contentType;
}
public Configuration getConf() {
return conf;
}
public void setConf(Configuration conf) {
this.conf = conf;
this.maxAgeInSeconds = conf.getInt("searcher.response.maxage", 86400);
this.prettyPrint = conf.getBoolean("searcher.response.prettyprint", true);
}
public void writeResponse(SearchResults results, HttpServletRequest request,
HttpServletResponse response)
throws IOException {
try {
// create the xml document and add the results and search nodes
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
Document xmldoc = factory.newDocumentBuilder().newDocument();
Element resEl = addNode(xmldoc, xmldoc, "results");
Element searchEl = addNode(xmldoc, resEl, "search");
// add common nodes
String query = results.getQuery();
addNode(xmldoc, searchEl, "query", query);
addNode(xmldoc, searchEl, "totalhits",
String.valueOf(results.getTotalHits()));
String lang = results.getLang();
if (lang != null) {
addNode(xmldoc, searchEl, "lang", lang);
}
String sort = results.getSort();
if (sort != null) {
addNode(xmldoc, searchEl, "sort", sort);
}
addNode(xmldoc, searchEl, "reverse", results.isReverse() ? "true"
: "false");
addNode(xmldoc, searchEl, "start", String.valueOf(results.getStart()));
addNode(xmldoc, searchEl, "end", String.valueOf(results.getEnd()));
addNode(xmldoc, searchEl, "rows", String.valueOf(results.getRows()));
addNode(xmldoc, searchEl, "totalhits",
String.valueOf(results.getTotalHits()));
addNode(xmldoc, searchEl, "withSummary",
String.valueOf(results.isWithSummary()));
String[] searchFields = results.getFields();
Set<String> fieldSet = new HashSet<String>();
if (searchFields != null && searchFields.length > 0) {
addNode(xmldoc, searchEl, "fields", StringUtils.join(searchFields, ","));
for (int i = 0; i < searchFields.length; i++) {
fieldSet.add(searchFields[i]);
}
}
// add documents
Element documents = addNode(xmldoc, resEl, "documents");
HitDetails[] details = results.getDetails();
Hit[] hits = results.getHits();
Summary[] summaries = results.getSummaries();
for (int i = 0; i < details.length; i++) {
// every document has an indexno and an indexdocno
Element document = addNode(xmldoc, documents, "document");
addAttribute(xmldoc, document, "indexno",
String.valueOf(hits[i].getIndexNo()));
addAttribute(xmldoc, document, "indexkey",
String.valueOf(hits[i].getUniqueKey()));
// don't add summaries not including summaries
if (summaries != null && results.isWithSummary()) {
String encSumm = Entities.encode(summaries[i].toString());
addNode(xmldoc, document, "summary", encSumm);
}
// add the fields from hit details
Element fields = addNode(xmldoc, document, "fields");
HitDetails detail = details[i];
for (int j = 0; j < detail.getLength(); j++) {
String fieldName = detail.getField(j);
String[] fieldValues = detail.getValues(fieldName);
// if we specified fields to return, only return those fields
if (fieldSet.size() == 0 || fieldSet.contains(fieldName)) {
Element field = addNode(xmldoc, fields, "field");
addAttribute(xmldoc, field, "name", fieldName);
for (int k = 0; k < fieldValues.length; k++) {
String encFieldVal = Entities.encode(fieldValues[k]);
addNode(xmldoc, field, "value", encFieldVal);
}
}
}
}
// get the xml source and a transformer to print it out
DOMSource source = new DOMSource(xmldoc);
TransformerFactory transFactory = TransformerFactory.newInstance();
Transformer transformer = transFactory.newTransformer();
// pretty printing can be set through configuration
if (prettyPrint) {
transformer.setOutputProperty("indent", "yes");
transformer.setOutputProperty(OutputKeys.INDENT, "yes");
transformer.setOutputProperty(
"{http://xml.apache.org/xslt}indent-amount", "2");
}
// write out the content to a byte array
ByteArrayOutputStream baos = new ByteArrayOutputStream();
StreamResult result = new StreamResult(baos);
transformer.transform(source, result);
baos.flush();
baos.close();
// cache control headers
SimpleDateFormat sdf = new SimpleDateFormat(
"E, d MMM yyyy HH:mm:ss 'GMT'");
long relExpiresInMillis = System.currentTimeMillis()
+ (1000 * maxAgeInSeconds);
response.setContentType(contentType);
response.setHeader("Cache-Control", "max-age=" + maxAgeInSeconds);
response.setHeader("Expires", sdf.format(relExpiresInMillis));
// write out the content to the response
response.getOutputStream().write(baos.toByteArray());
response.flushBuffer();
}
catch (Exception e) {
throw new IOException(e);
}
}
}