package lux.solr;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.StringReader;
import java.io.StringWriter;
import java.io.UnsupportedEncodingException;
import java.net.URI;
import java.util.ArrayList;
import java.util.Enumeration;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import javax.xml.transform.TransformerException;
import javax.xml.transform.sax.SAXSource;
import lux.Compiler;
import lux.DocWriter;
import lux.Evaluator;
import lux.QueryStats;
import lux.TransformErrorListener;
import lux.exception.LuxException;
import lux.exception.ResourceExhaustedException;
import lux.search.LuxSearcher;
import lux.solr.LuxDispatchFilter.Request;
import lux.xml.QName;
import net.sf.saxon.Configuration;
import net.sf.saxon.expr.instruct.GlobalVariable;
import net.sf.saxon.om.FingerprintedQName;
import net.sf.saxon.om.NamespaceBinding;
import net.sf.saxon.om.SequenceTool;
import net.sf.saxon.om.StructuredQName;
import net.sf.saxon.s9api.Axis;
import net.sf.saxon.s9api.SaxonApiException;
import net.sf.saxon.s9api.Serializer;
import net.sf.saxon.s9api.XQueryExecutable;
import net.sf.saxon.s9api.XdmAtomicValue;
import net.sf.saxon.s9api.XdmItem;
import net.sf.saxon.s9api.XdmNode;
import net.sf.saxon.s9api.XdmNodeKind;
import net.sf.saxon.s9api.XdmSequenceIterator;
import net.sf.saxon.s9api.XdmValue;
import net.sf.saxon.trans.XPathException;
import net.sf.saxon.tree.linked.LinkedTreeBuilder;
import net.sf.saxon.tree.tiny.TinyElementImpl;
import net.sf.saxon.type.AnyType;
import net.sf.saxon.type.BuiltInAtomicType;
import net.sf.saxon.value.AtomicValue;
import net.sf.saxon.value.DecimalValue;
import net.sf.saxon.value.GDateValue;
import net.sf.saxon.value.GDayValue;
import net.sf.saxon.value.GMonthDayValue;
import net.sf.saxon.value.GMonthValue;
import net.sf.saxon.value.GYearMonthValue;
import net.sf.saxon.value.GYearValue;
import net.sf.saxon.value.QNameValue;
import net.sf.saxon.value.TextFragmentValue;
import nu.validator.htmlparser.sax.HtmlParser;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.ContentStream;
import org.apache.solr.common.util.ContentStreamBase;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.StrUtils;
import org.apache.solr.core.SolrCore;
import org.apache.solr.handler.component.QueryComponent;
import org.apache.solr.handler.component.ResponseBuilder;
import org.apache.solr.handler.component.SearchComponent;
import org.apache.solr.handler.component.SearchHandler;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.request.SolrQueryRequestBase;
import org.apache.solr.request.SolrRequestHandler;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.search.DocSlice;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.update.CommitUpdateCommand;
import org.apache.solr.update.processor.UpdateRequestProcessorChain;
import org.apache.solr.util.plugin.SolrCoreAware;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.InputSource;
/**
* This component executes searches expressed as XPath or XQuery. Its queries
* will match documents that have been indexed using XmlIndexer.
*/
public class XQueryComponent extends QueryComponent implements SolrCoreAware {
public static final String LUX_XQUERY = "lux.xquery";
public static final String LUX_PATH_INFO = "lux.pathInfo";
private static final QName LUX_HTTP = new QName(Evaluator.LUX_NAMESPACE, "http");
// TODO: expose via configuration
private static final int MAX_RESULT_SIZE = (int) (Runtime.getRuntime().maxMemory() / 32);
protected Set<String> fields = new HashSet<String>();
protected SolrIndexConfig solrIndexConfig;
protected SearchHandler searchHandler;
protected String queryPath;
private SolrURIResolver uriResolver;
private static ThreadLocal<Evaluator> evalHolder;
private Serializer serializer;
private Logger logger;
private SolrCore core;
private int resultByteSize;
// In theory this is per-request state, but changes infrequently, so we just grab it as it flies by?
private String[] shards;
private String[] slices;
public XQueryComponent() {
logger = LoggerFactory.getLogger(XQueryComponent.class);
evalHolder = new ThreadLocal<Evaluator>();
}
@Override
public void inform(SolrCore solrCore) {
solrIndexConfig = SolrIndexConfig.registerIndexConfiguration(solrCore);
this.core = solrCore;
Configuration saxonConfig = solrIndexConfig.getCompiler().getProcessor().getUnderlyingConfiguration();
uriResolver = new SolrURIResolver(this, saxonConfig.getSystemURIResolver());
saxonConfig.setURIResolver(uriResolver);
}
private void findSearchHandler () {
for (SolrRequestHandler handler : core.getRequestHandlers().values()) {
if (handler instanceof SearchHandler) {
List<SearchComponent> components = ((SearchHandler) handler).getComponents();
if (components != null) {
for (SearchComponent component : components) {
if (component == this) {
searchHandler = (SearchHandler) handler;
break;
}
}
}
}
}
}
@Override
public void prepare(ResponseBuilder rb) throws IOException {
if (searchHandler == null) {
// bleah -- we need a link to the search handler to pass down in to the bowels of
// XQuery evaluation so we can recurse when we come to a search call. To get that,
// we can only traverse the core registry, but due to order of initialization, the
// handler won't have been linked to this component until after all the inform() calls
// are done.
// A possible alternative here would be to write our own search handler that extends
// the Solr one and adds itself to the ResponseBuilder...
findSearchHandler ();
}
SolrQueryRequest req = rb.req;
SolrParams params = req.getParams();
if (rb.getQueryString() == null) {
rb.setQueryString(params.get(CommonParams.Q));
}
String contentType = params.get("lux.contentType");
// TODO: make this a local variable in or near #addResult, not an instance variable: it's not threadsafe
serializer = solrIndexConfig.checkoutSerializer();
if (contentType != null) {
if (contentType.equals("text/html")) {
serializer.setOutputProperty(Serializer.Property.METHOD, "html");
} else if (contentType.equals("text/xml")) {
serializer.setOutputProperty(Serializer.Property.METHOD, "xml");
}
} else {
serializer.setOutputProperty(Serializer.Property.METHOD, getDefaultSerialization());
}
if (queryPath == null) {
// allow subclasses to override...
queryPath = rb.req.getParams().get(LUX_XQUERY);
}
resultByteSize = 0;
}
public ArrayList<String> getShardURLs (boolean includeSelf) {
// String[] urls = new String[shards.length + (includeSelf ? 0 : -1)];
ArrayList<String> urls = new ArrayList<String> ();
String shardId = core.getCoreDescriptor().getCloudDescriptor().getShardId();
for (int i = 0; i < shards.length; i++) {
if (!includeSelf) {
if (shardId.equals(slices[i])) {
// exclude this shard
continue;
}
}
List<String> replicas = StrUtils.splitSmart(shards[i], "|", true);
for (String replica : replicas) {
urls .add("http://" + replica);
}
}
return urls;
}
public String getDefaultSerialization() {
return "xml";
}
@Override
public void process(ResponseBuilder rb) throws IOException {
if (rb.grouping()) {
throw new SolrException(ErrorCode.BAD_REQUEST, "grouping not supported for XQuery");
}
SolrQueryRequest req = rb.req;
SolrParams params = req.getParams();
if (!params.getBool(XQUERY_COMPONENT_NAME, true)) {
// TODO -- what is this for? who would pass xquery=false??
return;
}
int start = params.getInt(CommonParams.START, 1);
int len = params.getInt(CommonParams.ROWS, -1);
try {
evaluateQuery(rb, start, len);
} finally {
solrIndexConfig.returnSerializer(serializer);
}
}
/**
* Process for a distributed search. This method is called at various stages
* during the processing of a request:
*
* During ResponseBuilder.STAGE_PARSE_QUERY we parse, optimize, compile and
* execute the XQuery query. When a lux:search call is encountered, it will
* create a SearchResultIterator, which creates a Lucene Query and calls
* back into the SearchHandler; then subsequent phases are handled by the
* normal QueryComponent.
*
* @return the next stage for this component
*/
@Override
public int distributedProcess(ResponseBuilder rb) throws IOException {
if (rb.grouping()) {
throw new SolrException(ErrorCode.BAD_REQUEST, "Solr grouping not supported for XQuery");
}
if (rb.stage == ResponseBuilder.STAGE_PARSE_QUERY) {
if (rb.req instanceof CloudQueryRequest) {
CloudQueryRequest cloudReq = (CloudQueryRequest) rb.req;
// the sort spec has already been generated
rb.setSortSpec(cloudReq.getSortSpec());
return ResponseBuilder.STAGE_EXECUTE_QUERY;
} else {
process(rb);
return ResponseBuilder.STAGE_DONE;
}
} else {
return super.distributedProcess(rb);
}
}
protected void evaluateQuery(ResponseBuilder rb, int start, int len) {
String query = rb.getQueryString();
SolrQueryRequest req = rb.req;
SolrQueryResponse rsp = rb.rsp;
if (StringUtils.isBlank(query)) {
rsp.add("xpath-error", "query was blank");
return;
}
SolrParams params = req.getParams();
long timeAllowed = (long) params.getInt(CommonParams.TIME_ALLOWED, -1);
XQueryExecutable expr;
LuxSearcher searcher = new LuxSearcher (rb.req.getSearcher());
DocWriter docWriter = new SolrDocWriter(this, rb.req.getCore());
Compiler compiler = solrIndexConfig.getCompiler();
Evaluator eval = new Evaluator(compiler, searcher, docWriter);
evalHolder.set (eval);
TransformErrorListener errorListener = eval.getErrorListener();
try {
URI baseURI = queryPath == null ? null : java.net.URI.create(queryPath);
expr = compiler.compile(query, errorListener, baseURI, null);
} catch (LuxException ex) {
// ex.printStackTrace();
String err = formatError(query, errorListener);
if (StringUtils.isEmpty(err)) {
err = ex.getMessage();
}
rsp.add("xpath-error", err);
// don't close: this forces a commit()
// evaluator.close();
return;
}
// SolrIndexSearcher.QueryResult result = new
// SolrIndexSearcher.QueryResult();
NamedList<Object> xpathResults = new NamedList<Object>();
long tstart = System.currentTimeMillis();
int count = 0;
SolrQueryContext context = new SolrQueryContext(this, req);
if (rb.shards != null && rb.req.getParams().getBool("distrib", true)) {
// This is a distributed request; pass in the ResponseBuilder so it will be
// available to a subquery.
context.setResponseBuilder(rb);
// also capture the current set of shards
shards = rb.shards;
slices = rb.slices;
}
bindRequestVariables(rb, req, expr, compiler, eval, context);
Iterator<XdmItem> queryResults = eval.iterator(expr, context);
String err = null;
while (queryResults.hasNext()) {
XdmItem xpathResult = queryResults.next();
if (++count < start) {
continue;
}
if (count == 1 && !xpathResult.isAtomicValue()) {
net.sf.saxon.s9api.QName name = ((XdmNode)xpathResult).getNodeName();
if (name != null && name.getNamespaceURI().equals(EXPATH_HTTP_NS) &&
name.getLocalName().equals("response")) {
err = handleEXPathResponse(req, rsp, xpathResults, xpathResult);
if (queryResults.hasNext()) {
logger.warn ("Ignoring results following http:response, which should be the sole item in its result");
}
break;
}
}
err = safeAddResult(xpathResults, xpathResult);
if (err != null) {
xpathResult = null;
break;
}
if ((len > 0 && xpathResults.size() >= len)
|| (timeAllowed > 0 && (System.currentTimeMillis() - tstart) > timeAllowed)) {
break;
}
}
ArrayList<TransformerException> errors = eval.getErrorListener().getErrors();
if (!errors.isEmpty()) {
err = formatError(query, errors, eval.getQueryStats());
if (xpathResults.size() == 0) {
xpathResults = null; // throw a 400 error; don't return partial
// results
}
}
if (err != null) {
rsp.add("xpath-error", err);
}
if (rb.getResults() == null) {
// create a dummy doc list if previous query processing didn't retrieve any docs
// In distributed operation, there will be doc results, otherwise none.
SolrIndexSearcher.QueryResult result = new SolrIndexSearcher.QueryResult();
result.setDocList(new DocSlice(0, 0, null, null, eval.getQueryStats().docCount, 0));
rb.setResult(result);
rsp.add("response", rb.getResults().docList);
}
if (xpathResults != null) {
rsp.add("xpath-results", xpathResults);
if (logger.isDebugEnabled()) {
logger.debug("retrieved: " + eval.getDocReader().getCacheMisses() + " docs, "
+ xpathResults.size() + " results, " + (System.currentTimeMillis() - tstart) + "ms");
}
} else {
logger.warn ("xquery evaluation error: " + eval.getDocReader().getCacheMisses() + " docs, " +
"0 results, " + (System.currentTimeMillis() - tstart) + "ms");
}
if (err == null && context.isCommitPending()) {
doCommit();
}
}
protected void doCommit () {
boolean isCloud = shards != null && shards.length > 1;
SolrQueryRequest req = new SolrQueryRequestBase (core, new ModifiableSolrParams()) {};
CommitUpdateCommand cmd = new CommitUpdateCommand(req, false);
cmd.softCommit = true;
// cmd.expungeDeletes = false;
// cmd.waitFlush = true;
// cmd.waitSearcher = true;
LoggerFactory.getLogger(getClass()).debug ("commit {}", shards);
try {
if (isCloud) {
SolrQueryResponse rsp = new SolrQueryResponse();
// ((ModifiableSolrParams)req.getParams()).add(ShardParams.SHARDS, getShardURLs(false));
UpdateRequestProcessorChain updateChain = core.getUpdateProcessingChain("lux-update-chain");
updateChain.createProcessor(req, rsp).processCommit(cmd);
} else {
// commit locally
core.getUpdateHandler().commit(cmd);
}
} catch (IOException e) {
throw new LuxException(e);
}
}
private String handleEXPathResponse(SolrQueryRequest req, SolrQueryResponse rsp, NamedList<Object> xpathResults, XdmItem xpathResult) {
XdmNode expathResponse;
expathResponse = (XdmNode) xpathResult;
HttpServletRequest httpReq = (HttpServletRequest) req.getContext().get(SolrQueryContext.LUX_HTTP_SERVLET_REQUEST);
HttpServletResponse httpResp = (HttpServletResponse) httpReq.getAttribute(SolrQueryContext.LUX_HTTP_SERVLET_RESPONSE);
TinyElementImpl responseNode = (TinyElementImpl) expathResponse.getUnderlyingNode();
// Get the status code and message
String status = responseNode.getAttributeValue("", "status");
String message = responseNode.getAttributeValue("", "message");
int istatus = 200;
if (status != null) {
try {
istatus = Integer.parseInt(status);
} catch (NumberFormatException e) {
throw new LuxException ("Non-numeric response status code: " + status);
}
if (istatus >= 400) {
try {
if (message != null) {
httpResp.sendError(istatus, message);
} else {
httpResp.sendError(istatus);
}
} catch (IOException e) {
logger.error("sendError failed: " + e.getMessage());
}
}
// if an error is generated by the query, call sendError?
httpResp.setStatus(istatus);
}
// Get the body, its charset and content-type and return the body to be used as the result
XdmSequenceIterator children = expathResponse.axisIterator(Axis.CHILD);
while (children.hasNext()) {
XdmNode child = (XdmNode) children.next();
net.sf.saxon.s9api.QName childName = child.getNodeName();
if (! childName.getNamespaceURI().equals(EXPATH_HTTP_NS)) {
logger.warn("ignoring unknown response child element: " + childName.getClarkName());
continue;
}
if (childName.getLocalName().equals("body")) {
// got the body
String src = child.getAttributeValue(qnameFor("src"));
if (src != null) {
throw new LuxException ("The body/@src attribute is not supported");
}
String characterSet = child.getAttributeValue(qnameFor("charset"));
if (characterSet == null) {
characterSet = "utf-8";
}
String contentType = child.getAttributeValue(qnameFor("content-type"));
if (contentType != null) {
contentType += "; charset=" + characterSet;
}
if (contentType == null) {
contentType = req.getParams().get("lux.contentType", contentType);
if (contentType != null) {
contentType = contentType.replaceFirst ("(?<=; charset=).*", characterSet);
}
}
if (contentType != null) {
req.getContext().put("lux.contentType", contentType);
}
XdmSequenceIterator bodyKids = child.axisIterator(Axis.CHILD);
while (bodyKids.hasNext()) {
XdmNode result = (XdmNode) bodyKids.next();
String err = safeAddResult(xpathResults, result);
if (err != null) {
return err;
}
}
}
else if (childName.getLocalName().equals("header")) {
String header = child.getAttributeValue(qnameFor("name"));
String value = child.getAttributeValue(qnameFor("value"));
httpResp.addHeader(header, value);
}
else if (childName.getLocalName().equals("multipart")) {
throw new LuxException ("Multipart HTTP responses not implemented");
}
}
/*
if (istatus >= 300 && istatus < 400) {
httpResp.sendRedirect(location);
}
*/
if (expathResponse != null) {
// TODO: pass the expathResponse to the LuxResponseWriter -- why?
req.getContext().put("expath:response", expathResponse);
}
return null;
}
private void bindRequestVariables(ResponseBuilder rb, SolrQueryRequest req,
XQueryExecutable expr, Compiler compiler, Evaluator evaluator,
SolrQueryContext context) {
Iterator<GlobalVariable> decls = expr.getUnderlyingCompiledQuery().getStaticContext().getModuleVariables();
boolean hasLuxHttp = false, hasEXpathRequest = false;
while (decls.hasNext()) {
GlobalVariable decl = decls.next();
StructuredQName varName = decl.getVariableQName();
if (varName.equals(new StructuredQName("", EXPATH_HTTP_NS, "input"))) {
hasEXpathRequest = true;
} else if (varName.equals(new StructuredQName("", LUX_HTTP.getNamespaceURI(), LUX_HTTP.getLocalPart()))) {
hasLuxHttp = true;
}
}
if (hasLuxHttp) {
context.bindVariable(LUX_HTTP, buildHttpParams (evaluator, req,
queryPath != null ? queryPath : "/xquery"
));
}
if (hasEXpathRequest) {
try {
context.bindVariable(new QName(EXPATH_HTTP_NS, "input", ""), buildEXPathRequest(compiler, evaluator, req));
} catch (XPathException e) {
throw new LuxException (e);
}
}
}
private String formatError(String query, TransformErrorListener errorListener) {
ArrayList<TransformerException> errors = errorListener.getErrors();
return formatError(query, errors, null);
}
private String formatError(String query, List<TransformerException> errors, QueryStats queryStats) {
StringBuilder buf = new StringBuilder();
if (queryStats != null && queryStats.optimizedQuery != null) {
query = queryStats.optimizedQuery;
}
for (TransformerException te : errors) {
if (te instanceof XPathException) {
String additionalLocationText = ((XPathException) te).getAdditionalLocationText();
if (additionalLocationText != null) {
buf.append(additionalLocationText);
}
}
buf.append(te.getMessageAndLocation());
buf.append("\n");
if (te.getLocator() != null) {
int lineNumber = te.getLocator().getLineNumber();
int column = te.getLocator().getColumnNumber();
String[] lines = query.split("\r?\n");
if (lineNumber <= lines.length && lineNumber > 0) {
String line = lines[lineNumber - 1];
buf.append(line, Math.min(Math.max(0, column - 100), line.length()),
Math.min(line.length(), column + 100));
}
}
logger.error("XQuery exception", te);
}
return buf.toString();
}
private XdmNode buildHttpParams(Evaluator evaluator, SolrQueryRequest req, String path) {
return (XdmNode) evaluator.build(new StringReader(buildHttpInfo(req)), path);
}
protected String safeAddResult (NamedList<Object> xpathResults, XdmItem item) {
try {
addResult (xpathResults, item);
return null;
} catch (SaxonApiException e) {
return e.getMessage();
} catch (ResourceExhaustedException e) {
return e.getMessage();
} catch (OutOfMemoryError e) {
return e.getMessage();
}
}
protected void addResult(NamedList<Object> xpathResults, XdmItem item) throws SaxonApiException {
if (item.isAtomicValue()) {
// We need to get Java primitive values that Solr knows how to
// marshal
XdmAtomicValue xdmValue = (XdmAtomicValue) item;
AtomicValue value = (AtomicValue) xdmValue.getUnderlyingValue();
try {
String typeName = value.getItemType().toString();
Object javaValue;
if (value instanceof DecimalValue) {
javaValue = ((DecimalValue) value).getDoubleValue();
addResultBytes(8);
} else if (value instanceof QNameValue) {
javaValue = ((QNameValue) value).getClarkName();
addResultBytes(((String) javaValue).length() * 2); // close
// enough,
// modulo
// surrogates
} else if (value instanceof GDateValue) {
if (value instanceof GMonthValue) {
javaValue = ((GMonthValue) value).getPrimitiveStringValue().toString();
} else if (value instanceof GYearValue) {
javaValue = ((GYearValue) value).getPrimitiveStringValue().toString();
} else if (value instanceof GDayValue) {
javaValue = ((GDayValue) value).getPrimitiveStringValue().toString();
} else if (value instanceof GMonthDayValue) {
javaValue = ((GMonthDayValue) value).getPrimitiveStringValue().toString();
} else if (value instanceof GYearMonthValue) {
javaValue = ((GYearMonthValue) value).getPrimitiveStringValue().toString();
} else {
javaValue = SequenceTool.convertToJava(value);
}
addResultBytes(javaValue.toString().length() * 2);
} else {
javaValue = SequenceTool.convertToJava(value);
addResultBytes (javaValue.toString().length() * 2);
}
// TODO hexBinary and base64Binary
xpathResults.add(typeName, javaValue);
} catch (XPathException e) {
xpathResults.add(value.getPrimitiveType().getDisplayName(), value.toString());
}
} else {
XdmNode node = (XdmNode) item;
XdmNodeKind nodeKind = node.getNodeKind();
StringWriter buf = new StringWriter ();
// TODO: tinybin serialization!
serializer.setOutputWriter(buf);
serializer.serializeNode(node);
String xml = buf.toString();
addResultBytes(xml.length() * 2);
xpathResults.add(nodeKind.toString().toLowerCase(), xml);
}
}
private void addResultBytes(int count) {
if (resultByteSize + count > MAX_RESULT_SIZE) {
throw new ResourceExhaustedException("Maximum result size exceeded, returned result has been truncated");
}
resultByteSize += count;
}
// Hand-coded serialization may be a bit fragile, but the only alternative
// using Saxon is too inconvenient
private String buildHttpInfo(SolrQueryRequest req) {
StringBuilder buf = new StringBuilder();
buf.append(String.format("<http>"));
buf.append("<params>");
SolrParams params = req.getParams();
Iterator<String> paramNames = params.getParameterNamesIterator();
while (paramNames.hasNext()) {
String param = paramNames.next();
if (param.startsWith("lux.")) {
continue;
}
buf.append(String.format("<param name=\"%s\">", param));
String[] values = params.getParams(param);
for (String value : values) {
buf.append(String.format("<value>%s</value>", xmlEscape(value)));
}
buf.append("</param>");
}
buf.append("</params>");
String pathInfo = params.get(LUX_PATH_INFO);
if (pathInfo != null) {
buf.append("<path-info>").append(xmlEscape(pathInfo)).append("</path-info>");
}
Map<Object, Object> context = req.getContext();
String webapp = (String) context.get("webapp");
if (webapp == null) {
webapp = "";
}
buf.append("<context-path>").append(webapp).append("</context-path>");
// TODO: headers, path, etc?
buf.append("</http>");
return buf.toString();
}
private static final String EXPATH_HTTP_NS = "http://expath.org/ns/webapp";
private XdmValue buildEXPathRequest (Compiler compiler, Evaluator evaluator, SolrQueryRequest req) throws XPathException {
LinkedTreeBuilder builder = new LinkedTreeBuilder (compiler.getProcessor().getUnderlyingConfiguration().makePipelineConfiguration());
builder.startDocument(0);
builder.startElement(fQNameFor("http", EXPATH_HTTP_NS, "request"), AnyType.getInstance(), 0, 0);
builder.namespace(new NamespaceBinding("http", EXPATH_HTTP_NS), 0);
Request requestWrapper = (Request) req.getContext().get(SolrQueryContext.LUX_HTTP_SERVLET_REQUEST);
addAttribute(builder, "method", requestWrapper.getMethod());
addAttribute(builder, "servlet", requestWrapper.getServletPath());
HttpServletRequest httpReq = (HttpServletRequest)requestWrapper.getRequest();
addAttribute(builder, "path", httpReq.getServletPath());
String pathInfo = requestWrapper.getPathInfo();
if (pathInfo != null) {
addAttribute(builder, "path-info", pathInfo);
}
builder.startContent();
// child elements
StringBuilder buf = new StringBuilder();
// authority
buf.append (requestWrapper.getScheme()).
append("://").
append(requestWrapper.getServerName()).
append(':').
append (requestWrapper.getServerPort());
String authority = buf.toString();
addSimpleElement(builder, "authority", authority);
// url
buf.append (httpReq.getServletPath());
if (httpReq.getQueryString() != null) {
buf.append ('?').append(httpReq.getQueryString());
}
String url = buf.toString();
addSimpleElement(builder, "url", url);
// context-root
addSimpleElement(builder, "context-root", httpReq.getContextPath());
// path - just one part: we don't do any parsing of the path
builder.startElement(fQNameFor("http", EXPATH_HTTP_NS, "path"), BuiltInAtomicType.UNTYPED_ATOMIC, 0, 0);
builder.startContent();
addSimpleElement(builder, "part", httpReq.getServletPath());
builder.endElement();
// params
Iterator<String> paramNames = req.getParams().getParameterNamesIterator();
while (paramNames.hasNext()) {
String param = paramNames.next();
String[] values = req.getParams().getParams(param);
for (String value : values) {
builder.startElement(fQNameFor("http", EXPATH_HTTP_NS, "param"), BuiltInAtomicType.UNTYPED_ATOMIC, 0, 0);
addAttribute (builder, "name", param);
addAttribute (builder, "value", value);
builder.startContent();
builder.endElement();
}
}
// headers
Enumeration<String> headerNames = httpReq.getHeaderNames();
while (headerNames.hasMoreElements()) {
String headerName = headerNames.nextElement();
Enumeration<String> headerValues = httpReq.getHeaders(headerName);
while (headerValues.hasMoreElements()) {
String value = headerValues.nextElement();
builder.startElement(fQNameFor("http", EXPATH_HTTP_NS, "header"), BuiltInAtomicType.UNTYPED_ATOMIC, 0, 0);
addAttribute (builder, "name", headerName);
addAttribute (builder, "value", value);
builder.startContent();
builder.endElement();
}
}
ArrayList<XdmItem> resultSequence = null;
if (req.getContentStreams() != null) {
resultSequence = new ArrayList<XdmItem>();
handleContentStreams (builder, req, resultSequence, evaluator);
}
builder.endElement(); // end request
builder.endDocument();
XdmNode expathReq = new XdmNode (builder.getCurrentRoot());
if (resultSequence == null) {
return expathReq;
}
resultSequence.add(0, expathReq);
return new XdmValue (resultSequence);
}
private void handleContentStreams (LinkedTreeBuilder builder, SolrQueryRequest req, ArrayList<XdmItem> result, Evaluator evaluator) throws XPathException {
// parts
int i = 0;
for (ContentStream stream : req.getContentStreams()) {
String contentType = stream.getContentType();
//String name = stream.getName();
byte[] partBytes = null;
try {
partBytes = IOUtils.toByteArray(stream.getStream(), stream.getSize());
} catch (IOException e) {
throw new LuxException (e);
}
String charset = ContentStreamBase.getCharsetFromContentType(contentType);
if (charset == null) {
charset = "utf-8";
}
if (!isText(contentType)) {
logger.warn ("Binary values not supported; treating " + contentType + " as xml, or text");
}
XdmItem part = null;
if (isXML(contentType) || !isText(contentType)) {
try {
part = evaluator.build(new ByteArrayInputStream(partBytes), "#part" + i);
} catch (LuxException e) {
// failed to parse
logger.warn("Caught an exception while parsing XML: " + e.getMessage() + ", treating it as plain text");
contentType = "text/plain; charset=" + charset;
}
}
if (part == null) {
String text;
try {
text = new String (partBytes, charset);
} catch (UnsupportedEncodingException e1) {
throw new LuxException (e1);
}
if (isHTML(contentType)) {
HtmlParser parser = new HtmlParser();
//Parser parser = new Parser();
SAXSource source = new SAXSource (parser, new InputSource (new StringReader (text)));
try {
part = evaluator.getDocBuilder().build(source);
} catch (SaxonApiException e) {
e.printStackTrace();
logger.warn ("failed to parse HTML; treating as plain text: " + e.getMessage());
}
}
if (part == null) {
TextFragmentValue node = new TextFragmentValue(text, "#part" + i);
node.setConfiguration(builder.getConfiguration());
part = new XdmNode (node);
}
}
result.add (part);
builder.startElement(fQNameFor("http", EXPATH_HTTP_NS, "body"), BuiltInAtomicType.UNTYPED_ATOMIC, 0, 0);
addAttribute(builder, "position", "1");
addAttribute(builder, "content-type", contentType);
builder.startContent();
builder.endElement();
}
}
private boolean isText (String contentType) {
return contentType.startsWith("text/") || isHTML(contentType) || isXML(contentType);
}
private boolean isHTML (String contentType) {
return contentType.matches(".*/html($| )");
}
private boolean isXML (String contentType) {
return contentType.matches(".*/xml($| )") ||
contentType.matches(".*\\+xml($| )");
}
private void addSimpleElement(LinkedTreeBuilder builder, String name, String text)
throws XPathException {
builder.startElement (fQNameFor("http", EXPATH_HTTP_NS, name), BuiltInAtomicType.STRING, 0, 0);
builder.startContent();
builder.characters(text, 0, 0);
builder.endElement();
}
private void addAttribute(LinkedTreeBuilder builder, String name, String value)
throws XPathException {
builder.attribute(fQNameFor("", "", name), BuiltInAtomicType.UNTYPED_ATOMIC, value, 0, 0);
}
public SolrIndexConfig getSolrIndexConfig() {
return solrIndexConfig;
}
// TODO cache these
protected FingerprintedQName fQNameFor (String prefix, String namespace, String name) {
return new FingerprintedQName(prefix, namespace, name);
}
protected net.sf.saxon.s9api.QName qnameFor (String namespace, String localName) {
return new net.sf.saxon.s9api.QName (namespace, localName);
}
protected net.sf.saxon.s9api.QName qnameFor (String localName) {
return new net.sf.saxon.s9api.QName (localName);
}
public SolrCore getCore () {
return core;
}
public Evaluator getEvaluator () {
return evalHolder.get();
}
public SearchHandler getSearchHandler() {
return searchHandler;
}
public String[] getCurrentShards() {
return shards;
}
public String[] getCurrentSlices() {
return slices;
}
private String xmlEscape(String value) {
return value.replaceAll("&", "&").replaceAll("<", "<").replaceAll("\"", """);
}
public static final String XQUERY_COMPONENT_NAME = "xquery";
@Override
public String getDescription() {
return "XQuery";
}
@Override
public String getSource() {
return "http://github.com/msokolov/lux";
}
@Override
public String getVersion() {
return "";
}
}
/*
* This Source Code Form is subject to the terms of the Mozilla Public License,
* v. 2.0. If a copy of the MPL was not distributed with this file, You can
* obtain one at http://mozilla.org/MPL/2.0/.
*/