/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.solr.response; import org.apache.solr.common.SolrDocument; import org.apache.solr.common.SolrDocumentList; import org.apache.solr.common.params.CommonParams; import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.XML; import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.search.SolrIndexSearcher; import org.apache.solr.search.DocList; import org.apache.solr.search.DocIterator; import org.apache.solr.search.DocSet; import org.apache.solr.schema.IndexSchema; import org.apache.solr.schema.SchemaField; import org.apache.solr.schema.TextField; import java.io.Writer; import java.io.IOException; import java.util.*; import org.apache.lucene.document.Fieldable; import org.apache.lucene.document.Document; /** * @version $Id: XMLWriter.java 950207 2010-06-01 19:06:17Z yonik $ */ final public class XMLWriter { public static float CURRENT_VERSION=2.2f; // // static thread safe part // private static final char[] XML_START1="<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n".toCharArray(); private static final char[] XML_STYLESHEET="<?xml-stylesheet type=\"text/xsl\" href=\"/admin/".toCharArray(); private static final char[] XML_STYLESHEET_END=".xsl\"?>\n".toCharArray(); private static final char[] XML_START2_SCHEMA=( "<response xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n" +" xsi:noNamespaceSchemaLocation=\"http://pi.cnet.com/cnet-search/response.xsd\">\n" ).toCharArray(); private static final char[] XML_START2_NOSCHEMA=( "<response>\n" ).toCharArray(); public static void writeResponse(Writer writer, SolrQueryRequest req, SolrQueryResponse rsp) throws IOException { String ver = req.getParams().get(CommonParams.VERSION); writer.write(XML_START1); String stylesheet = req.getParams().get("stylesheet"); if (stylesheet != null && stylesheet.length() > 0) { writer.write(XML_STYLESHEET); writer.write(stylesheet); writer.write(XML_STYLESHEET_END); } String noSchema = req.getParams().get("noSchema"); // todo - change when schema becomes available? if (false && noSchema == null) writer.write(XML_START2_SCHEMA); else writer.write(XML_START2_NOSCHEMA); // create an instance for each request to handle // non-thread safe stuff (indentation levels, etc) // and to encapsulate writer, schema, and searcher so // they don't have to be passed around in every function. // XMLWriter xw = new XMLWriter(writer, req.getSchema(), req, ver); xw.defaultFieldList = rsp.getReturnFields(); String indent = req.getParams().get("indent"); if (indent != null) { if ("".equals(indent) || "off".equals(indent)) { xw.setIndent(false); } else { xw.setIndent(true); } } // dump response values NamedList lst = rsp.getValues(); Boolean omitHeader = req.getParams().getBool(CommonParams.OMIT_HEADER); if(omitHeader != null && omitHeader) lst.remove("responseHeader"); int sz = lst.size(); int start=0; // special case the response header if the version is 2.1 or less if (xw.version<=2100 && sz>0) { Object header = lst.getVal(0); if (header instanceof NamedList && "responseHeader".equals(lst.getName(0))) { writer.write("<responseHeader>"); xw.incLevel(); NamedList nl = (NamedList)header; for (int i=0; i<nl.size(); i++) { String name = nl.getName(i); Object val = nl.getVal(i); if ("status".equals(name) || "QTime".equals(name)) { xw.writePrim(name,null,val.toString(),false); } else { xw.writeVal(name,val); } } xw.decLevel(); writer.write("</responseHeader>"); start=1; } } for (int i=start; i<sz; i++) { xw.writeVal(lst.getName(i),lst.getVal(i)); } writer.write("\n</response>\n"); } //////////////////////////////////////////////////////////// // request instance specific (non-static, not shared between threads) //////////////////////////////////////////////////////////// private final Writer writer; private final IndexSchema schema; // needed to write fields of docs private final SolrQueryRequest request; // the request private int level; private boolean defaultIndent=false; private boolean doIndent=false; // fieldList... the set of fields to return for each document private Set<String> defaultFieldList; // if a list smaller than this threshold is encountered, elements // will be written on the same line. // maybe constructed types should always indent first? private final int indentThreshold=0; final int version; // temporary working objects... // be careful not to use these recursively... private final ArrayList tlst = new ArrayList(); private final Calendar cal = Calendar.getInstance(TimeZone.getTimeZone("GMT"), Locale.US); private final StringBuilder sb = new StringBuilder(); public XMLWriter(Writer writer, IndexSchema schema, SolrQueryRequest req, String version) { this.writer = writer; this.schema = schema; this.request = req; float ver = version==null? CURRENT_VERSION : Float.parseFloat(version); this.version = (int)(ver*1000); } // // Functions to manipulate the current logical nesting level. // Any indentation will be partially based on level. // public void setLevel(int level) { this.level = level; } public int level() { return level; } public int incLevel() { return ++level; } public int decLevel() { return --level; } public void setIndent(boolean doIndent) { this.doIndent = doIndent; defaultIndent = doIndent; } /** Writes the XML attribute name/val. A null val means that the attribute is missing. */ public void writeAttr(String name, String val) throws IOException { writeAttr(name, val, true); } public void writeAttr(String name, String val, boolean escape) throws IOException{ if (val != null) { writer.write(' '); writer.write(name); writer.write("=\""); if(escape){ XML.escapeAttributeValue(val, writer); } else { writer.write(val); } writer.write('"'); } } /**Writes a tag with attributes * * @param tag * @param attributes * @param closeTag * @param escape * @throws IOException */ public void startTag(String tag, Map<String,String> attributes, boolean closeTag, boolean escape) throws IOException { if (doIndent) indent(); writer.write('<'); writer.write(tag); if(!attributes.isEmpty()) { for (Map.Entry<String, String> entry : attributes.entrySet()) { writeAttr(entry.getKey(), entry.getValue(), escape); } } if (closeTag) { writer.write("/>"); } else { writer.write('>'); } } /**Write a complete tag w/ attributes and cdata (the cdata is not enclosed in $lt;!CDATA[]!> * @param tag * @param attributes * @param cdata * @param escapeCdata * @param escapeAttr * @throws IOException */ public void writeCdataTag(String tag, Map<String,String> attributes, String cdata, boolean escapeCdata, boolean escapeAttr) throws IOException { if (doIndent) indent(); writer.write('<'); writer.write(tag); if (!attributes.isEmpty()) { for (Map.Entry<String, String> entry : attributes.entrySet()) { writeAttr(entry.getKey(), entry.getValue(), escapeAttr); } } writer.write('>'); if (cdata != null && cdata.length() > 0) { if (escapeCdata) { XML.escapeCharData(cdata, writer); } else { writer.write(cdata, 0, cdata.length()); } } writer.write("</"); writer.write(tag); writer.write('>'); } public void startTag(String tag, String name, boolean closeTag) throws IOException { if (doIndent) indent(); writer.write('<'); writer.write(tag); if (name!=null) { writeAttr("name", name); if (closeTag) { writer.write("/>"); } else { writer.write(">"); } } else { if (closeTag) { writer.write("/>"); } else { writer.write('>'); } } } // indent up to 40 spaces static final char[] indentChars = new char[81]; static { Arrays.fill(indentChars,' '); indentChars[0] = '\n'; // start with a newline } public void indent() throws IOException { indent(level); } public void indent(int lev) throws IOException { writer.write(indentChars, 0, Math.min((lev<<1)+1, indentChars.length)); } private static final Comparator fieldnameComparator = new Comparator() { public int compare(Object o, Object o1) { Fieldable f1 = (Fieldable)o; Fieldable f2 = (Fieldable)o1; int cmp = f1.name().compareTo(f2.name()); return cmp; // note - the sort is stable, so this should not have affected the ordering // of fields with the same name w.r.t eachother. } }; public final void writeDoc(String name, Document doc, Set<String> returnFields, float score, boolean includeScore) throws IOException { startTag("doc", name, false); incLevel(); if (includeScore) { writeFloat("score", score); } // Lucene Documents have multivalued types as multiple fields // with the same name. // The XML needs to represent these as // an array. The fastest way to detect multiple fields // with the same name is to sort them first. // using global tlst here, so we shouldn't call any other // function that uses it until we are done. tlst.clear(); for (Object obj : doc.getFields()) { Fieldable ff = (Fieldable)obj; // skip this field if it is not a field to be returned. if (returnFields!=null && !returnFields.contains(ff.name())) { continue; } tlst.add(ff); } Collections.sort(tlst, fieldnameComparator); int sz = tlst.size(); int fidx1 = 0, fidx2 = 0; while (fidx1 < sz) { Fieldable f1 = (Fieldable)tlst.get(fidx1); String fname = f1.name(); // find the end of fields with this name fidx2 = fidx1+1; while (fidx2 < sz && fname.equals(((Fieldable)tlst.get(fidx2)).name()) ) { fidx2++; } /*** // more efficient to use getFieldType instead of // getField since that way dynamic fields won't have // to create a SchemaField on the fly. FieldType ft = schema.getFieldType(fname); ***/ SchemaField sf = schema.getFieldOrNull(fname); if( sf == null ) { sf = new SchemaField( fname, new TextField() ); } if (fidx1+1 == fidx2) { // single field value if (version>=2100 && sf.multiValued()) { startTag("arr",fname,false); doIndent=false; sf.write(this, null, f1); writer.write("</arr>"); doIndent=defaultIndent; } else { sf.write(this, f1.name(), f1); } } else { // multiple fields with same name detected startTag("arr",fname,false); incLevel(); doIndent=false; int cnt=0; for (int i=fidx1; i<fidx2; i++) { if (defaultIndent && ++cnt==4) { // only indent every 4th item indent(); cnt=0; } sf.write(this, null, (Fieldable)tlst.get(i)); } decLevel(); // if (doIndent) indent(); writer.write("</arr>"); // doIndent=true; doIndent=defaultIndent; } fidx1 = fidx2; } decLevel(); if (doIndent) indent(); writer.write("</doc>"); } /** * @since solr 1.3 */ final void writeDoc(String name, SolrDocument doc, Set<String> returnFields, boolean includeScore) throws IOException { startTag("doc", name, false); incLevel(); if (includeScore && returnFields != null ) { returnFields.add( "score" ); } for (String fname : doc.getFieldNames()) { if (returnFields!=null && !returnFields.contains(fname)) { continue; } Object val = doc.getFieldValue(fname); if (val instanceof Collection) { writeVal(fname, val); } else { // single valued... figure out if we should put <arr> tags around it anyway SchemaField sf = schema.getFieldOrNull(fname); if (version>=2100 && sf!=null && sf.multiValued()) { startTag("arr",fname,false); doIndent=false; writeVal(fname, val); writer.write("</arr>"); doIndent=defaultIndent; } else { writeVal(fname, val); } } } decLevel(); if (doIndent) indent(); writer.write("</doc>"); } private static interface DocumentListInfo { Float getMaxScore(); int getCount(); long getNumFound(); long getStart(); void writeDocs( boolean includeScore, Set<String> fields ) throws IOException; } private final void writeDocuments( String name, DocumentListInfo docs, Set<String> fields) throws IOException { boolean includeScore=false; if (fields!=null) { includeScore = fields.contains("score"); if (fields.size()==0 || (fields.size()==1 && includeScore) || fields.contains("*")) { fields=null; // null means return all stored fields } } int sz=docs.getCount(); if (doIndent) indent(); writer.write("<result"); writeAttr("name",name); writeAttr("numFound",Long.toString(docs.getNumFound())); // TODO: change to long writeAttr("start",Long.toString(docs.getStart())); // TODO: change to long if (includeScore && docs.getMaxScore()!=null) { writeAttr("maxScore",Float.toString(docs.getMaxScore())); } if (sz==0) { writer.write("/>"); return; } else { writer.write('>'); } incLevel(); docs.writeDocs(includeScore, fields); decLevel(); if (doIndent) indent(); writer.write("</result>"); } public final void writeSolrDocumentList(String name, final SolrDocumentList docs, Set<String> fields) throws IOException { this.writeDocuments( name, new DocumentListInfo() { public int getCount() { return docs.size(); } public Float getMaxScore() { return docs.getMaxScore(); } public long getNumFound() { return docs.getNumFound(); } public long getStart() { return docs.getStart(); } public void writeDocs(boolean includeScore, Set<String> fields) throws IOException { for( SolrDocument doc : docs ) { writeDoc(null, doc, fields, includeScore); } } }, fields ); } public final void writeDocList(String name, final DocList ids, Set<String> fields) throws IOException { this.writeDocuments( name, new DocumentListInfo() { public int getCount() { return ids.size(); } public Float getMaxScore() { return ids.maxScore(); } public long getNumFound() { return ids.matches(); } public long getStart() { return ids.offset(); } public void writeDocs(boolean includeScore, Set<String> fields) throws IOException { SolrIndexSearcher searcher = request.getSearcher(); DocIterator iterator = ids.iterator(); int sz = ids.size(); includeScore = includeScore && ids.hasScores(); for (int i=0; i<sz; i++) { int id = iterator.nextDoc(); Document doc = searcher.doc(id, fields); writeDoc(null, doc, fields, (includeScore ? iterator.score() : 0.0f), includeScore); } } }, fields ); } public void writeVal(String name, Object val) throws IOException { // if there get to be enough types, perhaps hashing on the type // to get a handler might be faster (but types must be exact to do that...) // go in order of most common to least common if (val==null) { writeNull(name); } else if (val instanceof String) { writeStr(name, (String)val); } else if (val instanceof Integer) { // it would be slower to pass the int ((Integer)val).intValue() writeInt(name, val.toString()); } else if (val instanceof Boolean) { // could be optimized... only two vals writeBool(name, val.toString()); } else if (val instanceof Long) { writeLong(name, val.toString()); } else if (val instanceof Date) { writeDate(name,(Date)val); } else if (val instanceof Float) { // we pass the float instead of using toString() because // it may need special formatting. same for double. writeFloat(name, ((Float)val).floatValue()); } else if (val instanceof Double) { writeDouble(name, ((Double)val).doubleValue()); } else if (val instanceof Document) { writeDoc(name, (Document)val, defaultFieldList, 0.0f, false); } else if (val instanceof DocList) { // requires access to IndexReader writeDocList(name, (DocList)val, defaultFieldList); }else if (val instanceof SolrDocumentList) { // requires access to IndexReader writeSolrDocumentList(name, (SolrDocumentList)val, defaultFieldList); }else if (val instanceof DocSet) { // how do we know what fields to read? // todo: have a DocList/DocSet wrapper that // restricts the fields to write...? } else if (val instanceof Map) { writeMap(name, (Map)val); } else if (val instanceof NamedList) { writeNamedList(name, (NamedList)val); } else if (val instanceof Iterable) { writeArray(name,((Iterable)val).iterator()); } else if (val instanceof Object[]) { writeArray(name,(Object[])val); } else if (val instanceof Iterator) { writeArray(name,(Iterator)val); } else { // default... writeStr(name, val.getClass().getName() + ':' + val.toString()); } } // // Generic compound types // public void writeNamedList(String name, NamedList val) throws IOException { int sz = val.size(); startTag("lst", name, sz<=0); if (sz<indentThreshold) { doIndent=false; } incLevel(); for (int i=0; i<sz; i++) { writeVal(val.getName(i),val.getVal(i)); } decLevel(); if (sz > 0) { if (doIndent) indent(); writer.write("</lst>"); } } /** * writes a Map in the same format as a NamedList, using the * stringification of the key Object when it's non-null. * * @param name * @param map * @throws IOException * @see http://lucene.apache.org/solr/api/org/apache/solr/response/SolrQueryResponse.html#returnable_data */ public void writeMap(String name, Map<Object,Object> map) throws IOException { int sz = map.size(); startTag("lst", name, sz<=0); incLevel(); for (Map.Entry<Object,Object> entry : map.entrySet()) { Object k = entry.getKey(); Object v = entry.getValue(); // if (sz<indentThreshold) indent(); writeVal( null == k ? null : k.toString(), v); } decLevel(); if (sz > 0) { if (doIndent) indent(); writer.write("</lst>"); } } public void writeArray(String name, Object[] val) throws IOException { writeArray(name, Arrays.asList(val).iterator()); } public void writeArray(String name, Iterator iter) throws IOException { if( iter.hasNext() ) { startTag("arr", name, false ); incLevel(); while( iter.hasNext() ) { writeVal(null, iter.next()); } decLevel(); if (doIndent) indent(); writer.write("</arr>"); } else { startTag("arr", name, true ); } } // // Primitive types // public void writeNull(String name) throws IOException { writePrim("null",name,"",false); } public void writeStr(String name, String val) throws IOException { writePrim("str",name,val,true); } public void writeInt(String name, String val) throws IOException { writePrim("int",name,val,false); } public void writeInt(String name, int val) throws IOException { writeInt(name,Integer.toString(val)); } public void writeLong(String name, String val) throws IOException { writePrim("long",name,val,false); } public void writeLong(String name, long val) throws IOException { writeLong(name,Long.toString(val)); } public void writeBool(String name, String val) throws IOException { writePrim("bool",name,val,false); } public void writeBool(String name, boolean val) throws IOException { writeBool(name,Boolean.toString(val)); } public void writeShort(String name, String val) throws IOException { writePrim("short",name,val,false); } public void writeShort(String name, short val) throws IOException { writeInt(name,Short.toString(val)); } public void writeByte(String name, String val) throws IOException { writePrim("byte",name,val,false); } public void writeByte(String name, byte val) throws IOException { writeInt(name,Byte.toString(val)); } public void writeFloat(String name, String val) throws IOException { writePrim("float",name,val,false); } public void writeFloat(String name, float val) throws IOException { writeFloat(name,Float.toString(val)); } public void writeDouble(String name, String val) throws IOException { writePrim("double",name,val,false); } public void writeDouble(String name, double val) throws IOException { writeDouble(name,Double.toString(val)); } public void writeDate(String name, Date val) throws IOException { // using a stringBuilder for numbers can be nice since // a temporary string isn't used (it's added directly to the // builder's buffer. cal.setTime(val); sb.setLength(0); int i = cal.get(Calendar.YEAR); sb.append(i); sb.append('-'); i = cal.get(Calendar.MONTH) + 1; // 0 based, so add 1 if (i<10) sb.append('0'); sb.append(i); sb.append('-'); i=cal.get(Calendar.DAY_OF_MONTH); if (i<10) sb.append('0'); sb.append(i); sb.append('T'); i=cal.get(Calendar.HOUR_OF_DAY); // 24 hour time format if (i<10) sb.append('0'); sb.append(i); sb.append(':'); i=cal.get(Calendar.MINUTE); if (i<10) sb.append('0'); sb.append(i); sb.append(':'); i=cal.get(Calendar.SECOND); if (i<10) sb.append('0'); sb.append(i); i=cal.get(Calendar.MILLISECOND); if (i != 0) { sb.append('.'); if (i<100) sb.append('0'); if (i<10) sb.append('0'); sb.append(i); // handle canonical format specifying fractional // seconds shall not end in '0'. Given the slowness of // integer div/mod, simply checking the last character // is probably the fastest way to check. int lastIdx = sb.length()-1; if (sb.charAt(lastIdx)=='0') { lastIdx--; if (sb.charAt(lastIdx)=='0') { lastIdx--; } sb.setLength(lastIdx+1); } } sb.append('Z'); writeDate(name, sb.toString()); } public void writeDate(String name, String val) throws IOException { writePrim("date",name,val,false); } // // OPT - specific writeInt, writeFloat, methods might be faster since // there would be less write calls (write("<int name=\"" + name + ... + </int>) // public void writePrim(String tag, String name, String val, boolean escape) throws IOException { // OPT - we could use a temp char[] (or a StringBuilder) and if the // size was small enough to fit (if escape==false we can calc exact size) // then we could put things directly in the temp buf. // need to see what percent of CPU this takes up first though... // Could test a reusable StringBuilder... // is this needed here??? // Only if a fieldtype calls writeStr or something // with a null val instead of calling writeNull /*** if (val==null) { if (name==null) writer.write("<null/>"); else writer.write("<null name=\"" + name + "/>"); } ***/ int contentLen=val.length(); startTag(tag, name, contentLen==0); if (contentLen==0) return; if (escape) { XML.escapeCharData(val,writer); } else { writer.write(val,0,contentLen); } writer.write("</"); writer.write(tag); writer.write('>'); } }