package org.apache.solr.handler; /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute; import org.apache.lucene.util.Attribute; import org.apache.lucene.util.AttributeReflector; import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrInputDocument; import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.util.ContentStream; import org.apache.solr.common.util.ContentStreamBase; import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.SimpleOrderedMap; import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.response.SolrQueryResponse; import org.apache.solr.schema.FieldType; import org.apache.solr.schema.IndexSchema; import org.apache.solr.schema.SchemaField; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import; import; import; import; import; import; import; import; import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.Map; /** * * @deprecated Use {@link org.apache.solr.handler.DocumentAnalysisRequestHandler} instead. **/ @Deprecated public class AnalysisRequestHandler extends RequestHandlerBase { public static Logger log = LoggerFactory.getLogger(AnalysisRequestHandler.class); private XMLInputFactory inputFactory; @Override public void init(NamedList args) { super.init(args); inputFactory = XMLInputFactory.newInstance(); try { // The java 1.6 bundled stax parser (sjsxp) does not currently have a thread-safe // XMLInputFactory, as that implementation tries to cache and reuse the // XMLStreamReader. Setting the parser-specific "reuse-instance" property to false // prevents this. // All other known open-source stax parsers (and the bea ref impl) // have thread-safe factories. inputFactory.setProperty("reuse-instance", Boolean.FALSE); } catch (IllegalArgumentException ex) { // Other implementations will likely throw this exception since "reuse-instance" // isimplementation specific. log.debug("Unable to set the 'reuse-instance' property for the input factory: " + inputFactory); } } @Override public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throws Exception { SolrParams params = req.getParams(); Iterable<ContentStream> streams = req.getContentStreams(); if (streams != null) { for (ContentStream stream : req.getContentStreams()) { InputStream is = null; XMLStreamReader parser = null; try { is = stream.getStream(); final String charset = ContentStreamBase.getCharsetFromContentType(stream.getContentType()); parser = (charset == null) ? inputFactory.createXMLStreamReader(is) : inputFactory.createXMLStreamReader(is, charset); NamedList<Object> result = processContent(parser, req.getSchema()); rsp.add("response", result); } finally { if (parser != null) parser.close(); IOUtils.closeQuietly(is); } } } } NamedList<Object> processContent(XMLStreamReader parser, IndexSchema schema) throws XMLStreamException, IOException { NamedList<Object> result = new SimpleOrderedMap<Object>(); while (true) { int event =; switch (event) { case XMLStreamConstants.END_DOCUMENT: { parser.close(); return result; } case XMLStreamConstants.START_ELEMENT: { String currTag = parser.getLocalName(); if ("doc".equals(currTag)) { log.trace("Tokenizing doc..."); SolrInputDocument doc = readDoc(parser); SchemaField uniq = schema.getUniqueKeyField(); NamedList<NamedList<NamedList<Object>>> theTokens = new SimpleOrderedMap<NamedList<NamedList<Object>>>(); result.add(doc.getFieldValue(uniq.getName()).toString(), theTokens); for (String name : doc.getFieldNames()) { FieldType ft = schema.getFieldType(name); Analyzer analyzer = ft.getAnalyzer(); Collection<Object> vals = doc.getFieldValues(name); for (Object val : vals) { Reader reader = new StringReader(val.toString()); TokenStream tstream = analyzer.tokenStream(name, reader); NamedList<NamedList<Object>> tokens = getTokens(tstream); theTokens.add(name, tokens); } } } break; } } } } // a static mapping of the reflected attribute keys to the names used in Solr 1.3/1.4 static Map<String,String> ATTRIBUTE_MAPPING = Collections.unmodifiableMap(new HashMap<String,String>() {{ put(CharTermAttribute.class.getName() + "#term", "value"); put(OffsetAttribute.class.getName() + "#startOffset", "start"); put(OffsetAttribute.class.getName() + "#endOffset", "end"); put(PositionIncrementAttribute.class.getName() + "#positionIncrement", "posInc"); put(TypeAttribute.class.getName() + "#type", "type"); }}); static NamedList<NamedList<Object>> getTokens(TokenStream tstream) throws IOException { // outer is namedList since order of tokens is important NamedList<NamedList<Object>> tokens = new NamedList<NamedList<Object>>(); while (tstream.incrementToken()) { final NamedList<Object> token = new SimpleOrderedMap<Object>(); tokens.add("token", token); tstream.reflectWith(new AttributeReflector() { public void reflect(Class<? extends Attribute> attClass, String key, Object value) { String k = attClass.getName() + '#' + key; // map keys for "standard attributes": if (ATTRIBUTE_MAPPING.containsKey(k)) { k = ATTRIBUTE_MAPPING.get(k); } token.add(k, value); } }); } return tokens; } SolrInputDocument readDoc(XMLStreamReader parser) throws XMLStreamException { SolrInputDocument doc = new SolrInputDocument(); StringBuilder text = new StringBuilder(); String name = null; String attrName = ""; float boost = 1.0f; boolean isNull = false; while (true) { int event =; switch (event) { // Add everything to the text case XMLStreamConstants.SPACE: case XMLStreamConstants.CDATA: case XMLStreamConstants.CHARACTERS: text.append(parser.getText()); break; case XMLStreamConstants.END_ELEMENT: if ("doc".equals(parser.getLocalName())) { return doc; } else if ("field".equals(parser.getLocalName())) { if (!isNull) { doc.addField(name, text.toString(), boost); boost = 1.0f; } } break; case XMLStreamConstants.START_ELEMENT: text.setLength(0); String localName = parser.getLocalName(); if (!"field".equals(localName)) { log.warn("unexpected XML tag doc/" + localName); throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "unexpected XML tag doc/" + localName); } String attrVal = ""; for (int i = 0; i < parser.getAttributeCount(); i++) { attrName = parser.getAttributeLocalName(i); attrVal = parser.getAttributeValue(i); if ("name".equals(attrName)) { name = attrVal; } } break; } } } //////////////////////// SolrInfoMBeans methods ////////////////////// @Override public String getDescription() { return "Provide Analysis of text"; } @Override public String getVersion() { return "$Revision:$"; } @Override public String getSourceId() { return "$Id:$"; } @Override public String getSource() { return "$URL:$"; } }