package org.apache.lucene.index; /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.io.IOException; import java.io.Reader; import org.apache.lucene.document.Fieldable; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; /** * Holds state for inverting all occurrences of a single * field in the document. This class doesn't do anything * itself; instead, it forwards the tokens produced by * analysis to its own consumer * (InvertedDocConsumerPerField). It also interacts with an * endConsumer (InvertedDocEndConsumerPerField). */ final class DocInverterPerField extends DocFieldConsumerPerField { final private DocInverterPerThread perThread; final private FieldInfo fieldInfo; final InvertedDocConsumerPerField consumer; final InvertedDocEndConsumerPerField endConsumer; final DocumentsWriter.DocState docState; final FieldInvertState fieldState; public DocInverterPerField(DocInverterPerThread perThread, FieldInfo fieldInfo) { this.perThread = perThread; this.fieldInfo = fieldInfo; docState = perThread.docState; fieldState = perThread.fieldState; this.consumer = perThread.consumer.addField(this, fieldInfo); this.endConsumer = perThread.endConsumer.addField(this, fieldInfo); } @Override void abort() { consumer.abort(); endConsumer.abort(); } @Override public void processFields(final Fieldable[] fields, final int count) throws IOException { fieldState.reset(docState.doc.getBoost()); final int maxFieldLength = docState.maxFieldLength; final boolean doInvert = consumer.start(fields, count); for(int i=0;i<count;i++) { final Fieldable field = fields[i]; // TODO FI: this should be "genericized" to querying // consumer if it wants to see this particular field // tokenized. if (field.isIndexed() && doInvert) { final boolean anyToken; if (fieldState.length > 0) fieldState.position += docState.analyzer.getPositionIncrementGap(fieldInfo.name); if (!field.isTokenized()) { // un-tokenized field String stringValue = field.stringValue(); final int valueLength = stringValue.length(); perThread.singleToken.reinit(stringValue, 0, valueLength); fieldState.attributeSource = perThread.singleToken; consumer.start(field); boolean success = false; try { consumer.add(); success = true; } finally { if (!success) docState.docWriter.setAborting(); } fieldState.offset += valueLength; fieldState.length++; fieldState.position++; anyToken = valueLength > 0; } else { // tokenized field final TokenStream stream; final TokenStream streamValue = field.tokenStreamValue(); if (streamValue != null) stream = streamValue; else { // the field does not have a TokenStream, // so we have to obtain one from the analyzer final Reader reader; // find or make Reader final Reader readerValue = field.readerValue(); if (readerValue != null) reader = readerValue; else { String stringValue = field.stringValue(); if (stringValue == null) { throw new IllegalArgumentException("field must have either TokenStream, String or Reader value"); } perThread.stringReader.init(stringValue); reader = perThread.stringReader; } // Tokenize field and add to postingTable stream = docState.analyzer.reusableTokenStream(fieldInfo.name, reader); } // reset the TokenStream to the first token stream.reset(); final int startLength = fieldState.length; try { boolean hasMoreTokens = stream.incrementToken(); fieldState.attributeSource = stream; OffsetAttribute offsetAttribute = fieldState.attributeSource.addAttribute(OffsetAttribute.class); PositionIncrementAttribute posIncrAttribute = fieldState.attributeSource.addAttribute(PositionIncrementAttribute.class); consumer.start(field); for(;;) { // If we hit an exception in stream.next below // (which is fairly common, eg if analyzer // chokes on a given document), then it's // non-aborting and (above) this one document // will be marked as deleted, but still // consume a docID if (!hasMoreTokens) break; final int posIncr = posIncrAttribute.getPositionIncrement(); fieldState.position += posIncr; if (fieldState.position > 0) { fieldState.position--; } if (posIncr == 0) fieldState.numOverlap++; boolean success = false; try { // If we hit an exception in here, we abort // all buffered documents since the last // flush, on the likelihood that the // internal state of the consumer is now // corrupt and should not be flushed to a // new segment: consumer.add(); success = true; } finally { if (!success) docState.docWriter.setAborting(); } fieldState.position++; if (++fieldState.length >= maxFieldLength) { if (docState.infoStream != null) docState.infoStream.println("maxFieldLength " +maxFieldLength+ " reached for field " + fieldInfo.name + ", ignoring following tokens"); break; } hasMoreTokens = stream.incrementToken(); } // trigger streams to perform end-of-stream operations stream.end(); fieldState.offset += offsetAttribute.endOffset(); anyToken = fieldState.length > startLength; } finally { stream.close(); } } if (anyToken) fieldState.offset += docState.analyzer.getOffsetGap(field); fieldState.boost *= field.getBoost(); } // LUCENE-2387: don't hang onto the field, so GC can // reclaim fields[i] = null; } consumer.finish(); endConsumer.finish(); } }