package org.apache.lucene.index;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.document.Fieldable;
// TODO: break into separate freq and prox writers as
// codecs; make separate container (tii/tis/skip/*) that can
// be configured as any number of files 1..N
final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implements Comparable<FreqProxTermsWriterPerField> {
final FreqProxTermsWriterPerThread perThread;
final TermsHashPerField termsHashPerField;
final FieldInfo fieldInfo;
final DocumentsWriter.DocState docState;
final FieldInvertState fieldState;
boolean omitTermFreqAndPositions;
PayloadAttribute payloadAttribute;
public FreqProxTermsWriterPerField(TermsHashPerField termsHashPerField, FreqProxTermsWriterPerThread perThread, FieldInfo fieldInfo) {
this.termsHashPerField = termsHashPerField;
this.perThread = perThread;
this.fieldInfo = fieldInfo;
docState = termsHashPerField.docState;
fieldState = termsHashPerField.fieldState;
omitTermFreqAndPositions = fieldInfo.omitTermFreqAndPositions;
}
@Override
int getStreamCount() {
if (fieldInfo.omitTermFreqAndPositions)
return 1;
else
return 2;
}
@Override
void finish() {}
boolean hasPayloads;
@Override
void skippingLongTerm() throws IOException {}
public int compareTo(FreqProxTermsWriterPerField other) {
return fieldInfo.name.compareTo(other.fieldInfo.name);
}
void reset() {
// Record, up front, whether our in-RAM format will be
// with or without term freqs:
omitTermFreqAndPositions = fieldInfo.omitTermFreqAndPositions;
payloadAttribute = null;
}
@Override
boolean start(Fieldable[] fields, int count) {
for(int i=0;i<count;i++)
if (fields[i].isIndexed())
return true;
return false;
}
@Override
void start(Fieldable f) {
if (fieldState.attributeSource.hasAttribute(PayloadAttribute.class)) {
payloadAttribute = fieldState.attributeSource.getAttribute(PayloadAttribute.class);
} else {
payloadAttribute = null;
}
}
final void writeProx(final int termID, int proxCode) {
final Payload payload;
if (payloadAttribute == null) {
payload = null;
} else {
payload = payloadAttribute.getPayload();
}
if (payload != null && payload.length > 0) {
termsHashPerField.writeVInt(1, (proxCode<<1)|1);
termsHashPerField.writeVInt(1, payload.length);
termsHashPerField.writeBytes(1, payload.data, payload.offset, payload.length);
hasPayloads = true;
} else
termsHashPerField.writeVInt(1, proxCode<<1);
FreqProxPostingsArray postings = (FreqProxPostingsArray) termsHashPerField.postingsArray;
postings.lastPositions[termID] = fieldState.position;
}
@Override
final void newTerm(final int termID) {
// First time we're seeing this term since the last
// flush
assert docState.testPoint("FreqProxTermsWriterPerField.newTerm start");
FreqProxPostingsArray postings = (FreqProxPostingsArray) termsHashPerField.postingsArray;
postings.lastDocIDs[termID] = docState.docID;
if (omitTermFreqAndPositions) {
postings.lastDocCodes[termID] = docState.docID;
} else {
postings.lastDocCodes[termID] = docState.docID << 1;
postings.docFreqs[termID] = 1;
writeProx(termID, fieldState.position);
}
}
@Override
final void addTerm(final int termID) {
assert docState.testPoint("FreqProxTermsWriterPerField.addTerm start");
FreqProxPostingsArray postings = (FreqProxPostingsArray) termsHashPerField.postingsArray;
assert omitTermFreqAndPositions || postings.docFreqs[termID] > 0;
if (omitTermFreqAndPositions) {
if (docState.docID != postings.lastDocIDs[termID]) {
assert docState.docID > postings.lastDocIDs[termID];
termsHashPerField.writeVInt(0, postings.lastDocCodes[termID]);
postings.lastDocCodes[termID] = docState.docID - postings.lastDocIDs[termID];
postings.lastDocIDs[termID] = docState.docID;
}
} else {
if (docState.docID != postings.lastDocIDs[termID]) {
assert docState.docID > postings.lastDocIDs[termID];
// Term not yet seen in the current doc but previously
// seen in other doc(s) since the last flush
// Now that we know doc freq for previous doc,
// write it & lastDocCode
if (1 == postings.docFreqs[termID])
termsHashPerField.writeVInt(0, postings.lastDocCodes[termID]|1);
else {
termsHashPerField.writeVInt(0, postings.lastDocCodes[termID]);
termsHashPerField.writeVInt(0, postings.docFreqs[termID]);
}
postings.docFreqs[termID] = 1;
postings.lastDocCodes[termID] = (docState.docID - postings.lastDocIDs[termID]) << 1;
postings.lastDocIDs[termID] = docState.docID;
writeProx(termID, fieldState.position);
} else {
postings.docFreqs[termID]++;
writeProx(termID, fieldState.position-postings.lastPositions[termID]);
}
}
}
@Override
ParallelPostingsArray createPostingsArray(int size) {
return new FreqProxPostingsArray(size);
}
static final class FreqProxPostingsArray extends ParallelPostingsArray {
public FreqProxPostingsArray(int size) {
super(size);
docFreqs = new int[size];
lastDocIDs = new int[size];
lastDocCodes = new int[size];
lastPositions = new int[size];
}
int docFreqs[]; // # times this term occurs in the current doc
int lastDocIDs[]; // Last docID where this term occurred
int lastDocCodes[]; // Code for prior doc
int lastPositions[]; // Last position where this term occurred
@Override
ParallelPostingsArray newInstance(int size) {
return new FreqProxPostingsArray(size);
}
void copyTo(ParallelPostingsArray toArray, int numToCopy) {
assert toArray instanceof FreqProxPostingsArray;
FreqProxPostingsArray to = (FreqProxPostingsArray) toArray;
super.copyTo(toArray, numToCopy);
System.arraycopy(docFreqs, 0, to.docFreqs, 0, numToCopy);
System.arraycopy(lastDocIDs, 0, to.lastDocIDs, 0, numToCopy);
System.arraycopy(lastDocCodes, 0, to.lastDocCodes, 0, numToCopy);
System.arraycopy(lastPositions, 0, to.lastPositions, 0, numToCopy);
}
@Override
int bytesPerPosting() {
return ParallelPostingsArray.BYTES_PER_POSTING + 4 * DocumentsWriter.INT_NUM_BYTE;
}
}
public void abort() {}
}