package org.apache.lucene.index.codecs.standard; /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.io.IOException; import java.util.ArrayList; import java.util.List; import java.util.Comparator; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.util.BytesRef; import org.apache.lucene.index.codecs.FieldsConsumer; import org.apache.lucene.index.codecs.PostingsConsumer; import org.apache.lucene.index.codecs.TermsConsumer; import org.apache.lucene.store.IndexOutput; import org.apache.lucene.util.CodecUtil; /** * Writes terms dict and interacts with docs/positions * consumers to write the postings files. * * The [new] terms dict format is field-centric: each field * has its own section in the file. Fields are written in * UTF16 string comparison order. Within each field, each * term's text is written in UTF16 string comparison order. * @lucene.experimental */ public class StandardTermsDictWriter extends FieldsConsumer { final static String CODEC_NAME = "STANDARD_TERMS_DICT"; // Initial format public static final int VERSION_START = 0; public static final int VERSION_CURRENT = VERSION_START; private final DeltaBytesWriter termWriter; protected final IndexOutput out; final StandardPostingsWriter postingsWriter; final FieldInfos fieldInfos; FieldInfo currentField; private final StandardTermsIndexWriter termsIndexWriter; private final List<TermsConsumer> fields = new ArrayList<TermsConsumer>(); private final Comparator<BytesRef> termComp; public StandardTermsDictWriter( StandardTermsIndexWriter termsIndexWriter, SegmentWriteState state, StandardPostingsWriter postingsWriter, Comparator<BytesRef> termComp) throws IOException { final String termsFileName = IndexFileNames.segmentFileName(state.segmentName, "", StandardCodec.TERMS_EXTENSION); this.termsIndexWriter = termsIndexWriter; this.termComp = termComp; out = state.directory.createOutput(termsFileName); termsIndexWriter.setTermsOutput(out); state.flushedFiles.add(termsFileName); fieldInfos = state.fieldInfos; writeHeader(out); termWriter = new DeltaBytesWriter(out); currentField = null; this.postingsWriter = postingsWriter; postingsWriter.start(out); // have consumer write its format/header } protected void writeHeader(IndexOutput out) throws IOException { // Count indexed fields up front CodecUtil.writeHeader(out, CODEC_NAME, VERSION_CURRENT); out.writeLong(0); // leave space for end index pointer } @Override public TermsConsumer addField(FieldInfo field) { assert currentField == null || currentField.name.compareTo(field.name) < 0; currentField = field; StandardTermsIndexWriter.FieldWriter fieldIndexWriter = termsIndexWriter.addField(field); TermsConsumer terms = new TermsWriter(fieldIndexWriter, field, postingsWriter); fields.add(terms); return terms; } @Override public void close() throws IOException { try { final int fieldCount = fields.size(); final long dirStart = out.getFilePointer(); out.writeInt(fieldCount); for(int i=0;i<fieldCount;i++) { TermsWriter field = (TermsWriter) fields.get(i); out.writeInt(field.fieldInfo.number); out.writeLong(field.numTerms); out.writeLong(field.termsStartPointer); } writeTrailer(dirStart); } finally { try { out.close(); } finally { try { postingsWriter.close(); } finally { termsIndexWriter.close(); } } } } protected void writeTrailer(long dirStart) throws IOException { // TODO Auto-generated method stub out.seek(CodecUtil.headerLength(CODEC_NAME)); out.writeLong(dirStart); } class TermsWriter extends TermsConsumer { private final FieldInfo fieldInfo; private final StandardPostingsWriter postingsWriter; private final long termsStartPointer; private long numTerms; private final StandardTermsIndexWriter.FieldWriter fieldIndexWriter; TermsWriter( StandardTermsIndexWriter.FieldWriter fieldIndexWriter, FieldInfo fieldInfo, StandardPostingsWriter postingsWriter) { this.fieldInfo = fieldInfo; this.fieldIndexWriter = fieldIndexWriter; termWriter.reset(); termsStartPointer = out.getFilePointer(); postingsWriter.setField(fieldInfo); this.postingsWriter = postingsWriter; } @Override public Comparator<BytesRef> getComparator() { return termComp; } @Override public PostingsConsumer startTerm(BytesRef text) throws IOException { postingsWriter.startTerm(); return postingsWriter; } @Override public void finishTerm(BytesRef text, int numDocs) throws IOException { assert numDocs > 0; final boolean isIndexTerm = fieldIndexWriter.checkIndexTerm(text, numDocs); termWriter.write(text); out.writeVInt(numDocs); postingsWriter.finishTerm(numDocs, isIndexTerm); numTerms++; } // Finishes all terms in this field @Override public void finish() throws IOException { fieldIndexWriter.finish(); } } }