/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.lucene.codecs.memory; import java.io.IOException; import java.util.Iterator; import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.codecs.DocValuesConsumer; import org.apache.lucene.codecs.DocValuesProducer; import org.apache.lucene.codecs.LegacyDocValuesIterables; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.store.IndexOutput; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.IOUtils; import static org.apache.lucene.codecs.memory.DirectDocValuesProducer.BYTES; import static org.apache.lucene.codecs.memory.DirectDocValuesProducer.NUMBER; import static org.apache.lucene.codecs.memory.DirectDocValuesProducer.SORTED; import static org.apache.lucene.codecs.memory.DirectDocValuesProducer.SORTED_NUMERIC; import static org.apache.lucene.codecs.memory.DirectDocValuesProducer.SORTED_NUMERIC_SINGLETON; import static org.apache.lucene.codecs.memory.DirectDocValuesProducer.SORTED_SET; import static org.apache.lucene.codecs.memory.DirectDocValuesProducer.SORTED_SET_SINGLETON; import static org.apache.lucene.codecs.memory.DirectDocValuesProducer.VERSION_CURRENT; /** * Writer for {@link DirectDocValuesFormat} */ class DirectDocValuesConsumer extends DocValuesConsumer { IndexOutput data, meta; final int maxDoc; DirectDocValuesConsumer(SegmentWriteState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException { maxDoc = state.segmentInfo.maxDoc(); boolean success = false; try { String dataName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, dataExtension); data = state.directory.createOutput(dataName, state.context); CodecUtil.writeIndexHeader(data, dataCodec, VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix); String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension); meta = state.directory.createOutput(metaName, state.context); CodecUtil.writeIndexHeader(meta, metaCodec, VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix); success = true; } finally { if (!success) { IOUtils.closeWhileHandlingException(this); } } } @Override public void addNumericField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException { meta.writeVInt(field.number); meta.writeByte(NUMBER); addNumericFieldValues(field, LegacyDocValuesIterables.numericIterable(field, valuesProducer, maxDoc)); } private void addNumericFieldValues(FieldInfo field, Iterable<Number> values) throws IOException { meta.writeLong(data.getFilePointer()); long minValue = Long.MAX_VALUE; long maxValue = Long.MIN_VALUE; boolean missing = false; long count = 0; for (Number nv : values) { if (nv != null) { long v = nv.longValue(); minValue = Math.min(minValue, v); maxValue = Math.max(maxValue, v); } else { missing = true; } count++; if (count >= DirectDocValuesFormat.MAX_SORTED_SET_ORDS) { throw new IllegalArgumentException("DocValuesField \"" + field.name + "\" is too large, must be <= " + DirectDocValuesFormat.MAX_SORTED_SET_ORDS + " values/total ords"); } } meta.writeInt((int) count); if (missing) { long start = data.getFilePointer(); writeMissingBitset(values); meta.writeLong(start); meta.writeLong(data.getFilePointer() - start); } else { meta.writeLong(-1L); } byte byteWidth; if (minValue >= Byte.MIN_VALUE && maxValue <= Byte.MAX_VALUE) { byteWidth = 1; } else if (minValue >= Short.MIN_VALUE && maxValue <= Short.MAX_VALUE) { byteWidth = 2; } else if (minValue >= Integer.MIN_VALUE && maxValue <= Integer.MAX_VALUE) { byteWidth = 4; } else { byteWidth = 8; } meta.writeByte(byteWidth); for (Number nv : values) { long v; if (nv != null) { v = nv.longValue(); } else { v = 0; } switch(byteWidth) { case 1: data.writeByte((byte) v); break; case 2: data.writeShort((short) v); break; case 4: data.writeInt((int) v); break; case 8: data.writeLong(v); break; } } } @Override public void close() throws IOException { boolean success = false; try { if (meta != null) { meta.writeVInt(-1); // write EOF marker CodecUtil.writeFooter(meta); // write checksum } if (data != null) { CodecUtil.writeFooter(data); } success = true; } finally { if (success) { IOUtils.close(data, meta); } else { IOUtils.closeWhileHandlingException(data, meta); } data = meta = null; } } @Override public void addBinaryField(FieldInfo field, final DocValuesProducer valuesProducer) throws IOException { meta.writeVInt(field.number); meta.writeByte(BYTES); addBinaryFieldValues(field, LegacyDocValuesIterables.binaryIterable(field, valuesProducer, maxDoc)); } private void addBinaryFieldValues(FieldInfo field, final Iterable<BytesRef> values) throws IOException { // write the byte[] data final long startFP = data.getFilePointer(); boolean missing = false; long totalBytes = 0; int count = 0; for(BytesRef v : values) { if (v != null) { data.writeBytes(v.bytes, v.offset, v.length); totalBytes += v.length; if (totalBytes > DirectDocValuesFormat.MAX_TOTAL_BYTES_LENGTH) { throw new IllegalArgumentException("DocValuesField \"" + field.name + "\" is too large, cannot have more than DirectDocValuesFormat.MAX_TOTAL_BYTES_LENGTH (" + DirectDocValuesFormat.MAX_TOTAL_BYTES_LENGTH + ") bytes"); } } else { missing = true; } count++; } meta.writeLong(startFP); meta.writeInt((int) totalBytes); meta.writeInt(count); if (missing) { long start = data.getFilePointer(); writeMissingBitset(values); meta.writeLong(start); meta.writeLong(data.getFilePointer() - start); } else { meta.writeLong(-1L); } int addr = 0; for (BytesRef v : values) { data.writeInt(addr); if (v != null) { addr += v.length; } } data.writeInt(addr); } // TODO: in some cases representing missing with minValue-1 wouldn't take up additional space and so on, // but this is very simple, and algorithms only check this for values of 0 anyway (doesnt slow down normal decode) void writeMissingBitset(Iterable<?> values) throws IOException { long bits = 0; int count = 0; for (Object v : values) { if (count == 64) { data.writeLong(bits); count = 0; bits = 0; } if (v != null) { bits |= 1L << (count & 0x3f); } count++; } if (count > 0) { data.writeLong(bits); } } @Override public void addSortedField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException { meta.writeVInt(field.number); meta.writeByte(SORTED); // write the ordinals as numerics addNumericFieldValues(field, LegacyDocValuesIterables.sortedOrdIterable(valuesProducer, field, maxDoc)); // write the values as binary addBinaryFieldValues(field, LegacyDocValuesIterables.valuesIterable(valuesProducer.getSorted(field))); } @Override public void addSortedNumericField(FieldInfo field, final DocValuesProducer valuesProducer) throws IOException { final Iterable<Number> docToValueCount = LegacyDocValuesIterables.sortedNumericToDocCount(valuesProducer, field, maxDoc); final Iterable<Number> values = LegacyDocValuesIterables.sortedNumericToValues(valuesProducer, field); meta.writeVInt(field.number); if (isSingleValued(docToValueCount)) { meta.writeByte(SORTED_NUMERIC_SINGLETON); addNumericFieldValues(field, singletonView(docToValueCount, values, null)); } else { meta.writeByte(SORTED_NUMERIC); // First write docToValueCounts, except we "aggregate" the // counts so they turn into addresses, and add a final // value = the total aggregate: addNumericFieldValues(field, countToAddressIterator(docToValueCount)); // Write values for all docs, appended into one big // numerics: addNumericFieldValues(field, values); } } // note: this might not be the most efficient... but it's fairly simple @Override public void addSortedSetField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException { Iterable<BytesRef> values = LegacyDocValuesIterables.valuesIterable(valuesProducer.getSortedSet(field)); Iterable<Number> docToOrdCount = LegacyDocValuesIterables.sortedSetOrdCountIterable(valuesProducer, field, maxDoc); Iterable<Number> ords = LegacyDocValuesIterables.sortedSetOrdsIterable(valuesProducer, field); meta.writeVInt(field.number); if (isSingleValued(docToOrdCount)) { meta.writeByte(SORTED_SET_SINGLETON); // Write ordinals for all docs, appended into one big // numerics: addNumericFieldValues(field, singletonView(docToOrdCount, ords, -1L)); // write the values as binary addBinaryFieldValues(field, values); } else { meta.writeByte(SORTED_SET); // First write docToOrdCounts, except we "aggregate" the // counts so they turn into addresses, and add a final // value = the total aggregate: addNumericFieldValues(field, countToAddressIterator(docToOrdCount)); // Write ordinals for all docs, appended into one big // numerics: addNumericFieldValues(field, ords); // write the values as binary addBinaryFieldValues(field, values); } } /** * Just aggregates the count values so they become * "addresses", and adds one more value in the end * (the final sum) */ private Iterable<Number> countToAddressIterator(final Iterable<Number> counts) { return new Iterable<Number>() { @Override public Iterator<Number> iterator() { final Iterator<Number> iter = counts.iterator(); return new Iterator<Number>() { long sum; boolean ended; @Override public boolean hasNext() { return iter.hasNext() || !ended; } @Override public Number next() { long toReturn = sum; if (iter.hasNext()) { Number n = iter.next(); if (n != null) { sum += n.longValue(); } } else if (!ended) { ended = true; } else { assert false; } return toReturn; } @Override public void remove() { throw new UnsupportedOperationException(); } }; } }; } }