package org.apache.lucene.codecs.simpletext; /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with this * work for additional information regarding copyright ownership. The ASF * licenses this file to You under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. */ import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesConsumer.DOC; import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesConsumer.END; import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesConsumer.HEADER; import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesConsumer.VALUE; import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesConsumer.VALUE_SIZE; import java.io.Closeable; import java.io.IOException; import java.util.Collection; import java.util.Comparator; import java.util.Map; import java.util.TreeMap; import org.apache.lucene.codecs.DocValuesArraySource; import org.apache.lucene.codecs.PerDocProducerBase; import org.apache.lucene.index.DocValues; import org.apache.lucene.index.DocValues.SortedSource; import org.apache.lucene.index.DocValues.Source; import org.apache.lucene.index.DocValues.Type; import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IndexInput; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefHash; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.StringHelper; import org.apache.lucene.util.packed.PackedInts.Reader; /** * Reads plain-text DocValues. * <p> * <b><font color="red">FOR RECREATIONAL USE ONLY</font></B> * * @lucene.experimental */ public class SimpleTextPerDocProducer extends PerDocProducerBase { protected final TreeMap<String, DocValues> docValues; private Comparator<BytesRef> comp; private final String segmentSuffix; /** * Creates a new {@link SimpleTextPerDocProducer} instance and loads all * {@link DocValues} instances for this segment and codec. */ public SimpleTextPerDocProducer(SegmentReadState state, Comparator<BytesRef> comp, String segmentSuffix) throws IOException { this.comp = comp; this.segmentSuffix = segmentSuffix; if (anyDocValuesFields(state.fieldInfos)) { docValues = load(state.fieldInfos, state.segmentInfo.name, state.segmentInfo.getDocCount(), state.dir, state.context); } else { docValues = new TreeMap<String, DocValues>(); } } @Override protected Map<String, DocValues> docValues() { return docValues; } protected DocValues loadDocValues(int docCount, Directory dir, String id, DocValues.Type type, IOContext context) throws IOException { return new SimpleTextDocValues(dir, context, type, id, docCount, comp, segmentSuffix); } @Override protected void closeInternal(Collection<? extends Closeable> closeables) throws IOException { IOUtils.close(closeables); } private static class SimpleTextDocValues extends DocValues { private int docCount; @Override public void close() throws IOException { boolean success = false; try { super.close(); success = true; } finally { if (success) { IOUtils.close(input); } else { IOUtils.closeWhileHandlingException(input); } } } private Type type; private Comparator<BytesRef> comp; private int valueSize; private final IndexInput input; public SimpleTextDocValues(Directory dir, IOContext ctx, Type type, String id, int docCount, Comparator<BytesRef> comp, String segmentSuffix) throws IOException { this.type = type; this.docCount = docCount; this.comp = comp; final String fileName = IndexFileNames.segmentFileName(id, "", segmentSuffix); boolean success = false; IndexInput in = null; try { in = dir.openInput(fileName, ctx); valueSize = readHeader(in); success = true; } finally { if (!success) { IOUtils.closeWhileHandlingException(in); } } input = in; } @Override public Source load() throws IOException { boolean success = false; IndexInput in = input.clone(); try { Source source = null; switch (type) { case BYTES_FIXED_DEREF: case BYTES_FIXED_SORTED: case BYTES_FIXED_STRAIGHT: case BYTES_VAR_DEREF: case BYTES_VAR_SORTED: case BYTES_VAR_STRAIGHT: source = read(in, new ValueReader(type, docCount, comp)); break; case FIXED_INTS_16: case FIXED_INTS_32: case VAR_INTS: case FIXED_INTS_64: case FIXED_INTS_8: case FLOAT_32: case FLOAT_64: source = read(in, new ValueReader(type, docCount, null)); break; default: throw new IllegalArgumentException("unknown type: " + type); } assert source != null; success = true; return source; } finally { if (!success) { IOUtils.closeWhileHandlingException(in); } else { IOUtils.close(in); } } } private int readHeader(IndexInput in) throws IOException { BytesRef scratch = new BytesRef(); SimpleTextUtil.readLine(in, scratch); assert StringHelper.startsWith(scratch, HEADER); SimpleTextUtil.readLine(in, scratch); assert StringHelper.startsWith(scratch, VALUE_SIZE); return Integer.parseInt(readString(scratch.offset + VALUE_SIZE.length, scratch)); } private Source read(IndexInput in, ValueReader reader) throws IOException { BytesRef scratch = new BytesRef(); for (int i = 0; i < docCount; i++) { SimpleTextUtil.readLine(in, scratch); assert StringHelper.startsWith(scratch, DOC) : scratch.utf8ToString(); SimpleTextUtil.readLine(in, scratch); assert StringHelper.startsWith(scratch, VALUE); reader.fromString(i, scratch, scratch.offset + VALUE.length); } SimpleTextUtil.readLine(in, scratch); assert scratch.equals(END); return reader.getSource(); } @Override public Source getDirectSource() throws IOException { return this.getSource(); } @Override public int getValueSize() { return valueSize; } @Override public Type getType() { return type; } } public static String readString(int offset, BytesRef scratch) { return new String(scratch.bytes, scratch.offset + offset, scratch.length - offset, IOUtils.CHARSET_UTF_8); } private static final class ValueReader { private final Type type; private byte[] bytes; private short[] shorts; private int[] ints; private long[] longs; private float[] floats; private double[] doubles; private Source source; private BytesRefHash hash; private BytesRef scratch; public ValueReader(Type type, int maxDocs, Comparator<BytesRef> comp) { super(); this.type = type; Source docValuesArray = null; switch (type) { case FIXED_INTS_16: shorts = new short[maxDocs]; docValuesArray = DocValuesArraySource.forType(type) .newFromArray(shorts); break; case FIXED_INTS_32: ints = new int[maxDocs]; docValuesArray = DocValuesArraySource.forType(type).newFromArray(ints); break; case FIXED_INTS_64: longs = new long[maxDocs]; docValuesArray = DocValuesArraySource.forType(type) .newFromArray(longs); break; case VAR_INTS: longs = new long[maxDocs]; docValuesArray = new VarIntsArraySource(type, longs); break; case FIXED_INTS_8: bytes = new byte[maxDocs]; docValuesArray = DocValuesArraySource.forType(type).newFromArray(bytes); break; case FLOAT_32: floats = new float[maxDocs]; docValuesArray = DocValuesArraySource.forType(type) .newFromArray(floats); break; case FLOAT_64: doubles = new double[maxDocs]; docValuesArray = DocValuesArraySource.forType(type).newFromArray( doubles); break; case BYTES_FIXED_DEREF: case BYTES_FIXED_SORTED: case BYTES_FIXED_STRAIGHT: case BYTES_VAR_DEREF: case BYTES_VAR_SORTED: case BYTES_VAR_STRAIGHT: assert comp != null; hash = new BytesRefHash(); BytesSource bytesSource = new BytesSource(type, comp, maxDocs, hash); ints = bytesSource.docIdToEntry; source = bytesSource; scratch = new BytesRef(); break; } if (docValuesArray != null) { assert source == null; this.source = docValuesArray; } } public void fromString(int ord, BytesRef ref, int offset) { switch (type) { case FIXED_INTS_16: assert shorts != null; shorts[ord] = Short.parseShort(readString(offset, ref)); break; case FIXED_INTS_32: assert ints != null; ints[ord] = Integer.parseInt(readString(offset, ref)); break; case FIXED_INTS_64: case VAR_INTS: assert longs != null; longs[ord] = Long.parseLong(readString(offset, ref)); break; case FIXED_INTS_8: assert bytes != null; bytes[ord] = (byte) Integer.parseInt(readString(offset, ref)); break; case FLOAT_32: assert floats != null; floats[ord] = Float.parseFloat(readString(offset, ref)); break; case FLOAT_64: assert doubles != null; doubles[ord] = Double.parseDouble(readString(offset, ref)); break; case BYTES_FIXED_DEREF: case BYTES_FIXED_SORTED: case BYTES_FIXED_STRAIGHT: case BYTES_VAR_DEREF: case BYTES_VAR_SORTED: case BYTES_VAR_STRAIGHT: scratch.bytes = ref.bytes; scratch.length = ref.length - offset; scratch.offset = ref.offset + offset; int key = hash.add(scratch); ints[ord] = key < 0 ? (-key) - 1 : key; break; } } public Source getSource() { if (source instanceof BytesSource) { ((BytesSource) source).maybeSort(); } return source; } } private static final class BytesSource extends SortedSource { private final BytesRefHash hash; int[] docIdToEntry; int[] sortedEntries; int[] adresses; private final boolean isSorted; protected BytesSource(Type type, Comparator<BytesRef> comp, int maxDoc, BytesRefHash hash) { super(type, comp); docIdToEntry = new int[maxDoc]; this.hash = hash; isSorted = type == Type.BYTES_FIXED_SORTED || type == Type.BYTES_VAR_SORTED; } void maybeSort() { if (isSorted) { adresses = new int[hash.size()]; sortedEntries = hash.sort(getComparator()); for (int i = 0; i < adresses.length; i++) { int entry = sortedEntries[i]; adresses[entry] = i; } } } @Override public BytesRef getBytes(int docID, BytesRef ref) { if (isSorted) { return hash.get(sortedEntries[ord(docID)], ref); } else { return hash.get(docIdToEntry[docID], ref); } } @Override public SortedSource asSortedSource() { if (isSorted) { return this; } return null; } @Override public int ord(int docID) { assert isSorted; try { return adresses[docIdToEntry[docID]]; } catch (Exception e) { return 0; } } @Override public BytesRef getByOrd(int ord, BytesRef bytesRef) { assert isSorted; return hash.get(sortedEntries[ord], bytesRef); } @Override public Reader getDocToOrd() { return null; } @Override public int getValueCount() { return hash.size(); } } private static class VarIntsArraySource extends Source { private final long[] array; protected VarIntsArraySource(Type type, long[] array) { super(type); this.array = array; } @Override public long getInt(int docID) { return array[docID]; } @Override public BytesRef getBytes(int docID, BytesRef ref) { DocValuesArraySource.copyLong(ref, getInt(docID)); return ref; } } }