package org.apache.lucene.codecs.simpletext;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with this
* work for additional information regarding copyright ownership. The ASF
* licenses this file to You under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
import java.io.IOException;
import org.apache.lucene.codecs.DocValuesArraySource;
import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.index.DocValues.Type;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefHash;
import org.apache.lucene.util.IOUtils;
/**
* Writes plain-text DocValues.
* <p>
* <b><font color="red">FOR RECREATIONAL USE ONLY</font></B>
*
* @lucene.experimental
*/
public class SimpleTextDocValuesConsumer extends DocValuesConsumer {
static final BytesRef ZERO_DOUBLE = new BytesRef(Double.toString(0d));
static final BytesRef ZERO_INT = new BytesRef(Integer.toString(0));
static final BytesRef HEADER = new BytesRef("SimpleTextDocValues");
static final BytesRef END = new BytesRef("END");
static final BytesRef VALUE_SIZE = new BytesRef("valuesize ");
static final BytesRef DOC = new BytesRef(" doc ");
static final BytesRef VALUE = new BytesRef(" value ");
protected BytesRef scratch = new BytesRef();
protected int maxDocId = -1;
protected final String segment;
protected final Directory dir;
protected final IOContext ctx;
protected final Type type;
protected final BytesRefHash hash;
private int[] ords;
private int valueSize = Integer.MIN_VALUE;
private BytesRef zeroBytes;
private final String segmentSuffix;
public SimpleTextDocValuesConsumer(String segment, Directory dir,
IOContext ctx, Type type, String segmentSuffix) {
this.ctx = ctx;
this.dir = dir;
this.segment = segment;
this.type = type;
hash = new BytesRefHash();
ords = new int[0];
this.segmentSuffix = segmentSuffix;
}
@Override
public void add(int docID, IndexableField value) throws IOException {
assert docID >= 0;
final int ord, vSize;
switch (type) {
case BYTES_FIXED_DEREF:
case BYTES_FIXED_SORTED:
case BYTES_FIXED_STRAIGHT:
vSize = value.binaryValue().length;
ord = hash.add(value.binaryValue());
break;
case BYTES_VAR_DEREF:
case BYTES_VAR_SORTED:
case BYTES_VAR_STRAIGHT:
vSize = -1;
ord = hash.add(value.binaryValue());
break;
case FIXED_INTS_16:
vSize = 2;
scratch.grow(2);
DocValuesArraySource.copyShort(scratch, value.numericValue().shortValue());
ord = hash.add(scratch);
break;
case FIXED_INTS_32:
vSize = 4;
scratch.grow(4);
DocValuesArraySource.copyInt(scratch, value.numericValue().intValue());
ord = hash.add(scratch);
break;
case FIXED_INTS_8:
vSize = 1;
scratch.grow(1);
scratch.bytes[scratch.offset] = value.numericValue().byteValue();
scratch.length = 1;
ord = hash.add(scratch);
break;
case FIXED_INTS_64:
vSize = 8;
scratch.grow(8);
DocValuesArraySource.copyLong(scratch, value.numericValue().longValue());
ord = hash.add(scratch);
break;
case VAR_INTS:
vSize = -1;
scratch.grow(8);
DocValuesArraySource.copyLong(scratch, value.numericValue().longValue());
ord = hash.add(scratch);
break;
case FLOAT_32:
vSize = 4;
scratch.grow(4);
DocValuesArraySource.copyInt(scratch,
Float.floatToRawIntBits(value.numericValue().floatValue()));
ord = hash.add(scratch);
break;
case FLOAT_64:
vSize = 8;
scratch.grow(8);
DocValuesArraySource.copyLong(scratch,
Double.doubleToRawLongBits(value.numericValue().doubleValue()));
ord = hash.add(scratch);
break;
default:
throw new RuntimeException("should not reach this line");
}
if (valueSize == Integer.MIN_VALUE) {
assert maxDocId == -1;
valueSize = vSize;
} else {
if (valueSize != vSize) {
throw new IllegalArgumentException("value size must be " + valueSize + " but was: " + vSize);
}
}
maxDocId = Math.max(docID, maxDocId);
ords = grow(ords, docID);
ords[docID] = (ord < 0 ? (-ord)-1 : ord) + 1;
}
protected BytesRef getHeader() {
return HEADER;
}
private int[] grow(int[] array, int upto) {
if (array.length <= upto) {
return ArrayUtil.grow(array, 1 + upto);
}
return array;
}
private void prepareFlush(int docCount) {
assert ords != null;
ords = grow(ords, docCount);
}
@Override
public void finish(int docCount) throws IOException {
final String fileName = IndexFileNames.segmentFileName(segment, "",
segmentSuffix);
IndexOutput output = dir.createOutput(fileName, ctx);
boolean success = false;
BytesRef spare = new BytesRef();
try {
SimpleTextUtil.write(output, getHeader());
SimpleTextUtil.writeNewline(output);
SimpleTextUtil.write(output, VALUE_SIZE);
SimpleTextUtil.write(output, Integer.toString(this.valueSize), scratch);
SimpleTextUtil.writeNewline(output);
prepareFlush(docCount);
for (int i = 0; i < docCount; i++) {
SimpleTextUtil.write(output, DOC);
SimpleTextUtil.write(output, Integer.toString(i), scratch);
SimpleTextUtil.writeNewline(output);
SimpleTextUtil.write(output, VALUE);
writeDoc(output, i, spare);
SimpleTextUtil.writeNewline(output);
}
SimpleTextUtil.write(output, END);
SimpleTextUtil.writeNewline(output);
success = true;
} finally {
hash.close();
if (success) {
IOUtils.close(output);
} else {
IOUtils.closeWhileHandlingException(output);
dir.deleteFile(fileName);
}
}
}
protected void writeDoc(IndexOutput output, int docId, BytesRef spare) throws IOException {
int ord = ords[docId] - 1;
if (ord != -1) {
assert ord >= 0;
hash.get(ord, spare);
switch (type) {
case BYTES_FIXED_DEREF:
case BYTES_FIXED_SORTED:
case BYTES_FIXED_STRAIGHT:
case BYTES_VAR_DEREF:
case BYTES_VAR_SORTED:
case BYTES_VAR_STRAIGHT:
SimpleTextUtil.write(output, spare);
break;
case FIXED_INTS_16:
SimpleTextUtil.write(output,
Short.toString(DocValuesArraySource.asShort(spare)), scratch);
break;
case FIXED_INTS_32:
SimpleTextUtil.write(output,
Integer.toString(DocValuesArraySource.asInt(spare)), scratch);
break;
case VAR_INTS:
case FIXED_INTS_64:
SimpleTextUtil.write(output,
Long.toString(DocValuesArraySource.asLong(spare)), scratch);
break;
case FIXED_INTS_8:
assert spare.length == 1 : spare.length;
SimpleTextUtil.write(output,
Integer.toString(spare.bytes[spare.offset]), scratch);
break;
case FLOAT_32:
float valueFloat = Float.intBitsToFloat(DocValuesArraySource.asInt(spare));
SimpleTextUtil.write(output, Float.toString(valueFloat), scratch);
break;
case FLOAT_64:
double valueDouble = Double.longBitsToDouble(DocValuesArraySource
.asLong(spare));
SimpleTextUtil.write(output, Double.toString(valueDouble), scratch);
break;
default:
throw new IllegalArgumentException("unsupported type: " + type);
}
} else {
switch (type) {
case BYTES_FIXED_DEREF:
case BYTES_FIXED_SORTED:
case BYTES_FIXED_STRAIGHT:
if(zeroBytes == null) {
assert valueSize > 0;
zeroBytes = new BytesRef(new byte[valueSize]);
}
SimpleTextUtil.write(output, zeroBytes);
break;
case BYTES_VAR_DEREF:
case BYTES_VAR_SORTED:
case BYTES_VAR_STRAIGHT:
scratch.length = 0;
SimpleTextUtil.write(output, scratch);
break;
case FIXED_INTS_16:
case FIXED_INTS_32:
case FIXED_INTS_64:
case FIXED_INTS_8:
case VAR_INTS:
SimpleTextUtil.write(output, ZERO_INT);
break;
case FLOAT_32:
case FLOAT_64:
SimpleTextUtil.write(output, ZERO_DOUBLE);
break;
default:
throw new IllegalArgumentException("unsupported type: " + type);
}
}
}
@Override
protected Type getType() {
return type;
}
@Override
public int getValueSize() {
return valueSize;
}
}