package com.pearson.entech.elasticsearch.plugin.approx; import java.io.IOException; import java.lang.reflect.InvocationTargetException; import java.lang.reflect.Method; import java.util.Arrays; import java.util.List; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefHash; import org.apache.lucene.util.NumericUtils; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import com.pearson.entech.elasticsearch.search.facet.approx.termlist.Constants; /** * Utilities for handling BytesRef objects and data structures containing them. */ public class BytesRefUtils { /** The compact() method on BytesRefHash objects. */ private static volatile Method __compact; /** An empty array of objects: the params required for reflective-invocation of __compact. */ private static final Object[] __emptyParams = {}; /** * A procedure for processing entries in a BytesRefHash. */ public static interface Procedure { /** * Called once for each BytesRef. * * @param ref the BytesRef * @throws Exception */ void consume(BytesRef ref) throws Exception; } /** * Helper method to get the package-scoped compact() method of BytesRefHash. * * @return the method reference */ private static Method getCompactMethod() { if(__compact != null) return __compact; Method compact; try { compact = BytesRefHash.class.getDeclaredMethod("compact", new Class[0]); } catch(final SecurityException e) { throw new RuntimeException(e); } catch(final NoSuchMethodException e) { throw new RuntimeException(e); } compact.setAccessible(true); return __compact = compact; } /** * Destructively process the entries in a BytesRefHash, calling a procedure once for * each distinct entry's value. After processing, the hash will be emptied. * * @param hash the hash to process * @param proc the procedure to call on each entry * @throws IllegalStateException if the procedure threw any exception */ public static void process(final BytesRefHash hash, final Procedure proc) { int[] ids; try { ids = (int[]) getCompactMethod().invoke(hash, __emptyParams); } catch(final IllegalAccessException e) { throw new RuntimeException(e); } catch(final InvocationTargetException e) { throw new RuntimeException(e); } final BytesRef scratch = new BytesRef(); for(int i = 0; i < ids.length; i++) { final int id = ids[i]; if(id < 0) break; hash.get(id, scratch); try { proc.consume(scratch); } catch(final Exception e) { throw new IllegalStateException(e); } } hash.clear(); } /** * Merge multiple BytesRefHash objects into the first one provided. All the others * will be emptied during this process. * * @param hashes the hashes to merge */ public static void merge(final BytesRefHash... hashes) { if(hashes.length < 1) throw new IllegalArgumentException("Cannot merge empty array of BytesRefHash objects"); if(hashes.length == 1) return; final AddToHash proc = new AddToHash(hashes[0]); for(int i = 1; i < hashes.length; i++) { process(hashes[i], proc); } } /** * Serialize a hash to an ElasticSearch StreamOutput. * * @param hash a BytesRefHash * @param out the StreamOutput * @throws IOException */ public static void serialize(final BytesRefHash hash, final StreamOutput out) throws IOException { final ElasticSearchSerializer proc = new ElasticSearchSerializer(out, hash.size()); try { process(hash, proc); } catch(final IllegalStateException e) { throw new IOException(e.getCause()); } } /** * Deserialize a hash from an ElasticSearch StreamInput. * * @param in the StreamInput * @return a new BytesRefHash * @throws IOException */ public static BytesRefHash deserialize(final StreamInput in) throws IOException { final BytesRefHash output = new BytesRefHash(); final int entries = in.readVInt(); byte[] scratch = null; for(int i = 0; i < entries; i++) { final int length = in.readVInt(); // Reuse previous byte array if long enough, otherwise create new one if(scratch == null || scratch.length < length) { scratch = new byte[length]; } in.readBytes(scratch, 0, length); output.add(new BytesRef(scratch, 0, length)); } return output; } /** * Procedure for adding BytesRefs to a BytesRefHash. */ public static class AddToHash implements Procedure { private final BytesRefHash _target; /** * Create a new procedure. * * @param target the BytesRefHash to merge into */ public AddToHash(final BytesRefHash target) { _target = target; } @Override public void consume(final BytesRef ref) throws Exception { _target.add(ref); } } /** * Procedure for interpreting a BytesRefHash as a set of UTF8 strings. */ public static class AsStrings implements Procedure { private final String[] _strings; private int _ptr; private final Constants.FIELD_DATA_TYPE _dataType; /** * Create a new procedure to extract strings into an array of the given size. * * @param size the number of elements to accommodate */ public AsStrings(final int size, final Constants.FIELD_DATA_TYPE dataType) { _strings = new String[size]; _ptr = 0; _dataType = dataType; } @Override public void consume(final BytesRef ref) throws Exception { //check both ref length and data type before consuming the ref value if(ref.length == NumericUtils.BUF_SIZE_LONG && _dataType == Constants.FIELD_DATA_TYPE.LONG) { _strings[_ptr++] = Long.toString(NumericUtils.prefixCodedToLong(ref)); } else if(ref.length == NumericUtils.BUF_SIZE_INT && _dataType == Constants.FIELD_DATA_TYPE.INT) { _strings[_ptr++] = Integer.toString(NumericUtils.prefixCodedToInt(ref)); } else _strings[_ptr++] = ref.utf8ToString(); } /** * Get an array containing all of the entries converted to strings. * * @return the array */ public String[] getArray() { return _strings; } /** * Get a list view of the array returned by getArray(). * * @return the list */ public List<String> getList() { return Arrays.asList(_strings); } } /** * Procedure for serializing a BytesRefHash to an ElasticSearch StreamOutput. */ public static class ElasticSearchSerializer implements Procedure { private final StreamOutput _out; /** * Create a new ElasticSearchSerializer. * * @param out the StreamOutput to write to * @param entries the number of entries (BytesRef objects) to write * @throws IOException */ public ElasticSearchSerializer(final StreamOutput out, final int entries) throws IOException { _out = out; _out.writeVInt(entries); } @Override public void consume(final BytesRef ref) throws Exception { _out.writeBytesRef(ref); } } }