/* * Copyright (C) 2014 Indeed Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except * in compliance with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software distributed under the * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language governing permissions and * limitations under the License. */ package com.indeed.flamdex; import com.google.common.base.Charsets; import com.google.common.base.Throwables; import com.google.common.collect.Maps; import com.google.common.collect.Sets; import com.google.common.io.Files; import com.indeed.flamdex.api.DocIdStream; import com.indeed.flamdex.api.FlamdexOutOfMemoryException; import com.indeed.flamdex.api.FlamdexReader; import com.indeed.flamdex.api.GenericIntTermDocIterator; import com.indeed.flamdex.api.GenericStringTermDocIterator; import com.indeed.flamdex.api.IntTermDocIterator; import com.indeed.flamdex.api.IntTermIterator; import com.indeed.flamdex.api.IntValueLookup; import com.indeed.flamdex.api.StringTermDocIterator; import com.indeed.flamdex.api.StringTermIterator; import com.indeed.flamdex.api.StringValueLookup; import com.indeed.flamdex.api.TermIterator; import com.indeed.flamdex.fieldcache.IntArrayIntValueLookup; import com.indeed.flamdex.utils.FlamdexUtils; import com.indeed.flamdex.writer.FlamdexDocWriter; import com.indeed.flamdex.writer.FlamdexDocument; import com.indeed.flamdex.writer.FlamdexWriter; import com.indeed.flamdex.writer.IntFieldWriter; import com.indeed.flamdex.writer.StringFieldWriter; import it.unimi.dsi.fastutil.ints.IntArrayList; import it.unimi.dsi.fastutil.ints.IntList; import it.unimi.dsi.fastutil.longs.Long2ObjectMap; import it.unimi.dsi.fastutil.longs.Long2ObjectRBTreeMap; import it.unimi.dsi.fastutil.longs.Long2ObjectSortedMap; import it.unimi.dsi.fastutil.longs.LongList; import it.unimi.dsi.fastutil.longs.LongOpenHashSet; import it.unimi.dsi.fastutil.longs.LongSet; import java.io.Closeable; import java.io.DataInput; import java.io.DataOutput; import java.io.File; import java.io.IOException; import java.nio.ByteBuffer; import java.nio.CharBuffer; import java.nio.charset.CharsetDecoder; import java.nio.charset.CharsetEncoder; import java.nio.charset.CodingErrorAction; import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; import java.util.SortedMap; import java.util.SortedSet; import java.util.TreeMap; /** * @author jsgroth */ public final class MemoryFlamdex implements FlamdexReader, FlamdexWriter, FlamdexDocWriter { private static final long TREE_MAP_USAGE = 8 + 4 + 4 + 4 + 4 + 12 + 16 + 4 + 4 + 12; private static final long TREE_MAP_ENTRY_USAGE = 8 + 4 + 4 + 4 + 4 + 4 + 1; //todo jeffp fix this for longs (or don't) private static final long INT_2_OBJECT_RB_TREE_MAP_USAGE = 8 + 4 + 4 + 4 + 4 + 12 + 12 + 12 + 1 + 4 + 12 + 4 + 12 + 64 + 4 + 12 + 256 + 4; private static final long INT_2_OBJECT_RB_TREE_MAP_ENTRY_USAGE = 8 + 4 + 4 + 4 + 4 + 4; private static final long STRING_USAGE = 8 + 4 + 12 + 4 + 4 + 4; private static final long INT_ARRAY_LIST_USAGE = 8 + 12 + 4; private final SortedMap<String, Long2ObjectSortedMap<IntArrayList>> intFields = Maps.newTreeMap(); private final SortedMap<String, SortedMap<String, IntArrayList>> stringFields = Maps.newTreeMap(); private int numDocs; private long memoryUsageEstimate = initialMemoryUsageEstimate(); @Override public Collection<String> getIntFields() { return intFields.keySet(); } @Override public Collection<String> getStringFields() { return stringFields.keySet(); } @Override public int getNumDocs() { return numDocs; } /* * Does nothing */ @Override public String getDirectory() { String dir = null; File tempDir; tempDir = Files.createTempDir(); try { dir = tempDir.getCanonicalPath(); } catch (IOException e) { } return dir; } /* * Does nothing */ @Override public String getOutputDirectory() { return null; } @Override public void resetMaxDocs(long numDocs) { /* does nothing */ } public MemoryFlamdex setNumDocs(int numDocs) { this.numDocs = numDocs; return this; } @Override public DocIdStream getDocIdStream() { return new MemoryDocIdStream(); } @Override public IntTermIterator getIntTermIterator(String field) { return new MemoryIntTermIterator(field); } @Override public StringTermIterator getStringTermIterator(String field) { return new MemoryStringTermIterator(field); } @Override public IntTermDocIterator getIntTermDocIterator(final String field) { return new GenericIntTermDocIterator(getIntTermIterator(field), getDocIdStream()); } @Override public StringTermDocIterator getStringTermDocIterator(final String field) { return new GenericStringTermDocIterator(getStringTermIterator(field), getDocIdStream()); } @Override public long getIntTotalDocFreq(String field) { return FlamdexUtils.getIntTotalDocFreq(this, field); } @Override public long getStringTotalDocFreq(String field) { return FlamdexUtils.getStringTotalDocFreq(this, field); } @Override public Collection<String> getAvailableMetrics() { return Collections.emptyList(); } @Override public IntValueLookup getMetric(String metric) throws FlamdexOutOfMemoryException { return new IntArrayIntValueLookup(FlamdexUtils.cacheIntField(metric, this)); } public StringValueLookup getStringLookup(final String field) throws FlamdexOutOfMemoryException { throw new UnsupportedOperationException(); } @Override public long memoryRequired(String metric) { return 0L; } @Override @SuppressWarnings("MismatchedQueryAndUpdateOfCollection") public IntFieldWriter getIntFieldWriter(final String field) throws IOException { return new IntFieldWriter() { private final Long2ObjectSortedMap<IntArrayList> terms = new Long2ObjectRBTreeMap<IntArrayList>(); private IntArrayList currentDocList; private long term; @Override public void nextTerm(long term) throws IOException { if (currentDocList != null && currentDocList.size() > 0) terms.put(this.term, currentDocList); this.term = term; currentDocList = new IntArrayList(); } @Override public void nextDoc(int doc) throws IOException { if (doc >= numDocs) { throw new IllegalArgumentException("invalid doc: doc="+doc+", numDocs="+numDocs); } currentDocList.add(doc); } @Override public void close() throws IOException { if (currentDocList != null && currentDocList.size() > 0) terms.put(term, currentDocList); intFields.put(field, terms); memoryUsageEstimate += usage(field, terms); } }; } @Override @SuppressWarnings("MismatchedQueryAndUpdateOfCollection") public StringFieldWriter getStringFieldWriter(final String field) throws IOException { return new StringFieldWriter() { private final SortedMap<String, IntArrayList> terms = Maps.newTreeMap(); private IntArrayList currentDocList; private String term; @Override public void nextTerm(String term) throws IOException { if (currentDocList != null && currentDocList.size() > 0) terms.put(this.term, currentDocList); this.term = term; currentDocList = new IntArrayList(); } @Override public void nextDoc(int doc) throws IOException { if (doc >= numDocs) { throw new IllegalArgumentException("invalid doc: doc="+doc+", numDocs="+numDocs); } currentDocList.add(doc); } @Override public void close() throws IOException { if (currentDocList != null && currentDocList.size() > 0) terms.put(term, currentDocList); stringFields.put(field, terms); memoryUsageEstimate += usage(field, terms); } }; } @Override public void addDocument(FlamdexDocument doc) { final Map<String, LongList> docIntFields = doc.getIntFields(); for (final String intField : docIntFields.keySet()) { Long2ObjectSortedMap<IntArrayList> myIntTerms = intFields.get(intField); if (myIntTerms == null) { intFields.put(intField, myIntTerms = new Long2ObjectRBTreeMap<IntArrayList>()); memoryUsageEstimate += TREE_MAP_ENTRY_USAGE + usage(intField) + INT_2_OBJECT_RB_TREE_MAP_USAGE; } final LongSet seenIntTerms = new LongOpenHashSet(); final LongList terms = docIntFields.get(intField); for (int i = 0; i < terms.size(); ++i) { final long term = terms.getLong(i); if (seenIntTerms.contains(term)) continue; seenIntTerms.add(term); IntArrayList docList = myIntTerms.get(term); if (docList == null) { myIntTerms.put(term, docList = new IntArrayList()); memoryUsageEstimate += INT_2_OBJECT_RB_TREE_MAP_ENTRY_USAGE + INT_ARRAY_LIST_USAGE + (4 * docList.elements().length); } memoryUsageEstimate -= 4 * docList.elements().length; docList.add(numDocs); memoryUsageEstimate += 4 * docList.elements().length; } } final Map<String, List<String>> docStringFields = doc.getStringFields(); for (final String stringField : docStringFields.keySet()) { SortedMap<String, IntArrayList> myStringTerms = stringFields.get(stringField); if (myStringTerms == null) { stringFields.put(stringField, myStringTerms = new TreeMap<String, IntArrayList>()); memoryUsageEstimate += TREE_MAP_ENTRY_USAGE + usage(stringField) + TREE_MAP_USAGE; } final Set<String> seenStringTerms = new HashSet<String>(); final List<String> terms = docStringFields.get(stringField); for (final String term : terms) { if (seenStringTerms.contains(term)) continue; seenStringTerms.add(term); IntArrayList docList = myStringTerms.get(term); if (docList == null) { myStringTerms.put(term, docList = new IntArrayList()); memoryUsageEstimate += TREE_MAP_ENTRY_USAGE + usage(term) + INT_ARRAY_LIST_USAGE + (4 * docList.elements().length); } memoryUsageEstimate -= 4 * docList.elements().length; docList.add(numDocs); memoryUsageEstimate += 4 * docList.elements().length; } } ++numDocs; } @Override public void close() throws IOException { } public MemoryFlamdex() { this(false); } public MemoryFlamdex(final boolean replaceMalformedInput) { if (replaceMalformedInput) { encoder = Charsets.UTF_8.newEncoder().onMalformedInput(CodingErrorAction.REPLACE).onUnmappableCharacter(CodingErrorAction.REPLACE); } else { encoder = Charsets.UTF_8.newEncoder(); } } public long getMemoryUsageEstimate() { return memoryUsageEstimate; } public void write(DataOutput out) throws IOException { out.writeInt(numDocs); out.writeInt(intFields.size()); for (final String intField : intFields.keySet()) { writeString(out, intField); } out.writeInt(stringFields.size()); for (final String stringField : stringFields.keySet()) { writeString(out, stringField); } for (final String intField : intFields.keySet()) { final Long2ObjectSortedMap<IntArrayList> terms = intFields.get(intField); writeVLong(terms.size(), out); long lastTerm = 0; for (final Long2ObjectMap.Entry<IntArrayList> e : terms.long2ObjectEntrySet()) { final long term = e.getLongKey(); final IntList docList = e.getValue(); writeVLong(term - lastTerm, out); lastTerm = term; writeVLong(docList.size(), out); int lastDoc = 0; for (int i = 0; i < docList.size(); ++i) { final int doc = docList.getInt(i); writeVLong(doc - lastDoc, out); lastDoc = doc; } } } for (final String stringField : stringFields.keySet()) { final Map<String, IntArrayList> terms = stringFields.get(stringField); writeVLong(terms.size(), out); byte[] lastTermBytes = new byte[0]; int lastTermLength = 0; for (final Map.Entry<String, IntArrayList> e : terms.entrySet()) { final String term = e.getKey(); final IntList docList = e.getValue(); final ByteBuffer encoded = encoder.encode(CharBuffer.wrap(term)); final byte[] termBytes = encoded.array(); final int termLength = encoded.limit(); final int prefixLen = getPrefixLen(lastTermBytes, termBytes, lastTermLength); final int newLen = termLength - prefixLen; writeVLong(prefixLen, out); writeVLong(newLen, out); out.write(termBytes, prefixLen, newLen); lastTermBytes = termBytes; lastTermLength = termLength; writeVLong(docList.size(), out); int lastDoc = 0; for (int i = 0; i < docList.size(); ++i) { final int doc = docList.getInt(i); writeVLong(doc - lastDoc, out); lastDoc = doc; } } } } public void readFields(DataInput in) throws IOException { numDocs = in.readInt(); final int numIntFields = in.readInt(); intFields.clear(); for (int z = 0; z < numIntFields; ++z) { final String intField = readString(in); intFields.put(intField, new Long2ObjectRBTreeMap<IntArrayList>()); } final int numStringFields = in.readInt(); stringFields.clear(); for (int z = 0; z < numStringFields; ++z) { final String stringField = readString(in); stringFields.put(stringField, new TreeMap<String, IntArrayList>()); } for (final String intField : intFields.keySet()) { final Long2ObjectMap<IntArrayList> terms = intFields.get(intField); final long numTerms = readVLong(in); long term = 0; for (long l = 0; l < numTerms; l++) { final long termDelta = readVLong(in); term += termDelta; final int docFreq = (int)readVLong(in); final IntArrayList docList = new IntArrayList(docFreq); terms.put(term, docList); int doc = 0; for (int x = 0; x < docFreq; ++x) { final int docDelta = (int)readVLong(in); doc += docDelta; docList.add(doc); } } } final CharsetDecoder decoder = Charsets.UTF_8.newDecoder(); for (final String stringField : stringFields.keySet()) { final Map<String, IntArrayList> terms = stringFields.get(stringField); final long numTerms = readVLong(in); byte[] term = new byte[10]; ByteBuffer termBuf = ByteBuffer.wrap(term); for (long l = 0; l < numTerms; l++) { final long prefixLen = readVLong(in); final int newLen = (int)readVLong(in); final int termLen = (int)prefixLen + newLen; if (termLen > term.length) { term = Arrays.copyOf(term, Math.max(termLen, 2 * term.length)); termBuf = ByteBuffer.wrap(term); } if (newLen > 0) { in.readFully(term, (int)prefixLen, newLen); } final String termStr = decoder.decode((ByteBuffer)termBuf.position(0).limit(termLen)).toString(); final int docFreq = (int)readVLong(in); final IntArrayList docList = new IntArrayList(docFreq); terms.put(termStr, docList); int doc = 0; for (int i = 0; i < docFreq; ++i) { final int docDelta = (int)readVLong(in); doc += docDelta; docList.add(doc); } } } memoryUsageEstimate = initialMemoryUsageEstimate(); for (final Map.Entry<String, Long2ObjectSortedMap<IntArrayList>> e : intFields.entrySet()) { memoryUsageEstimate += usage(e.getKey(), e.getValue()); } for (final Map.Entry<String, SortedMap<String, IntArrayList>> e : stringFields.entrySet()) { memoryUsageEstimate += usage(e.getKey(), e.getValue()); } } private static long initialMemoryUsageEstimate() { long size = 20; size += 2 * TREE_MAP_USAGE; return size; } private static long usage(String intField, Long2ObjectMap<IntArrayList> terms) { long size = TREE_MAP_ENTRY_USAGE; size += usage(intField); size += usage(terms); return size; } private static long usage(String stringField, SortedMap<String, IntArrayList> terms) { long size = TREE_MAP_ENTRY_USAGE; size += usage(stringField); size += usage(terms); return size; } private static long usage(String s) { return STRING_USAGE + 2 * s.length(); } private static long usage(Long2ObjectMap<IntArrayList> map) { long size = INT_2_OBJECT_RB_TREE_MAP_USAGE; size += map.size() * INT_2_OBJECT_RB_TREE_MAP_ENTRY_USAGE; for (final IntArrayList list : map.values()) { size += INT_ARRAY_LIST_USAGE + 4 * list.elements().length; } return size; } private static long usage(SortedMap<String, IntArrayList> map) { long size = TREE_MAP_USAGE; for (final Map.Entry<String, IntArrayList> e : map.entrySet()) { size += TREE_MAP_ENTRY_USAGE; size += usage(e.getKey()); size += INT_ARRAY_LIST_USAGE + 4 * e.getValue().elements().length; } return size; } // DO NOT USE THIS METHOD UNLESS YOU KNOW WHAT YOU ARE DOING public static FlamdexReader streamer(final DataInput in) throws IOException { final int numDocs = in.readInt(); final int numIntFields = in.readInt(); final SortedSet<String> intFields = Sets.newTreeSet(); for (int i = 0; i < numIntFields; ++i) { intFields.add(readString(in)); } final int numStringFields = in.readInt(); final SortedSet<String> stringFields = Sets.newTreeSet(); for (int i = 0; i < numStringFields; ++i) { stringFields.add(readString(in)); } final CharsetDecoder decoder = Charsets.UTF_8.newDecoder(); return new FlamdexReader() { long intTerm; byte[] stringTermBytes = new byte[100]; ByteBuffer stringTermBuf = ByteBuffer.wrap(stringTermBytes); String stringTerm; int stringTermLen; int docFreq; int[] docList = new int[1000]; @Override public Collection<String> getIntFields() { return intFields; } @Override public Collection<String> getStringFields() { return stringFields; } @Override public int getNumDocs() { return numDocs; } /* * Does nothing */ @Override public String getDirectory() { return "."; } @Override public DocIdStream getDocIdStream() { return new DocIdStream() { int[] myDocList = new int[1000]; int pos; int myDocListSize; @Override public void reset(TermIterator term) { if (docFreq > myDocList.length) { myDocList = new int[Math.max(docFreq, 2 * myDocList.length)]; } System.arraycopy(docList, 0, myDocList, 0, docFreq); pos = 0; myDocListSize = docFreq; } @Override public int fillDocIdBuffer(int[] docIdBuffer) { final int n = Math.min(myDocListSize - pos, docIdBuffer.length); System.arraycopy(myDocList, pos, docIdBuffer, 0, n); pos += n; return n; } @Override public void close() { } }; } @Override public IntTermIterator getIntTermIterator(String field) { final long numTerms; try { numTerms = readVLong(in); } catch (IOException e) { throw Throwables.propagate(e); } intTerm = 0; return new IntTermIterator() { long termIndex = 0; @Override public void reset(long term) { throw new UnsupportedOperationException(); } @Override public long term() { return intTerm; } @Override public boolean next() { if (termIndex >= numTerms) return false; termIndex++; try { final long termDelta = readVLong(in); intTerm += termDelta; docFreq = (int)readVLong(in); if (docFreq > docList.length) { docList = new int[Math.max(docFreq, 2 * docList.length)]; } int doc = 0; for (int i = 0; i < docFreq; ++i) { doc += (int)readVLong(in); docList[i] = doc; } return true; } catch (IOException e) { throw new RuntimeException(e); } } @Override public int docFreq() { return docFreq; } @Override public void close() { } }; } @Override public StringTermIterator getStringTermIterator(String field) { final long numTerms; try { numTerms = readVLong(in); } catch (IOException e) { throw Throwables.propagate(e); } stringTermLen = 0; return new StringTermIterator() { long termIndex = 0; @Override public void reset(String term) { throw new UnsupportedOperationException(); } @Override public String term() { return stringTerm; } @Override public boolean next() { if (termIndex >= numTerms) return false; termIndex++; try { final long prefixLen = readVLong(in); final int newLen = (int)readVLong(in); stringTermLen = (int)prefixLen + newLen; if (stringTermLen > stringTermBytes.length) { stringTermBytes = Arrays.copyOf(stringTermBytes, Math.max(stringTermLen, 2 * stringTermBytes.length)); stringTermBuf = ByteBuffer.wrap(stringTermBytes); } if (newLen > 0) { in.readFully(stringTermBytes, (int)prefixLen, newLen); } stringTerm = decoder.decode((ByteBuffer)stringTermBuf.position(0).limit(stringTermLen)).toString(); docFreq = (int)readVLong(in); if (docFreq > docList.length) { docList = new int[Math.max(docFreq, 2 * docList.length)]; } int doc = 0; for (int i = 0; i < docFreq; ++i) { doc += (int)readVLong(in); docList[i] = doc; } return true; } catch (IOException e) { throw new RuntimeException(e); } } @Override public int docFreq() { return docFreq; } @Override public void close() { } }; } @Override public IntTermDocIterator getIntTermDocIterator(final String field) { return new GenericIntTermDocIterator(getIntTermIterator(field), getDocIdStream()); } @Override public StringTermDocIterator getStringTermDocIterator(final String field) { return new GenericStringTermDocIterator(getStringTermIterator(field), getDocIdStream()); } @Override public long getIntTotalDocFreq(String field) { throw new UnsupportedOperationException(); } @Override public long getStringTotalDocFreq(String field) { throw new UnsupportedOperationException(); } @Override public Collection<String> getAvailableMetrics() { throw new UnsupportedOperationException(); } @Override public IntValueLookup getMetric(String metric) throws FlamdexOutOfMemoryException { throw new UnsupportedOperationException(); } public StringValueLookup getStringLookup(final String field) throws FlamdexOutOfMemoryException { throw new UnsupportedOperationException(); } @Override public long memoryRequired(String metric) { throw new UnsupportedOperationException(); } @Override public void close() throws IOException { if (in instanceof Closeable) { ((Closeable)in).close(); } } }; } public MemoryFlamdex shallowCopy() { final MemoryFlamdex ret = new MemoryFlamdex(); ret.numDocs = numDocs; ret.intFields.putAll(intFields); ret.stringFields.putAll(stringFields); return ret; } private static int getPrefixLen(byte[] a, byte[] b, int n) { for (int i = 0; i < n; ++i) { if (a[i] != b[i]) return i; } return n; } private interface MemoryTermIterator extends TermIterator { IntList getDocList(); } private class MemoryIntTermIterator implements MemoryTermIterator, IntTermIterator { private final Long2ObjectSortedMap<IntArrayList> map; private Iterator<Long2ObjectMap.Entry<IntArrayList>> keys; private long term; private IntList docList; private MemoryIntTermIterator(final String field) { map = intFields.get(field); keys = map.long2ObjectEntrySet().iterator(); } @Override public void reset(long term) { keys = map.tailMap(term).long2ObjectEntrySet().iterator(); } @Override public long term() { return term; } @Override public IntList getDocList() { return docList; } @Override public boolean next() { if (!keys.hasNext()) { return false; } final Long2ObjectMap.Entry<IntArrayList> e = keys.next(); term = e.getLongKey(); docList = e.getValue(); // noinspection SimplifiableIfStatement if (docList.isEmpty()) return next(); return true; } @Override public int docFreq() { return docList.size(); } @Override public void close() { } } private class MemoryStringTermIterator implements MemoryTermIterator, StringTermIterator { private final SortedMap<String, IntArrayList> map; private Iterator<Map.Entry<String, IntArrayList>> keys; private String term; private IntList docList; private MemoryStringTermIterator(final String field) { map = stringFields.get(field); keys = map.entrySet().iterator(); } @Override public void reset(String term) { keys = map.tailMap(term).entrySet().iterator(); } @Override public String term() { return term; } @Override public IntList getDocList() { return docList; } @Override public boolean next() { if (!keys.hasNext()) { return false; } final Map.Entry<String, IntArrayList> e = keys.next(); term = e.getKey(); docList = e.getValue(); // noinspection SimplifiableIfStatement if (docList.isEmpty()) return next(); return true; } @Override public int docFreq() { return docList.size(); } @Override public void close() { } } private class MemoryDocIdStream implements DocIdStream { private IntList docList; private int index; @Override public void reset(TermIterator term) { if (!(term instanceof MemoryTermIterator)) { throw new IllegalArgumentException("invalid term iterator"); } internalReset((MemoryTermIterator)term); } private void internalReset(final MemoryTermIterator term) { docList = term.getDocList(); index = 0; } @Override public int fillDocIdBuffer(int[] docIdBuffer) { final int n = Math.min(docIdBuffer.length, docList.size() - index); for (int i = 0; i < n; ++i) { docIdBuffer[i] = docList.getInt(index++); } return n; } @Override public void close() { } } private final CharsetEncoder encoder; private static final ThreadLocal<CharsetEncoder> ENCODER = new ThreadLocal<CharsetEncoder>() { @Override protected CharsetEncoder initialValue() { return Charsets.UTF_8.newEncoder(); } }; private static final ThreadLocal<CharsetDecoder> DECODER = new ThreadLocal<CharsetDecoder>() { @Override protected CharsetDecoder initialValue() { return Charsets.UTF_8.newDecoder(); } }; private static String readString(DataInput in) throws IOException { final int len = (int) readVLong(in); final byte[] bytes = new byte[len]; in.readFully(bytes); return DECODER.get().decode(ByteBuffer.wrap(bytes)).toString(); } private static void writeString(DataOutput out, String s) throws IOException { final ByteBuffer encoded = ENCODER.get().encode(CharBuffer.wrap(s)); final int len = encoded.limit(); writeVLong(len, out); out.write(encoded.array(), 0, len); } /* * the following methods were forked from org.apache.hadoop.io.WritableUtils */ private static void writeVLong(long i, DataOutput out) throws IOException { if (i >= -112 && i <= 127) { out.write((int)(i & 0xFF)); return; } int len = -112; if (i < 0) { i = ~i; len = -120; } long tmp = i; while (tmp != 0) { tmp = tmp >> 8; len--; } out.write(len & 0xFF); len = (len < -120) ? -(len + 120) : -(len + 112); for (int idx = len; idx != 0; idx--) { int shiftbits = (idx - 1) * 8; long mask = 0xFFL << shiftbits; out.write((int)((i & mask) >>> shiftbits)); } } private static long readVLong(DataInput in) throws IOException { byte firstByte = in.readByte(); int len = decodeVIntSize(firstByte); if (len == 1) { return firstByte; } long i = 0; for (int idx = 0; idx < len - 1; idx++) { int b = in.readUnsignedByte(); i = i << 8; i = i | b; } return (isNegativeVInt(firstByte) ? (~i) : i); } private static boolean isNegativeVInt(byte value) { return value < -120 || (value >= -112 && value < 0); } private static int decodeVIntSize(byte value) { if (value >= -112) { return 1; } else if (value < -120) { return -119 - value; } return -111 - value; } @Override public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; MemoryFlamdex that = (MemoryFlamdex) o; if (numDocs != that.numDocs) return false; if (intFields != null ? !intFields.equals(that.intFields) : that.intFields != null) return false; if (stringFields != null ? !stringFields.equals(that.stringFields) : that.stringFields != null) return false; return true; } @Override public int hashCode() { int result = intFields != null ? intFields.hashCode() : 0; result = 31 * result + (stringFields != null ? stringFields.hashCode() : 0); result = 31 * result + numDocs; return result; } }