package com.yahoo.glimmer.indexing.generator; /* * Copyright (c) 2012 Yahoo! Inc. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 * Unless required by applicable law or agreed to in writing, software distributed under the License is * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and limitations under the License. * See accompanying LICENSE file. */ import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; import org.apache.hadoop.io.WritableComparable; /** * The value that is passed between the mapper and reducer. * * Can represent different thing depending the type * * @author tep */ public class TermValue implements WritableComparable<TermValue>, Cloneable { /** * TermValue Type. The order of this enum is important as it determines the * order in which the values are give in the Reducers values Iterable. */ public enum Type { /** * For every doc a DOC_STATS is written. v1 = term occurrence count in * doc. v2 = position of last term occurrence. */ TERM_STATS, /** * For each unique term in a doc, a PREDICATE_ID is written. v1 = the id * of the index for the term. */ INDEX_ID, /** * For every term in every doc an OCCURRENCE is written. v1 = doc id, v2 * = terms position. */ OCCURRENCE, /** * To generate the doc sizes for each index we need to know the number of terms per doc per index. * Unlike the other types the term here is irrelevant. And will be set to TermKey.DOC_SIZE_TERM. * v1 = the doc id & v2 = term count for this field/index. * Note that all DOC_SIZE values will only go to mapper 0 and need special handling * during index merging. */ DOC_SIZE; } private Type type; private long v1; private int v2; public TermValue(Type type, long v1) { if (type != Type.INDEX_ID) { throw new IllegalArgumentException("Type " + type + " is not value with 1 arg"); } this.type = type; this.v1 = v1; } public TermValue(Type type, long v1, int v2) { if (type != Type.TERM_STATS && type != Type.OCCURRENCE && type != Type.DOC_SIZE) { throw new IllegalArgumentException("Type " + type + " is not value with 2 args"); } this.type = type; this.v1 = v1; this.v2 = v2; } public TermValue() { } public TermValue(TermValue that) { set(that); } public void set(TermValue that) { type = that.type; v1 = that.v1; v2 = that.v2; } public Type getType() { return type; } public long getV1() { return v1; } public int getV2() { return v2; } public void readFields(DataInput in) throws IOException { type = Type.values()[in.readInt()]; v1 = in.readLong(); v2 = in.readInt(); } public void write(DataOutput out) throws IOException { out.writeInt(type.ordinal()); out.writeLong(v1); out.writeInt(v2); } @Override public boolean equals(Object o) { if (o instanceof TermValue) { TermValue that = (TermValue) o; return type == that.type && v1 == that.v1 && v2 == that.v2; } return false; } @Override public int hashCode() { int hash = 7; hash = 31 * hash + type.hashCode(); hash = 31 * hash + (int)(v1 ^ (v1 >>> 32)); hash = 31 * hash + v2; return hash; } public String toString() { return type.name() + "(" + v1 + "," + v2 + ")"; } public int compareTo(TermValue that) { long i = type.compareTo(that.type); if (i != 0) { return (int)i; } i = v1 - that.v1; if (i != 0) { return i > 0 ? 1 : -1; } i = v2 - that.v2; return (int)i; } }