/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.dkpro.bigdata.collocations;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.nio.charset.CharacterCodingException;
import org.apache.hadoop.io.BinaryComparable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.WritableComparable;
import org.apache.mahout.math.Varint;
import org.dkpro.bigdata.collocations.Gram.Type;
/** A GramKey, based on the identity fields of Gram (type, string) plus a byte[] used for secondary ordering */
public final class GramKey extends BinaryComparable implements WritableComparable<BinaryComparable> {
private int primaryLength;
private int length;
private byte[] bytes;
public GramKey() {
}
/** create a GramKey based on the specified Gram and order
*
* @param gram
* @param order
*/
public GramKey(Gram gram, byte[] order) {
set(gram, order);
}
/** set the gram held by this key */
public void set(Gram gram, byte[] order) {
primaryLength = gram.getLength();
length = primaryLength + order.length;
setCapacity(length, false);
System.arraycopy(gram.getBytes(), 0, bytes, 0, primaryLength);
if (order.length > 0) {
System.arraycopy(order, 0, bytes, primaryLength, order.length);
}
}
@Override
public byte[] getBytes() {
return bytes;
}
@Override
public int getLength() {
return length;
}
public int getPrimaryLength() {
return primaryLength;
}
@Override
public void readFields(DataInput in) throws IOException {
int newLength = Varint.readUnsignedVarInt(in);
int newPrimaryLength = Varint.readUnsignedVarInt(in);
setCapacity(newLength, false);
in.readFully(bytes, 0, newLength);
length = newLength;
primaryLength = newPrimaryLength;
}
@Override
public void write(DataOutput out) throws IOException {
Varint.writeUnsignedVarInt(length, out);
Varint.writeUnsignedVarInt(primaryLength, out);
out.write(bytes, 0, length);
}
/* Cribbed from o.a.hadoop.io.Text:
* Sets the capacity of this object to <em>at least</em>
* {@code len} bytes. If the current buffer is longer,
* then the capacity and existing content of the buffer are
* unchanged. If {@code len} is larger
* than the current capacity, this object's capacity is
* increased to match.
* @param len the number of bytes we need
* @param keepData should the old data be kept
*/
private void setCapacity(int len, boolean keepData) {
if (bytes == null || bytes.length < len) {
byte[] newBytes = new byte[len];
if (bytes != null && keepData) {
System.arraycopy(bytes, 0, newBytes, 0, length);
}
bytes = newBytes;
}
}
/**
* @return the gram is at the head of its text unit or tail or unigram.
*/
public Type getType() {
return Gram.decodeType(bytes, 0);
}
public String getPrimaryString() {
try {
return Text.decode(bytes, 1, primaryLength - 1);
} catch (CharacterCodingException e) {
throw new IllegalStateException(e);
}
}
@Override
public String toString() {
return '\'' + getPrimaryString() + "'[" + getType() + ']';
}
}