package org.commoncrawl.mapred.pipelineV3.crawllistgen; import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; import junit.framework.Assert; import org.apache.hadoop.io.DataInputBuffer; import org.apache.hadoop.io.DataOutputBuffer; import org.apache.hadoop.io.RawComparator; import org.apache.hadoop.io.Writable; import org.apache.hadoop.io.WritableComparable; import org.apache.hadoop.io.WritableComparator; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.Partitioner; import org.junit.Test; public class CrawlListKey implements WritableComparable<CrawlListKey> { public static final int KEY_TYPE_HOMEPAGE_URL = 1; public static final int KEY_TYPE_WIKIPEDIA_URL = 2; public static final int KEY_TYPE_URL = 10; public long partitionDomainKey; public long comparisonDomainKey; public int type; public double rank0; public long rank1; @Override public void write(DataOutput out) throws IOException { out.writeLong(partitionDomainKey); out.writeLong(comparisonDomainKey); out.writeShort(type); out.writeDouble(rank0); out.writeLong(rank1); } @Override public void readFields(DataInput in) throws IOException { partitionDomainKey = in.readLong(); comparisonDomainKey = in.readLong(); type = in.readShort(); rank0 = in.readDouble(); rank1 = in.readLong(); } @Override public int compareTo(CrawlListKey arg0) { int result = (comparisonDomainKey < arg0.comparisonDomainKey) ? -1 : (comparisonDomainKey > arg0.comparisonDomainKey) ? 1: 0; if (result == 0) result = (type < arg0.type) ? -1 : (type > arg0.type) ? 1: 0; if (result == 0) result = (rank0 < arg0.rank0) ? 1 : (rank0 > arg0.rank0) ? -1: 0; if (result == 0) result = (rank1 < arg0.rank1) ? -1 : (rank1 > arg0.rank1) ? 1: 0; return result; } @Override public String toString() { return "PD:" + partitionDomainKey + " DH:" + comparisonDomainKey + " T:" + type + " Rank0:" + rank0; } public static final class CrawListKeyComparator implements RawComparator<CrawlListKey> { @Override public int compare(CrawlListKey arg0, CrawlListKey arg1) { return arg0.compareTo(arg1); } @Override public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) { s1 += 8; s2 += 8; long lkey = WritableComparator.readLong(b1, s1); long rkey = WritableComparator.readLong(b2, s2); int result = (lkey < rkey) ? -1 : (lkey > rkey) ? 1: 0; if (result == 0) { s1 += 8; s2 += 8; lkey = WritableComparator.readUnsignedShort(b1, s1); rkey = WritableComparator.readUnsignedShort(b2, s2); result = (lkey < rkey) ? -1 : (lkey > rkey) ? 1: 0; if (result == 0) { s1 += 2; s2 += 2; double ldkey = WritableComparator.readDouble(b1, s1); double rdkey = WritableComparator.readDouble(b2, s2); result = (ldkey < rdkey) ? 1 : (ldkey > rdkey) ? -1: 0; if (result == 0) { s1 += 8; s2 += 8; lkey = WritableComparator.readLong(b1, s1); rkey = WritableComparator.readLong(b2, s2); result = (lkey < rkey) ? -1 : (lkey > rkey) ? 1: 0; } } } return result; } } public static class CrawlListKeyPartitioner implements Partitioner<CrawlListKey,Writable> { @Override public void configure(JobConf job) { } @Override public int getPartition(CrawlListKey key, Writable value, int numPartitions) { return (((int)key.partitionDomainKey) & Integer.MAX_VALUE) % numPartitions; } } public static CrawlListKey generateKey(long partitionDomain,long comparisonDomain,int type,double rank0,long rank1) { CrawlListKey keyOut = new CrawlListKey(); return generateKey(keyOut,partitionDomain, comparisonDomain, type, rank0, rank1); } public static CrawlListKey generateKey(CrawlListKey keyOut,long partitionDomain,long comparisonDomain,int type,double rank0,long rank1) { keyOut.partitionDomainKey = partitionDomain; keyOut.comparisonDomainKey = comparisonDomain; keyOut.type = type; keyOut.rank0 = rank0; keyOut.rank1 = rank1; return keyOut; } private static final DataOutputBuffer writeTestKey(CrawlListKey key)throws IOException { DataOutputBuffer temp = new DataOutputBuffer(); key.write(temp); DataInputBuffer input = new DataInputBuffer(); // validate serialization while we are at it ... input.reset(temp.getData(), temp.getLength()); CrawlListKey tempKey = new CrawlListKey(); tempKey.readFields(input); Assert.assertEquals(0, key.compareTo(tempKey)); temp.reset(); return temp; } private static final int compareRaw(CrawListKeyComparator comparator,CrawlListKey key1,CrawlListKey key2) throws IOException { DataOutputBuffer buffer1 = writeTestKey(key1); DataOutputBuffer buffer2= writeTestKey(key2); return comparator.compare(buffer1.getData(), 0, buffer1.getLength(), buffer2.getData(), 0, buffer2.getLength()); } @Test public void testComparator() throws Exception { CrawlListKey key1 = generateKey(1L, 1L, 0, 0, 0); CrawlListKey key2 = generateKey(1L, 1L, 1, 0, 0); CrawlListKey key3 = generateKey(1L, 1L, 1, 1, 0); CrawlListKey key4 = generateKey(1L, 1L, 1, 1, 1); CrawListKeyComparator comparator = new CrawlListKey.CrawListKeyComparator(); Assert.assertEquals(0, compareRaw(comparator,key1,key1)); Assert.assertEquals(0, comparator.compare(key1,key1)); Assert.assertEquals(-1, compareRaw(comparator,key1,key2)); Assert.assertEquals(-1, comparator.compare(key1,key2)); Assert.assertEquals(-1, compareRaw(comparator,key1,key3)); Assert.assertEquals(-1, comparator.compare(key1,key3)); Assert.assertEquals(-1, compareRaw(comparator,key1,key4)); Assert.assertEquals(-1, comparator.compare(key1,key4)); Assert.assertEquals(1, compareRaw(comparator,key2,key1)); Assert.assertEquals(1, comparator.compare(key2,key1)); Assert.assertEquals(0, compareRaw(comparator,key2,key2)); Assert.assertEquals(0, comparator.compare(key2,key2)); Assert.assertEquals(-1, compareRaw(comparator,key2,key3)); Assert.assertEquals(-1, comparator.compare(key2,key3)); Assert.assertEquals(-1, compareRaw(comparator,key2,key4)); Assert.assertEquals(-1, comparator.compare(key2,key4)); Assert.assertEquals(1, compareRaw(comparator,key3,key1)); Assert.assertEquals(1, comparator.compare(key3,key1)); Assert.assertEquals(1, compareRaw(comparator,key3,key2)); Assert.assertEquals(1, comparator.compare(key3,key2)); Assert.assertEquals(0, compareRaw(comparator,key3,key3)); Assert.assertEquals(0, comparator.compare(key3,key3)); Assert.assertEquals(-1, compareRaw(comparator,key3,key4)); Assert.assertEquals(-1, comparator.compare(key3,key4)); Assert.assertEquals(1, compareRaw(comparator,key4,key1)); Assert.assertEquals(1, comparator.compare(key4,key1)); Assert.assertEquals(1, compareRaw(comparator,key4,key2)); Assert.assertEquals(1, comparator.compare(key4,key2)); Assert.assertEquals(1, compareRaw(comparator,key4,key3)); Assert.assertEquals(1, comparator.compare(key4,key3)); Assert.assertEquals(0, compareRaw(comparator,key4,key4)); Assert.assertEquals(0, comparator.compare(key4,key4)); } }