/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.util;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Random;
import org.apache.hadoop.io.WritableComparator;
import org.junit.Assert;
import org.junit.Test;
/**
* Unit test to verify that the Floyd priority queue
* algorithm.
*/
public class TestPriorityQueue {
class IntPriorityQueue extends PriorityQueue<Integer> {
protected boolean lessThan(Object a, Object b) {
Integer i = (Integer) a;
Integer j = (Integer) b;
return i.intValue() < j.intValue();
}
}
class IntPriorityQueueFloyd extends PriorityQueueFloyd<Integer> {
protected boolean lessThan(Object a, Object b) {
Integer i = (Integer) a;
Integer j = (Integer) b;
return i.intValue() < j.intValue();
}
}
@Test
public void testCorrectness() throws Exception {
IntPriorityQueue pQ = new IntPriorityQueue();
IntPriorityQueueFloyd fQ = new IntPriorityQueueFloyd();
pQ.initialize(8);
fQ.initialize(8);
/* Expecting
* 2
* 4 6
* 8 10 12 14
* 16
*/
for (int i = 1; i <=8; i++ ) {
pQ.insert(Integer.valueOf(i * 2));
fQ.insert(Integer.valueOf(i * 2));
}
Assert.assertEquals("After inserting even",
pQ.toString(),
fQ.toString());
System.out.println("new = " + fQ.toString());
/*
* Expecting
*
* 10
* 11 12
* 16 13 15 14
* 17
*/
for (int i = 1; i <= 8; i++) {
Integer elem = Integer.valueOf(i * 2 + 1);
pQ.insert(elem);
fQ.insert(elem);
}
Assert.assertEquals("After inserting odd",
pQ.toString(),
fQ.toString());
System.out.println("new = " + fQ.toString());
// random sampling test
Random rand = new Random(395788);
for (int i = 1; i < 100000; i++) {
Integer elem = Integer.valueOf(rand.nextInt(100000));
pQ.insert(elem);
fQ.insert(elem);
}
Assert.assertEquals("After inserting odd",
pQ.toString(),
fQ.toString());
System.out.println("new = " + fQ.toString());
}
/**
* Performance tests to compare performance of the Typical PriorityQueue
* and Floyd version of PriorityQueue.
*
* To compile this class:
*
* ant test-core -Dtestcase=TestPriorityQueue
*
* This can be run from the command line with:
*
* java -cp build/hadoop-0.20-test.jar:build/hadoop-0.20-core.jar \
* -Xms10g -Xmx10g \
* 'org.apache.hadoop.util.TestPriorityQueue$PerformanceTest'
*/
public static class PerformanceTest {
/**
* Ordered input stream -- simulating map output Segment
*/
class Segment {
ArrayList<byte[]> inputs;
int index;
int maxIndex;
Segment(int max_len) {
reset();
maxIndex = max_len;
inputs = new ArrayList<byte[]>(max_len);
}
byte[] top() {
return inputs.get(index);
}
boolean hasNext() {
return index < maxIndex;
}
byte[] next() {
byte[] elem = top();
pop();
return elem;
}
void pop() {
index++;
}
void reset() {
index = 0;
}
void push(byte[] elem) {
inputs.add(elem);
}
void setMaxLen(int maxIndex) {
this.maxIndex = maxIndex;
}
}
class SegmentPriorityQueue extends PriorityQueue<Segment> {
protected boolean lessThan(Object a, Object b) {
Segment s1 = (Segment) a;
Segment s2 = (Segment) b;
byte[] b1 = (byte[]) s1.top();
byte[] b2 = (byte[]) s2.top();
int r = WritableComparator.compareBytes(b1, 0, b1.length,
b2, 0, b2.length);
return r < 0;
}
}
class SegmentPriorityQueueFloyd extends PriorityQueueFloyd<Segment> {
protected boolean lessThan(Object a, Object b) {
Segment s1 = (Segment) a;
Segment s2 = (Segment) b;
byte[] b1 = (byte[]) s1.top();
byte[] b2 = (byte[]) s2.top();
int r = WritableComparator.compareBytes(b1, 0, b1.length,
b2, 0, b2.length);
return r < 0;
}
}
Segment[] segments;
PerformanceTest(int queueSize, int maxLength) {
segments = genInputSegments(queueSize, maxLength);
}
public static void main(String[] args) {
int QUEUE_SIZE = 24;
int MAX_INPUT_SIZE = 1000000;
PerformanceTest bench = new PerformanceTest(QUEUE_SIZE, MAX_INPUT_SIZE);
printHeader();
for (int i = 1000; i <= MAX_INPUT_SIZE; i *= 10) {
bench.doBenchmark(i, System.out);
}
QUEUE_SIZE = 100;
bench = new PerformanceTest(QUEUE_SIZE, MAX_INPUT_SIZE);
for (int i = 1000; i <= MAX_INPUT_SIZE; i *= 10) {
bench.doBenchmark(i, System.out);
}
}
private static void printHeader() {
System.out.printf("\nPerformance Table (msec)\n");
printCell("Queue Size", 0, System.out);
printCell("Input Size", 0, System.out);
printCell("PriorityQueue", 0, System.out);
printCell("PriorityQueueFloyd", 0, System.out);
printCell("Improvements", 0, System.out);
System.out.printf("\n");
}
/**
* Bench the CPU time of original vs Floyd variation of priority queues.
* Each benchmark depends on 2 input variables:
* @param queueSize is the number of input streams
* @param inputLen is the size of each input stream.
*/
void doBenchmark(int inputLen, PrintStream out) {
long begin_time, end_time;
double pq_ms, fq_ms;
// limit max input size
for (Segment s : segments) {
s.setMaxLen(inputLen);
}
int queueSize = segments.length;
printCell(String.valueOf(queueSize), "Queue Size".length(), out);
printCell(String.valueOf(inputLen), "Input Size".length(), out);
SegmentPriorityQueue pQ = new SegmentPriorityQueue();
pq_ms = measureSort(pQ, queueSize);
printCell(String.format("%9.2f", pq_ms),
"PriorityQueue".length(), out);
// reset segments
for (Segment s : segments) {
s.reset();
}
SegmentPriorityQueueFloyd fQ = new SegmentPriorityQueueFloyd();
fq_ms = measureSort(fQ, queueSize);
printCell(String.format("%9.2f", fq_ms),
"PriorityQueueFloyd".length(), out);
printCell(String.format("%5.2f%%", (100.0d*(pq_ms-fq_ms)/pq_ms)),
"Improvements".length(), out);
System.out.printf("\n");
}
private final double measureSort(PriorityQueue<Segment> pQ, int queueSize) {
pQ.initialize(queueSize);
long begin_time = System.nanoTime();
mergeSort(pQ, segments);
long end_time = System.nanoTime();
return (end_time - begin_time) / 1000000.0d;
}
void mergeSort(PriorityQueue<Segment> queue, Segment[] segments) {
for (Segment s : segments) {
queue.insert(s);
}
do {
Segment minSegment = queue.top();
if (minSegment == null) {
break;
}
minSegment.next();
if (minSegment.hasNext()) {
queue.adjustTop();
} else {
queue.pop();
}
} while (true);
}
/**
* Prepare the input streams in main memory so that we only measure
* the priority queue time.
* @param queueSize is the number of input streams
* @param inputLen is the size of each input stream.
* @return list of ordered input streams
*/
Segment[] genInputSegments(int queueSize, int inputLen) {
Segment[] segments = new Segment[queueSize];
Random rand = new Random(385902);
for (int i = 0; i < queueSize; i++) {
Segment segment = genSortedList(inputLen, rand.nextInt(1000));
segments[i] = segment;
}
return segments;
}
/**
* Generate an ordered list of byte arrays.
* @param head the smallest number
* @return list of ordered byte arrays representing 0-padded ASCII number
* representation.
*/
public Segment genSortedList(int len, long head) {
Segment segment = new Segment(len);
Random rand = new Random(93854);
long l = head;
for (int i = 0; i < len; ++i) {
byte[] elem = zeroPaddedArray(l);
segment.push(elem);
long l2 = l + rand.nextInt(10000);
if (l2 < l) { // overflow
break;
}
l = l2;
}
return segment;
}
private byte[] zeroPaddedArray(long l) {
byte[] asciiLong = Long.toString(l).getBytes();
byte[] elem = new byte[20]; // Long.MAX = 9,223,372,036,854,775,807
if (asciiLong.length < elem.length) {
Arrays.fill(elem, 0, elem.length - asciiLong.length, (byte)'0');
}
System.arraycopy(asciiLong,
0,
elem,
elem.length - asciiLong.length,
asciiLong.length);
return elem;
}
private static void printCell(String s, int width, PrintStream out) {
final int w = s.length() > width? s.length(): width;
out.printf(" %" + w + "s |", s);
}
}
}