/**
* AnalyzerBeans
* Copyright (C) 2014 Neopost - Customer Information Management
*
* This copyrighted material is made available to anyone wishing to use, modify,
* copy, or redistribute it subject to the terms and conditions of the GNU
* Lesser General Public License, as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this distribution; if not, write to:
* Free Software Foundation, Inc.
* 51 Franklin Street, Fifth Floor
* Boston, MA 02110-1301 USA
*/
package org.eobjects.analyzer.util.sort;
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.Writer;
import java.util.Comparator;
import junit.framework.TestCase;
import org.apache.metamodel.util.FileHelper;
import org.apache.metamodel.util.ToStringComparator;
public class SortMergeWriterTest extends TestCase {
public void testSimpleSort() throws Exception {
doSortTests(2);
doSortTests(5);
doSortTests(10);
doSortTests(10000);
}
public void testSimpleDedup() throws Exception {
doDedupTests(2);
doDedupTests(5);
doDedupTests(10);
doDedupTests(10000);
}
// test that the comparator is being used. Here we dedup string arrays but
// only based on the string at index 0.
public void testDedupArray() throws Exception {
Comparator<String[]> comparator = new Comparator<String[]>() {
@Override
public int compare(String[] o1, String[] o2) {
return o1[0].compareTo(o2[0]);
}
};
SortMergeWriter<String[], Writer> sorter = new SortMergeWriter<String[], Writer>(2, comparator) {
@Override
protected Writer createWriter(File file) {
return FileHelper.getBufferedWriter(file);
}
@Override
protected void writeRow(Writer writer, String[] row, int count) throws IOException {
if (row == null) {
writer.write("<null>," + count + "\n");
} else {
writer.write(row[0] + "," + count + "\n");
}
}
protected void writeHeader(Writer writer) throws IOException {
writer.write("text,count\n");
};
};
sorter.append(new String[] { "foo", "foo" });
sorter.append(new String[] { "bar", "foobar" });
sorter.append(new String[] { "foobar", "bar" });
sorter.append(new String[] { "barfoo", "foobar" });
sorter.append(new String[] { "foo", "foo" });
sorter.append(new String[] { "foobar", "bar" });
sorter.append(new String[] { "barfoo", "foobar" });
sorter.append(new String[] { "bar", "foo" });
sorter.append(new String[] { "foobar", "bar" });
sorter.append(new String[] { "barfoo", "foobar" });
sorter.append(new String[] { "bar", "foo" });
sorter.append(new String[] { "foobar", "bar" });
File file = sorter.write("target/sort_merge_arrays-deduped.csv");
assertTrue(file.exists());
try (BufferedReader br = FileHelper.getBufferedReader(file);) {
assertEquals("text,count", br.readLine());
assertEquals("bar,3", br.readLine());
assertEquals("barfoo,3", br.readLine());
assertEquals("foo,2", br.readLine());
assertEquals("foobar,4", br.readLine());
assertNull(br.readLine());
br.close();
}
}
public void testUseAsUniquenessChecker() throws Exception {
SortMergeWriter<String, Writer> sorter = new SortMergeWriter<String, Writer>(2,
ToStringComparator.getComparator()) {
@Override
protected Writer createWriter(File file) {
return FileHelper.getBufferedWriter(file);
}
@Override
protected void writeRow(Writer writer, String row, int count) throws IOException {
if (count > 1) {
writer.write(row + "," + count + "\n");
}
}
protected void writeHeader(Writer writer) throws IOException {
writer.write("text,count\n");
};
@Override
protected void writeNull(Writer writer, int nullCount) throws IOException {
if (nullCount > 1) {
writeRow(writer, "<null>", nullCount);
}
}
};
sorter.append("foo");
sorter.append("bar");
sorter.append("baz");
sorter.append("hello");
sorter.append("world");
for (int i = 0; i < 100; i++) {
sorter.append("unique" + i);
}
sorter.append("bar");
sorter.append("foo");
File file = sorter.write("target/sort_merge_uniqueness.txt");
String str = FileHelper.readFileAsString(file);
assertEquals("text,count\n" + "bar,2\n" + "foo,2", str);
}
public void testNullSafety() throws Exception {
SortMergeWriter<String, Writer> sorter = new SortMergeWriter<String, Writer>(2,
ToStringComparator.getComparator()) {
@Override
protected Writer createWriter(File file) {
return FileHelper.getBufferedWriter(file);
}
@Override
protected void writeRow(Writer writer, String row, int count) throws IOException {
writer.write(row + "," + count + "\n");
}
protected void writeHeader(Writer writer) throws IOException {
writer.write("text,count\n");
};
@Override
protected void writeNull(Writer writer, int nullCount) throws IOException {
writeRow(writer, "<null>", nullCount);
}
};
sorter.append("1234");
sorter.append("acb");
sorter.append(null);
sorter.append("5678");
sorter.append("1234");
sorter.append("acb", 3);
sorter.append("acb");
sorter.append("5678");
sorter.append("1234");
File file = sorter.write("target/sort_merge_null_safety.txt");
assertTrue(file.exists());
BufferedReader br = FileHelper.getBufferedReader(file);
assertEquals("text,count", br.readLine());
assertEquals("<null>,1", br.readLine());
assertEquals("1234,3", br.readLine());
assertEquals("5678,2", br.readLine());
assertEquals("acb,5", br.readLine());
assertNull(br.readLine());
}
public void testNoUnnescesaryTempFiles() throws Exception {
SortMergeWriter<String, Writer> sorter = new SortMergeWriter<String, Writer>(10,
ToStringComparator.getComparator()) {
@Override
protected Writer createWriter(File file) {
return FileHelper.getBufferedWriter(file);
}
@Override
protected void writeRow(Writer writer, String row, int count) throws IOException {
writer.write(row + "," + count + "\n");
}
protected void writeHeader(Writer writer) throws IOException {
writer.write("text,count\n");
};
@Override
protected File createTempFile() throws IOException {
throw new IllegalStateException("This test is not supposed to require temp files!");
}
};
sorter.append("1234");
sorter.append("acb");
sorter.append("abc");
sorter.append("acb");
sorter.append("5678");
File file = sorter.write("target/sort_merge_no_temp_file.txt");
assertTrue(file.exists());
try (BufferedReader br = FileHelper.getBufferedReader(file)) {
assertEquals("text,count", br.readLine());
assertEquals("1234,1", br.readLine());
assertEquals("5678,1", br.readLine());
assertEquals("abc,1", br.readLine());
assertEquals("acb,2", br.readLine());
assertNull(br.readLine());
}
}
private void doSortTests(int threshold) throws Exception {
SortMergeWriter<String, Writer> sorter = new SortMergeWriter<String, Writer>(threshold,
ToStringComparator.getComparator()) {
@Override
protected Writer createWriter(File file) {
return FileHelper.getBufferedWriter(file);
}
@Override
protected void writeRow(Writer writer, String row, int count) throws IOException {
writer.write(row + "," + count + "\n");
}
protected void writeHeader(Writer writer) throws IOException {
writer.write("number,count\n");
};
};
sorter.append("02");
sorter.append("01");
sorter.append("04");
sorter.append("03");
sorter.append("06");
sorter.append("07");
sorter.append("08");
sorter.append("05");
sorter.append("09");
sorter.append("10");
sorter.append("13");
sorter.append("12");
sorter.append("11");
sorter.append("14");
File file = sorter.write("target/sort_merge_sort_" + threshold + ".txt");
assertTrue(file.exists());
try (BufferedReader br = FileHelper.getBufferedReader(file)) {
assertEquals("number,count", br.readLine());
assertEquals("01,1", br.readLine());
assertEquals("02,1", br.readLine());
assertEquals("03,1", br.readLine());
assertEquals("04,1", br.readLine());
assertEquals("05,1", br.readLine());
assertEquals("06,1", br.readLine());
assertEquals("07,1", br.readLine());
assertEquals("08,1", br.readLine());
assertEquals("09,1", br.readLine());
assertEquals("10,1", br.readLine());
assertEquals("11,1", br.readLine());
assertEquals("12,1", br.readLine());
assertEquals("13,1", br.readLine());
assertEquals("14,1", br.readLine());
assertNull(br.readLine());
}
}
private void doDedupTests(int threshold) throws Exception {
SortMergeWriter<String, Writer> sorter = new SortMergeWriter<String, Writer>(threshold,
ToStringComparator.getComparator()) {
@Override
protected Writer createWriter(File file) {
return FileHelper.getBufferedWriter(file);
}
@Override
protected void writeRow(Writer writer, String row, int count) throws IOException {
writer.write(row + "," + count + "\n");
}
@Override
protected void writeHeader(Writer writer) throws IOException {
// do nothing
}
};
sorter.append("02");
sorter.append("01");
sorter.append("04");
sorter.append("03");
sorter.append("06");
sorter.append("07");
sorter.append("08");
sorter.append("05");
sorter.append("09");
sorter.append("10");
sorter.append("13");
sorter.append("12");
sorter.append("11");
sorter.append("14");
sorter.append("02");
sorter.append("01");
sorter.append("01");
sorter.append("14");
sorter.append("10");
sorter.append("10");
sorter.append("10");
File file = sorter.write("target/sort_merge_dedup_" + threshold + ".txt");
assertTrue(file.exists());
try (BufferedReader br = FileHelper.getBufferedReader(file)) {
assertEquals("01,3", br.readLine());
assertEquals("02,2", br.readLine());
assertEquals("03,1", br.readLine());
assertEquals("04,1", br.readLine());
assertEquals("05,1", br.readLine());
assertEquals("06,1", br.readLine());
assertEquals("07,1", br.readLine());
assertEquals("08,1", br.readLine());
assertEquals("09,1", br.readLine());
assertEquals("10,4", br.readLine());
assertEquals("11,1", br.readLine());
assertEquals("12,1", br.readLine());
assertEquals("13,1", br.readLine());
assertEquals("14,2", br.readLine());
assertNull(br.readLine());
}
}
}