package water.parser;
import org.junit.Assume;
import org.junit.BeforeClass;
import org.junit.Test;
import java.io.File;
import java.io.IOException;
import java.util.Set;
import java.util.TreeSet;
import water.TestUtil;
import water.util.FileUtils;
import water.util.Log;
import static org.junit.Assert.assertEquals;
import static water.parser.OrcTestUtils.compareOrcAndH2OFrame;
import static water.util.FileUtils.*;
/**
* Test suite for orc parser.
*
* This test will build a H2O frame for all orc files found in smalldata/parser/orc directory
* and compare the H2O frame content with the orc file content read with Core Java commands.
* Test is declared a success if the content of H2O frame is the same as the contents read
* by using core Java commands off the Orc file itself. No multi-threading is used in reading
* off the Orc file using core Java commands.
*/
public class ParseTestOrc extends TestUtil {
int totalFilesTested = 0;
int numberWrong = 0;
// list all orc files in smalldata/parser/orc directory
private String[] allOrcFiles = {
"smalldata/parser/orc/TestOrcFile.columnProjection.orc",
"smalldata/parser/orc/bigint_single_col.orc",
"smalldata/parser/orc/TestOrcFile.emptyFile.orc",
"smalldata/parser/orc/bool_single_col.orc",
// "smalldata/parser/orc/TestOrcFile.metaData.orc", // do not support metadata from user
// "smalldata/parser/orc/decimal.orc",
// "smalldata/parser/orc/TestOrcFile.test1.orc", // do not support metadata from user
"smalldata/parser/orc/demo-11-zlib.orc",
"smalldata/parser/orc/TestOrcFile.testDate1900.orc",
"smalldata/parser/orc/demo-12-zlib.orc",
"smalldata/parser/orc/TestOrcFile.testDate2038.orc",
"smalldata/parser/orc/double_single_col.orc",
"smalldata/parser/orc/TestOrcFile.testMemoryManagementV11.orc",
"smalldata/parser/orc/float_single_col.orc",
"smalldata/parser/orc/TestOrcFile.testMemoryManagementV12.orc",
"smalldata/parser/orc/int_single_col.orc",
"smalldata/parser/orc/TestOrcFile.testPredicatePushdown.orc",
"smalldata/parser/orc/nulls-at-end-snappy.orc",
// "smalldata/parser/orc/TestOrcFile.testSeek.orc", // do not support metadata from user
// "smalldata/parser/orc/orc-file-11-format.orc", // different column names are used between stripes
"smalldata/parser/orc/TestOrcFile.testSnappy.orc",
"smalldata/parser/orc/orc_split_elim.orc",
"smalldata/parser/orc/TestOrcFile.testStringAndBinaryStatistics.orc",
// "smalldata/parser/orc/over1k_bloom.orc", // do not support metadata from user
"smalldata/parser/orc/TestOrcFile.testStripeLevelStats.orc",
"smalldata/parser/orc/smallint_single_col.orc",
// "smalldata/parser/orc/TestOrcFile.testTimestamp.orc", // abnormal orc file, no inpsector structure available
"smalldata/parser/orc/string_single_col.orc",
// "smalldata/parser/orc/TestOrcFile.testUnionAndTimestamp.orc", // do not support metadata from user
"smalldata/parser/orc/tinyint_single_col.orc",
"smalldata/parser/orc/TestOrcFile.testWithoutIndex.orc",
// "smalldata/parser/orc/version1999.orc" // contain only orc header, no column and no row, total file size is 0.
};
@BeforeClass
static public void setup() { TestUtil.stall_till_cloudsize(5); }
@BeforeClass
static public void _preconditionJavaVersion() { // NOTE: the `_` force execution of this check after setup
// Does not run test on Java6 since we are running on Hadoop lib
Assume.assumeTrue("Java6 is not supported", !System.getProperty("java.version", "NA").startsWith("1.6"));
}
@Test
public void testParseAllOrcs() {
Set<String> failedFiles = new TreeSet<>();
int numOfOrcFiles = allOrcFiles.length; // number of Orc Files to test
for (int fIndex = 0; fIndex < numOfOrcFiles; fIndex++) {
String fileName = allOrcFiles[fIndex];
Log.info("Orc Parser parsing " + fileName);
File f = locateFile(fileName);
if (f != null && f.exists()) {
try {
numberWrong += compareOrcAndH2OFrame(fileName, f, failedFiles);
totalFilesTested++;
} catch (IOException e) {
e.printStackTrace();
failedFiles.add(fileName);
numberWrong++;
}
} else {
Log.warn("The following file was not found: " + fileName);
failedFiles.add(fileName);
numberWrong++;
}
}
if (numberWrong > 0) {
Log.warn("There are errors in your test.");
assertEquals("Number of orc files failed to parse is: " + numberWrong + ", failed files = " +
failedFiles.toString(), 0, numberWrong);
} else {
Log.info("Parser test passed! Number of files parsed is " + totalFilesTested);
}
}
}