/**
* DataCleaner (community edition)
* Copyright (C) 2014 Neopost - Customer Information Management
*
* This copyrighted material is made available to anyone wishing to use, modify,
* copy, or redistribute it subject to the terms and conditions of the GNU
* Lesser General Public License, as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this distribution; if not, write to:
* Free Software Foundation, Inc.
* 51 Franklin Street, Fifth Floor
* Boston, MA 02110-1301 USA
*/
package org.datacleaner.cli;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.PrintStream;
import java.io.StringWriter;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.commons.lang.SerializationUtils;
import org.apache.log4j.PropertyConfigurator;
import org.apache.metamodel.util.FileHelper;
import org.datacleaner.result.AnalysisResult;
import org.xml.sax.Attributes;
import org.xml.sax.ErrorHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
import org.xml.sax.helpers.DefaultHandler;
import com.google.common.base.Splitter;
import junit.framework.TestCase;
import nu.validator.htmlparser.common.XmlViolationPolicy;
import nu.validator.htmlparser.sax.HtmlParser;
public class MainTest extends TestCase {
private StringWriter _stringWriter;
private PrintStream _originalSysOut;
@Override
protected void setUp() throws Exception {
_stringWriter = new StringWriter();
_originalSysOut = System.out;
useAsSystemOut(_stringWriter);
PropertyConfigurator.configure("src/test/resources/log4j.xml");
}
private void useAsSystemOut(final StringWriter stringWriter) {
final OutputStream out = new OutputStream() {
@Override
public void write(final int b) throws IOException {
_stringWriter.write(b);
}
};
System.setOut(new PrintStream(out));
}
@Override
protected void tearDown() throws Exception {
super.tearDown();
System.setOut(_originalSysOut);
}
public void testUsage() throws Throwable {
Main.main("-usage".split(" "));
final String out1 = _stringWriter.toString();
final String[] lines = out1.split("\n");
assertEquals(13, lines.length);
assertEquals("-conf (-configuration, --configuration-file) PATH :"
+ " Path to an XML file describing the configuration of", lines[0].trim());
assertEquals("DataCleaner", lines[1].trim());
assertEquals("-ds (-datastore, --datastore-name) VAL :"
+ " Name of datastore when printing a list of schemas, tables", lines[2].trim());
assertEquals("or columns. Overrides datastore used when used with -job", lines[3].trim());
assertEquals("-job (--job-file) PATH :"
+ " Path to an analysis job XML file to execute", lines[4].trim());
assertEquals("-list [ANALYZERS | TRANSFORMERS | FILTERS | DATASTORES | :"
+ " Used to print a list of various elements available in the", lines[5].trim());
assertEquals("SCHEMAS | TABLES | COLUMNS] : configuration", lines[6].trim());
assertEquals("-of (--output-file) PATH :"
+ " Path to file in which to save the result of the job", lines[7].trim());
assertEquals("-ot (--output-type) [TEXT | HTML | SERIALIZED] :"
+ " How to represent the result of the job", lines[8].trim());
assertEquals("-properties (--properties-file) PATH : Path to a custom properties file",
lines[9].trim());
assertEquals("-runtype (--runtype) [LOCAL | SPARK] : How/where to run the job",
lines[10].trim());
assertEquals("-s (-schema, --schema-name) VAL :"
+ " Name of schema when printing a list of tables or columns", lines[11].trim());
assertEquals("-t (-table, --table-name) VAL :"
+ " Name of table when printing a list of columns", lines[12].trim());
// again without the -usage flag
_stringWriter = new StringWriter();
useAsSystemOut(_stringWriter);
Main.main(new String[0]);
final String out2 = _stringWriter.toString();
assertEquals(out1, out2);
}
public void testListDatastores() throws Throwable {
Main.main("-conf src/test/resources/cli-examples/conf.xml -list DATASTORES".split(" "));
final String out = _stringWriter.toString().replaceAll("\r\n", "\n");
assertEquals("Datastores:\n-----------\nall_datastores\nemployees_csv\norderdb\n", out);
}
public void testListSchemas() throws Throwable {
Main.main("-conf src/test/resources/cli-examples/conf.xml -ds orderdb -list SCHEMAS".split(" "));
final String out = _stringWriter.toString().replaceAll("\r\n", "\n");
assertEquals("Schemas:\n" + "--------\n" + "INFORMATION_SCHEMA\n" + "PUBLIC\n", out);
}
public void testListTables() throws Throwable {
Main.main("-conf src/test/resources/cli-examples/conf.xml -ds orderdb -schema PUBLIC -list TABLES".split(" "));
final String out = _stringWriter.toString().replaceAll("\r\n", "\n");
assertEquals(
"Tables:\n-------\nCUSTOMERS\nEMPLOYEES\nOFFICES\nORDERDETAILS\nORDERFACT\nORDERS\nPAYMENTS\nPRODUCTS\n",
out);
}
public void testListColumns() throws Throwable {
Main.main(
"-conf src/test/resources/cli-examples/conf.xml -ds orderdb -schema PUBLIC -table EMPLOYEES -list COLUMNS"
.split(" "));
final String out = _stringWriter.toString().replaceAll("\r\n", "\n");
assertEquals(
"Columns:\n--------\nEMPLOYEENUMBER\nLASTNAME\nFIRSTNAME\nEXTENSION\nEMAIL\nOFFICECODE\nREPORTSTO\nJOBTITLE\n",
out);
}
public void testListTransformers() throws Throwable {
Main.main("-conf src/test/resources/cli-examples/conf.xml -list TRANSFORMERS".split(" "));
final String out = _stringWriter.toString().replaceAll("\r\n", "\n");
final String[] lines = out.split("\n");
assertEquals("Transformers:", lines[0]);
assertTrue(out, out.indexOf("name: Email standardizer") != -1);
assertTrue(out, out.indexOf(" - Consumes a single input column (type: String)") != -1);
}
public void testListFilters() throws Throwable {
Main.main("-conf src/test/resources/cli-examples/conf.xml -list FILTERS".split(" "));
final String out = _stringWriter.toString().replaceAll("\r\n", "\n");
final String[] lines = out.split("\n");
assertEquals("Filters:", lines[0]);
assertTrue(out.indexOf("name: Null check") != -1);
assertTrue(out.indexOf("- Outcome: NOT_NULL") != -1);
assertTrue(out.indexOf("- Outcome: NULL") != -1);
}
public void testListAnalyzers() throws Throwable {
Main.main("-conf src/test/resources/cli-examples/conf.xml -list ANALYZERS".split(" "));
final String out = _stringWriter.toString().replaceAll("\r\n", "\n");
final String[] lines = out.split("\n");
assertEquals("Analyzers:", lines[0]);
assertTrue(out.indexOf("name: Pattern finder") != -1);
assertTrue(out.indexOf("name: String analyzer") != -1);
}
public void testExampleEmployeesJob() throws Throwable {
Main.main(
"-conf src/test/resources/cli-examples/conf.xml -job src/test/resources/cli-examples/employees_job.xml"
.split(" "));
final String out = _stringWriter.toString().replaceAll("\r\n", "\n");
final List<String> lines = Splitter.on('\n').splitToList(out);
assertTrue(out, out.indexOf("- Value count (company.com): 4") != -1);
assertTrue(out, out.indexOf("- Value count (eobjects.org): 2") != -1);
assertTrue("lines length was: " + lines.size(), lines.size() > 60);
assertTrue("lines length was: " + lines.size(), lines.size() < 90);
assertTrue(lines.contains("SUCCESS!"));
}
public void testWriteToFile() throws Throwable {
final String filename = "target/test_write_to_file.txt";
Main.main(
("-conf src/test/resources/cli-examples/conf.xml -job src/test/resources/cli-examples/employees_job.xml -of "
+ filename).split(" "));
final File file = new File(filename);
assertTrue(file.exists());
final String result = FileHelper.readFileAsString(file);
assertEquals("SUCCESS!", result.split("\n")[0].trim());
}
public void testWriteHtmlToFile() throws Throwable {
final String filename = "target/test_write_html_to_file.html";
Main.main(
("-conf src/test/resources/cli-examples/conf.xml -job src/test/resources/cli-examples/employees_job.xml -of "
+ filename + " -ot HTML").split(" "));
final File file = new File(filename);
assertTrue(file.exists());
{
final String result = FileHelper.readFileAsString(file);
final String[] lines = result.split("\n");
assertEquals("<html>", lines[1]);
}
try (InputStream in = FileHelper.getInputStream(file)) {
// parse it with validator.nu for HTML correctness
final HtmlParser htmlParser = new HtmlParser(XmlViolationPolicy.FATAL);
final AtomicInteger elementCounter = new AtomicInteger();
htmlParser.setContentHandler(new DefaultHandler() {
@Override
public void startElement(final String uri, final String localName, final String qName,
final Attributes attributes) throws SAXException {
elementCounter.incrementAndGet();
}
});
final List<Exception> warningsAndErrors = new ArrayList<>();
htmlParser.setErrorHandler(new ErrorHandler() {
@Override
public void warning(final SAXParseException exception) throws SAXException {
System.err.println("Warning: " + exception.getMessage());
warningsAndErrors.add(exception);
}
@Override
public void fatalError(final SAXParseException exception) throws SAXException {
System.out.println("Fatal error: " + exception.getMessage());
throw exception;
}
@Override
public void error(final SAXParseException exception) throws SAXException {
System.err.println("Error: " + exception.getMessage());
warningsAndErrors.add(exception);
}
});
htmlParser.parse(new InputSource(in));
// the output has approx 3600 XML elements
final int elementCount = elementCounter.get();
assertTrue("Element count: " + elementCount, elementCount > 3000);
assertTrue("Element count: " + elementCount, elementCount < 5000);
if (!warningsAndErrors.isEmpty()) {
for (final Exception error : warningsAndErrors) {
final String message = error.getMessage();
if (message.startsWith("No explicit character encoding declaration has been seen yet") || message
.startsWith("The character encoding of the document was not declared.")) {
// ignore/accept this one
continue;
}
error.printStackTrace();
fail("Got " + warningsAndErrors.size() + " warnings and errors, see log for details");
}
}
}
}
public void testWriteSerializedToFile() throws Throwable {
final String filename = "target/test_write_serialized_to_file.analysis.result.dat";
Main.main(
("-conf src/test/resources/cli-examples/conf.xml -job src/test/resources/cli-examples/employees_job.xml -of "
+ filename + " -ot SERIALIZED").split(" "));
final File file = new File(filename);
assertTrue(file.exists());
final AnalysisResult result = (AnalysisResult) SerializationUtils.deserialize(new FileInputStream(file));
assertNotNull(result);
assertEquals(6, result.getResults().size());
}
}