/*
* Copyright 2014, Stratio.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.stratio.deep.core.extractor;
import static junit.framework.TestCase.assertNotNull;
import static org.testng.AssertJUnit.assertEquals;
import static org.testng.AssertJUnit.assertNull;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.rdd.RDD;
import org.json.simple.JSONObject;
import org.json.simple.JSONValue;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
import com.stratio.deep.commons.config.BaseConfig;
import com.stratio.deep.commons.config.ExtractorConfig;
import com.stratio.deep.commons.entity.Cells;
import com.stratio.deep.commons.extractor.utils.ExtractorConstants;
import com.stratio.deep.commons.filter.Filter;
import com.stratio.deep.commons.filter.FilterType;
import com.stratio.deep.commons.rdd.IExtractor;
import com.stratio.deep.core.context.DeepSparkContext;
import com.stratio.deep.core.entity.BookEntity;
import com.stratio.deep.core.entity.MessageTestEntity;
import com.stratio.deep.core.entity.PlayerEntity;
import com.stratio.deep.core.entity.TeamEntity;
/**
* Created by rcrespo on 9/09/14.
*/
/**
* This is the common test that validate each extractor.
*
* @param <T> the type parameter
* @param <S> the type parameter
*/
public abstract class ExtractorTest<T, S extends BaseConfig> implements Serializable {
/**
* The constant LOG.
*/
private static final Logger LOG = LoggerFactory.getLogger(ExtractorTest.class);
private static final long serialVersionUID = -4496047807269893090L;
/**
* The Input entity.
*/
private Class inputEntity;
/**
* The Output entity.
*/
private Class outputEntity;
/**
* The Config entity.
*/
private Class configEntity;
/**
* The Host.
*/
private final String host;
/**
* The Port.
*/
private Integer port;
private Integer port2 = 9160;
/**
* The Database.
*/
protected String database = "test";
/**
* The Table read.
*/
protected final String tableRead = "input";
/**
* The constant READ_COUNT_EXPECTED.
*/
protected static final long READ_COUNT_EXPECTED = 1l;
/**
* The constant READ_FIELD_EXPECTED.
*/
protected static final String READ_FIELD_EXPECTED = "new message test";
protected static final String ID_MESSAGE_EXPECTED = "messageTest";
/**
* The Extractor.
*/
protected Class<IExtractor<T, S>> extractor;
/**
* The Origin book.
*/
protected T originBook;
/**
* The constant BOOK_INPUT.
*/
protected static final String BOOK_INPUT = "bookinput";
/**
* The constant BOOK_OUTPUT.
*/
protected static final String BOOK_OUTPUT = "bookoutput";
public static final String FOOTBALL_TEAM_INPUT = "footballteam";
public static final String FOOTBALL_PLAYER_INPUT = "footballplayer";
protected static final String FOOTBALL_OUTPUT = "footballoutput";
/**
* The WORD _ cOUNT _ sPECTED.
*/
protected Long WORD_COUNT_SPECTED = 3833L;
private static final String DATA_TEST_DIVINE_COMEDY = "/divineComedy.json";
private static final String DATA_TEST_MESSAGE = "/message.json";
private static final String DATA_TEST_FOOTBALL_TEAMS = "/football_teams.json";
private static final String DATA_TEST_FOOTBALL_PLAYERS = "/football_players.json";
private String customDataSet;
/**
* The Database extractor name.
*/
protected String databaseExtractorName;
/**
* Instantiates a new Extractor test.
*
* @param extractor the extractor
* @param host the host
* @param port the port
* @param isCells the is cells
*/
public ExtractorTest(Class<IExtractor<T, S>> extractor, String host, Integer port, boolean isCells) {
this(extractor, host, port, isCells, null);
}
public ExtractorTest(Class<IExtractor<T, S>> extractor, String host, Integer port, boolean isCells,
Class dataSetClass) {
if (isCells) {
this.inputEntity = Cells.class;
this.outputEntity = Cells.class;
this.configEntity = Cells.class;
} else {
this.inputEntity = MessageTestEntity.class;
this.outputEntity = MessageTestEntity.class;
if (dataSetClass != null) {
this.configEntity = dataSetClass;
} else {
this.configEntity = BookEntity.class;
}
}
this.host = host;
this.port = port;
this.extractor = extractor;
this.databaseExtractorName = extractor.getSimpleName().toLowerCase();
}
/**
* Read file.
*
* @param path the path
* @return the list
*/
protected List<String> readFile(String path) {
List<String> readLines = new ArrayList<>();
try (BufferedReader reader = new BufferedReader(new InputStreamReader(getClass().getResourceAsStream(path)))) {
String currentLine;
while ((currentLine = reader.readLine()) != null) {
readLines.add(currentLine);
}
} catch (Exception e) {
e.printStackTrace();
}
return readLines;
}
/**
* Transform RDD.
*
* @param <T> the type parameter
* @param stringJavaRDD the string java rDD
* @param entityClass the entity class
* @return the java rDD
*/
protected <T> JavaRDD<T> transformRDD(JavaRDD<String> stringJavaRDD, final Class<T> entityClass) {
JavaRDD<JSONObject> jsonObjectJavaRDD = stringJavaRDD.map(new Function<String, JSONObject>() {
@Override
public JSONObject call(String v1) throws Exception {
return (JSONObject) JSONValue.parse(v1);
}
});
JavaRDD<T> javaRDD = jsonObjectJavaRDD.map(new Function<JSONObject, T>() {
@Override
public T call(JSONObject v1) throws Exception {
return transform(v1, BOOK_INPUT, entityClass);
}
});
return javaRDD;
}
/**
* Init data set.
*
* @throws IOException the iO exception
*/
@BeforeClass(alwaysRun = true)
public void initDataSet() throws IOException {
DeepSparkContext context = getDeepSparkContext();
initDataSetDivineComedy(context);
initDataSetMessage(context);
initDataSetFootball(context);
context.stop();
}
protected void initDataSetDivineComedy(DeepSparkContext context) {
JavaRDD<String> stringJavaRDD;
//Divine Comedy
List<String> lineas = readFile(DATA_TEST_DIVINE_COMEDY);
stringJavaRDD = context.parallelize(lineas);
JavaRDD<T> javaRDD = transformRDD(stringJavaRDD, configEntity);
originBook = javaRDD.first();
DeepSparkContext.saveRDD(javaRDD.rdd(), (ExtractorConfig<T>) getWriteExtractorConfig(BOOK_INPUT,
configEntity));
}
protected void initDataSetMessage(DeepSparkContext context) {
//Test Message
List<String> lineas = readFile(DATA_TEST_MESSAGE);
JavaRDD<String> stringJavaRDD = context.parallelize(lineas);
JavaRDD<T> javaRDD = transformRDD(stringJavaRDD, inputEntity);
DeepSparkContext.saveRDD(javaRDD.rdd(), (ExtractorConfig<T>) getWriteExtractorConfig(tableRead,
inputEntity));
}
protected void initDataSetFootball(DeepSparkContext context) {
// Football teams data set
List<String> teams = readFile(DATA_TEST_FOOTBALL_TEAMS);
JavaRDD<TeamEntity> teamsRDD = transformRDD(context.parallelize(teams), TeamEntity.class);
DeepSparkContext.saveRDD(teamsRDD.rdd(), getWriteExtractorConfig(FOOTBALL_TEAM_INPUT,
TeamEntity.class));
// Football players data set
List<String> players = readFile(DATA_TEST_FOOTBALL_PLAYERS);
JavaRDD<PlayerEntity> playersRDD = transformRDD(context.parallelize(players), PlayerEntity.class);
DeepSparkContext.saveRDD(playersRDD.rdd(), getWriteExtractorConfig(FOOTBALL_PLAYER_INPUT,
PlayerEntity.class));
}
/**
* Transform to T type.
*
* @param <W> the type parameter
* @param jsonObject the json object
* @param nameSpace the name space
* @param entityClass the entity class
* @return the t
*/
protected abstract <W> W transform(JSONObject jsonObject, String nameSpace, Class<W> entityClass);
/**
* It tests if the extractor can read from the data store
*
* @param <W> the type parameter
*/
@Test(alwaysRun = true, groups = { "FunctionalTests" })
public <W> void testRead() {
DeepSparkContext context = getDeepSparkContext();
try {
ExtractorConfig<W> inputConfigEntity = getReadExtractorConfig(databaseExtractorName, tableRead,
inputEntity);
RDD<W> inputRDDEntity = context.createRDD(inputConfigEntity);
Assert.assertEquals(READ_COUNT_EXPECTED, inputRDDEntity.count());
if (inputConfigEntity.getEntityClass().isAssignableFrom(Cells.class)) {
Assert.assertEquals(((Cells) inputRDDEntity.first()).getCellByName("message").getCellValue(),
READ_FIELD_EXPECTED);
Assert.assertEquals(((Cells) inputRDDEntity.first()).getCellByName("id").getCellValue(),
ID_MESSAGE_EXPECTED);
} else {
Assert.assertEquals(((MessageTestEntity) inputRDDEntity.first()).getMessage(), READ_FIELD_EXPECTED);
Assert.assertEquals(((MessageTestEntity) inputRDDEntity.first()).getId(), ID_MESSAGE_EXPECTED);
}
} finally {
context.stop();
}
}
/**
* It tests if the extractor can write to the data store
*
* @param <W> the type parameter
*/
@Test(alwaysRun = true)
public <W> void testWrite() {
DeepSparkContext context = getDeepSparkContext();
try {
ExtractorConfig<W> inputConfigEntity = getReadExtractorConfig(databaseExtractorName, tableRead,
inputEntity);
RDD<W> inputRDDEntity = context.createRDD(inputConfigEntity);
ExtractorConfig<W> outputConfigEntity;
if (inputConfigEntity.getEntityClass().isAssignableFrom(Cells.class)) {
outputConfigEntity = (ExtractorConfig<W>) getWriteExtractorConfig("outputCells", Cells.class);
} else {
outputConfigEntity = (ExtractorConfig<W>) getWriteExtractorConfig("outputEntity",
MessageTestEntity.class);
}
// Save RDD in DataSource
context.saveRDD(inputRDDEntity, outputConfigEntity);
RDD<W> outputRDDEntity = context.createRDD(outputConfigEntity);
if (inputConfigEntity.getEntityClass().isAssignableFrom(Cells.class)) {
Assert.assertEquals(((Cells) outputRDDEntity.first()).getCellByName("message").getCellValue(),
READ_FIELD_EXPECTED);
} else {
Assert.assertEquals(((MessageTestEntity) outputRDDEntity.first()).getMessage(), READ_FIELD_EXPECTED);
}
} finally {
context.stop();
}
}
/**
* Test input columns.
*
* @param <W> the type parameter
*/
@Test(alwaysRun = true)
public <W> void testInputColumns() {
DeepSparkContext context = getDeepSparkContext();
try {
ExtractorConfig<W> inputConfigEntity = getInputColumnConfig(new String[] { "id", "metadata" });
RDD<W> inputRDDEntity = context.createRDD(inputConfigEntity);
if (isEntityClassCells(inputConfigEntity)) {
Cells bookCells = (Cells) inputRDDEntity.first();
assertNotNull(bookCells.getCellByName("id").getCellValue());
assertNotNull(bookCells.getCellByName("metadata").getCellValue());
assertNull(bookCells.getCellByName("cantos"));
} else {
BookEntity bookEntity = (BookEntity) inputRDDEntity.first();
assertNotNull(bookEntity.getId());
assertNotNull(bookEntity.getMetadataEntity());
assertNull(bookEntity.getCantoEntities());
}
ExtractorConfig<W> inputConfigEntity2 = getInputColumnConfig("cantos");
RDD<W> inputRDDEntity2 = context.createRDD(inputConfigEntity2);
//TODO check this
if (isEntityClassCells(inputConfigEntity2)) {
Cells bookCells = (Cells) inputRDDEntity2.first();
assertNull(bookCells.getCellByName("id"));
assertNull(bookCells.getCellByName("metadata"));
assertNotNull(bookCells.getCellByName("cantos").getCellValue());
} else {
BookEntity bookEntity2 = (BookEntity) inputRDDEntity2.first();
assertNull(bookEntity2.getId());
assertNull(bookEntity2.getMetadataEntity());
assertNotNull(bookEntity2.getCantoEntities());
}
ExtractorConfig<W> inputConfigEntity3 = getInputColumnConfig("cantos", "metadata");
RDD<W> inputRDDEntity3 = context.createRDD(inputConfigEntity3);
if (isEntityClassCells(inputConfigEntity3)) {
Cells bookCells = (Cells) inputRDDEntity3.first();
assertNull(bookCells.getCellByName("id"));
assertNotNull(bookCells.getCellByName("metadata").getCellValue());
assertNotNull(bookCells.getCellByName("cantos").getCellValue());
} else {
BookEntity bookEntity = (BookEntity) inputRDDEntity3.first();
assertNull(bookEntity.getId());
assertNotNull(bookEntity.getMetadataEntity());
assertNotNull(bookEntity.getCantoEntities());
}
} finally {
context.stop();
}
}
/**
* Gets extractor config.
*
* @param clazz the clazz
* @return the extractor config
*/
protected <W> ExtractorConfig<W> getExtractorConfig(Class<W> clazz) {
return new ExtractorConfig<>(clazz);
}
/**
* Test filter EQ.
*
* @param <W> the type parameter
*/
@Test(alwaysRun = true, dependsOnGroups = { "FunctionalTests" })
protected <W> void testFilterEQ() {
DeepSparkContext context = getDeepSparkContext();
try {
Filter[] filters = null;
Filter filter = new Filter("id", FilterType.EQ, "TestDataSet");
filters = new Filter[] { filter };
ExtractorConfig<W> inputConfigEntity2 = getFilterConfig(filters);
RDD<W> inputRDDEntity2 = context.createRDD(inputConfigEntity2);
assertEquals(inputRDDEntity2.count(), 1);
} finally {
context.stop();
}
}
/**
* Test filter NEQ.
*
* @param <W> the type parameter
*/
@Test
protected <W> void testFilterNEQ() {
DeepSparkContext context = getDeepSparkContext();
try {
Filter[] filters = null;
Filter filter = new Filter("id", FilterType.NEQ, "TestDataSet");
filters = new Filter[] { filter };
ExtractorConfig<W> inputConfigEntity = getFilterConfig(filters);
RDD<W> inputRDDEntity = context.createRDD(inputConfigEntity);
assertEquals(inputRDDEntity.count(), 0);
} finally {
context.stop();
}
}
/**
* Gets write extractor config.
*
* @param tableOutput the table output
* @param entityClass the entity class
* @return the write extractor config
*/
public ExtractorConfig getWriteExtractorConfig(String tableOutput, Class entityClass) {
ExtractorConfig extractorConfig = getExtractorConfig(entityClass);
extractorConfig.putValue(ExtractorConstants.HOST, host)
.putValue(ExtractorConstants.DATABASE, databaseExtractorName)
.putValue(ExtractorConstants.PORT, port)
.putValue(ExtractorConstants.PORT2, port2)
.putValue(ExtractorConstants.COLLECTION, tableOutput)
.putValue(ExtractorConstants.CREATE_ON_WRITE, true);
extractorConfig.setExtractorImplClass(extractor);
return extractorConfig;
}
/**
* Gets read extractor config.
*
* @return the read extractor config
*/
public <W> ExtractorConfig<W> getReadExtractorConfig() {
return getReadExtractorConfig(database, tableRead, inputEntity);
}
/**
* Gets read extractor config.
*
* @param database the database
* @param collection the collection
* @param entityClass the entity class
* @return the read extractor config
*/
public <W> ExtractorConfig<W> getReadExtractorConfig(String database, String collection, Class<W> entityClass) {
ExtractorConfig<W> extractorConfig = getExtractorConfig(entityClass);
extractorConfig.putValue(ExtractorConstants.HOST, host)
.putValue(ExtractorConstants.DATABASE, database)
.putValue(ExtractorConstants.PORT, port)
.putValue(ExtractorConstants.PORT2, port2)
.putValue(ExtractorConstants.COLLECTION, collection);
extractorConfig.setExtractorImplClass(extractor);
return extractorConfig;
}
/**
* Gets input column config.
*
* @param inputColumns the input columns
* @return the input column config
*/
public <W> ExtractorConfig<W> getInputColumnConfig(String... inputColumns) {
ExtractorConfig<W> extractorConfig = getExtractorConfig(configEntity);
extractorConfig.putValue(ExtractorConstants.HOST, host)
.putValue(ExtractorConstants.DATABASE, databaseExtractorName)
.putValue(ExtractorConstants.PORT, port)
.putValue(ExtractorConstants.PORT2, port2)
.putValue(ExtractorConstants.COLLECTION, BOOK_INPUT)
.putValue(ExtractorConstants.INPUT_COLUMNS, inputColumns);
extractorConfig.setExtractorImplClass(extractor);
return extractorConfig;
}
/**
* Gets filter config.
*
* @param filters the filters
* @return the filter config
*/
public <W> ExtractorConfig<W> getFilterConfig(Filter[] filters) {
ExtractorConfig<W> extractorConfig = getExtractorConfig(configEntity);
extractorConfig.putValue(ExtractorConstants.HOST, host)
.putValue(ExtractorConstants.DATABASE, databaseExtractorName)
.putValue(ExtractorConstants.COLLECTION, BOOK_INPUT)
.putValue(ExtractorConstants.PORT, port)
.putValue(ExtractorConstants.PORT2, port2)
.putValue(ExtractorConstants.FILTER_QUERY, filters);
extractorConfig.setExtractorImplClass(extractor);
return extractorConfig;
}
/**
* Is entity class cells.
*
* @param extractorConfig the extractor config
* @return the boolean
*/
private boolean isEntityClassCells(ExtractorConfig extractorConfig) {
if (extractorConfig.getEntityClass().isAssignableFrom(Cells.class)) {
return true;
}
return false;
}
/**
* Get deep spark context.
*
* @return the deep spark context
*/
protected static DeepSparkContext getDeepSparkContext() {
return new DeepSparkContext("local", "deepSparkContextTest");
}
}