/* * Copyright 2014, Stratio. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.stratio.deep.core.extractor; import static junit.framework.TestCase.assertNotNull; import static org.testng.AssertJUnit.assertEquals; import static org.testng.AssertJUnit.assertNull; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; import java.io.Serializable; import java.util.ArrayList; import java.util.List; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.function.Function; import org.apache.spark.rdd.RDD; import org.json.simple.JSONObject; import org.json.simple.JSONValue; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.testng.Assert; import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; import com.stratio.deep.commons.config.BaseConfig; import com.stratio.deep.commons.config.ExtractorConfig; import com.stratio.deep.commons.entity.Cells; import com.stratio.deep.commons.extractor.utils.ExtractorConstants; import com.stratio.deep.commons.filter.Filter; import com.stratio.deep.commons.filter.FilterType; import com.stratio.deep.commons.rdd.IExtractor; import com.stratio.deep.core.context.DeepSparkContext; import com.stratio.deep.core.entity.BookEntity; import com.stratio.deep.core.entity.MessageTestEntity; import com.stratio.deep.core.entity.PlayerEntity; import com.stratio.deep.core.entity.TeamEntity; /** * Created by rcrespo on 9/09/14. */ /** * This is the common test that validate each extractor. * * @param <T> the type parameter * @param <S> the type parameter */ public abstract class ExtractorTest<T, S extends BaseConfig> implements Serializable { /** * The constant LOG. */ private static final Logger LOG = LoggerFactory.getLogger(ExtractorTest.class); private static final long serialVersionUID = -4496047807269893090L; /** * The Input entity. */ private Class inputEntity; /** * The Output entity. */ private Class outputEntity; /** * The Config entity. */ private Class configEntity; /** * The Host. */ private final String host; /** * The Port. */ private Integer port; private Integer port2 = 9160; /** * The Database. */ protected String database = "test"; /** * The Table read. */ protected final String tableRead = "input"; /** * The constant READ_COUNT_EXPECTED. */ protected static final long READ_COUNT_EXPECTED = 1l; /** * The constant READ_FIELD_EXPECTED. */ protected static final String READ_FIELD_EXPECTED = "new message test"; protected static final String ID_MESSAGE_EXPECTED = "messageTest"; /** * The Extractor. */ protected Class<IExtractor<T, S>> extractor; /** * The Origin book. */ protected T originBook; /** * The constant BOOK_INPUT. */ protected static final String BOOK_INPUT = "bookinput"; /** * The constant BOOK_OUTPUT. */ protected static final String BOOK_OUTPUT = "bookoutput"; public static final String FOOTBALL_TEAM_INPUT = "footballteam"; public static final String FOOTBALL_PLAYER_INPUT = "footballplayer"; protected static final String FOOTBALL_OUTPUT = "footballoutput"; /** * The WORD _ cOUNT _ sPECTED. */ protected Long WORD_COUNT_SPECTED = 3833L; private static final String DATA_TEST_DIVINE_COMEDY = "/divineComedy.json"; private static final String DATA_TEST_MESSAGE = "/message.json"; private static final String DATA_TEST_FOOTBALL_TEAMS = "/football_teams.json"; private static final String DATA_TEST_FOOTBALL_PLAYERS = "/football_players.json"; private String customDataSet; /** * The Database extractor name. */ protected String databaseExtractorName; /** * Instantiates a new Extractor test. * * @param extractor the extractor * @param host the host * @param port the port * @param isCells the is cells */ public ExtractorTest(Class<IExtractor<T, S>> extractor, String host, Integer port, boolean isCells) { this(extractor, host, port, isCells, null); } public ExtractorTest(Class<IExtractor<T, S>> extractor, String host, Integer port, boolean isCells, Class dataSetClass) { if (isCells) { this.inputEntity = Cells.class; this.outputEntity = Cells.class; this.configEntity = Cells.class; } else { this.inputEntity = MessageTestEntity.class; this.outputEntity = MessageTestEntity.class; if (dataSetClass != null) { this.configEntity = dataSetClass; } else { this.configEntity = BookEntity.class; } } this.host = host; this.port = port; this.extractor = extractor; this.databaseExtractorName = extractor.getSimpleName().toLowerCase(); } /** * Read file. * * @param path the path * @return the list */ protected List<String> readFile(String path) { List<String> readLines = new ArrayList<>(); try (BufferedReader reader = new BufferedReader(new InputStreamReader(getClass().getResourceAsStream(path)))) { String currentLine; while ((currentLine = reader.readLine()) != null) { readLines.add(currentLine); } } catch (Exception e) { e.printStackTrace(); } return readLines; } /** * Transform RDD. * * @param <T> the type parameter * @param stringJavaRDD the string java rDD * @param entityClass the entity class * @return the java rDD */ protected <T> JavaRDD<T> transformRDD(JavaRDD<String> stringJavaRDD, final Class<T> entityClass) { JavaRDD<JSONObject> jsonObjectJavaRDD = stringJavaRDD.map(new Function<String, JSONObject>() { @Override public JSONObject call(String v1) throws Exception { return (JSONObject) JSONValue.parse(v1); } }); JavaRDD<T> javaRDD = jsonObjectJavaRDD.map(new Function<JSONObject, T>() { @Override public T call(JSONObject v1) throws Exception { return transform(v1, BOOK_INPUT, entityClass); } }); return javaRDD; } /** * Init data set. * * @throws IOException the iO exception */ @BeforeClass(alwaysRun = true) public void initDataSet() throws IOException { DeepSparkContext context = getDeepSparkContext(); initDataSetDivineComedy(context); initDataSetMessage(context); initDataSetFootball(context); context.stop(); } protected void initDataSetDivineComedy(DeepSparkContext context) { JavaRDD<String> stringJavaRDD; //Divine Comedy List<String> lineas = readFile(DATA_TEST_DIVINE_COMEDY); stringJavaRDD = context.parallelize(lineas); JavaRDD<T> javaRDD = transformRDD(stringJavaRDD, configEntity); originBook = javaRDD.first(); DeepSparkContext.saveRDD(javaRDD.rdd(), (ExtractorConfig<T>) getWriteExtractorConfig(BOOK_INPUT, configEntity)); } protected void initDataSetMessage(DeepSparkContext context) { //Test Message List<String> lineas = readFile(DATA_TEST_MESSAGE); JavaRDD<String> stringJavaRDD = context.parallelize(lineas); JavaRDD<T> javaRDD = transformRDD(stringJavaRDD, inputEntity); DeepSparkContext.saveRDD(javaRDD.rdd(), (ExtractorConfig<T>) getWriteExtractorConfig(tableRead, inputEntity)); } protected void initDataSetFootball(DeepSparkContext context) { // Football teams data set List<String> teams = readFile(DATA_TEST_FOOTBALL_TEAMS); JavaRDD<TeamEntity> teamsRDD = transformRDD(context.parallelize(teams), TeamEntity.class); DeepSparkContext.saveRDD(teamsRDD.rdd(), getWriteExtractorConfig(FOOTBALL_TEAM_INPUT, TeamEntity.class)); // Football players data set List<String> players = readFile(DATA_TEST_FOOTBALL_PLAYERS); JavaRDD<PlayerEntity> playersRDD = transformRDD(context.parallelize(players), PlayerEntity.class); DeepSparkContext.saveRDD(playersRDD.rdd(), getWriteExtractorConfig(FOOTBALL_PLAYER_INPUT, PlayerEntity.class)); } /** * Transform to T type. * * @param <W> the type parameter * @param jsonObject the json object * @param nameSpace the name space * @param entityClass the entity class * @return the t */ protected abstract <W> W transform(JSONObject jsonObject, String nameSpace, Class<W> entityClass); /** * It tests if the extractor can read from the data store * * @param <W> the type parameter */ @Test(alwaysRun = true, groups = { "FunctionalTests" }) public <W> void testRead() { DeepSparkContext context = getDeepSparkContext(); try { ExtractorConfig<W> inputConfigEntity = getReadExtractorConfig(databaseExtractorName, tableRead, inputEntity); RDD<W> inputRDDEntity = context.createRDD(inputConfigEntity); Assert.assertEquals(READ_COUNT_EXPECTED, inputRDDEntity.count()); if (inputConfigEntity.getEntityClass().isAssignableFrom(Cells.class)) { Assert.assertEquals(((Cells) inputRDDEntity.first()).getCellByName("message").getCellValue(), READ_FIELD_EXPECTED); Assert.assertEquals(((Cells) inputRDDEntity.first()).getCellByName("id").getCellValue(), ID_MESSAGE_EXPECTED); } else { Assert.assertEquals(((MessageTestEntity) inputRDDEntity.first()).getMessage(), READ_FIELD_EXPECTED); Assert.assertEquals(((MessageTestEntity) inputRDDEntity.first()).getId(), ID_MESSAGE_EXPECTED); } } finally { context.stop(); } } /** * It tests if the extractor can write to the data store * * @param <W> the type parameter */ @Test(alwaysRun = true) public <W> void testWrite() { DeepSparkContext context = getDeepSparkContext(); try { ExtractorConfig<W> inputConfigEntity = getReadExtractorConfig(databaseExtractorName, tableRead, inputEntity); RDD<W> inputRDDEntity = context.createRDD(inputConfigEntity); ExtractorConfig<W> outputConfigEntity; if (inputConfigEntity.getEntityClass().isAssignableFrom(Cells.class)) { outputConfigEntity = (ExtractorConfig<W>) getWriteExtractorConfig("outputCells", Cells.class); } else { outputConfigEntity = (ExtractorConfig<W>) getWriteExtractorConfig("outputEntity", MessageTestEntity.class); } // Save RDD in DataSource context.saveRDD(inputRDDEntity, outputConfigEntity); RDD<W> outputRDDEntity = context.createRDD(outputConfigEntity); if (inputConfigEntity.getEntityClass().isAssignableFrom(Cells.class)) { Assert.assertEquals(((Cells) outputRDDEntity.first()).getCellByName("message").getCellValue(), READ_FIELD_EXPECTED); } else { Assert.assertEquals(((MessageTestEntity) outputRDDEntity.first()).getMessage(), READ_FIELD_EXPECTED); } } finally { context.stop(); } } /** * Test input columns. * * @param <W> the type parameter */ @Test(alwaysRun = true) public <W> void testInputColumns() { DeepSparkContext context = getDeepSparkContext(); try { ExtractorConfig<W> inputConfigEntity = getInputColumnConfig(new String[] { "id", "metadata" }); RDD<W> inputRDDEntity = context.createRDD(inputConfigEntity); if (isEntityClassCells(inputConfigEntity)) { Cells bookCells = (Cells) inputRDDEntity.first(); assertNotNull(bookCells.getCellByName("id").getCellValue()); assertNotNull(bookCells.getCellByName("metadata").getCellValue()); assertNull(bookCells.getCellByName("cantos")); } else { BookEntity bookEntity = (BookEntity) inputRDDEntity.first(); assertNotNull(bookEntity.getId()); assertNotNull(bookEntity.getMetadataEntity()); assertNull(bookEntity.getCantoEntities()); } ExtractorConfig<W> inputConfigEntity2 = getInputColumnConfig("cantos"); RDD<W> inputRDDEntity2 = context.createRDD(inputConfigEntity2); //TODO check this if (isEntityClassCells(inputConfigEntity2)) { Cells bookCells = (Cells) inputRDDEntity2.first(); assertNull(bookCells.getCellByName("id")); assertNull(bookCells.getCellByName("metadata")); assertNotNull(bookCells.getCellByName("cantos").getCellValue()); } else { BookEntity bookEntity2 = (BookEntity) inputRDDEntity2.first(); assertNull(bookEntity2.getId()); assertNull(bookEntity2.getMetadataEntity()); assertNotNull(bookEntity2.getCantoEntities()); } ExtractorConfig<W> inputConfigEntity3 = getInputColumnConfig("cantos", "metadata"); RDD<W> inputRDDEntity3 = context.createRDD(inputConfigEntity3); if (isEntityClassCells(inputConfigEntity3)) { Cells bookCells = (Cells) inputRDDEntity3.first(); assertNull(bookCells.getCellByName("id")); assertNotNull(bookCells.getCellByName("metadata").getCellValue()); assertNotNull(bookCells.getCellByName("cantos").getCellValue()); } else { BookEntity bookEntity = (BookEntity) inputRDDEntity3.first(); assertNull(bookEntity.getId()); assertNotNull(bookEntity.getMetadataEntity()); assertNotNull(bookEntity.getCantoEntities()); } } finally { context.stop(); } } /** * Gets extractor config. * * @param clazz the clazz * @return the extractor config */ protected <W> ExtractorConfig<W> getExtractorConfig(Class<W> clazz) { return new ExtractorConfig<>(clazz); } /** * Test filter EQ. * * @param <W> the type parameter */ @Test(alwaysRun = true, dependsOnGroups = { "FunctionalTests" }) protected <W> void testFilterEQ() { DeepSparkContext context = getDeepSparkContext(); try { Filter[] filters = null; Filter filter = new Filter("id", FilterType.EQ, "TestDataSet"); filters = new Filter[] { filter }; ExtractorConfig<W> inputConfigEntity2 = getFilterConfig(filters); RDD<W> inputRDDEntity2 = context.createRDD(inputConfigEntity2); assertEquals(inputRDDEntity2.count(), 1); } finally { context.stop(); } } /** * Test filter NEQ. * * @param <W> the type parameter */ @Test protected <W> void testFilterNEQ() { DeepSparkContext context = getDeepSparkContext(); try { Filter[] filters = null; Filter filter = new Filter("id", FilterType.NEQ, "TestDataSet"); filters = new Filter[] { filter }; ExtractorConfig<W> inputConfigEntity = getFilterConfig(filters); RDD<W> inputRDDEntity = context.createRDD(inputConfigEntity); assertEquals(inputRDDEntity.count(), 0); } finally { context.stop(); } } /** * Gets write extractor config. * * @param tableOutput the table output * @param entityClass the entity class * @return the write extractor config */ public ExtractorConfig getWriteExtractorConfig(String tableOutput, Class entityClass) { ExtractorConfig extractorConfig = getExtractorConfig(entityClass); extractorConfig.putValue(ExtractorConstants.HOST, host) .putValue(ExtractorConstants.DATABASE, databaseExtractorName) .putValue(ExtractorConstants.PORT, port) .putValue(ExtractorConstants.PORT2, port2) .putValue(ExtractorConstants.COLLECTION, tableOutput) .putValue(ExtractorConstants.CREATE_ON_WRITE, true); extractorConfig.setExtractorImplClass(extractor); return extractorConfig; } /** * Gets read extractor config. * * @return the read extractor config */ public <W> ExtractorConfig<W> getReadExtractorConfig() { return getReadExtractorConfig(database, tableRead, inputEntity); } /** * Gets read extractor config. * * @param database the database * @param collection the collection * @param entityClass the entity class * @return the read extractor config */ public <W> ExtractorConfig<W> getReadExtractorConfig(String database, String collection, Class<W> entityClass) { ExtractorConfig<W> extractorConfig = getExtractorConfig(entityClass); extractorConfig.putValue(ExtractorConstants.HOST, host) .putValue(ExtractorConstants.DATABASE, database) .putValue(ExtractorConstants.PORT, port) .putValue(ExtractorConstants.PORT2, port2) .putValue(ExtractorConstants.COLLECTION, collection); extractorConfig.setExtractorImplClass(extractor); return extractorConfig; } /** * Gets input column config. * * @param inputColumns the input columns * @return the input column config */ public <W> ExtractorConfig<W> getInputColumnConfig(String... inputColumns) { ExtractorConfig<W> extractorConfig = getExtractorConfig(configEntity); extractorConfig.putValue(ExtractorConstants.HOST, host) .putValue(ExtractorConstants.DATABASE, databaseExtractorName) .putValue(ExtractorConstants.PORT, port) .putValue(ExtractorConstants.PORT2, port2) .putValue(ExtractorConstants.COLLECTION, BOOK_INPUT) .putValue(ExtractorConstants.INPUT_COLUMNS, inputColumns); extractorConfig.setExtractorImplClass(extractor); return extractorConfig; } /** * Gets filter config. * * @param filters the filters * @return the filter config */ public <W> ExtractorConfig<W> getFilterConfig(Filter[] filters) { ExtractorConfig<W> extractorConfig = getExtractorConfig(configEntity); extractorConfig.putValue(ExtractorConstants.HOST, host) .putValue(ExtractorConstants.DATABASE, databaseExtractorName) .putValue(ExtractorConstants.COLLECTION, BOOK_INPUT) .putValue(ExtractorConstants.PORT, port) .putValue(ExtractorConstants.PORT2, port2) .putValue(ExtractorConstants.FILTER_QUERY, filters); extractorConfig.setExtractorImplClass(extractor); return extractorConfig; } /** * Is entity class cells. * * @param extractorConfig the extractor config * @return the boolean */ private boolean isEntityClassCells(ExtractorConfig extractorConfig) { if (extractorConfig.getEntityClass().isAssignableFrom(Cells.class)) { return true; } return false; } /** * Get deep spark context. * * @return the deep spark context */ protected static DeepSparkContext getDeepSparkContext() { return new DeepSparkContext("local", "deepSparkContextTest"); } }