/*
* Copyright 2014, Stratio.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.stratio.deep.core.extractor;
import static com.stratio.deep.commons.utils.CellsUtils.getObjectFromJson;
import static org.testng.Assert.assertEquals;
import java.lang.reflect.InvocationTargetException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.function.FlatMapFunction;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.Function2;
import org.apache.spark.api.java.function.PairFunction;
import org.apache.spark.rdd.RDD;
import org.json.simple.JSONObject;
import org.testng.Assert;
import org.testng.annotations.Test;
import com.google.common.base.Optional;
import com.stratio.deep.commons.config.BaseConfig;
import com.stratio.deep.commons.config.ExtractorConfig;
import com.stratio.deep.commons.exception.DeepTransformException;
import com.stratio.deep.core.context.DeepSparkContext;
import com.stratio.deep.core.entity.BookEntity;
import com.stratio.deep.core.entity.CantoEntity;
import com.stratio.deep.core.entity.PlayerEntity;
import com.stratio.deep.core.entity.TeamEntity;
import com.stratio.deep.core.entity.WordCount;
import scala.Tuple2;
/**
* Created by rcrespo on 17/11/14.
*/
public abstract class ExtractorEntityTest<T, S extends BaseConfig> extends ExtractorTest<T, S> {
private static final long serialVersionUID = 6367238996895716537L;
/**
* @param extractor
* @param host
* @param port
* @param isCells
*/
public ExtractorEntityTest(Class extractor, String host, Integer port, boolean isCells, Class<T> dataSetClass) {
super(extractor, host, port, isCells, dataSetClass);
}
public ExtractorEntityTest(Class extractor, String host, Integer port, boolean isCells) {
super(extractor, host, port, isCells);
}
@Override
public Object transform(JSONObject jsonObject, String nameSpace, Class entityClass) {
try {
return getObjectFromJson(entityClass, jsonObject);
} catch (IllegalAccessException | InstantiationException | InvocationTargetException e) {
throw new DeepTransformException(e.getMessage());
}
}
/**
* Prepares team data to use in join tests. Maps teams to pair with id as key.
*
* @param context the deep context
*/
protected JavaPairRDD<Long, TeamEntity> prepareTeamRDD(DeepSparkContext context) {
ExtractorConfig<TeamEntity> teamsConfigEntity = getReadExtractorConfig(this.databaseExtractorName,
FOOTBALL_TEAM_INPUT, TeamEntity.class);
return context.createJavaRDD(teamsConfigEntity)
.mapToPair
(new PairFunction<TeamEntity, Long, TeamEntity>() {
@Override
public Tuple2<Long, TeamEntity> call(TeamEntity teamEntity)
throws Exception {
return new Tuple2<>(teamEntity.getId(), teamEntity);
}
});
}
/**
* Prepares player data to use in join tests. Maps players to pair with teamId as key.
*
* @param context the deep context
*/
protected JavaPairRDD<Long, PlayerEntity> preparePlayerRDD(DeepSparkContext context) {
ExtractorConfig<PlayerEntity> playersConfigEntity = getReadExtractorConfig(this.databaseExtractorName,
FOOTBALL_PLAYER_INPUT, PlayerEntity.class);
return context.createJavaRDD(playersConfigEntity)
.mapToPair
(new PairFunction<PlayerEntity, Long, PlayerEntity>() {
@Override
public Tuple2<Long, PlayerEntity> call(PlayerEntity playerEntity)
throws Exception {
return new Tuple2<>(playerEntity.getTeamId(), playerEntity);
}
});
}
/**
* It tests if the extractor can group by key players data set
*/
@Test
protected void testGroupByKey() {
DeepSparkContext context = getDeepSparkContext();
try {
JavaPairRDD<Long, PlayerEntity> playersRDD = preparePlayerRDD(context);
assertEquals(playersRDD.count(), 5);
assertEquals(playersRDD.groupByKey().count(), 4);
} finally {
context.stop();
}
}
/**
* It tests if the extractor can filter players data set
*/
@Test
protected void testFilterSpark() {
DeepSparkContext context = getDeepSparkContext();
try {
JavaPairRDD<Long, PlayerEntity> playersRDD = preparePlayerRDD(context);
JavaPairRDD<Long, PlayerEntity> playersFilteredEqRDD = playersRDD.filter(
new Function<Tuple2<Long, PlayerEntity>, Boolean>() {
@Override
public Boolean call(Tuple2<Long, PlayerEntity> player) throws Exception {
return player._2().getId() == 3;
}
});
JavaPairRDD<Long, PlayerEntity> playersFilteredNEqRDD = playersRDD.filter(
new Function<Tuple2<Long, PlayerEntity>, Boolean>() {
@Override
public Boolean call(Tuple2<Long, PlayerEntity> player) throws Exception {
return player._2().getId() != 1;
}
});
assertEquals(playersRDD.count(), 5);
assertEquals(playersFilteredEqRDD.count(), 1);
assertEquals(playersFilteredEqRDD.collect().size(), 1);
assertEquals(playersFilteredEqRDD.first()._2().getId().longValue(), 3L);
assertEquals(playersFilteredNEqRDD.count(), 4);
assertEquals(playersFilteredNEqRDD.collect().size(), 4);
} finally {
context.stop();
}
}
/**
* It tests if the extractor can join two data sets
*/
@Test
protected void testInnerJoin() {
DeepSparkContext context = getDeepSparkContext();
try {
JavaPairRDD<Long, TeamEntity> teamsRDD = prepareTeamRDD(context);
JavaPairRDD<Long, Iterable<PlayerEntity>> playersRDD = preparePlayerRDD(context).groupByKey();
JavaPairRDD<Long, Tuple2<TeamEntity, Iterable<PlayerEntity>>> joinRDD = teamsRDD.join(playersRDD);
assertEquals(joinRDD.count(), 4);
} finally {
context.stop();
}
}
/**
* It tests if the extractor can left join two data sets
*/
@Test
protected void testLeftOuterJoin() {
DeepSparkContext context = getDeepSparkContext();
try {
JavaPairRDD<Long, TeamEntity> teamsRDD = prepareTeamRDD(context);
JavaPairRDD<Long, Iterable<PlayerEntity>> playersRDD = preparePlayerRDD(context).groupByKey();
JavaPairRDD<Long, Tuple2<TeamEntity, Optional<Iterable<PlayerEntity>>>> joinRDD =
teamsRDD.leftOuterJoin(playersRDD);
assertEquals(joinRDD.count(), teamsRDD.count());
} finally {
context.stop();
}
}
/**
* It tests if the extractor can right join two data sets
*/
@Test
protected void testRightOuterJoin() {
DeepSparkContext context = getDeepSparkContext();
try {
JavaPairRDD<Long, TeamEntity> teamsRDD = prepareTeamRDD(context);
JavaPairRDD<Long, PlayerEntity> playersRDD = preparePlayerRDD(context);
JavaPairRDD<Long, Tuple2<Optional<TeamEntity>, PlayerEntity>> joinRDD =
teamsRDD.rightOuterJoin(playersRDD);
assertEquals(joinRDD.count(), playersRDD.count());
} finally {
context.stop();
}
}
@Test(alwaysRun = true)
public void testDataSet() {
DeepSparkContext context = getDeepSparkContext();
try {
ExtractorConfig<BookEntity> inputConfigEntity = getReadExtractorConfig(databaseExtractorName, BOOK_INPUT,
BookEntity.class);
RDD<BookEntity> inputRDDEntity = context.createRDD(inputConfigEntity);
//Import dataSet was OK and we could read it
assertEquals(1, inputRDDEntity.count());
List<BookEntity> books = inputRDDEntity.toJavaRDD().collect();
BookEntity book = books.get(0);
// tests subDocuments
assertEquals(((BookEntity) originBook).getMetadataEntity().getAuthor(),
book.getMetadataEntity().getAuthor());
// tests List<subDocuments>
List<CantoEntity> listCantos = ((BookEntity) originBook).getCantoEntities();
for (int i = 0; i < listCantos.size(); i++) {
assertEquals(listCantos.get(i).getNumber(), book.getCantoEntities().get(i).getNumber());
assertEquals(listCantos.get(i).getText(), book.getCantoEntities().get(i).getText());
}
RDD<BookEntity> inputRDDEntity2 = context.createRDD(inputConfigEntity);
JavaRDD<String> words = inputRDDEntity2.toJavaRDD().flatMap(new FlatMapFunction<BookEntity, String>() {
@Override
public Iterable<String> call(BookEntity bookEntity) throws Exception {
List<String> words = new ArrayList<>();
for (CantoEntity canto : bookEntity.getCantoEntities()) {
words.addAll(Arrays.asList(canto.getText().split(" ")));
}
return words;
}
});
JavaPairRDD<String, Long> wordCount = words.mapToPair(new PairFunction<String, String, Long>() {
@Override
public Tuple2<String, Long> call(String s) throws Exception {
return new Tuple2<String, Long>(s, 1l);
}
});
JavaPairRDD<String, Long> wordCountReduced = wordCount.reduceByKey(new Function2<Long, Long, Long>() {
@Override
public Long call(Long integer, Long integer2) throws Exception {
return integer + integer2;
}
});
JavaRDD<WordCount> outputRDD = wordCountReduced.map(new Function<Tuple2<String, Long>, WordCount>() {
@Override
public WordCount call(Tuple2<String, Long> stringIntegerTuple2) throws Exception {
return new WordCount(stringIntegerTuple2._1(), stringIntegerTuple2._2());
}
});
ExtractorConfig<WordCount> outputConfigEntity = getWriteExtractorConfig(BOOK_OUTPUT, WordCount.class);
context.saveRDD(outputRDD.rdd(), outputConfigEntity);
RDD<WordCount> outputRDDEntity = context.createRDD(outputConfigEntity);
Assert.assertEquals(WORD_COUNT_SPECTED.longValue(), ((Long) outputRDDEntity.cache().count()).longValue());
} finally {
context.stop();
}
}
}