/* * Copyright 2014, Stratio. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.stratio.deep.core.extractor; import static com.stratio.deep.commons.utils.CellsUtils.getCellFromJson; import static org.testng.Assert.assertEquals; import java.lang.reflect.InvocationTargetException; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.function.FlatMapFunction; import org.apache.spark.api.java.function.Function; import org.apache.spark.api.java.function.Function2; import org.apache.spark.api.java.function.PairFunction; import org.apache.spark.rdd.RDD; import org.json.simple.JSONObject; import org.testng.Assert; import org.testng.annotations.Test; import com.stratio.deep.commons.config.BaseConfig; import com.stratio.deep.commons.config.ExtractorConfig; import com.stratio.deep.commons.entity.Cell; import com.stratio.deep.commons.entity.Cells; import com.stratio.deep.commons.exception.DeepTransformException; import com.stratio.deep.commons.rdd.IExtractor; import com.stratio.deep.core.context.DeepSparkContext; import scala.Tuple2; /** * Created by rcrespo on 17/11/14. */ public abstract class ExtractorCellTest<S extends BaseConfig> extends ExtractorTest<Cells, S> { private static final long serialVersionUID = -7147600574221227223L; /** * @param extractor * @param host * @param port * @param isCells */ public ExtractorCellTest( Class<IExtractor<Cells, S>> extractor, String host, Integer port, boolean isCells) { super(extractor, host, port, isCells); } @Override protected <T> T transform(JSONObject jsonObject, String nameSpace, Class<T> entityClass) { try { return (T) getCellFromJson(jsonObject, "book.input"); } catch (IllegalAccessException | InstantiationException | InvocationTargetException e) { throw new DeepTransformException(e.getMessage()); } } @Test public void testDataSet() { DeepSparkContext context = getDeepSparkContext(); try { ExtractorConfig<Cells> inputConfigEntity = getReadExtractorConfig(databaseExtractorName, BOOK_INPUT, Cells.class); RDD<Cells> inputRDDEntity = context.createRDD(inputConfigEntity); //Import dataSet was OK and we could read it assertEquals(1, inputRDDEntity.count()); List<Cells> books = inputRDDEntity.toJavaRDD().collect(); Cells book = books.get(0); // tests subDocuments assertEquals((book.getCellByName("metadata").getValue()), originBook.getCellByName("metadata").getValue()); // tests List<subDocuments> List<Cells> listCantos = (List<Cells>) originBook.getCellByName("cantos").getValue(); for (int i = 0; i < listCantos.size(); i++) { Cells cells = listCantos.get(i); assertEquals(cells.getCellByName("canto").getValue(), ((List<Cells>) book.getCellByName("cantos").getCellValue()).get(i).getCellByName("canto") .getCellValue()); assertEquals(cells.getCellByName("text").getValue(), ((List<Cells>) book.getCellByName("cantos").getCellValue()).get(i).getCellByName("text") .getCellValue()); } RDD<Cells> inputRDDEntity2 = context.createRDD(inputConfigEntity); JavaRDD<String> words = inputRDDEntity2.toJavaRDD().flatMap(new FlatMapFunction<Cells, String>() { @Override public Iterable<String> call(Cells bookEntity) throws Exception { List<String> words = new ArrayList<>(); for (Cells canto : ((List<Cells>) bookEntity.getCellByName("cantos").getCellValue())) { words.addAll(Arrays.asList(((String) canto.getCellByName("text").getCellValue()).split(" "))); } return words; } }); JavaPairRDD<String, Integer> wordCount = words.mapToPair(new PairFunction<String, String, Integer>() { @Override public Tuple2<String, Integer> call(String s) throws Exception { return new Tuple2<>(s, 1); } }); JavaPairRDD<String, Integer> wordCountReduced = wordCount .reduceByKey(new Function2<Integer, Integer, Integer>() { @Override public Integer call(Integer integer, Integer integer2) throws Exception { return integer + integer2; } }); JavaRDD<Cells> outputRDD = wordCountReduced.map(new Function<Tuple2<String, Integer>, Cells>() { @Override public Cells call(Tuple2<String, Integer> stringIntegerTuple2) throws Exception { return new Cells(Cell.create("word", stringIntegerTuple2._1()), Cell.create("count", stringIntegerTuple2._2())); } }); ExtractorConfig<Cells> outputConfigEntity = getWriteExtractorConfig(BOOK_OUTPUT, Cells.class); context.saveRDD(outputRDD.rdd(), outputConfigEntity); RDD<Cells> outputRDDEntity = context.createRDD(outputConfigEntity); Assert.assertEquals(((Long) outputRDDEntity.cache().count()).longValue(), WORD_COUNT_SPECTED.longValue()); } finally { context.stop(); } } }