/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.blur.spark.example;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import org.apache.blur.spark.BlurMRBulkLoadSparkProcessor;
import org.apache.blur.thrift.BlurClient;
import org.apache.blur.thrift.generated.Blur.Iface;
import org.apache.blur.thrift.generated.Column;
import org.apache.blur.thrift.generated.Record;
import org.apache.blur.thrift.generated.RecordMutation;
import org.apache.blur.thrift.generated.RecordMutationType;
import org.apache.blur.thrift.generated.RowMutation;
import org.apache.blur.thrift.generated.RowMutationType;
import org.apache.spark.SparkConf;
import org.apache.spark.storage.StorageLevel;
import org.apache.spark.streaming.api.java.JavaDStream;
import org.apache.spark.streaming.api.java.JavaStreamingContext;
@SuppressWarnings("serial")
public class SimpleSparkLoaderExample extends BlurMRBulkLoadSparkProcessor<String> {
public static void main(String[] args) throws IOException {
SimpleSparkLoaderExample loader = new SimpleSparkLoaderExample();
loader.setConnectionStr("127.0.0.1:40010");
// loader.setHdfsDirToMonitor("hdfs://localhost:9000/tmp/spark/input/");
loader.setHdfsDirToMonitor("file:///tmp/spark-input/");
loader.setOutputPath("hdfs://localhost:9000/tmp/spark/output-" + System.currentTimeMillis());
loader.setSparkMaster("spark://amccurry:7077");
loader.setTableName("test_hdfs");
loader.run();
}
private String _tableName;
private String _connectionStr;
private String _hdfsDirToMonitor;
private String _sparkMaster;
private String _outputPath;
@Override
protected void setupSparkConf(SparkConf conf) {
conf.set("spark.master", _sparkMaster);
}
@Override
protected String getBlurTableName() {
return _tableName;
}
@Override
protected Iface getBlurClient() {
return BlurClient.getClient(_connectionStr);
}
@Override
protected String getAppName() {
return "Sample Blur Loader";
}
@SuppressWarnings("unchecked")
@Override
protected List<JavaDStream<String>> getStreamsList(JavaStreamingContext ssc) {
return Arrays.asList(ssc.textFileStream(_hdfsDirToMonitor));
}
@Override
protected RowMutation convert(String s) {
s = s.trim();
String rowId = s;
String recordId = s;
String value = s;
List<Column> columns = new ArrayList<Column>();
columns.add(new Column("col", value));
Record record = new Record(recordId, "spark-test", columns);
RowMutation rowMutation = new RowMutation();
rowMutation.setTable(getTableName());
rowMutation.setRowMutationType(RowMutationType.REPLACE_ROW);
rowMutation.setRowId(rowId);
rowMutation.addToRecordMutations(new RecordMutation(RecordMutationType.REPLACE_ENTIRE_RECORD, record));
return rowMutation;
}
@Override
protected String getOutputPath() {
return _outputPath;
}
public String getTableName() {
return _tableName;
}
public String getConnectionStr() {
return _connectionStr;
}
public String getHdfsDirToMonitor() {
return _hdfsDirToMonitor;
}
public String getSparkMaster() {
return _sparkMaster;
}
public void setTableName(String tableName) {
_tableName = tableName;
}
public void setConnectionStr(String connectionStr) {
_connectionStr = connectionStr;
}
public void setHdfsDirToMonitor(String hdfsDirToMonitor) {
_hdfsDirToMonitor = hdfsDirToMonitor;
}
public void setSparkMaster(String sparkMaster) {
_sparkMaster = sparkMaster;
}
public void setOutputPath(String outputPath) {
_outputPath = outputPath;
}
}