/*
* Copyright © 2014-2016 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package co.cask.cdap.internal.app.runtime.batch;
import co.cask.cdap.api.common.Bytes;
import co.cask.cdap.api.common.RuntimeArguments;
import co.cask.cdap.api.common.Scope;
import co.cask.cdap.api.dataset.lib.FileSet;
import co.cask.cdap.api.dataset.lib.FileSetArguments;
import co.cask.cdap.api.dataset.lib.FileSetProperties;
import co.cask.cdap.api.dataset.lib.KeyValueTable;
import co.cask.cdap.api.dataset.lib.ObjectStore;
import co.cask.cdap.api.dataset.lib.TimeseriesTable;
import co.cask.cdap.api.dataset.lib.cube.AggregationFunction;
import co.cask.cdap.api.dataset.table.Get;
import co.cask.cdap.api.dataset.table.Table;
import co.cask.cdap.api.mapreduce.MapReduceSpecification;
import co.cask.cdap.api.metrics.MetricDataQuery;
import co.cask.cdap.api.metrics.MetricTimeSeries;
import co.cask.cdap.app.runtime.Arguments;
import co.cask.cdap.common.conf.Constants;
import co.cask.cdap.common.io.Locations;
import co.cask.cdap.data2.transaction.Transactions;
import co.cask.cdap.internal.DefaultId;
import co.cask.cdap.internal.app.deploy.pipeline.ApplicationWithPrograms;
import co.cask.cdap.internal.app.runtime.BasicArguments;
import co.cask.cdap.proto.Id;
import co.cask.cdap.test.XSlowTests;
import co.cask.tephra.TransactionAware;
import co.cask.tephra.TransactionExecutor;
import co.cask.tephra.TransactionExecutorFactory;
import co.cask.tephra.TransactionFailureException;
import co.cask.tephra.TxConstants;
import com.google.common.base.Charsets;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Maps;
import com.google.common.io.CharStreams;
import com.google.common.io.Files;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.twill.filesystem.Location;
import org.junit.Assert;
import org.junit.ClassRule;
import org.junit.Test;
import org.junit.experimental.categories.Category;
import org.junit.rules.ExternalResource;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.FilenameFilter;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.net.URI;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import javax.annotation.Nullable;
/**
*
*/
@Category(XSlowTests.class)
public class MapReduceProgramRunnerTest extends MapReduceRunnerTestBase {
@ClassRule
public static final ExternalResource RESOURCE = new ExternalResource() {
@Override
protected void before() throws Throwable {
// Set the tx timeout to a ridiculously low value that will test that the long-running transactions
// actually bypass that timeout.
System.setProperty(TxConstants.Manager.CFG_TX_TIMEOUT, "1");
System.setProperty(TxConstants.Manager.CFG_TX_CLEANUP_INTERVAL, "2");
}
};
/**
* Tests that beforeSubmit() and getSplits() are called in the same transaction,
* and with the same instance of the input dataset.
*/
@Test
public void testTransactionHandling() throws Exception {
final ApplicationWithPrograms app = deployApp(AppWithTxAware.class);
runProgram(app, AppWithTxAware.PedanticMapReduce.class,
new BasicArguments(ImmutableMap.of("outputPath", TEMP_FOLDER_SUPPLIER.get().getPath() + "/output")));
}
@Test
public void testMapreduceWithFileSet() throws Exception {
// test reading and writing distinct datasets, reading more than one path
// hack to use different datasets at each invocation of this test
System.setProperty("INPUT_DATASET_NAME", "numbers");
System.setProperty("OUTPUT_DATASET_NAME", "sums");
Map<String, String> runtimeArguments = Maps.newHashMap();
Map<String, String> inputArgs = Maps.newHashMap();
FileSetArguments.setInputPaths(inputArgs, "abc, xyz");
Map<String, String> outputArgs = Maps.newHashMap();
FileSetArguments.setOutputPath(outputArgs, "a001");
runtimeArguments.putAll(RuntimeArguments.addScope(Scope.DATASET, "numbers", inputArgs));
runtimeArguments.putAll(RuntimeArguments.addScope(Scope.DATASET, "sums", outputArgs));
testMapreduceWithFile("numbers", "abc, xyz", "sums", "a001",
AppWithMapReduceUsingFileSet.class,
AppWithMapReduceUsingFileSet.ComputeSum.class,
new BasicArguments(runtimeArguments), null);
// test reading and writing same dataset
// hack to use different datasets at each invocation of this test
System.setProperty("INPUT_DATASET_NAME", "boogie");
System.setProperty("OUTPUT_DATASET_NAME", "boogie");
runtimeArguments = Maps.newHashMap();
inputArgs = Maps.newHashMap();
FileSetArguments.setInputPaths(inputArgs, "zzz");
outputArgs = Maps.newHashMap();
FileSetArguments.setOutputPath(outputArgs, "f123");
runtimeArguments.putAll(RuntimeArguments.addScope(Scope.DATASET, "boogie", inputArgs));
runtimeArguments.putAll(RuntimeArguments.addScope(Scope.DATASET, "boogie", outputArgs));
testMapreduceWithFile("boogie", "zzz", "boogie", "f123",
AppWithMapReduceUsingFileSet.class,
AppWithMapReduceUsingFileSet.ComputeSum.class,
new BasicArguments(runtimeArguments), null);
}
@Test
public void testMapreduceWithDynamicDatasets() throws Exception {
Id.DatasetInstance rtInput1 = Id.DatasetInstance.from(DefaultId.NAMESPACE, "rtInput1");
Id.DatasetInstance rtInput2 = Id.DatasetInstance.from(DefaultId.NAMESPACE, "rtInput2");
Id.DatasetInstance rtOutput1 = Id.DatasetInstance.from(DefaultId.NAMESPACE, "rtOutput1");
// create the datasets here because they are not created by the app
dsFramework.addInstance("fileSet", rtInput1, FileSetProperties.builder()
.setBasePath("rtInput1")
.setInputFormat(TextInputFormat.class)
.setOutputFormat(TextOutputFormat.class)
.setOutputProperty(TextOutputFormat.SEPERATOR, ":")
.build());
dsFramework.addInstance("fileSet", rtOutput1, FileSetProperties.builder()
.setBasePath("rtOutput1")
.setInputFormat(TextInputFormat.class)
.setOutputFormat(TextOutputFormat.class)
.setOutputProperty(TextOutputFormat.SEPERATOR, ":")
.build());
// build runtime args for app
Map<String, String> runtimeArguments = Maps.newHashMap();
runtimeArguments.put(AppWithMapReduceUsingRuntimeDatasets.INPUT_NAME, "rtInput1");
runtimeArguments.put(AppWithMapReduceUsingRuntimeDatasets.INPUT_PATHS, "abc, xyz");
runtimeArguments.put(AppWithMapReduceUsingRuntimeDatasets.OUTPUT_NAME, "rtOutput1");
runtimeArguments.put(AppWithMapReduceUsingRuntimeDatasets.OUTPUT_PATH, "a001");
// test reading and writing distinct datasets, reading more than one path
testMapreduceWithFile("rtInput1", "abc, xyz", "rtOutput1", "a001",
AppWithMapReduceUsingRuntimeDatasets.class,
AppWithMapReduceUsingRuntimeDatasets.ComputeSum.class,
new BasicArguments(runtimeArguments),
AppWithMapReduceUsingRuntimeDatasets.COUNTERS);
// validate that the table emitted metrics
Collection<MetricTimeSeries> metrics =
metricStore.query(new MetricDataQuery(
0,
System.currentTimeMillis() / 1000L,
Integer.MAX_VALUE,
"system." + Constants.Metrics.Name.Dataset.OP_COUNT,
AggregationFunction.SUM,
ImmutableMap.of(Constants.Metrics.Tag.NAMESPACE, DefaultId.NAMESPACE.getId(),
Constants.Metrics.Tag.APP, AppWithMapReduceUsingRuntimeDatasets.APP_NAME,
Constants.Metrics.Tag.MAPREDUCE, AppWithMapReduceUsingRuntimeDatasets.MR_NAME,
Constants.Metrics.Tag.DATASET, "rtt"),
Collections.<String>emptyList()));
Assert.assertEquals(1, metrics.size());
MetricTimeSeries ts = metrics.iterator().next();
Assert.assertEquals(1, ts.getTimeValues().size());
Assert.assertEquals(1, ts.getTimeValues().get(0).getValue());
// test reading and writing same dataset
dsFramework.addInstance("fileSet", rtInput2, FileSetProperties.builder()
.setBasePath("rtInput2")
.setInputFormat(TextInputFormat.class)
.setOutputFormat(TextOutputFormat.class)
.setOutputProperty(TextOutputFormat.SEPERATOR, ":")
.build());
runtimeArguments = Maps.newHashMap();
runtimeArguments.put(AppWithMapReduceUsingRuntimeDatasets.INPUT_NAME, "rtInput2");
runtimeArguments.put(AppWithMapReduceUsingRuntimeDatasets.INPUT_PATHS, "zzz");
runtimeArguments.put(AppWithMapReduceUsingRuntimeDatasets.OUTPUT_NAME, "rtInput2");
runtimeArguments.put(AppWithMapReduceUsingRuntimeDatasets.OUTPUT_PATH, "f123");
testMapreduceWithFile("rtInput2", "zzz", "rtInput2", "f123",
AppWithMapReduceUsingRuntimeDatasets.class,
AppWithMapReduceUsingRuntimeDatasets.ComputeSum.class,
new BasicArguments(runtimeArguments),
AppWithMapReduceUsingRuntimeDatasets.COUNTERS);
}
private void testMapreduceWithFile(String inputDatasetName, String inputPaths,
String outputDatasetName, String outputPath,
Class appClass, Class mrClass,
Arguments runtimeArgs,
@Nullable final String counterTableName) throws Exception {
final ApplicationWithPrograms app = deployApp(appClass);
Map<String, String> inputArgs = Maps.newHashMap();
Map<String, String> outputArgs = Maps.newHashMap();
FileSetArguments.setInputPaths(inputArgs, inputPaths);
FileSetArguments.setOutputPath(outputArgs, outputPath);
// clear the counters in case a previous test case left behind some values
if (counterTableName != null) {
Transactions.execute(datasetCache.newTransactionContext(), "countersVerify", new Runnable() {
@Override
public void run() {
KeyValueTable counters = datasetCache.getDataset(counterTableName);
counters.delete(AppWithMapReduceUsingRuntimeDatasets.INPUT_RECORDS);
counters.delete(AppWithMapReduceUsingRuntimeDatasets.REDUCE_KEYS);
}
});
}
// write a handful of numbers to a file; compute their sum, too.
final long[] values = { 15L, 17L, 7L, 3L };
final FileSet input = datasetCache.getDataset(inputDatasetName, inputArgs);
long sum = 0L, count = 1;
long inputRecords = 0;
for (Location inputLocation : input.getInputLocations()) {
final PrintWriter writer = new PrintWriter(inputLocation.getOutputStream());
for (long value : values) {
value *= count;
writer.println(value);
sum += value;
inputRecords++;
}
writer.close();
count++;
}
runProgram(app, mrClass, runtimeArgs);
// output location in file system is a directory that contains a part file, a _SUCCESS file, and checksums
// (.<filename>.crc) for these files. Find the actual part file. Its name begins with "part". In this case,
// there should be only one part file (with this small data, we have a single reducer).
final FileSet results = datasetCache.getDataset(outputDatasetName, outputArgs);
Location resultLocation = results.getOutputLocation();
if (resultLocation.isDirectory()) {
for (Location child : resultLocation.list()) {
if (!child.isDirectory() && child.getName().startsWith("part")) {
resultLocation = child;
break;
}
}
}
Assert.assertFalse(resultLocation.isDirectory());
// read output and verify result
String line = CharStreams.readFirstLine(
CharStreams.newReaderSupplier(
Locations.newInputSupplier(resultLocation), Charsets.UTF_8));
Assert.assertNotNull(line);
String[] fields = line.split(":");
Assert.assertEquals(2, fields.length);
Assert.assertEquals(AppWithMapReduceUsingFileSet.FileMapper.ONLY_KEY, fields[0]);
Assert.assertEquals(sum, Long.parseLong(fields[1]));
if (counterTableName != null) {
final long totalInputRecords = inputRecords;
Transactions.execute(datasetCache.newTransactionContext(), "countersVerify", new Runnable() {
@Override
public void run() {
KeyValueTable counters = datasetCache.getDataset(counterTableName);
Assert.assertEquals(totalInputRecords,
counters.incrementAndGet(AppWithMapReduceUsingRuntimeDatasets.INPUT_RECORDS, 0L));
Assert.assertEquals(1L, counters.incrementAndGet(AppWithMapReduceUsingRuntimeDatasets.REDUCE_KEYS, 0L));
}
});
}
}
@Test
public void testMapReduceDriverResources() throws Exception {
final ApplicationWithPrograms app = deployApp(AppWithMapReduce.class);
MapReduceSpecification mrSpec =
app.getSpecification().getMapReduce().get(AppWithMapReduce.ClassicWordCount.class.getSimpleName());
Assert.assertEquals(AppWithMapReduce.ClassicWordCount.MEMORY_MB, mrSpec.getDriverResources().getMemoryMB());
}
@Test
public void testMapreduceWithObjectStore() throws Exception {
final ApplicationWithPrograms app = deployApp(AppWithMapReduceUsingObjectStore.class);
final ObjectStore<String> input = datasetCache.getDataset("keys");
final String testString = "persisted data";
//Populate some input
Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) input).execute(
new TransactionExecutor.Subroutine() {
@Override
public void apply() {
input.write(Bytes.toBytes(testString), testString);
input.write(Bytes.toBytes("distributed systems"), "distributed systems");
}
});
runProgram(app, AppWithMapReduceUsingObjectStore.ComputeCounts.class, false);
final KeyValueTable output = datasetCache.getDataset("count");
//read output and verify result
Transactions.createTransactionExecutor(txExecutorFactory, output).execute(
new TransactionExecutor.Subroutine() {
@Override
public void apply() {
byte[] val = output.read(Bytes.toBytes(testString));
Assert.assertTrue(val != null);
Assert.assertEquals(Bytes.toString(val), Integer.toString(testString.length()));
val = output.read(Bytes.toBytes("distributed systems"));
Assert.assertTrue(val != null);
Assert.assertEquals(Bytes.toString(val), "19");
}
});
}
@Test
public void testWordCount() throws Exception {
final ApplicationWithPrograms app = deployApp(AppWithMapReduce.class);
final String inputPath = createInput();
final java.io.File outputDir = new java.io.File(tmpFolder.newFolder(), "output");
final KeyValueTable jobConfigTable = datasetCache.getDataset("jobConfig");
// write config into dataset
Transactions.createTransactionExecutor(txExecutorFactory, jobConfigTable).execute(
new TransactionExecutor.Subroutine() {
@Override
public void apply() {
jobConfigTable.write(Bytes.toBytes("inputPath"), Bytes.toBytes(inputPath));
jobConfigTable.write(Bytes.toBytes("outputPath"), Bytes.toBytes(outputDir.getPath()));
}
});
runProgram(app, AppWithMapReduce.ClassicWordCount.class, false);
File[] outputFiles = outputDir.listFiles(new FilenameFilter() {
@Override
public boolean accept(File dir, String name) {
return name.startsWith("part-r-") && !name.endsWith(".crc");
}
});
Assert.assertNotNull("no output files found", outputFiles);
int lines = 0;
for (File file : outputFiles) {
lines += Files.readLines(file, Charsets.UTF_8).size();
}
// dummy check that output file is not empty
Assert.assertTrue(lines > 0);
}
@Test
public void testJobSuccess() throws Exception {
testSuccess(false);
}
@Test
public void testJobSuccessWithFrequentFlushing() throws Exception {
// simplest test for periodic flushing
// NOTE: we will change auto-flush to take into account size of buffered data, so no need to do/test a lot with
// current approach
testSuccess(true);
}
private void testSuccess(boolean frequentFlushing) throws Exception {
final ApplicationWithPrograms app = deployApp(AppWithMapReduce.class);
// we need to start a tx context and do a "get" on all datasets so that they are in datasetCache
datasetCache.newTransactionContext();
final TimeseriesTable table = datasetCache.getDataset("timeSeries");
final KeyValueTable beforeSubmitTable = datasetCache.getDataset("beforeSubmit");
final KeyValueTable onFinishTable = datasetCache.getDataset("onFinish");
final Table counters = datasetCache.getDataset("counters");
final Table countersFromContext = datasetCache.getDataset("countersFromContext");
// 1) fill test data
fillTestInputData(txExecutorFactory, table, false);
// 2) run job
final long start = System.currentTimeMillis();
runProgram(app, AppWithMapReduce.AggregateTimeseriesByTag.class, frequentFlushing);
final long stop = System.currentTimeMillis();
// 3) verify results
Transactions.createTransactionExecutor(txExecutorFactory, datasetCache.getTransactionAwares()).execute(
new TransactionExecutor.Subroutine() {
@Override
public void apply() {
Map<String, Long> expected = Maps.newHashMap();
// note: not all records add to the sum since filter by tag="tag1" and ts={1..3} is used
expected.put("tag1", 18L);
expected.put("tag2", 3L);
expected.put("tag3", 18L);
Iterator<TimeseriesTable.Entry> agg = table.read(AggregateMetricsByTag.BY_TAGS, start, stop);
int count = 0;
while (agg.hasNext()) {
TimeseriesTable.Entry entry = agg.next();
String tag = Bytes.toString(entry.getTags()[0]);
Assert.assertEquals((long) expected.get(tag), Bytes.toLong(entry.getValue()));
count++;
}
Assert.assertEquals(expected.size(), count);
Assert.assertArrayEquals(Bytes.toBytes("beforeSubmit:done"),
beforeSubmitTable.read(Bytes.toBytes("beforeSubmit")));
Assert.assertArrayEquals(Bytes.toBytes("onFinish:done"),
onFinishTable.read(Bytes.toBytes("onFinish")));
Assert.assertTrue(counters.get(new Get("mapper")).getLong("records", 0) > 0);
Assert.assertTrue(counters.get(new Get("reducer")).getLong("records", 0) > 0);
Assert.assertTrue(countersFromContext.get(new Get("mapper")).getLong("records", 0) > 0);
Assert.assertTrue(countersFromContext.get(new Get("reducer")).getLong("records", 0) > 0);
}
});
datasetCache.dismissTransactionContext();
// todo: verify metrics. Will be possible after refactor for CDAP-765
}
@Test
public void testJobFailure() throws Exception {
testFailure(false);
}
@Test
public void testJobFailureWithFrequentFlushing() throws Exception {
testFailure(true);
}
@Test
public void testMapReduceWithLocalFiles() throws Exception {
ApplicationWithPrograms appWithPrograms = deployApp(AppWithLocalFiles.class);
URI stopWordsFile = createStopWordsFile();
final KeyValueTable kvTable = datasetCache.getDataset(AppWithLocalFiles.MR_INPUT_DATASET);
Transactions.createTransactionExecutor(txExecutorFactory, kvTable).execute(
new TransactionExecutor.Subroutine() {
@Override
public void apply() {
kvTable.write("2324", "a test record");
kvTable.write("43353", "the test table");
kvTable.write("34335", "an end record");
}
}
);
runProgram(appWithPrograms, AppWithLocalFiles.MapReduceWithLocalFiles.class,
new BasicArguments(ImmutableMap.of(
AppWithLocalFiles.MR_INPUT_DATASET, "input",
AppWithLocalFiles.MR_OUTPUT_DATASET, "output",
AppWithLocalFiles.STOPWORDS_FILE_ARG, stopWordsFile.toString()
)));
final KeyValueTable outputKvTable = datasetCache.getDataset(AppWithLocalFiles.MR_OUTPUT_DATASET);
Transactions.createTransactionExecutor(txExecutorFactory, outputKvTable).execute(
new TransactionExecutor.Subroutine() {
@Override
public void apply() {
Assert.assertNull(outputKvTable.read("a"));
Assert.assertNull(outputKvTable.read("the"));
Assert.assertNull(outputKvTable.read("an"));
Assert.assertEquals(2, Bytes.toInt(outputKvTable.read("test")));
Assert.assertEquals(2, Bytes.toInt(outputKvTable.read("record")));
Assert.assertEquals(1, Bytes.toInt(outputKvTable.read("table")));
Assert.assertEquals(1, Bytes.toInt(outputKvTable.read("end")));
}
}
);
}
private URI createStopWordsFile() throws IOException {
File file = tmpFolder.newFile("stopWords.txt");
try (OutputStreamWriter out = new OutputStreamWriter(new FileOutputStream(file))) {
out.write("the\n");
out.write("a\n");
out.write("an");
}
return file.toURI();
}
// TODO: this tests failure in Map tasks. We also need to test: failure in Reduce task, kill of a job by user.
private void testFailure(boolean frequentFlushing) throws Exception {
// We want to verify that when mapreduce job fails:
// * things written in beforeSubmit() remains and visible to others
// * things written in tasks not visible to others TODO AAA: do invalidate
// * things written in onfinish() remains and visible to others
// NOTE: the code of this test is similar to testTimeSeriesRecordsCount() test. We put some "bad data" intentionally
// here to be recognized by map tasks as a message to emulate failure
final ApplicationWithPrograms app = deployApp(AppWithMapReduce.class);
// we need to start a tx context and do a "get" on all datasets so that they are in datasetCache
datasetCache.newTransactionContext();
final TimeseriesTable table = datasetCache.getDataset("timeSeries");
final KeyValueTable beforeSubmitTable = datasetCache.getDataset("beforeSubmit");
final KeyValueTable onFinishTable = datasetCache.getDataset("onFinish");
final Table counters = datasetCache.getDataset("counters");
final Table countersFromContext = datasetCache.getDataset("countersFromContext");
// 1) fill test data
fillTestInputData(txExecutorFactory, table, true);
// 2) run job
final long start = System.currentTimeMillis();
runProgram(app, AppWithMapReduce.AggregateTimeseriesByTag.class, frequentFlushing);
final long stop = System.currentTimeMillis();
// 3) verify results
Transactions.createTransactionExecutor(txExecutorFactory, datasetCache.getTransactionAwares()).execute(
new TransactionExecutor.Subroutine() {
@Override
public void apply() {
// data should be rolled back todo: test that partially written is rolled back too
Assert.assertFalse(table.read(AggregateMetricsByTag.BY_TAGS, start, stop).hasNext());
// but written beforeSubmit and onFinish is available to others
Assert.assertArrayEquals(Bytes.toBytes("beforeSubmit:done"),
beforeSubmitTable.read(Bytes.toBytes("beforeSubmit")));
Assert.assertArrayEquals(Bytes.toBytes("onFinish:done"),
onFinishTable.read(Bytes.toBytes("onFinish")));
Assert.assertEquals(0, counters.get(new Get("mapper")).getLong("records", 0));
Assert.assertEquals(0, counters.get(new Get("reducer")).getLong("records", 0));
Assert.assertEquals(0, countersFromContext.get(new Get("mapper")).getLong("records", 0));
Assert.assertEquals(0, countersFromContext.get(new Get("reducer")).getLong("records", 0));
}
});
datasetCache.dismissTransactionContext();
}
private void fillTestInputData(TransactionExecutorFactory txExecutorFactory,
final TimeseriesTable table,
final boolean withBadData) throws TransactionFailureException, InterruptedException {
TransactionExecutor executor = Transactions.createTransactionExecutor(txExecutorFactory, table);
executor.execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() {
fillTestInputData(table, withBadData);
}
});
}
private void fillTestInputData(TimeseriesTable table, boolean withBadData) {
byte[] metric1 = Bytes.toBytes("metric");
byte[] metric2 = Bytes.toBytes("metric2");
byte[] tag1 = Bytes.toBytes("tag1");
byte[] tag2 = Bytes.toBytes("tag2");
byte[] tag3 = Bytes.toBytes("tag3");
// m1e1 = metric: 1, entity: 1
table.write(new TimeseriesTable.Entry(metric1, Bytes.toBytes(3L), 1, tag3, tag2, tag1));
table.write(new TimeseriesTable.Entry(metric1, Bytes.toBytes(10L), 2, tag2, tag3));
// 55L will make job fail
table.write(new TimeseriesTable.Entry(metric1, Bytes.toBytes(withBadData ? 55L : 15L), 3, tag1, tag3));
table.write(new TimeseriesTable.Entry(metric1, Bytes.toBytes(23L), 4, tag2));
table.write(new TimeseriesTable.Entry(metric2, Bytes.toBytes(4L), 3, tag1, tag3));
}
private void runProgram(ApplicationWithPrograms app, Class<?> programClass, boolean frequentFlushing)
throws Exception {
HashMap<String, String> userArgs = Maps.newHashMap();
userArgs.put("metric", "metric");
userArgs.put("startTs", "1");
userArgs.put("stopTs", "3");
userArgs.put("tag", "tag1");
if (frequentFlushing) {
userArgs.put("frequentFlushing", "true");
}
runProgram(app, programClass, new BasicArguments(userArgs));
}
private String createInput() throws IOException {
File inputDir = tmpFolder.newFolder();
File inputFile = new File(inputDir.getPath() + "/words.txt");
inputFile.deleteOnExit();
try (BufferedWriter writer = new BufferedWriter(new FileWriter(inputFile))) {
writer.write("this text has");
writer.newLine();
writer.write("two words text inside");
}
return inputDir.getPath();
}
}