/*
* Copyright © 2015-2016 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package co.cask.cdap.etl.batch;
import co.cask.cdap.api.data.schema.Schema;
import co.cask.cdap.api.dataset.lib.TimePartitionedFileSet;
import co.cask.cdap.etl.mock.test.HydratorTestBase;
import co.cask.cdap.proto.artifact.ArtifactSummary;
import co.cask.cdap.proto.id.ArtifactId;
import co.cask.cdap.proto.id.NamespaceId;
import co.cask.cdap.test.TestConfiguration;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import org.apache.avro.file.DataFileStream;
import org.apache.avro.generic.GenericDatumReader;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.io.DatumReader;
import org.apache.twill.filesystem.Location;
import org.junit.BeforeClass;
import org.junit.ClassRule;
import java.io.IOException;
import java.util.List;
/**
* Base test class that sets up plugins and the batch template.
*/
public class ETLBatchTestBase extends HydratorTestBase {
protected static final ArtifactId APP_ARTIFACT_ID =
new ArtifactId(NamespaceId.DEFAULT.getNamespace(), "app", "1.0.0");
protected static final ArtifactSummary APP_ARTIFACT = new ArtifactSummary("app", "1.0.0");
private static int startCount = 0;
@ClassRule
public static final TestConfiguration CONFIG = new TestConfiguration("explore.enabled", false);
@BeforeClass
public static void setupTest() throws Exception {
if (startCount++ > 0) {
return;
}
setupBatchArtifacts(APP_ARTIFACT_ID, ETLBatchApplication.class);
}
protected List<GenericRecord> readOutput(TimePartitionedFileSet fileSet, Schema schema) throws IOException {
org.apache.avro.Schema avroSchema = new org.apache.avro.Schema.Parser().parse(schema.toString());
DatumReader<GenericRecord> datumReader = new GenericDatumReader<>(avroSchema);
List<GenericRecord> records = Lists.newArrayList();
for (Location dayLoc : fileSet.getEmbeddedFileSet().getBaseLocation().list()) {
// this level should be the day (ex: 2015-01-19)
for (Location timeLoc : dayLoc.list()) {
// this level should be the time (ex: 21-23.1234567890000)
for (Location file : timeLoc.list()) {
// this level should be the actual mapred output
String locName = file.getName();
if (locName.endsWith(".avro")) {
DataFileStream<GenericRecord> fileStream = new DataFileStream<>(file.getInputStream(), datumReader);
Iterables.addAll(records, fileStream);
fileStream.close();
}
}
}
}
return records;
}
}