/**
* (c) Copyright 2013 WibiData, Inc.
*
* See the NOTICE file distributed with this work for additional
* information regarding copyright ownership.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.kiji.mapreduce;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.util.Arrays;
import java.util.Date;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.hadoop.conf.Configuration;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.kiji.mapreduce.avro.generated.JobHistoryEntry;
import org.kiji.mapreduce.framework.JobHistoryKijiTable;
import org.kiji.mapreduce.framework.KijiConfKeys;
import org.kiji.mapreduce.output.MapReduceJobOutputs;
import org.kiji.mapreduce.produce.KijiProduceJobBuilder;
import org.kiji.mapreduce.produce.KijiProducer;
import org.kiji.mapreduce.produce.ProducerContext;
import org.kiji.mapreduce.tools.KijiJobHistory;
import org.kiji.schema.Kiji;
import org.kiji.schema.KijiDataRequest;
import org.kiji.schema.KijiRowData;
import org.kiji.schema.KijiTable;
import org.kiji.schema.KijiURI;
import org.kiji.schema.layout.KijiTableLayout;
import org.kiji.schema.testutil.AbstractKijiIntegrationTest;
/**
* Integration test for the job history table.
*/
public class IntegrationTestJobHistoryKijiTable extends AbstractKijiIntegrationTest {
private static final Logger LOG = LoggerFactory.getLogger(
IntegrationTestJobHistoryKijiTable.class);
/**
* Test that makes sure the job history table is installed correctly and can be opened.
*/
@Test
public void testInstallAndOpen() throws Exception {
Kiji kiji = Kiji.Factory.open(getKijiURI());
try {
// This will throw an IOException if there's difficulty opening the table
final JobHistoryKijiTable jobHistory = JobHistoryKijiTable.open(kiji);
jobHistory.close();
} finally {
kiji.release();
}
}
/**
* This test sets up an older version of the job history table and ensures that
* it gets updated upon install.
*
* @throws Exception Upon failure to install or upgrade the job history table.
*/
@Test
public void testUpgradeJobHistoryTable() throws Exception {
final String tableName = "job_history";
// old table layout.
final String tableLayoutResource = "/org/kiji/mapreduce/job-history-layout.json";
// all job history tables have at least this version.
final String prevTableLayoutVersion = "1";
// latest job history layout version
final String jhTableLayoutVersion = "2";
Kiji kiji = Kiji.Factory.open(getKijiURI());
try {
// If the job history table exists, delete it.
if (kiji.getTableNames().contains(tableName)) {
kiji.deleteTable(tableName);
}
// Create a job history table with the older layout
kiji.createTable(
KijiTableLayout.createFromEffectiveJsonResource(tableLayoutResource).getDesc());
assertEquals(kiji.getMetaTable().getTableLayout(tableName).getDesc().getLayoutId(),
prevTableLayoutVersion);
// Now install job history table
final JobHistoryKijiTable jobHistory = JobHistoryKijiTable.open(kiji);
try {
assertEquals(kiji.getMetaTable().getTableLayout(tableName).getDesc().getLayoutId(),
jhTableLayoutVersion);
} finally {
jobHistory.close();
}
} finally {
kiji.release();
}
}
/** A private inner producer to test job recording. */
public static class EmailDomainProducer extends KijiProducer {
/** {@inheritDoc} */
@Override
public KijiDataRequest getDataRequest() {
// We only need to read the most recent email address field from the user's row.
return KijiDataRequest.create("info", "email");
}
/** {@inheritDoc} */
@Override
public String getOutputColumn() {
return "derived:domain";
}
/** {@inheritDoc} */
@Override
public void produce(KijiRowData input, ProducerContext context) throws IOException {
if (!input.containsColumn("info", "email")) {
// This user doesn't have an email address.
return;
}
String email = input.getMostRecentValue("info", "email").toString();
int atSymbol = email.indexOf("@");
if (atSymbol < 0) {
// Couldn't find the '@' in the email address. Give up.
return;
}
String domain = email.substring(atSymbol + 1);
context.put(domain);
}
}
/** A private inner producer to test job recording of failed jobs. */
public static class BrokenEmailDomainProducer extends KijiProducer {
/** {@inheritDoc} */
@Override
public KijiDataRequest getDataRequest() {
// We only need to read the most recent email address field from the user's row.
return KijiDataRequest.create("info", "email");
}
/** {@inheritDoc} */
@Override
public String getOutputColumn() {
return "derived:domain";
}
/** {@inheritDoc} */
@Override
public void produce(KijiRowData input, ProducerContext context) throws IOException {
throw new RuntimeException("This producer always fails.");
}
}
/**
* Test of all the basic information recorded by a mapper.
*/
@Test
public void testMappers() throws Exception {
createAndPopulateFooTable();
final Configuration jobConf = getConf();
// Set a value in the configuration. We'll check to be sure we can retrieve it later.
jobConf.set("conf.test.animal.string", "squirrel");
final Kiji kiji = Kiji.Factory.open(getKijiURI());
try {
final KijiURI fooTableURI = KijiURI.newBuilder(getKijiURI()).withTableName("foo").build();
final JobHistoryKijiTable jobHistory = JobHistoryKijiTable.open(kiji);
try {
// Construct a Producer for this table.
final KijiProduceJobBuilder builder = KijiProduceJobBuilder.create()
.withConf(jobConf)
.withInputTable(fooTableURI)
.withProducer(EmailDomainProducer.class)
.withOutput(MapReduceJobOutputs.newDirectKijiTableMapReduceJobOutput(fooTableURI));
KijiMapReduceJob mrJob = builder.build();
// Record the jobId and run the job.
String jobName = mrJob.getHadoopJob().getJobName();
LOG.info("About to run job: " + jobName);
assertTrue(mrJob.run());
String jobId = mrJob.getHadoopJob().getJobID().toString();
LOG.info("Job was run with id: " + jobId);
// Retrieve the recorded values and sanity test them.
JobHistoryEntry jobEntry = jobHistory.getJobDetails(jobId);
assertEquals(jobEntry.getJobName(), jobName);
assertEquals(jobEntry.getJobId(), jobId);
assertTrue(jobEntry.getJobStartTime() < jobEntry.getJobEndTime());
assertEquals("SUCCEEDED", jobEntry.getJobEndStatus());
// Check counters. We don't know the exact number of rows in the foo table, so just check if
// it's greater than 0.
final String countersString = jobEntry.getJobCounters();
final Pattern countersPattern = Pattern.compile("PRODUCER_ROWS_PROCESSED=(\\d+)");
final Matcher countersMatcher = countersPattern.matcher(countersString);
assertTrue(countersMatcher.find());
assertTrue(Integer.parseInt(countersMatcher.group(1)) > 0);
// Test to make sure the Configuration has the correct producer class, and records the value
// we set previously.
final String configString = jobEntry.getJobConfiguration();
final Configuration config = new Configuration();
config.addResource(new ByteArrayInputStream(configString.getBytes()));
assertTrue(EmailDomainProducer.class
== config.getClass(KijiConfKeys.KIJI_PRODUCER_CLASS, null));
assertEquals("Couldn't retrieve configuration field from deserialized configuration.",
"squirrel", config.get("conf.test.animal.string"));
} finally {
jobHistory.close();
}
} finally {
kiji.release();
}
}
/**
* Test that runs a producer that always fails and checks to be sure that it's recorded in the
* history table with a failure.
*/
@Test
public void testFailingJob() throws Exception {
createAndPopulateFooTable();
final Configuration jobConf = getConf();
final Kiji kiji = Kiji.Factory.open(getKijiURI());
try {
final KijiURI fooTableURI = KijiURI.newBuilder(getKijiURI()).withTableName("foo").build();
final JobHistoryKijiTable jobHistory = JobHistoryKijiTable.open(kiji);
try {
// Construct a Producer for this table.
final KijiProduceJobBuilder builder = KijiProduceJobBuilder.create()
.withConf(jobConf)
.withInputTable(fooTableURI)
.withProducer(BrokenEmailDomainProducer.class)
.withOutput(MapReduceJobOutputs.newDirectKijiTableMapReduceJobOutput(fooTableURI));
KijiMapReduceJob mrJob = builder.build();
// Record the jobId and run the job. Make sure it completes and failed.
String jobName = mrJob.getHadoopJob().getJobName();
LOG.info("About to run failing job: " + jobName);
assertFalse(mrJob.run());
String jobId = mrJob.getHadoopJob().getJobID().toString();
LOG.info("Job was run with id: " + jobId);
assertTrue(mrJob.getHadoopJob().isComplete());
assertFalse(mrJob.getHadoopJob().isSuccessful());
// Retrieve the job status from the history table and make sure it failed.
JobHistoryEntry jobEntry = jobHistory.getJobDetails(jobId);
assertEquals("FAILED", jobEntry.getJobEndStatus());
} finally {
jobHistory.close();
}
} finally {
kiji.release();
}
}
/**
* Test that makes sure information is recorded correctly for a job run with .submit() instead
* of .run(). Only checks timing info.
*/
@Test
public void testSubmit() throws Exception {
createAndPopulateFooTable();
final Kiji kiji = Kiji.Factory.open(getKijiURI());
try {
final KijiURI fooTableURI = KijiURI.newBuilder(getKijiURI()).withTableName("foo").build();
JobHistoryKijiTable jobHistory = JobHistoryKijiTable.open(kiji);
try {
// Construct a Producer for this table.
KijiProduceJobBuilder builder = KijiProduceJobBuilder.create()
.withConf(getConf())
.withInputTable(fooTableURI)
.withProducer(EmailDomainProducer.class)
.withOutput(MapReduceJobOutputs.newDirectKijiTableMapReduceJobOutput(fooTableURI));
KijiMapReduceJob mrJob = builder.build();
LOG.info("About to submit job: " + mrJob.getHadoopJob().getJobName());
KijiMapReduceJob.Status status = mrJob.submit();
while (!status.isComplete()) {
Thread.sleep(1000L);
}
assertTrue(status.isSuccessful());
String jobId = mrJob.getHadoopJob().getJobID().toString();
LOG.info("Job successfully submitted and run. Id: " + jobId);
// The job recording takes place in a separate thread, so sleep a bit to give it time to
// write out.
Thread.sleep(5000L);
JobHistoryEntry jobEntry = jobHistory.getJobDetails(jobId);
assertTrue(jobEntry.getJobStartTime() < jobEntry.getJobEndTime());
} finally {
jobHistory.close();
}
} finally {
kiji.release();
}
}
/**
* Tests that a job will still run to completion even without an installed job history table.
*/
@Test
public void testMissingHistoryTableNonfatal() throws Exception {
createAndPopulateFooTable();
// Do not create a job history table.
final Kiji kiji = Kiji.Factory.open(getKijiURI());
try {
final KijiTable fooTable = kiji.openTable("foo");
try {
final KijiProduceJobBuilder builder = KijiProduceJobBuilder.create()
.withConf(getConf())
.withInputTable(fooTable.getURI())
.withProducer(EmailDomainProducer.class)
.withOutput(MapReduceJobOutputs.newDirectKijiTableMapReduceJobOutput(
fooTable.getURI()));
final KijiMapReduceJob mrJob = builder.build();
assertTrue(mrJob.run());
} finally {
fooTable.release();
}
} finally {
kiji.release();
}
}
/**
* Tests the output of the job-history tool.
*/
@Test
public void testJobHistoryTool() throws Exception {
createAndPopulateFooTable();
final Configuration jobConf = getConf();
final Kiji kiji = Kiji.Factory.open(getKijiURI());
try {
final KijiURI fooTableURI = KijiURI.newBuilder(getKijiURI()).withTableName("foo").build();
// Construct two producers for this table.
final KijiProduceJobBuilder builderEmailDomain = KijiProduceJobBuilder.create()
.withConf(jobConf)
.withInputTable(fooTableURI)
.withProducer(EmailDomainProducer.class)
.withOutput(MapReduceJobOutputs.newDirectKijiTableMapReduceJobOutput(fooTableURI));
KijiMapReduceJob mrJobOne = builderEmailDomain.build();
KijiMapReduceJob mrJobTwo = builderEmailDomain.build();
// Run the first produce job.
String jobOneName = mrJobOne.getHadoopJob().getJobName();
LOG.info("About to run job: " + jobOneName);
assertTrue(mrJobOne.run());
String jobOneId = mrJobOne.getHadoopJob().getJobID().toString();
LOG.info("Job was run with id: " + jobOneId);
// Get the StdOut from the job-history tool.
String jobHistoryStdOut = runTool(new KijiJobHistory(),
new String[]{"--kiji=" + kiji.getURI(), "--job-id=" + jobOneId,
"--verbose", "--counter-names", }).getStdout("Utf-8");
JobHistoryKijiTable jobHistory = JobHistoryKijiTable.open(kiji);
// Check if the StdOut contains the job history for the first job
try {
JobHistoryEntry jobOneEntry = jobHistory.getJobDetails(jobOneId);
assertTrue(jobHistoryStdOut.contains(jobOneEntry.getJobName()));
assertTrue(jobHistoryStdOut.contains(jobOneEntry.getJobId()));
assertTrue(jobHistoryStdOut.contains(new Date(jobOneEntry.getJobStartTime()).toString()));
assertTrue(jobHistoryStdOut.contains(new Date(jobOneEntry.getJobEndTime()).toString()));
assertTrue(jobHistoryStdOut.contains(jobOneEntry.getJobEndStatus()));
assertTrue(jobHistoryStdOut.contains(jobOneEntry.getJobCounters()));
assertTrue(jobHistoryStdOut.contains(jobOneEntry.getJobConfiguration()));
assertTrue(jobHistoryStdOut.contains(
Arrays.toString(jobOneEntry.getCountersFamily().keySet().toArray())));
// check for a specific counter to guard against an empty array in the check above
assertTrue(jobHistoryStdOut.contains(
"org.kiji.mapreduce.framework.JobHistoryCounters:PRODUCER_ROWS_PROCESSED"));
// Run the second produce job.
String jobTwoName = mrJobTwo.getHadoopJob().getJobName();
LOG.info("About to run job: " + jobTwoName);
assertTrue(mrJobTwo.run());
String jobTwoId = mrJobTwo.getHadoopJob().getJobID().toString();
LOG.info("Job was run with id: " + jobTwoId);
// Get the StdOut from the job-history tool, again.
jobHistoryStdOut = runTool(new KijiJobHistory(),
new String[]{"--kiji=" + kiji.getURI()}).getStdout("Utf-8");
// Check if the StdOut contains the relevant job histories for each job.
// Check the first produce job again.
assertTrue(jobHistoryStdOut.contains(jobOneEntry.getJobName()));
assertTrue(jobHistoryStdOut.contains(jobOneEntry.getJobId()));
assertTrue(jobHistoryStdOut.contains(new Date(jobOneEntry.getJobStartTime()).toString()));
assertTrue(jobHistoryStdOut.contains(new Date(jobOneEntry.getJobEndTime()).toString()));
assertTrue(jobHistoryStdOut.contains(jobOneEntry.getJobEndStatus()));
// Check the second produce job.
JobHistoryEntry jobTwoEntry = jobHistory.getJobDetails(jobTwoId);
assertTrue(jobHistoryStdOut.contains(jobTwoEntry.getJobName()));
assertTrue(jobHistoryStdOut.contains(jobTwoEntry.getJobId()));
assertTrue(jobHistoryStdOut.contains(new Date(jobTwoEntry.getJobStartTime()).toString()));
assertTrue(jobHistoryStdOut.contains(new Date(jobTwoEntry.getJobEndTime()).toString()));
assertTrue(jobHistoryStdOut.contains(jobTwoEntry.getJobEndStatus()));
} finally {
jobHistory.close();
}
} finally {
kiji.release();
}
}
}