/*
* Copyright 2012 NGDATA nv
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.lilyproject.mapreduce;
import java.util.List;
import com.google.common.base.Joiner;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapreduce.Job;
import org.codehaus.jackson.JsonNode;
import org.lilyproject.client.LilyClient;
import org.lilyproject.repository.api.LRepository;
import org.lilyproject.repository.api.RecordScan;
import org.lilyproject.tools.import_.json.RecordScanWriter;
import org.lilyproject.tools.import_.json.WriteOptions;
import org.lilyproject.util.exception.ExceptionUtil;
import org.lilyproject.util.json.JsonFormat;
public class LilyMapReduceUtil {
public static final String ZK_CONNECT_STRING = "lily.mapreduce.zookeeper";
/**
* Config key for storing the list of repository tables to be run through the mapper.
*/
public static final String REPOSITORY_TABLES = "lily.mapreduce.tables";
/**
* Config key for storing the repository on which the MR job should be executed.
*/
public static final String REPOSITORY_NAME = "lily.mapreduce.repository";
private LilyMapReduceUtil() {
}
/**
* Set the necessary parameters inside the job configuration for using Lily as input.
*/
public static void initMapperJob(RecordScan scan, String zooKeeperConnectString, LRepository repository, Job job) {
initMapperJob(scan, false, zooKeeperConnectString, repository, job, null);
}
/**
* Initialize a mapper job to run on a specific set of repository tables.
*/
public static void initMapperJob(RecordScan scan, String zooKeeperConnectString, LRepository repository,
Job job, List<String> repositoryTables) {
initMapperJob(scan, false, zooKeeperConnectString, repository, job, repositoryTables);
}
public static void initMapperJob(RecordScan scan, boolean returnIdRecords, String zooKeeperConnectString,
LRepository repository, Job job) {
initMapperJob(scan, returnIdRecords, zooKeeperConnectString, repository, job, null);
}
/**
* Set the necessary parameters inside the job configuration for using Lily as input.
*/
public static void initMapperJob(RecordScan scan, boolean returnIdRecords, String zooKeeperConnectString,
LRepository repository, Job job, List<String> repositoryTables) {
if (returnIdRecords) {
job.setInputFormatClass(LilyIdScanInputFormat.class);
} else {
job.setInputFormatClass(LilyScanInputFormat.class);
}
job.getConfiguration().set(ZK_CONNECT_STRING, zooKeeperConnectString);
job.getConfiguration().set(REPOSITORY_NAME, repository.getRepositoryName());
if (repositoryTables != null && !repositoryTables.isEmpty()) {
job.getConfiguration().set(REPOSITORY_TABLES, Joiner.on(',').join(repositoryTables));
}
if (scan != null) {
try {
JsonNode node = RecordScanWriter.INSTANCE.toJson(scan, new WriteOptions(), repository);
String scanData = JsonFormat.serializeAsString(node);
job.getConfiguration().set(AbstractLilyScanInputFormat.SCAN, scanData);
} catch (Exception e) {
ExceptionUtil.handleInterrupt(e);
throw new RuntimeException(e);
}
}
}
/**
* Creates a LilyClient based on the information found in the Configuration object.
*/
public static LilyClient getLilyClient(Configuration conf) throws InterruptedException {
String zkConnectString = conf.get(ZK_CONNECT_STRING);
try {
return new LilyClient(zkConnectString, 30000);
} catch (InterruptedException e) {
throw e;
} catch (Exception e) {
throw new RuntimeException(e);
}
}
}