/**
*
*/
package uk.bl.wa.apache.solr.hadoop;
import java.io.File;
import java.io.IOException;
import java.net.URI;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Properties;
import org.apache.hadoop.filecache.DistributedCache;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.JobConf;
import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.cloud.DocCollection;
import org.apache.solr.common.cloud.DocRouter;
import org.apache.solr.common.cloud.Slice;
import org.apache.solr.common.cloud.SolrZkClient;
import org.apache.solr.common.params.MapSolrParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.core.CoreContainer;
import org.apache.solr.core.CoreDescriptor;
import org.apache.solr.core.HdfsDirectoryFactory;
import org.apache.solr.core.SolrCore;
import org.apache.solr.core.SolrResourceLoader;
import org.apache.zookeeper.KeeperException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* @author Andrew Jackson <Andrew.Jackson@bl.uk>
*
*/
public class Solate {
private static final Logger LOG = LoggerFactory.getLogger(Solate.class);
private int shards;
private HashMap<String, Integer> shardNumbers;
private DocCollection docCollection;
private final SolrParams emptySolrParams = new MapSolrParams(
Collections.<String, String> emptyMap());
public Solate(String zkHost, String collection, int numShards) {
this.shards = numShards;
if (shards <= 0) {
throw new IllegalArgumentException("Illegal shards: " + shards);
}
if (zkHost == null) {
throw new IllegalArgumentException("zkHost must not be null");
}
if (collection == null) {
throw new IllegalArgumentException("collection must not be null");
}
LOG.info("Using SolrCloud zkHost: {}, collection: {}", zkHost,
collection);
docCollection = new ZooKeeperInspector()
.extractDocCollection(zkHost,
collection);
if (docCollection == null) {
throw new IllegalArgumentException("docCollection must not be null");
}
if (docCollection.getSlicesMap().size() != shards) {
throw new IllegalArgumentException("Incompatible shards: + "
+ shards + " for docCollection: " + docCollection);
}
LOG.info("Got slices: " + docCollection.getSlices().size());
for (Slice s : docCollection.getSlices()) {
LOG.info("Slice: " + s.getName());
}
List<Slice> slices = new ZooKeeperInspector()
.getSortedSlices(docCollection.getSlices());
if (slices.size() != shards) {
throw new IllegalStateException("Incompatible sorted shards: + "
+ shards + " for docCollection: " + docCollection);
}
shardNumbers = new HashMap<String, Integer>(slices.size());
for (int i = 0; i < slices.size(); i++) {
shardNumbers.put(slices.get(i).getName(), i);
}
LOG.debug("Using SolrCloud docCollection: {}", docCollection);
DocRouter docRouter = docCollection.getRouter();
if (docRouter == null) {
throw new IllegalArgumentException("docRouter must not be null");
}
LOG.info("Using SolrCloud docRouterClass: {}", docRouter.getClass());
}
public int getPartition(String keyStr, SolrInputDocument doc) {
DocRouter docRouter = docCollection.getRouter();
Slice slice = docRouter.getTargetSlice(keyStr, doc, emptySolrParams,
docCollection);
if (slice == null) {
throw new IllegalStateException(
"No matching slice found! The slice seems unavailable. docRouterClass: "
+ docRouter.getClass().getName());
}
int rootShard = shardNumbers.get(slice.getName());
if (rootShard < 0 || rootShard >= shards) {
throw new IllegalStateException("Illegal shard number " + rootShard
+ " for slice: " + slice + ", docCollection: "
+ docCollection);
}
// LOG.debug("Slice " + slice.getName() + " == #" + rootShard);
return rootShard;
}
/**
*
* @param conf
* @param zipName
* @return
* @throws IOException
*/
public static Path findSolrConfig(JobConf conf, String zipName)
throws IOException {
Path solrHome = null;
Path[] localArchives = DistributedCache.getLocalCacheArchives(conf);
if (localArchives == null || localArchives.length == 0) {
LOG.error("No local cache archives.");
throw new IOException(String.format("No local cache archives."));
}
for (Path unpackedDir : localArchives) {
LOG.info("Looking at: " + unpackedDir + " for " + zipName);
if (unpackedDir.getName().equals(zipName)) {
LOG.info("Using this unpacked directory as solr home: {}",
unpackedDir);
solrHome = unpackedDir;
break;
}
}
return solrHome;
}
/**
*
* https://github.com/cloudera/search/blob/master/search-mr/src/main/java/
* org/apache/solr/hadoop/SolrRecordWriter.java#L154
*
* https://github.com/apache/lucene-solr/blob/9
* c961e68a981dfd9b3ca4df2195caca2cd70869b
* /solr/contrib/map-reduce/src/java/org
* /apache/solr/hadoop/SolrRecordWriter.java#L155
*
*
* @param solrHomeDir
* @param fs
* @param outputDir
* @param outputShardDir
* @return
* @throws IOException
*/
public static EmbeddedSolrServer createEmbeddedSolrServer(Path solrHomeDir,
FileSystem fs, Path outputDir, Path outputShardDir)
throws IOException {
if (solrHomeDir == null) {
throw new IOException("Unable to find solr home setting");
}
LOG.info("Creating embedded Solr server with solrHomeDir: "
+ solrHomeDir + ", fs: " + fs + ", outputShardDir: "
+ outputShardDir);
// FIXME note this is odd (no scheme) given Solr doesn't currently
// support uris (just abs/relative path)
Path solrDataDir = new Path(outputShardDir, "data");
if (!fs.exists(solrDataDir) && !fs.mkdirs(solrDataDir)) {
throw new IOException("Unable to create " + solrDataDir);
}
String dataDirStr = solrDataDir.toUri().toString();
LOG.info("Attempting to set data dir to: " + dataDirStr);
System.setProperty("solr.data.dir", dataDirStr);
System.setProperty("solr.home", solrHomeDir.toString());
System.setProperty("solr.solr.home", solrHomeDir.toString());
System.setProperty("solr.hdfs.home", outputDir.toString());
System.setProperty("solr.directoryFactory",
HdfsDirectoryFactory.class.getName());
System.setProperty("solr.lock.type", "hdfs");
System.setProperty("solr.hdfs.nrtcachingdirectory", "false");
System.setProperty("solr.hdfs.blockcache.enabled", "true");
System.setProperty("solr.autoCommit.maxTime", "600000");
System.setProperty("solr.autoSoftCommit.maxTime", "-1");
LOG.info("Loading the container...");
CoreContainer container = new CoreContainer();
container.load();
for (String s : container.getAllCoreNames()) {
LOG.warn("Got core name: " + s);
}
String coreName = "";
if (container.getCoreNames().size() > 0) {
coreName = container.getCoreNames().iterator().next();
}
/*
* LOG.error("Setting up core1 descriptor..."); CoreDescriptor descr =
* new CoreDescriptor(container, "core1", new Path( solrHomeDir,
* "discovery").toString(), null);
*
* LOG.error("Creating core1... " + descr.getConfigName()); SolrCore
* core = container.create(descr);
*
* if (!(core.getDirectoryFactory() instanceof HdfsDirectoryFactory)) {
* throw new UnsupportedOperationException(
* "Invalid configuration. Currently, the only DirectoryFactory supported is "
* + HdfsDirectoryFactory.class.getSimpleName()); }
*
* LOG.error("Registering core1..."); container.register(core, false);
*
* try { Thread.sleep(10 * 1000); } catch (InterruptedException e) { //
* TODO Auto-generated catch block e.printStackTrace(); }
*/
LOG.error("Now firing up the server...");
EmbeddedSolrServer solr = new EmbeddedSolrServer(container, coreName);
LOG.error("Server was fired up.");
return solr;
}
public static EmbeddedSolrServer createEmbeddedSolrServer(Path solrHomeDir,
FileSystem fs, Path outputShardDir) throws IOException {
if (solrHomeDir == null) {
throw new IOException("Unable to find solr home setting");
}
LOG.info("Creating embedded Solr server with solrHomeDir: "
+ solrHomeDir + ", fs: " + fs + ", outputShardDir: "
+ outputShardDir);
Path solrDataDir = new Path(outputShardDir, "data");
String dataDirStr = solrDataDir.toUri().toString();
Properties props = new Properties();
props.setProperty(CoreDescriptor.CORE_DATADIR, dataDirStr);
SolrResourceLoader loader = new SolrResourceLoader(
solrHomeDir.toString(), null, props);
LOG.info(String
.format(Locale.ENGLISH,
"Constructed instance information solr.home %s (%s), instance dir %s, conf dir %s, writing index to solr.data.dir %s, with permdir %s",
solrHomeDir, solrHomeDir.toUri(),
loader.getInstanceDir(), loader.getConfigDir(),
dataDirStr, outputShardDir));
// TODO: This is fragile and should be well documented
System.setProperty("solr.directoryFactory",
HdfsDirectoryFactory.class.getName());
System.setProperty("solr.lock.type", "hdfs");
System.setProperty("solr.hdfs.home", outputShardDir.getParent().toString());
System.setProperty("solr.hdfs.nrtcachingdirectory", "false");
System.setProperty("solr.hdfs.blockcache.enabled", "false");
System.setProperty("solr.autoCommit.maxTime", "600000");
System.setProperty("solr.autoSoftCommit.maxTime", "-1");
LOG.info("Instanciating container...");
CoreContainer container = new CoreContainer(loader);
LOG.info("Loading container...");
container.load();
LOG.info("Creating core descriptor...");
CoreDescriptor descr = new CoreDescriptor(container, "core1", new Path(
solrHomeDir, "collection1").toString(), props);
LOG.info("Creating core...");
SolrCore core = container.create(descr);
if (!(core.getDirectoryFactory() instanceof HdfsDirectoryFactory)) {
throw new UnsupportedOperationException(
"Invalid configuration. Currently, the only DirectoryFactory supported is "
+ HdfsDirectoryFactory.class.getSimpleName());
}
LOG.info("Registering core...");
container.register(core, false);
LOG.info("Returning EmbeddedSolrServer...");
EmbeddedSolrServer solr = new EmbeddedSolrServer(container, "core1");
return solr;
}
public static void cacheSolrHome(JobConf conf, String zkHost,
String collection, String solrHomeZipName) throws KeeperException,
InterruptedException, IOException {
// use the config that this collection uses for the SolrHomeCache.
File tmpSolrHomeDir;
if (zkHost != null) {
ZooKeeperInspector zki = new ZooKeeperInspector();
SolrZkClient zkClient = zki
.getZkClient(zkHost);
String configName = zki.readConfigName(zkClient, collection);
tmpSolrHomeDir = zki.downloadConfigDir(zkClient, configName);
}
// Override with local config:
tmpSolrHomeDir = new File("../warc-indexer/src/main/solr/solr/")
.getAbsoluteFile();
// Create a ZIP file:
File solrHomeLocalZip = File.createTempFile("tmp-", solrHomeZipName);
Zipper.zipDir(tmpSolrHomeDir, solrHomeLocalZip);
// Add to HDFS:
FileSystem fs = FileSystem.get(conf);
String hdfsSolrHomeDir = fs.getHomeDirectory() + "/solr/tempHome/"
+ solrHomeZipName;
fs.copyFromLocalFile(new Path(solrHomeLocalZip.toString()), new Path(
hdfsSolrHomeDir));
final URI baseZipUrl = fs.getUri().resolve(
hdfsSolrHomeDir + '#' + solrHomeZipName);
// Cache it:
DistributedCache.addCacheArchive(baseZipUrl, conf);
}
}