package org.opencb.opencga.storage.core.search;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.ObjectReader;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.time.StopWatch;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.BinaryRequestWriter;
import org.apache.solr.client.solrj.impl.HttpSolrClient;
import org.apache.solr.client.solrj.request.CollectionAdminRequest;
import org.apache.solr.client.solrj.request.CoreAdminRequest;
import org.apache.solr.client.solrj.request.CoreStatus;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.client.solrj.response.UpdateResponse;
import org.opencb.biodata.formats.variant.io.VariantReader;
import org.opencb.biodata.models.variant.Variant;
import org.opencb.biodata.models.variant.VariantSource;
import org.opencb.commons.datastore.core.Query;
import org.opencb.commons.datastore.core.QueryOptions;
import org.opencb.commons.utils.FileUtils;
import org.opencb.opencga.core.results.VariantQueryResult;
import org.opencb.opencga.storage.core.config.StorageConfiguration;
import org.opencb.opencga.storage.core.exceptions.StorageEngineException;
import org.opencb.opencga.storage.core.exceptions.VariantSearchException;
import org.opencb.opencga.storage.core.search.solr.SolrQueryParser;
import org.opencb.opencga.storage.core.search.solr.SolrVariantIterator;
import org.opencb.opencga.storage.core.search.solr.SolrVariantSearchIterator;
import org.opencb.opencga.storage.core.variant.adaptors.VariantDBAdaptorUtils;
import org.opencb.opencga.storage.core.variant.adaptors.VariantDBIterator;
import org.opencb.opencga.storage.core.variant.io.VariantReaderUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.TimeUnit;
/**
* Created by wasim on 09/11/16.
*/
public class VariantSearchManager {
private String collection;
private StorageConfiguration storageConfiguration;
private SolrQueryParser solrQueryParser;
private VariantDBAdaptorUtils variantDBAdaptorUtils;
private HttpSolrClient solrClient;
private VariantSearchToVariantConverter variantSearchToVariantConverter;
private Logger logger;
private static final int DEFAULT_INSERT_SIZE = 10000;
@Deprecated
public VariantSearchManager(String host, String collection) {
// this.collection = collection;
this.solrClient = new HttpSolrClient.Builder(host + collection).build();
this.solrClient.setRequestWriter(new BinaryRequestWriter());
variantSearchToVariantConverter = new VariantSearchToVariantConverter();
}
public VariantSearchManager(VariantDBAdaptorUtils variantDBAdaptorUtils, StorageConfiguration storageConfiguration) {
this.variantDBAdaptorUtils = variantDBAdaptorUtils;
this.storageConfiguration = storageConfiguration;
// this.solrClient = new HttpSolrClient.Builder(storageConfiguration.getSearch().getHost() + collection).build();
// this.solrClient.setRequestWriter(new BinaryRequestWriter());
this.variantSearchToVariantConverter = new VariantSearchToVariantConverter();
this.solrQueryParser = new SolrQueryParser(this.variantDBAdaptorUtils);
logger = LoggerFactory.getLogger(VariantSearchManager.class);
}
// public VariantSearchManager(String collection, StorageConfiguration storageConfiguration) {
//// this.host = storageConfiguration.getSearch().getHost();
//// this.collection = collection;
// this.storageConfiguration = storageConfiguration;
//
// this.solrClient = new HttpSolrClient.Builder(storageConfiguration.getSearch().getHost() + collection).build();
// this.solrClient.setRequestWriter(new BinaryRequestWriter());
//
// variantSearchToVariantConverter = new VariantSearchToVariantConverter();
//
// logger = LoggerFactory.getLogger(VariantSearchManager.class);
// }
private void init(String collection) throws VariantSearchException {
if (this.solrClient == null || StringUtils.isEmpty(this.collection) || !this.collection.equals(collection)) {
// check if collection exist
if (!existCollection(collection)) {
createCollection(collection);
}
this.solrClient = new HttpSolrClient.Builder(storageConfiguration.getSearch().getHost() + collection).build();
this.solrClient.setRequestWriter(new BinaryRequestWriter());
this.collection = collection;
}
}
public boolean isAlive(String collection) {
try {
// init collection
init(collection);
return solrClient.ping().getResponse().get("status").equals("OK");
} catch (VariantSearchException | SolrServerException | IOException e) {
return false;
}
}
/**
* Create a Solr core from a configuration set directory. By default, the configuration set directory is located
* inside the folder server/solr/configsets.
*
* @param coreName Core name
* @param configSet Configuration set name
*/
public void createCore(String coreName, String configSet) {
try {
logger.debug("Creating core: " + storageConfiguration.getSearch().getHost() + ", core=" + coreName
+ ", configSet=" + configSet);
HttpSolrClient solrClient = new HttpSolrClient.Builder(storageConfiguration.getSearch().getHost()).build();
CoreAdminRequest.Create request = new CoreAdminRequest.Create();
request.setCoreName(coreName);
request.setConfigSet(configSet);
request.process(solrClient);
} catch (Exception e) {
e.printStackTrace();
}
}
/**
* Check if a given core exists.
*
* @param coreName Core name
* @return True or false
*/
public boolean existCore(String coreName) {
HttpSolrClient solrClient = new HttpSolrClient.Builder(storageConfiguration.getSearch().getHost()).build();
try {
CoreStatus status = CoreAdminRequest.getCoreStatus(coreName, solrClient);
// if the status.response is null, catch the exception
status.getInstanceDirectory();
} catch (Exception e) {
return false;
}
return true;
}
/**
* * Create a Solr collection with default parameters: configuration, shards and replicas.
*
* @param collectionName Collection name
* @throws VariantSearchException Exception
*/
public void createCollection(String collectionName) throws VariantSearchException {
createCollection(collectionName, "OpenCGAConfSet", 1, 1);
}
/**
* Create a Solr collection from a configuration directory. The configuration has to be uploaded to the zookeeper,
* $ ./bin/solr zk upconfig -n <config name> -d <path to the config dir> -z <host:port zookeeper>.
* For Solr, collection name, configuration name and number of shards are mandatory in order to create a collection.
* Number of replicas is optional.
*
* @param collectionName Collection name
* @param config Configuration name
* @param numShards Number of shards
* @param numReplicas Number of replicas
* @throws VariantSearchException Exception
*/
public void createCollection(String collectionName, String config, int numShards, int numReplicas) throws VariantSearchException {
logger.debug("Creating collection: " + storageConfiguration.getSearch().getHost() + ", collection=" + collectionName
+ ", config=" + config + ", numShards=" + numShards + ", numReplicas=" + numReplicas);
try {
HttpSolrClient solrClient = new HttpSolrClient.Builder(storageConfiguration.getSearch().getHost()).build();
CollectionAdminRequest request = CollectionAdminRequest.createCollection(collectionName, config,
numShards, numReplicas);
request.process(solrClient);
} catch (Exception e) {
throw new VariantSearchException(e.getMessage(), e);
}
}
/**
* Check if a given collection exists.
*
* @param collectionName Collection name
* @return True or false
*/
public boolean existCollection(String collectionName) {
HttpSolrClient solrClient = new HttpSolrClient.Builder(storageConfiguration.getSearch().getHost()).build();
try {
List<String> collections = CollectionAdminRequest.listCollections(solrClient);
// for (String collection : collections) {
// System.out.println(collection);
// }
for (String collection : collections) {
if (collection.equals(collectionName)) {
return true;
}
}
} catch (Exception e) {
return false;
}
return false;
}
/**
* Load a Solr core/collection from a Avro or JSON file.
*
* @param collection Collection name
* @param path Path to the file to load
* @throws IOException IOException
* @throws VariantSearchException SolrServerException
* @throws StorageEngineException SolrServerException
*/
public void load(String collection, Path path) throws IOException, VariantSearchException, StorageEngineException {
// TODO: can we use VariantReaderUtils as implemented in the function load00 below ?
// TODO: VarriantReaderUtils supports JSON, AVRO and VCF file formats.
// Check path is not null and exists.
FileUtils.checkFile(path);
// Init collection if needed
init(collection);
File file = path.toFile();
if (file.getName().endsWith("json") || file.getName().endsWith("json.gz")) {
loadJson(path);
} else if (file.getName().endsWith("avro") || file.getName().endsWith("avro.gz")) {
loadAvro(path);
} else {
throw new IOException("File format " + path + " not supported. Please, use Avro or JSON file formats.");
}
}
/**
* Load a Solr core/collection from a variant DB iterator.
*
* @param collection Collection name
* @param variantDBIterator Iterator to retrieve the variants to load
* @throws IOException IOException
* @throws VariantSearchException VariantSearchException
*/
public void load(String collection, VariantDBIterator variantDBIterator) throws IOException, VariantSearchException {
if (variantDBIterator != null) {
// init collection if needed
init(collection);
int count = 0;
List<Variant> variantList = new ArrayList<>(DEFAULT_INSERT_SIZE);
while (variantDBIterator.hasNext()) {
variantList.add(variantDBIterator.next());
count++;
if (count % DEFAULT_INSERT_SIZE == 0) {
insert(variantList);
variantList.clear();
}
}
// insert the remaining variants
if (variantList.size() > 0) {
insert(variantList);
}
logger.info("Loading done: {} variants.", count);
}
}
/**
* Return the list of Variant objects from a Solr core/collection
* according a given query.
*
* @param collection Collection name
* @param query Query
* @param queryOptions Query options
* @return List of Variant objects
* @throws IOException IOException
* @throws VariantSearchException VariantSearchException
*/
public VariantQueryResult<Variant> query(String collection, Query query, QueryOptions queryOptions)
throws IOException, VariantSearchException {
// we don't initialize here the collection, the iterator does
StopWatch stopWatch = StopWatch.createStarted();
List<Variant> results = new ArrayList<>();
SolrVariantIterator iterator = iterator(collection, query, queryOptions);
while (iterator.hasNext()) {
results.add(iterator.next());
}
return new VariantQueryResult<>("", (int) stopWatch.getTime(TimeUnit.MILLISECONDS),
results.size(), iterator.getNumFound(), "Data from Solr", "", results, null);
}
/**
* Return the list of VariantSearchModel objects from a Solr core/collection
* according a given query.
*
* @param collection Collection name
* @param query Query
* @param queryOptions Query options
* @return List of VariantSearchModel objects
* @throws IOException IOException
* @throws VariantSearchException VariantSearchException
*/
public List<VariantSearchModel> nativeQuery(String collection, Query query, QueryOptions queryOptions) throws IOException,
VariantSearchException {
// we don't initialize here the collection, the iterator does
List<VariantSearchModel> results = new ArrayList<>();
SolrVariantSearchIterator iterator = nativeIterator(collection, query, queryOptions);
while (iterator.hasNext()) {
results.add(iterator.next());
}
return results;
}
/**
* Return a Solr variant iterator to retrieve Variant objects from a Solr core/collection
* according a given query.
*
* @param collection Collection name
* @param query Query
* @param queryOptions Query options
* @return Solr Variant iterator
* @throws IOException IOException
* @throws VariantSearchException VariantSearchException
*/
public SolrVariantIterator iterator(String collection, Query query, QueryOptions queryOptions) throws VariantSearchException,
IOException {
// init collection if needed
init(collection);
try {
SolrQuery solrQuery = solrQueryParser.parse(query, queryOptions);
QueryResponse response = solrClient.query(solrQuery);
SolrVariantIterator iterator = new SolrVariantIterator((response.getBeans(VariantSearchModel.class).iterator()));
iterator.setNumFound(response.getResults().getNumFound());
return iterator;
} catch (SolrServerException e) {
throw new VariantSearchException(e.getMessage(), e);
}
}
/**
* Return a Solr variant iterator to retrieve VariantSearchModel objects from a Solr core/collection
* according a given query.
*
* @param collection Collection name
* @param query Query
* @param queryOptions Query options
* @return Solr VariantSearch iterator
* @throws IOException IOException
* @throws VariantSearchException VariantSearchException
*/
public SolrVariantSearchIterator nativeIterator(String collection, Query query, QueryOptions queryOptions)
throws VariantSearchException, IOException {
// init collection if needed
init(collection);
try {
SolrQuery solrQuery = solrQueryParser.parse(query, queryOptions);
QueryResponse response = solrClient.query(solrQuery);
return new SolrVariantSearchIterator(response.getBeans(VariantSearchModel.class).iterator());
} catch (SolrServerException e) {
throw new VariantSearchException(e.getMessage(), e);
}
}
public VariantSearchFacet getFacet(Query query, QueryOptions queryOptions) {
SolrQuery solrQuery = solrQueryParser.parse(query, queryOptions);
QueryResponse response = null;
try {
response = solrClient.query(solrQuery);
} catch (SolrServerException | IOException e) {
e.printStackTrace();
}
return getFacets(response);
}
/**-------------------------------------
* P R I V A T E M E T H O D S
-------------------------------------*/
/**
* Insert a variant into Solr.
*
* @param variant Variant to insert
* @throws IOException IOException
* @throws VariantSearchException VariantSearchException
*/
private void insert(Variant variant) throws IOException, VariantSearchException {
VariantSearchModel variantSearchModel = variantSearchToVariantConverter.convertToStorageType(variant);
if (variantSearchModel != null && variantSearchModel.getId() != null) {
UpdateResponse updateResponse = null;
try {
updateResponse = solrClient.addBean(variantSearchModel);
if (0 == updateResponse.getStatus()) {
solrClient.commit();
}
} catch (SolrServerException e) {
throw new VariantSearchException(e.getMessage(), e);
}
}
}
/**
* Insert a list of variants into Solr.
*
* @param variants List of variants to insert
* @throws IOException IOException
* @throws VariantSearchException VariantSearchException
*/
private void insert(List<Variant> variants) throws IOException, VariantSearchException {
if (variants != null && variants.size() > 0) {
List<VariantSearchModel> variantSearchModels = variantSearchToVariantConverter.convertListToStorageType(variants);
if (!variantSearchModels.isEmpty()) {
UpdateResponse updateResponse = null;
try {
updateResponse = solrClient.addBeans(variantSearchModels);
if (0 == updateResponse.getStatus()) {
solrClient.commit();
}
} catch (SolrServerException e) {
throw new VariantSearchException(e.getMessage(), e);
}
}
}
}
/**
* Load a JSON file into the Solr core/collection.
*
* @param path Path to the JSON file
* @throws IOException
* @throws VariantSearchException
*/
private void loadJson(Path path) throws IOException, VariantSearchException {
// This opens json and json.gz files automatically
BufferedReader bufferedReader = FileUtils.newBufferedReader(path);
// TODO: get the buffer size from configuration file
List<Variant> variants = new ArrayList<>(DEFAULT_INSERT_SIZE);
int count = 0;
String line;
ObjectReader objectReader = new ObjectMapper().readerFor(Variant.class);
while ((line = bufferedReader.readLine()) != null) {
Variant variant = objectReader.readValue(line);
variants.add(variant);
count++;
if (count % DEFAULT_INSERT_SIZE == 0) {
insert(variants);
variants.clear();
}
}
// Insert the remaining variants
if (variants.size() > 0) {
insert(variants);
}
// close
bufferedReader.close();
}
private void loadAvro(Path path) throws IOException, VariantSearchException, StorageEngineException {
// reader
VariantSource source = null;
VariantReader reader = VariantReaderUtils.getVariantReader(path, source);
List<Variant> variants;
// TODO: get the buffer size from configuration file
int bufferSize = 10000;
do {
variants = reader.read(bufferSize);
insert(variants);
} while (variants.size() == bufferSize);
reader.close();
}
/**
*
* @param response
* @return
*/
private VariantSearchFacet getFacets(QueryResponse response) {
VariantSearchFacet variantSearchFacet = new VariantSearchFacet();
if (response.getFacetFields() != null) {
variantSearchFacet.setFacetFields(response.getFacetFields());
}
if (response.getFacetQuery() != null) {
variantSearchFacet.setFacetQueries(response.getFacetQuery());
}
if (response.getFacetRanges() != null) {
variantSearchFacet.setFacetRanges(response.getFacetRanges());
}
if (response.getIntervalFacets() != null) {
variantSearchFacet.setFacetIntervales(response.getIntervalFacets());
}
return variantSearchFacet;
}
}