package org.opencb.opencga.storage.core.search;
import com.fasterxml.jackson.core.JsonFactory;
import com.fasterxml.jackson.core.JsonParser;
import com.fasterxml.jackson.core.ObjectCodec;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.ObjectReader;
import org.apache.solr.client.solrj.response.RangeFacet;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Ignore;
import org.junit.Test;
import org.opencb.biodata.models.variant.Variant;
import org.opencb.biodata.models.variant.avro.VariantAnnotation;
import org.opencb.commons.datastore.core.Query;
import org.opencb.commons.datastore.core.QueryOptions;
import org.opencb.commons.test.GenericTest;
import org.opencb.commons.utils.FileUtils;
import org.opencb.opencga.storage.core.search.solr.SolrVariantSearchIterator;
import org.opencb.opencga.storage.core.variant.VariantStorageBaseTest;
import java.io.*;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.zip.GZIPInputStream;
/**
* Created by wasim on 22/11/16.
*/
@Ignore
public class VariantSearchManagerTest extends GenericTest {
private String collection;
private String filename;
private List<Variant> variantList;
private JsonFactory factory;
private InputStream variantsStream;
private JsonParser variantsParser;
private ObjectMapper jsonObjectMapper;
private VariantSearchManager variantSearchManager;
private int TOTAL_VARIANTS = 97;
@Before
public void setUp() throws Exception {
filename = "/home/imedina/Downloads/variation_chr1.full.json.gz";
factory = new JsonFactory();
jsonObjectMapper = new ObjectMapper();
initJSONParser(new File(VariantStorageBaseTest.getResourceUri(filename)));
variantList = readNextVariantFromJSON(100);
// collection = "biotest_core2";
collection = "biotest_collection_4";
variantSearchManager = new VariantSearchManager("http://localhost:8983/solr/", collection);
}
// @Test
public void createCore() {
try {
String coreName = "core555";
String configSet = "myConfSet";
variantSearchManager.createCore(coreName, configSet);
} catch (Exception e) {
e.printStackTrace();
}
}
// @Test
public void existCore() {
try {
String name;
name = "core999";
System.out.println("exist " + name + "? " + variantSearchManager.existCore(name));
name = "core99999";
System.out.println("exist " + name + "? " + variantSearchManager.existCore(name));
} catch (Exception e) {
e.printStackTrace();
}
}
// @Test
public void createCollection() {
try {
String collectionName = "collection888";
String configName = "myConfSet";
int numShards = 2;
int numReplicas = 2;
variantSearchManager.createCollection(collectionName, configName, numShards, numReplicas);
} catch (Exception e) {
e.printStackTrace();
}
}
// @Test
public void existCollection() {
try {
String name;
name = "collection888";
System.out.println("exist " + name + "? " + variantSearchManager.existCollection(name));
name = "collection888888";
System.out.println("exist " + name + "? " + variantSearchManager.existCollection(name));
} catch (Exception e) {
e.printStackTrace();
}
}
// @Test
public void conversionTest() {
try {
// filename = "/home/imedina/Downloads/variation_chr1.full.json.gz";
filename = "/home/jtarraga/data150/vcf/variation_chr22.3.json";
BufferedReader bufferedReader = FileUtils.newBufferedReader(Paths.get(filename));
VariantSearchToVariantConverter variantSearchToVariantConverter = new VariantSearchToVariantConverter();
ObjectReader objectReader = jsonObjectMapper.readerFor(Variant.class);
String line;
List<Variant> variants = new ArrayList<>(10000);
int count = 0;
while ((line = bufferedReader.readLine()) != null) {
Variant variant = objectReader.readValue(line);
VariantSearchModel variantSearchModel = variantSearchToVariantConverter.convertToStorageType(variant);
System.out.println("--------------- variant:");
System.out.println(variant.toJson());
System.out.println("--------------- variant search model:");
System.out.println(variantSearchModel.toString());
Variant variant2 = variantSearchToVariantConverter.convertToDataModelType(variantSearchModel);
System.out.println("--------------- variant2:");
System.out.println(variant2.toJson());
count++;
}
System.out.println("Number of processed variants: " + count);
bufferedReader.close();
} catch (Exception e) {
e.printStackTrace();
}
}
//@Test
public void loadVariantFileIntoSolrTest() {
String test = "Test_Variant_Insert_";
try {
variantSearchManager.load(collection, Paths.get(filename));
} catch (Exception e) {
e.printStackTrace();
}
}
//@Test
public void verifyInsertedVariantTest() {
String test = "Test_Variant_Verification_";
try {
variantSearchManager.load(collection, Paths.get(filename));
List<Variant> variants = modifyVariantsID(test);
Query query = new Query();
query.append("dbSNP", test + "*");
QueryOptions queryOptions = new QueryOptions();
queryOptions.append(QueryOptions.LIMIT, 500);
SolrVariantSearchIterator iterator = variantSearchManager.nativeIterator(collection, query, queryOptions);
List<VariantSearchModel> results = new ArrayList<>();
iterator.forEachRemaining(results::add);
// Assert.assertEquals(1, results.size());
// Assert.assertTrue(variants.get(0).getStart() == results.get(0).getStart());
} catch (Exception e) {
e.printStackTrace();
}
}
//@Test
public void queryNonInsertedVariantTest() {
String test = "Test_Variant_Non_Inserted_";
try {
Query query = new Query();
query.append("dbSNP", test + "*");
QueryOptions queryOptions = new QueryOptions();
queryOptions.append(QueryOptions.LIMIT, 500);
SolrVariantSearchIterator iterator = variantSearchManager.nativeIterator(collection, query, queryOptions);
List<VariantSearchModel> results = new ArrayList<>();
iterator.forEachRemaining(results::add);
Assert.assertEquals(0, results.size());
} catch (Exception e) {
e.printStackTrace();
}
}
//@Test
public void variantToVariantSearchConversionTest() {
Variant variant = variantList.get(0);
VariantSearchToVariantConverter converter = new VariantSearchToVariantConverter();
VariantSearchModel variantSearchModel = converter.convertToStorageType(variant);
Assert.assertEquals(variantSearchModel.getId(), getVariantSolrID(variant));
Assert.assertEquals(variantSearchModel.getVariantId(), variant.getId());
Assert.assertEquals(variantSearchModel.getChromosome(), variant.getChromosome());
Assert.assertEquals(variantSearchModel.getType().toString(), variant.getType().toString());
}
/*
// Facet not supported yet !!
@Test
public void variantFacetFiledCountTest() {
try {
String facetFieldName = "chromosome";
Query query = new Query();
QueryOptions queryOptions = new QueryOptions();
query.append("ids", facetFieldName);
query.append("facet.field", facetFieldName);
Variant variant = variantList.get(0);
variant.setId(facetFieldName);
variantSearchManager.insert(variant);
VariantSearchFacet variantSearchFacet = variantSearchManager.getFacet(query, queryOptions);
Assert.assertEquals(variantSearchFacet.getFacetFields().get(0).getName(), facetFieldName);
Assert.assertEquals(1, variantSearchFacet.getFacetFields().get(0).getValueCount());
} catch (Exception e) {
e.printStackTrace();
}
}
@Test
public void variantFacetFiledsCountTest() {
try {
Query query = new Query();
QueryOptions queryOptions = new QueryOptions();
query.append("ids", "*");
query.append("facet.fields", "type,sift");
variantSearchManager.insert(variantList);
VariantSearchFacet variantSearchFacet = variantSearchManager.getFacet(query, queryOptions);
Assert.assertEquals(variantSearchFacet.getFacetFields().get(0).getName(), "type");
Assert.assertEquals(variantSearchFacet.getFacetFields().get(1).getName(), "sift");
Assert.assertEquals(TOTAL_VARIANTS, variantSearchFacet.getFacetFields().get(0).getValues().get(0).getCount());
Assert.assertEquals(TOTAL_VARIANTS, variantSearchFacet.getFacetFields().get(1).getValues().get(0).getCount());
} catch (Exception e) {
e.printStackTrace();
}
}
@Test
public void variantFacetQueryTest() {
try {
Query query = new Query();
QueryOptions queryOptions = new QueryOptions();
query.append("ids", "*");
query.append("facet.query", "type:SNV");
variantSearchManager.insert(variantList);
VariantSearchFacet variantSearchFacet = variantSearchManager.getFacet(query, queryOptions);
Assert.assertTrue(TOTAL_VARIANTS == variantSearchFacet.getFacetQueries().entrySet().iterator().next().getValue());
} catch (Exception e) {
e.printStackTrace();
}
}
@Test
public void variantFacetRangeTest() {
try {
Query query = new Query();
QueryOptions queryOptions = new QueryOptions();
query.append("ids", "*");
Map<String, Map<String, Double>> rangeFields = new HashMap<>();
Map<String, Double> sift = new HashMap<>();
sift.put("facet.range.start", 0.0);
sift.put("facet.range.end", 11.0);
sift.put("facet.range.gap", 2.0);
rangeFields.put("sift", sift);
query.append("facet.range", rangeFields);
variantSearchManager.insert(variantList);
VariantSearchFacet variantSearchFacet = variantSearchManager.getFacet(query, queryOptions);
List<RangeFacet.Count> rangeEntries = variantSearchFacet.getFacetRanges().get(0).getCounts();
Assert.assertNotNull(rangeEntries);
Assert.assertEquals(0, rangeEntries.get(0).getCount());
Assert.assertEquals(0, rangeEntries.get(1).getCount());
Assert.assertEquals(0, rangeEntries.get(2).getCount());
Assert.assertEquals(0, rangeEntries.get(3).getCount());
Assert.assertEquals(0, rangeEntries.get(4).getCount());
Assert.assertEquals(TOTAL_VARIANTS, rangeEntries.get(5).getCount());
} catch (Exception e) {
e.printStackTrace();
}
}
*/
// @Test
public void variantSolrQueryLimitTest() {
try {
variantSearchManager.load(collection, Paths.get(filename));
Query query = new Query();
QueryOptions queryOptions = new QueryOptions();
query.append("ids", "*");
queryOptions.append(QueryOptions.LIMIT, 15);
SolrVariantSearchIterator iterator = variantSearchManager.nativeIterator(collection, query, queryOptions);
List<VariantSearchModel> results = new ArrayList<>();
iterator.forEachRemaining(results::add);
Assert.assertEquals(15, results.size());
} catch (Exception e) {
e.printStackTrace();
}
}
//@Test
public void queryOptionSortTest() {
try {
variantSearchManager.load(collection, Paths.get(filename));
Query query = new Query();
QueryOptions queryOptions = new QueryOptions();
query.append("ids", "*");
queryOptions.append(QueryOptions.LIMIT, 15);
queryOptions.add(QueryOptions.SORT, "start");
queryOptions.add(QueryOptions.ORDER, QueryOptions.DESCENDING);
SolrVariantSearchIterator iterator = variantSearchManager.nativeIterator(collection, query, queryOptions);
List<VariantSearchModel> results = new ArrayList<>();
iterator.forEachRemaining(results::add);
Assert.assertTrue(results.get(0).getStart() > results.get(14).getStart());
} catch (Exception e) {
e.printStackTrace();
}
}
private String getVariantSolrID(Variant variant) {
VariantAnnotation variantAnnotation = variant.getAnnotation();
return variantAnnotation.getChromosome() + "_" + variantAnnotation.getStart() + "_"
+ variantAnnotation.getReference() + "_" + variantAnnotation.getAlternate();
}
private List<Variant> modifyVariantsID(String prefix) {
List<Variant> modifiedVariants = new ArrayList<>();
for (Variant variant : variantList) {
Variant var = variant;
var.setId(prefix + variant.getId());
modifiedVariants.add(var);
}
return modifiedVariants;
}
private void initJSONParser(File file) {
try {
this.variantsStream = new GZIPInputStream(new FileInputStream(file));
} catch (IOException e) {
e.printStackTrace();
}
try {
this.variantsParser = this.factory.createParser(this.variantsStream);
this.variantsParser.setCodec((ObjectCodec) this.jsonObjectMapper);
} catch (IOException e) {
e.printStackTrace();
}
}
private List<Variant> readNextVariantFromJSON(int bucket) {
List<Variant> variants = new ArrayList<Variant>();
int i = 0;
try {
while (this.variantsParser.nextToken() != null && i++ < bucket) {
Variant var = (Variant) this.variantsParser.readValueAs(Variant.class);
variants.add(var);
}
} catch (IOException e) {
// e.printStackTrace();
}
return variants;
}
}