package org.nextprot.api.tasks.solr.indexer.entry.diff;
import org.junit.Test;
import org.nextprot.api.core.domain.Entry;
import org.nextprot.api.solr.index.EntryIndex.Fields;
import org.nextprot.api.tasks.solr.indexer.entry.SolrDiffTest;
import org.nextprot.api.tasks.solr.indexer.entry.impl.ChromosomeFieldBuilder;
import java.util.Arrays;
import java.util.List;
import java.util.Set;
import java.util.TreeSet;
import static org.junit.Assert.assertEquals;
public class ChromosomeFieldBuilderDiffTest extends SolrDiffTest {
@Test
public void testEntryWith3LocationsOn3differentChromosomes() {
testEntryGivesSameIndexAsSolrIndex("NX_P62158"); // locations: 2p21, 19q13.32, 14q32.11
}
@Test
public void testEntryWith2LocationsOnSameChromosomeAndDifferentBand() {
testEntryGivesSameIndexAsSolrIndex("NX_E9PJI5"); // locations: 16p12.3 (CHR_HSCHR16_1_CTG1), 16p13.11
testEntryGivesSameIndexAsSolrIndex("NX_O75144"); // locations: 21q22.3, 21p12
testEntryGivesSameIndexAsSolrIndex("NX_O95255"); // locations: 16p13.11, 16p12.3 (CHR_HSCHR16_1_CTG1)
testEntryGivesSameIndexAsSolrIndex("NX_P03989"); // locations: 6p22.1 (CHR_HSCHR6_MHC_MANN_CTG1), 6p21.33
}
@Test
public void testEntryWith2LocationsOnTheSameChromosomeAndBand() {
// Other entries with same situation:NX_A8MYA2, NX_O75900, NX_P01562
testEntryGivesSameIndexAsSolrIndex("NX_A6NE21"); // locations: 8p23.1
testEntryGivesSameIndexAsSolrIndex("NX_P43686"); // locations: 19q13.2
}
@Test
public void testEntryWith1Location() {
testEntryGivesSameIndexAsSolrIndex("NX_A0PK05"); // locations: 10q11.21
testEntryGivesSameIndexAsSolrIndex("NX_A0MZ66"); // locations: 10q25.3
testEntryGivesSameIndexAsSolrIndex("NX_A1L3X4"); // locations: 16q13
testEntryGivesSameIndexAsSolrIndex("NX_A0PJX8"); // locations: 1p26.31
testEntryGivesSameIndexAsSolrIndex("NX_A1L170"); // locations: 1q23.3
testEntryGivesSameIndexAsSolrIndex("NX_A1L020"); // locations: 1q22
testEntryGivesSameIndexAsSolrIndex("NX_A2A368"); // locations: Xp21.1
testEntryGivesSameIndexAsSolrIndex("NX_A6NDE4"); // locations: Yq11.223
}
@Test
public void testEntryWithChromosomeAndUnknownBand() {
testEntryGivesSameIndexAsSolrIndex("NX_Q13072"); // locations: 13 (unknown band)
testEntryGivesSameIndexAsSolrIndex("NX_Q86Y27"); // locations: 13 (unknown band)
}
@Test
public void testEntryWithChromosomeAndBlankBand() {
testEntryGivesSameIndexAsSolrIndex("NX_Q8WZ33"); // locations: 4 (blank band)
testEntryGivesSameIndexAsSolrIndex("NX_P00156"); // locations: MT (blank band)
testEntryGivesSameIndexAsSolrIndex("NX_P03905"); // locations: MT (blank band)
}
@Test
public void testEntryWithUnknownChromosomeLocation() {
// other cases with unknown locations: NX_O00370 NX_P0CW71 NX_Q96PT4 NX_Q96PT3 NX_Q9UN81
testEntryGivesSameIndexAsSolrIndex("NX_P0CW71"); // locations: (unknown chromosome and band)
}
@Test
public void testChromsomalLocation() {
for(int i=0; i < 10; i++){
Entry entry = getEntry(i);
System.out.println(entry.getUniqueName());
testChrLoc(entry);
testChrLocS(entry);
testGeneBand(entry);
}
}
private void testEntryGivesSameIndexAsSolrIndex(String uniqueName) {
Entry entry = getEntry(uniqueName);
System.out.println(entry.getUniqueName());
testChrLoc(entry);
testChrLocS(entry);
testGeneBand(entry);
}
private void testChrLoc(Entry entry) {
Fields field = Fields.CHR_LOC;
ChromosomeFieldBuilder cfb = new ChromosomeFieldBuilder();
cfb.initializeBuilder(entry);
// build a set with the list of actual values in field (which are separated by spaces)
Set<String> actualSet = new TreeSet<String>();
String actualValue = cfb.getFieldValue(field, String.class);
actualSet.addAll(Arrays.asList(actualValue.split(" ")));
// in the current pam implementation the data is the same
// but can contain several times the same values (and may have a different order)
// nevertheless the set of unique values should be the same
Set<String> expectedSet = new TreeSet<String>();
String expectedValue = (String) getValueForFieldInCurrentSolrImplementation(entry.getUniqueName(), field);
expectedSet.addAll(Arrays.asList(expectedValue.replace(",","").split(" ")));
//showActualAndExpectedValues(expectedSet,actualSet, "chr_loc");
assertEquals(expectedSet,actualSet);
}
/*
*
* The old pam implementation now sorts alphabetically
* the multiple locations retrieved from db before indexing them in chr_loc field
* The new alain's implementation does the same as well now
* The value of chr_loc_s (sort order value for chr location) is computed
* on the basis of the first location found in chr_loc in both old and new implementation
*
*/
private void testChrLocS(Entry entry) {
ChromosomeFieldBuilder cfb = new ChromosomeFieldBuilder();
cfb.initializeBuilder(entry);
Integer expectedValue = (Integer) getValueForFieldInCurrentSolrImplementation(entry.getUniqueName(), Fields.CHR_LOC_S);
Integer actualValue = cfb.getFieldValue(Fields.CHR_LOC_S, Integer.class);
assertEquals(expectedValue, actualValue);
}
@SuppressWarnings("unchecked")
private void testGeneBand(Entry entry) {
Fields field = Fields.GENE_BAND;
ChromosomeFieldBuilder cfb = new ChromosomeFieldBuilder();
cfb.initializeBuilder(entry);
Set<String> actualSet = new TreeSet<String>();
List<String> geneBandValues = cfb.getFieldValue(field, List.class);
for (String s: geneBandValues) actualSet.addAll(Arrays.asList(s.split(" ")));
// in the current pam implementation the data is the same
// but can contains several times the same values and have a different order
// nevertheless the set of unique values should be the same
Set<String> expectedSet = new TreeSet<String>();
List<String> expectedValues = (List<String>) getValueForFieldInCurrentSolrImplementation(entry.getUniqueName(), field);
for (String s: expectedValues) expectedSet.addAll(Arrays.asList(s.split(" ")));
//showActualAndExpectedValues(expectedSet,actualSet, "gene_band");
assertEquals(expectedSet,actualSet);
}
private void showActualAndExpectedValues(Set<String> expectedSet, Set<String> actualSet, String fieldName) {
for (String s: actualSet) System.out.println("actual " + fieldName + " value: " +s);
for (String s: expectedSet) System.out.println("expected " + fieldName + " value: " +s);
}
}