package org.nextprot.api.tasks.solr.indexer.entry.diff; import org.junit.Assert; import org.junit.Ignore; import org.junit.Test; import org.nextprot.api.core.domain.Entry; import org.nextprot.api.core.service.EntryBuilderService; import org.nextprot.api.core.service.TerminologyService; import org.nextprot.api.solr.index.EntryIndex.Fields; import org.nextprot.api.tasks.solr.indexer.entry.SolrDiffTest; import org.nextprot.api.tasks.solr.indexer.entry.impl.InteractionFieldBuilder; import org.nextprot.api.tasks.solr.indexer.entry.impl.XrefFieldBuilder; import org.springframework.beans.factory.annotation.Autowired; import java.util.Collections; import java.util.List; import java.util.Set; import java.util.TreeSet; public class XRefFieldBuilderDiffTest extends SolrDiffTest { @Autowired EntryBuilderService entryBuilderService; @Test public void testXrefs() { String[] test_list = {"NX_Q8N7I0", "NX_O00115","NX_O00116","NX_E5RQL4","NX_P32418","NX_Q7Z6P3", "NX_Q7Z713", "NX_P22102", "NX_P10415", "NX_Q6PI97", "NX_Q8NDZ0", "NX_O15056"}; for(int i=0; i < test_list.length; i++) { testXrefs(getEntry(test_list[i])); } //for(int i=4500; i < 5000; i++){ testXrefs(getEntry(i)); } // 'random' entries } public void testXrefs(Entry entry) { String entryName = entry.getUniqueName(); int newcnt=0, comcnt=0, misscnt=0; System.out.println("Testing: " + entryName); XrefFieldBuilder xfb = new XrefFieldBuilder(); xfb.setEntryBuilderService(entryBuilderService); xfb.initializeBuilder(entry); List<String> expectedABs = (List) getValueForFieldInCurrentSolrImplementation(entryName, Fields.ANTIBODY); if(expectedABs != null) { Collections.sort(expectedABs); List<String> currentABs = xfb.getFieldValue(Fields.ANTIBODY, List.class); if(currentABs != null) Collections.sort(currentABs); Assert.assertEquals(expectedABs, currentABs); } List<String> expectedEnsembl = (List) getValueForFieldInCurrentSolrImplementation(entryName, Fields.ENSEMBL); if(expectedEnsembl != null) { if(expectedEnsembl.size() > 1 || expectedEnsembl.get(0).startsWith("ENS")) // We don't want housemade ENSEMBL like NX_VG_7_129906380_2933 (NX_Q13166) Assert.assertEquals(xfb.getFieldValue(Fields.ENSEMBL, List.class).size(), expectedEnsembl.size()); } Set<String> expectedxrefSet = new TreeSet<String>((List) getValueForFieldInCurrentSolrImplementation(entryName, Fields.XREFS)); Set<String> xrefSet = new TreeSet<String>(xfb.getFieldValue(Fields.XREFS, List.class)); Set<String> acOnlySet = new TreeSet<String>(); Set<String> expectedacOnlySet = new TreeSet<String>(); for(String elem : expectedxrefSet) if(!elem.startsWith("journal:")) // For some unknown reasons some journals appear in the xref field of kant (eg:NX_P43686), this is a bug expectedacOnlySet.add(elem.substring(elem.indexOf(", ")+2)); for(String elem : xrefSet) acOnlySet.add(elem.substring(elem.indexOf(", ")+2)); for(String elem : expectedacOnlySet) if(!acOnlySet.contains(elem) && !elem.startsWith("PAp")) System.err.println("MISS: " + elem); // It looks that for entries that we have re-mapped the original ENSG/T/P from UniProt are not available in the API (eg: ENSG00000279911 -> ENSG00000172459 in NX_Q8NGP9) // see also : NX_Q9HBT8 ENSP00000408168 ENSP00000458062 ENST00000412988 ENST00000413242, NX_Q8NH49/ENSP00000321506, NX_Q8NGR6/ENST00000304833 ... //for(String elem : xrefSet) if(!expectedxrefSet.contains(elem)) //{System.err.println("NEW: " + elem); newcnt += 1;} //else {System.err.println("COMMON: " + elem); comcnt += 1;} //for(String elem : expectedxrefSet) if(!xrefSet.contains(elem)) {System.err.println("MISSING: " + elem); misscnt += 1;} //System.err.println("COMMON: " + comcnt + " MISSING: " + misscnt + " NEW: " + newcnt); if (xrefSet.size() < expectedxrefSet.size()) { // Several issues there: // 1) missing pubmeds and DOIs -> the ones comming from additional refs (they will be added to entry publications) // 2) Refseq nucleotides (XM_, NM_) labeled as 'nucleotide sequence ID' are not in the api results // 3) Domain names are not xrefs eg: entry name:GED, entry name:B33481 = PIR expectedxrefSet.removeAll(xrefSet); String msg = "Xrefs in current solr contains more data: " + expectedxrefSet; System.err.println(msg); //Assert.fail(msg); } else if (xrefSet.size() > expectedxrefSet.size()) { //System.err.println("removing " + expectedxrefSet.size() + " expected xrefs"); //xrefSet.removeAll(expectedxrefSet); String msg = "Xrefs from API contains more data: " + xrefSet; //System.err.println(msg); //Assert.fail(msg); } else Assert.assertTrue(true); List<String> expectedInteractions = (List) getValueForFieldInCurrentSolrImplementation(entryName, Fields.INTERACTIONS); if(expectedInteractions != null) { //Assert.assertEquals(xfb.getFieldValue(Fields.INTERACTIONS, List.class).size(), expectedInteractions.size()); Integer olditcnt = 0, newitcnt = 0; InteractionFieldBuilder ifb = new InteractionFieldBuilder(); ifb.setEntryBuilderService(entryBuilderService); ifb.initializeBuilder(entry); Set<String> itSet = new TreeSet<String>(ifb.getFieldValue(Fields.INTERACTIONS, List.class)); for(String intactIt : expectedInteractions) if(intactIt.startsWith("<p>Interacts")) olditcnt++; for(String newintactIt : itSet) if(newintactIt.startsWith("AC:") || newintactIt.equals("selfInteraction")) newitcnt++; // There may be one more interaction in the new index (the subunit annotation) Assert.assertEquals(olditcnt, newitcnt); } } }