package org.genedb.db.loading;
import static org.junit.Assert.*;
import org.genedb.db.loading.FeatureTable.Feature;
import org.apache.log4j.Logger;
import org.junit.Before;
import org.junit.Test;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
public class EmblFileTest {
private static final Logger logger = Logger.getLogger(EmblFileTest.class);
private EmblFile emblFile;
@Before
public void loadEmblFile() throws IOException, ParsingException {
logger.debug("Current working directory is " + new File(".").getCanonicalPath());
File file = new File("test/data/Smp_scaff000604.embl");
emblFile = new EmblFile(file, new FileReader(file));
}
@Test
public void metqdata() {
assertEquals("Smp_scaff000604", emblFile.getAccession());
assertEquals(1, emblFile.getSequenceVersion());
assertEquals("linear", emblFile.getTopology());
assertEquals("genomic DNA", emblFile.getMoleculeType());
assertEquals("ANN", emblFile.getDataClass());
assertEquals("PRO", emblFile.getTaxonomicDivision());
assertEquals(206621, emblFile.getSequenceLength());
assertEquals(206621, emblFile.getSequence().length());
}
@Test
public void twoPseudogenes() throws ParsingException {
FeatureTable featureTable = emblFile.getFeatureTable();
Iterable<FeatureTable.Feature> features = featureTable.getFeatures();
int foundFeatures = 0;
for (Feature feature: features) {
if (!feature.type.equals("CDS")) {
continue;
}
String temporarySystematicId = feature.getQualifierValue("temporary_systematic_id");
if ("Smp_097250".equals(temporarySystematicId)) {
foundFeatures++;
assertTrue(feature.hasQualifier("pseudo"));
}
else if ("Smp_175570".equals(temporarySystematicId)) {
foundFeatures++;
assertEquals("CDS at complement(join(31435..31550,31594..31709,31743..31811," +
"31849..31930,31973..32027)): " +
"/method=\"new or changed gene model\"; " +
"/note=\"predicted by \"\"EVM2\"\"\"; " +
"/product=\"conserved hypothetical protein\"; " +
"/psu_db_xref=\"SMA1:Sm00.scaff00658.0040;\"; " +
"/psu_db_xref=\"GeneDB_Smansoni:Sm00874; Blastn score=852 evalue=0.0, Blastx score=299 evalue=1e-82\"; " +
"/similarity=\"blastp; GB:AAH73413.1; ; ; ; ; id=55.3%; ; E()=3.8e-39; ; ; ;\"; " +
"/similarity=\"blastp; RF:NP_001004925.1; ; ; ; ; id=56.1%; ; E()=4.8e-39; ; ; ;\"; " +
"/similarity=\"blastp; RF:NP_001025088.1; ; ; ; ; id=56.0%; ; E()=6.2e-39; ; ; ;\"; " +
"/similarity=\"blastp; SP:Q91VH6; ; ; ; ; id=56.0%; ; E()=6.2e-39; ; ; ;\"; " +
"/similarity=\"blastp; SP:Q9Y316; ; ; ; ; id=56.0%; ; E()=6.2e-39; ; ; ;\"; " +
"/synonym=\"29646.t000004\"; " +
"/synonym=\"29646.m000188\"; " +
"/temporary_systematic_id=\"Smp_175570\"", feature.toString());
}
}
assertEquals(2, foundFeatures);
}
@Test
public void ignoreQualifierGlobally() throws DataError {
FeatureTable featureTable = emblFile.getFeatureTable();
featureTable.ignoreQualifier("synonym");
Iterable<FeatureTable.Feature> features = featureTable.getFeatures();
int numberOfFeatures = 0;
for (FeatureTable.Feature feature: features) {
++numberOfFeatures;
assertNull(String.format("%s feature at line %d reports a /synonym from getQualifierValue",
feature.type, feature.lineNumber),
feature.getQualifierValue("synonym"));
assertTrue(String.format("%s feature at line %d reports a /synonym from getQualifierValues",
feature.type, feature.lineNumber),
feature.getQualifierValues("synonym").isEmpty());
if (feature.type.equals("CDS")) {
assertTrue(String.format("CDS feature at line %d has no /synonym", feature.lineNumber),
feature.getUnusedQualifierNames().contains("synonym"));
}
}
assertEquals(28, numberOfFeatures);
}
// ignore qualifier by type: check ignored for type && !ignored for !type
@Test
public void ignoreQualifierByFeature() {
FeatureTable featureTable = emblFile.getFeatureTable();
featureTable.ignoreQualifier("note", "CDS");
Iterable<FeatureTable.Feature> features = featureTable.getFeatures();
int numberOfFeatures = 0;
int totalNumberOfNotes = 0;
for (FeatureTable.Feature feature: features) {
++numberOfFeatures;
int numberOfNotes = feature.getQualifierValues("note").size();
totalNumberOfNotes += numberOfNotes;
if (feature.type.equals("CDS")) {
assertEquals(0, numberOfNotes);
}
}
assertEquals(15, totalNumberOfNotes);
}
}