package eu.dnetlib.iis.wf.ingest.pmc.plaintext; import java.io.InputStream; import java.io.InputStreamReader; import junit.framework.TestCase; import org.apache.commons.io.IOUtils; import org.jdom.Document; import org.jdom.Element; import org.jdom.Namespace; import org.jdom.input.SAXBuilder; import org.junit.Test; /** * @author Dominika Tkaczyk * */ public class NlmToDocumentTextConverterTest extends TestCase { private static final String testXML = "/eu/dnetlib/iis/wf/ingest/pmc/plaintext/document.nxml"; private static final String testTXT = "/eu/dnetlib/iis/wf/ingest/pmc/plaintext/document.txt"; private static final String testXmlNestedInOAI = "/eu/dnetlib/iis/wf/ingest/pmc/plaintext/document_nested_in_oai.nxml"; private static final String testTxtNestedInOAI = "/eu/dnetlib/iis/wf/ingest/pmc/plaintext/document_nested_in_oai.txt"; @Test public void testConvertFull() throws Exception { SAXBuilder builder = new SAXBuilder(); builder.setValidation(false); builder.setFeature("http://xml.org/sax/features/validation", false); builder.setFeature("http://apache.org/xml/features/nonvalidating/load-dtd-grammar", false); builder.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); InputStreamReader testIS = new InputStreamReader(ClassLoader.class.getResourceAsStream(testXML), "UTF-8"); Document document = builder.build(testIS); Element sourceDocument = document.getRootElement(); String testText = NlmToDocumentTextConverter.getDocumentText(sourceDocument, null); testIS.close(); InputStream expectedIS = ClassLoader.class.getResourceAsStream(testTXT); String expectedText = IOUtils.toString(expectedIS, "UTF-8").replaceAll(System.getProperty("line.separator"), "\n"); expectedIS.close(); assertEquals(expectedText, testText); } @Test public void testConvertFullNestedInOAI() throws Exception { SAXBuilder builder = new SAXBuilder(); builder.setValidation(false); builder.setFeature("http://xml.org/sax/features/validation", false); builder.setFeature("http://apache.org/xml/features/nonvalidating/load-dtd-grammar", false); builder.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); InputStreamReader testIS = new InputStreamReader(ClassLoader.class.getResourceAsStream(testXmlNestedInOAI), "UTF-8"); Document document = builder.build(testIS); Element sourceDocument = document.getRootElement(); String testText = NlmToDocumentTextConverter.getDocumentText(sourceDocument, Namespace.getNamespace("http://www.openarchives.org/OAI/2.0/")); testIS.close(); InputStream expectedIS = ClassLoader.class.getResourceAsStream(testTxtNestedInOAI); String expectedText = IOUtils.toString(expectedIS, "UTF-8").replaceAll(System.getProperty("line.separator"), "\n"); expectedIS.close(); assertEquals(expectedText, testText); } }