package org.gbif.dwca.io; import org.gbif.dwc.terms.DcTerm; import org.gbif.dwc.terms.DwcTerm; import org.gbif.dwc.terms.GbifTerm; import org.gbif.dwca.tools.MetaValidator; import org.gbif.utils.file.FileUtils; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.net.URI; import java.nio.file.Files; import java.util.List; import java.util.Set; import javax.xml.parsers.SAXParser; import javax.xml.parsers.SAXParserFactory; import com.google.common.collect.Lists; import com.google.common.collect.Sets; import org.junit.Test; import org.xml.sax.Attributes; import org.xml.sax.InputSource; import org.xml.sax.SAXException; import org.xml.sax.XMLReader; import org.xml.sax.ext.DefaultHandler2; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; /** * Integration tests related to the MetaDescriptor operations. * * @author mdoering * @author cgendreau * */ public class MetaDescriptorTest { private static final String NOMENCLATURAL_CODE_VOCABULARY = "http://rs.gbif.org/vocabulary/gbif/nomenclatural_code.xml"; //for testing only, language vocabulary doesn't exist at rs.gbif.org private static final String LANGUAGE_VOCABULARY = "http://rs.gbif.org/vocabulary/gbif/language.xml"; public class SAXExtractTerms extends DefaultHandler2 { private final List<String> terms; public SAXExtractTerms(List<String> terms) { this.terms = terms; } @Override public void startElement(String uri, String localName, String qName, Attributes atts) throws SAXException { for (String attName : Lists.newArrayList("rowType", "term")) { if (atts.getValue(attName) != null) { terms.add(atts.getValue(attName)); } } } } @Test public void testXml() throws Exception { // read archive Archive arch = ArchiveFactory.openArchive(FileUtils.getClasspathFile("archive-dwc")); // write meta.xml File tmpMeta = File.createTempFile("meta", ".xml"); System.out.println("Writing temporary test meta file to " + tmpMeta.getAbsolutePath()); MetaDescriptorWriter.writeMetaFile(tmpMeta, arch); // validate xml System.out.println("Validate xml"); MetaValidator.validate(new FileInputStream(tmpMeta)); // verify rowType & terms are URIs List<String> terms = Lists.newArrayList(); SAXParserFactory spf = SAXParserFactory.newInstance(); spf.setNamespaceAware(true); SAXParser saxParser = spf.newSAXParser(); XMLReader xmlReader = saxParser.getXMLReader(); xmlReader.setContentHandler(new SAXExtractTerms(terms)); xmlReader.parse(new InputSource(new FileInputStream(tmpMeta))); assertEquals(18, terms.size()); for (String term : terms) { URI uri = URI.create(term); assertNotNull(uri + " is no full URI term", uri.getScheme()); assertNotNull(uri + " is no full URI term", uri.getAuthority()); assertNotNull(uri + " is no full URI term", uri.getPath()); } } @Test public void testRoundtrip() { try { // read archive Archive arch = ArchiveFactory.openArchive(FileUtils.getClasspathFile("archive-dwc")); assertNotNull(arch); assertNotNull(arch.getCore()); assertTrue(arch.getCore().getId().getIndex() == 0); assertTrue(arch.getCore().hasTerm(DwcTerm.scientificName)); assertEquals(2, arch.getExtensions().size()); assertEquals("\t", arch.getCore().getFieldsTerminatedBy()); assertNull(arch.getCore().getField(DwcTerm.scientificName).getDelimitedBy()); assertEquals(";", arch.getCore().getField(DwcTerm.nomenclaturalStatus).getDelimitedBy()); assertEquals(NOMENCLATURAL_CODE_VOCABULARY, arch.getCore().getField(DwcTerm.nomenclaturalCode).getVocabulary()); assertEquals(LANGUAGE_VOCABULARY, arch.getExtension(GbifTerm.VernacularName).getField(DcTerm.language).getVocabulary()); // write meta.xml File tmpDwca = createTmpMeta(arch); Archive arch2 = ArchiveFactory.openArchive(tmpDwca); // core props ArchiveFile core = arch2.getCore(); assertNotNull(core); assertNotNull(core.getId()); assertTrue(core.hasTerm(DwcTerm.scientificName)); assertEquals("DarwinCore.txt", core.getLocation()); assertEquals("\t", core.getFieldsTerminatedBy()); assertNull(core.getField(DwcTerm.scientificName).getDelimitedBy()); assertEquals(";", core.getField(DwcTerm.nomenclaturalStatus).getDelimitedBy()); assertEquals(NOMENCLATURAL_CODE_VOCABULARY, core.getField(DwcTerm.nomenclaturalCode).getVocabulary()); for (ArchiveField f : arch.getCore().getFields().values()) { assertTrue(core.hasTerm(f.getTerm().qualifiedName())); assertEquals(core.getField(f.getTerm().qualifiedName()).getIndex(), f.getIndex()); } // extensions props assertEquals(2, arch2.getExtensions().size()); Set<String> filenames = Sets.newHashSet("VernacularName.txt", "media.txt"); for (ArchiveFile ext : arch2.getExtensions()) { assertTrue(filenames.contains(ext.getLocation())); filenames.remove(ext.getLocation()); } assertTrue(filenames.isEmpty()); } catch (Exception e) { e.printStackTrace(); fail(); } } private File createTmpMeta(Archive arch) throws IOException { File tmpDir = Files.createTempDirectory("dwca-io-test").toFile(); tmpDir.deleteOnExit(); File tmpMeta = new File(tmpDir, Archive.META_FN); System.out.println("Writing temporary test meta file to " + tmpMeta.getAbsolutePath()); MetaDescriptorWriter.writeMetaFile(tmpMeta, arch); return tmpDir; } @Test public void testRoundtripQuotes() { try { // read archive Archive arch = ArchiveFactory.openArchive(FileUtils.getClasspathFile("xml-entity-meta")); assertNotNull(arch); assertNotNull(arch.getCore()); assertNotNull(arch.getCore().getId()); assertTrue(arch.getCore().hasTerm(DwcTerm.scientificName)); assertEquals(1, arch.getExtensions().size()); // write meta.xml File tmpDwca = createTmpMeta(arch); Archive arch2 = ArchiveFactory.openArchive(tmpDwca); // core props ArchiveFile core = arch2.getCore(); assertNotNull(core); assertNotNull(core.getId()); assertTrue(core.hasTerm(DwcTerm.scientificName)); assertEquals("test", core.getLocation()); for (ArchiveField f : arch.getCore().getFields().values()) { assertTrue(core.hasTerm(f.getTerm().qualifiedName())); assertEquals(core.getField(f.getTerm().qualifiedName()).getIndex(), f.getIndex()); } // extensions props assertEquals(1, arch2.getExtensions().size()); ArchiveFile ext = arch2.getExtensions().iterator().next(); assertEquals("test2", ext.getLocation()); assertEquals(2, ext.getFields().size()); } catch (Exception e) { e.printStackTrace(); fail(); } } /** * Test the reading of a static meta.xml file. * * @throws Exception */ @Test public void testMetaDescriptorReading() throws Exception { // we can read only a meta.xml file as an Archive Archive arch = new Archive(); ArchiveFactory.readMetaDescriptor(arch, new FileInputStream(FileUtils.getClasspathFile("meta/meta.xml"))); //validate archive ID field ArchiveField af = arch.getCore().getId(); assertEquals(Integer.valueOf(1), af.getIndex()); //validate default af = arch.getCore().getField(DwcTerm.kingdom); assertEquals("Animalia", af.getDefaultValue()); // validate vocabulary af = arch.getCore().getField(DwcTerm.nomenclaturalCode); assertEquals(NOMENCLATURAL_CODE_VOCABULARY, af.getVocabulary()); } }