/***************************************************************************
* Copyright 2010 Global Biodiversity Information Facility Secretariat
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
***************************************************************************/
package org.gbif.ipt.task;
import org.gbif.api.model.common.DOI;
import org.gbif.dwc.terms.DwcTerm;
import org.gbif.dwca.io.Archive;
import org.gbif.dwca.io.ArchiveFactory;
import org.gbif.utils.file.csv.CSVReader;
import org.gbif.ipt.action.BaseAction;
import org.gbif.ipt.config.AppConfig;
import org.gbif.ipt.config.Constants;
import org.gbif.ipt.config.DataDir;
import org.gbif.ipt.config.IPTModule;
import org.gbif.ipt.config.JdbcSupport;
import org.gbif.ipt.mock.MockAppConfig;
import org.gbif.ipt.mock.MockDataDir;
import org.gbif.ipt.mock.MockRegistryManager;
import org.gbif.ipt.model.Extension;
import org.gbif.ipt.model.FileSource;
import org.gbif.ipt.model.Resource;
import org.gbif.ipt.model.User;
import org.gbif.ipt.model.converter.ConceptTermConverter;
import org.gbif.ipt.model.converter.ExtensionRowTypeConverter;
import org.gbif.ipt.model.converter.JdbcInfoConverter;
import org.gbif.ipt.model.converter.OrganisationKeyConverter;
import org.gbif.ipt.model.converter.PasswordConverter;
import org.gbif.ipt.model.converter.UserEmailConverter;
import org.gbif.ipt.model.factory.ExtensionFactory;
import org.gbif.ipt.model.factory.ThesaurusHandlingRule;
import org.gbif.ipt.model.voc.IdentifierStatus;
import org.gbif.ipt.service.AlreadyExistingException;
import org.gbif.ipt.service.ImportException;
import org.gbif.ipt.service.InvalidFilenameException;
import org.gbif.ipt.service.admin.ExtensionManager;
import org.gbif.ipt.service.admin.RegistrationManager;
import org.gbif.ipt.service.admin.UserAccountManager;
import org.gbif.ipt.service.admin.VocabulariesManager;
import org.gbif.ipt.service.admin.impl.VocabulariesManagerImpl;
import org.gbif.ipt.service.manage.SourceManager;
import org.gbif.ipt.service.manage.impl.ResourceManagerImpl;
import org.gbif.ipt.service.manage.impl.SourceManagerImpl;
import org.gbif.ipt.service.registry.RegistryManager;
import org.gbif.ipt.struts2.SimpleTextProvider;
import org.gbif.utils.file.CompressionUtil;
import org.gbif.utils.file.FileUtils;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.math.BigDecimal;
import java.util.Iterator;
import java.util.Locale;
import java.util.Map;
import javax.validation.constraints.NotNull;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParserFactory;
import com.google.common.collect.Maps;
import com.google.inject.Guice;
import com.google.inject.Injector;
import com.google.inject.servlet.ServletModule;
import com.google.inject.struts2.Struts2GuicePluginModule;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.log4j.Logger;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
import org.xml.sax.SAXException;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
import static org.mockito.Matchers.any;
import static org.mockito.Matchers.anyString;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;
public class GenerateDwcaTest {
private static final Logger LOG = Logger.getLogger(GenerateDwcaTest.class);
private static final String RESOURCE_SHORTNAME = "res1";
private static final String VERSIONED_ARCHIVE_FILENAME = "dwca-3.0.zip";
private GenerateDwca generateDwca;
private Resource resource;
private User creator;
private ReportHandler mockHandler;
private DataDir mockDataDir = MockDataDir.buildMock();
private AppConfig mockAppConfig = MockAppConfig.buildMock();
private SourceManager mockSourceManager;
private static VocabulariesManager mockVocabulariesManager = mock(VocabulariesManager.class);
private File tmpDataDir;
private File resourceDir;
@BeforeClass
public static void init() {
// populate HashMap from basisOfRecord vocabulary, with lowercase keys (used in basisOfRecord validation)
Map<String, String> basisOfRecords = Maps.newHashMap();
basisOfRecords.put("preservedspecimen", "Preserved Specimen");
basisOfRecords.put("fossilspecimen", "Fossil Specimen");
basisOfRecords.put("livingspecimen", "Living Specimen");
basisOfRecords.put("humanobservation", "Human Observation");
basisOfRecords.put("machineobservation", "Machine Observation");
basisOfRecords.put("materialsample", "Material Sample");
basisOfRecords.put("occurrence", "Occurrence");
when(
mockVocabulariesManager.getI18nVocab(Constants.VOCAB_URI_BASIS_OF_RECORDS, Locale.ENGLISH.getLanguage(), false))
.thenReturn(basisOfRecords);
}
@Before
public void setup() throws IOException {
// create resource, version 3.0
resource = new Resource();
resource.setShortname(RESOURCE_SHORTNAME);
resource.setEmlVersion(new BigDecimal("3.0"));
// create user
creator = new User();
creator.setFirstname("Leonardo");
creator.setLastname("Pisano");
creator.setEmail("fi@liberabaci.com");
creator.setLastLoginToNow();
creator.setRole(User.Role.Manager);
creator.setPassword("011235813");
mockHandler = mock(ResourceManagerImpl.class);
resourceDir = FileUtils.createTempDir();
File publicationLogFile = new File(resourceDir, DataDir.PUBLICATION_LOG_FILENAME);
// publication log file
when(mockDataDir.resourcePublicationLogFile(RESOURCE_SHORTNAME)).thenReturn(publicationLogFile);
// tmp directory
tmpDataDir = FileUtils.createTempDir();
when(mockDataDir.tmpDir()).thenReturn(tmpDataDir);
// archival mode on
when(mockAppConfig.isArchivalMode()).thenReturn(true);
}
/**
* A resource with no core is expected to throw a GeneratorException.
*/
@Test(expected = GeneratorException.class)
public void testResourceWithNoCore() throws Exception {
generateDwca = new GenerateDwca(resource, mockHandler, mockDataDir, mock(SourceManager.class), mockAppConfig,
mock(VocabulariesManager.class));
generateDwca.call();
}
@Test
public void testGenerateCoreFromSingleSourceFile() throws Exception {
// retrieve sample zipped resource XML configuration file
File resourceXML = FileUtils.getClasspathFile("resources/res1/resource.xml");
// create resource from single source file (with empty line as last line)
File occurrence = FileUtils.getClasspathFile("resources/res1/occurrence.txt");
Resource resource = getResource(resourceXML, occurrence);
generateDwca = new GenerateDwca(resource, mockHandler, mockDataDir, mockSourceManager, mockAppConfig,
mockVocabulariesManager);
Map<String, Integer> recordsByExtension = generateDwca.call();
// count for occurrence core only
assertEquals(1, recordsByExtension.size());
// 2 rows in core file
String coreRowType = resource.getCoreRowType();
assertEquals(Constants.DWC_ROWTYPE_OCCURRENCE, coreRowType);
int recordCount = recordsByExtension.get(resource.getCoreRowType());
assertEquals(2, recordCount);
// confirm existence of versioned (archived) DwC-A "dwca-3.0.zip"
File versionedDwca = new File(resourceDir, VERSIONED_ARCHIVE_FILENAME);
assertTrue(versionedDwca.exists());
// investigate the DwC-A
File dir = FileUtils.createTempDir();
CompressionUtil.decompressFile(dir, versionedDwca, true);
Archive archive = ArchiveFactory.openArchive(dir);
assertEquals(DwcTerm.Occurrence, archive.getCore().getRowType());
assertEquals(0, archive.getCore().getId().getIndex().intValue());
assertEquals(4, archive.getCore().getFieldsSorted().size());
// confirm order of fields appear honors order of Occurrence Core Extension
assertEquals("basisOfRecord", archive.getCore().getFieldsSorted().get(0).getTerm().simpleName());
assertEquals("occurrenceID", archive.getCore().getFieldsSorted().get(1).getTerm().simpleName());
assertEquals("scientificName", archive.getCore().getFieldsSorted().get(2).getTerm().simpleName());
assertEquals("kingdom", archive.getCore().getFieldsSorted().get(3).getTerm().simpleName());
// confirm data written to file
CSVReader reader = archive.getCore().getCSVReader();
// 1st record
String[] row = reader.next();
assertEquals("1", row[0]);
assertEquals("occurrence", row[1]);
assertEquals("1", row[2]);
assertEquals("puma concolor", row[3]);
assertEquals("occurrence", row[4]);
// 2nd record
row = reader.next();
assertEquals("2", row[0]);
assertEquals("occurrence", row[1]);
assertEquals("2", row[2]);
assertEquals("pumm:concolor", row[3]);
assertEquals("occurrence", row[4]);
reader.close();
// since basisOfRecord was occurrence, and this is ambiguous, there should be a warning message!
boolean foundWarningAboutAmbiguousBOR = false;
// since there was an empty line at bottom of file, there should be a warning message!
boolean foundWarningAboutEmptyLine = false;
for (Iterator<TaskMessage> iter = generateDwca.report().getMessages().iterator(); iter.hasNext();) {
TaskMessage msg = iter.next();
if (msg.getMessage().startsWith("2 line(s) use ambiguous basisOfRecord")) {
foundWarningAboutAmbiguousBOR = true;
} else if (msg.getMessage().startsWith("1 empty line(s) skipped")) {
foundWarningAboutEmptyLine = true;
}
}
assertTrue(foundWarningAboutAmbiguousBOR);
assertTrue(foundWarningAboutEmptyLine);
}
/**
* Confirm resource DOI used for datasetID, when setting "doi used for DatasetID" has been turned on in the extension
* mapping.
*/
@Test
public void testGenerateCoreFromSingleSourceFileDOIForDatasetID() throws Exception {
// retrieve sample zipped resource XML configuration file, where setting "doi used for datasetID" has been turned on
File resourceXML = FileUtils.getClasspathFile("resources/res1/resource_doi_dataset_id.xml");
// create resource from single source file
File occurrence = FileUtils.getClasspathFile("resources/res1/occurrence_doi_dataset_id.txt");
// set DOI
Resource resource = getResource(resourceXML, occurrence);
resource.setDoi(new DOI("10.5072/gc8gqc"));
resource.setIdentifierStatus(IdentifierStatus.PUBLIC_PENDING_PUBLICATION);
// assert DOI set properly
assertNotNull(resource.getDoi());
assertEquals("10.5072/gc8gqc", resource.getDoi().getDoiName());
assertEquals(IdentifierStatus.PUBLIC_PENDING_PUBLICATION, resource.getIdentifierStatus());
generateDwca = new GenerateDwca(resource, mockHandler, mockDataDir, mockSourceManager, mockAppConfig,
mockVocabulariesManager);
Map<String, Integer> recordsByExtension = generateDwca.call();
// count for occurrence core only
assertEquals(1, recordsByExtension.size());
// 2 rows in core file
String coreRowType = resource.getCoreRowType();
assertEquals(Constants.DWC_ROWTYPE_OCCURRENCE, coreRowType);
int recordCount = recordsByExtension.get(resource.getCoreRowType());
assertEquals(2, recordCount);
// confirm existence of versioned (archived) DwC-A "dwca-3.0.zip"
File versionedDwca = new File(resourceDir, VERSIONED_ARCHIVE_FILENAME);
assertTrue(versionedDwca.exists());
// investigate the DwC-A
File dir = FileUtils.createTempDir();
CompressionUtil.decompressFile(dir, versionedDwca, true);
Archive archive = ArchiveFactory.openArchive(dir);
assertEquals(DwcTerm.Occurrence, archive.getCore().getRowType());
assertEquals(0, archive.getCore().getId().getIndex().intValue());
assertEquals(5, archive.getCore().getFieldsSorted().size());
// confirm order of fields appear honors order of Occurrence Core Extension
assertEquals("datasetID", archive.getCore().getFieldsSorted().get(0).getTerm().simpleName());
assertEquals("basisOfRecord", archive.getCore().getFieldsSorted().get(1).getTerm().simpleName());
assertEquals("occurrenceID", archive.getCore().getFieldsSorted().get(2).getTerm().simpleName());
assertEquals("scientificName", archive.getCore().getFieldsSorted().get(3).getTerm().simpleName());
assertEquals("kingdom", archive.getCore().getFieldsSorted().get(4).getTerm().simpleName());
// confirm data written to file
CSVReader reader = archive.getCore().getCSVReader();
// 1st record
String[] row = reader.next();
assertEquals("1", row[0]);
assertEquals("doi:10.5072/gc8gqc", row[1]); // confirm resource DOI used for datasetID
assertEquals("occurrence", row[2]);
assertEquals("1", row[3]);
assertEquals("puma concolor", row[4]);
assertEquals("occurrence", row[5]);
// 2nd record
row = reader.next();
assertEquals("2", row[0]);
assertEquals("doi:10.5072/gc8gqc", row[1]); // confirm resource DOI used for datasetID
assertEquals("occurrence", row[2]);
assertEquals("2", row[3]);
assertEquals("pumm:concolor", row[4]);
assertEquals("occurrence", row[5]);
reader.close();
}
@Test
public void testGenerateCoreFromSingleSourceFileNoIdMapped() throws Exception {
// retrieve sample zipped resource XML configuration file, with no id mapped
File resourceXML = FileUtils.getClasspathFile("resources/res1/resource_no_id_mapped.xml");
// create resource from single source file with an id column with non unique values (mapped to individual ID).
// since the non unique ids aren't mapped to the core record identifier (occurrenceID) validation isn't triggered.
File occurrence = FileUtils.getClasspathFile("resources/res1/occurrence_non_unique_ids.txt");
Resource resource = getResource(resourceXML, occurrence);
generateDwca = new GenerateDwca(resource, mockHandler, mockDataDir, mockSourceManager, mockAppConfig,
mockVocabulariesManager);
Map<String, Integer> recordsByExtension = generateDwca.call();
// count for occurrence core only
assertEquals(1, recordsByExtension.size());
// 4 rows in core file
String coreRowType = resource.getCoreRowType();
assertEquals(Constants.DWC_ROWTYPE_OCCURRENCE, coreRowType);
int recordCount = recordsByExtension.get(resource.getCoreRowType());
assertEquals(4, recordCount);
// confirm existence of versioned DwC-A "dwca-3.0.zip"
File versionedDwca = new File(resourceDir, VERSIONED_ARCHIVE_FILENAME);
assertTrue(versionedDwca.exists());
// investigate the DwC-A
File dir = FileUtils.createTempDir();
CompressionUtil.decompressFile(dir, versionedDwca, true);
Archive archive = ArchiveFactory.openArchive(dir);
assertEquals(DwcTerm.Occurrence, archive.getCore().getRowType());
assertEquals(0, archive.getCore().getId().getIndex().intValue());
assertEquals(4, archive.getCore().getFieldsSorted().size());
// confirm order of fields appear honors order of Occurrence Core Extension
assertEquals("basisOfRecord", archive.getCore().getFieldsSorted().get(0).getTerm().simpleName());
assertEquals("organismID", archive.getCore().getFieldsSorted().get(1).getTerm().simpleName());
assertEquals("scientificName", archive.getCore().getFieldsSorted().get(2).getTerm().simpleName());
assertEquals("kingdom", archive.getCore().getFieldsSorted().get(3).getTerm().simpleName());
// confirm data written to file
CSVReader reader = archive.getCore().getCSVReader();
// 1st record
String[] row = reader.next();
// no id was mapped, so the first column (ID column, index 0) is empty
assertEquals("", row[0]);
assertEquals("HumanObservation", row[1]);
assertEquals("1", row[2]);
assertEquals("puma concolor", row[3]);
assertEquals("Animalia", row[4]);
// 2nd record
row = reader.next();
assertEquals("", row[0]);
assertEquals("HumanObservation", row[1]);
assertEquals("2", row[2]);
assertEquals("Panthera onca", row[3]);
assertEquals("Animalia", row[4]);
reader.close();
}
/**
* A generated DwC-a with occurrenceID mapped, but missing one or more occurrenceID values, is expected to
* throw a GeneratorException.
*/
@Test(expected = GeneratorException.class)
public void testValidateCoreFromSingleSourceFileMissingIds() throws Exception {
// retrieve sample zipped resource XML configuration file
File resourceXML = FileUtils.getClasspathFile("resources/res1/resource.xml");
// create resource, with single source file that is missing occurrenceIDs
File occurrence = FileUtils.getClasspathFile("resources/res1/occurrence_missing_ids.txt");
Resource resource = getResource(resourceXML, occurrence);
generateDwca = new GenerateDwca(resource, mockHandler, mockDataDir, mockSourceManager, mockAppConfig,
mock(VocabulariesManager.class));
generateDwca.call();
}
/**
* A generated DwC-a with occurrenceID mapped, but having non unique occurrenceID values, is expected to
* throw a GeneratorException.
*/
@Test(expected = GeneratorException.class)
public void testValidateCoreFromSingleSourceFileNonUniqueIds() throws Exception {
// retrieve sample zipped resource XML configuration file
File resourceXML = FileUtils.getClasspathFile("resources/res1/resource.xml");
// create resource, with single source file that has non-unique occurrenceIDs, regardless of case
File occurrence = FileUtils.getClasspathFile("resources/res1/occurrence_non_unique_ids.txt");
Resource resource = getResource(resourceXML, occurrence);
generateDwca = new GenerateDwca(resource, mockHandler, mockDataDir, mockSourceManager, mockAppConfig,
mock(VocabulariesManager.class));
generateDwca.call();
}
/**
* A generated DwC-a with occurrenceID mapped, but with occurrenceID values that are non unique when compared with
* case insensitivity, is expected to throw a GeneratorException. E.g. FISHES:1 and fishes:1 are considered equal.
*/
@Test(expected = GeneratorException.class)
public void testValidateCoreFromSingleSourceFileNonUniqueIdsCase() throws Exception {
// retrieve sample zipped resource XML configuration file
File resourceXML = FileUtils.getClasspathFile("resources/res1/resource.xml");
// create resource, with single source file that has unique occurrenceIDs due when compared with case sensitivity
// and non-unique occurrenceIDs when compared with case sensitivity
File occurrence = FileUtils.getClasspathFile("resources/res1/occurrence_non_unique_ids_case.txt");
Resource resource = getResource(resourceXML, occurrence);
generateDwca = new GenerateDwca(resource, mockHandler, mockDataDir, mockSourceManager, mockAppConfig,
mock(VocabulariesManager.class));
generateDwca.call();
}
/**
* Generates a test Resource.
* </br>
* The test resource is built from the test occurrence resource /res1 (res1/resource.xml, res1/eml.xml) mocking all
* necessary methods executed by GenerateDwca.call().
* </br>
* For flexibility, the source file used to generate the core data file can be changed. The columns of this
* source file must match the resourceXML configuration file passed in. By changing the source file and resource
* configuration file, multiple scenarios can be created for testing.
*
* @param resourceXML resource (XML) configuration file defining column mapping of sourceFile
* @param sourceFile source file
*
* @return test Resource
*/
private Resource getResource(@NotNull File resourceXML, @NotNull File sourceFile)
throws IOException, SAXException, ParserConfigurationException, AlreadyExistingException, ImportException,
InvalidFilenameException {
UserAccountManager mockUserAccountManager = mock(UserAccountManager.class);
UserEmailConverter mockEmailConverter = new UserEmailConverter(mockUserAccountManager);
RegistrationManager mockRegistrationManager = mock(RegistrationManager.class);
OrganisationKeyConverter mockOrganisationKeyConverter = new OrganisationKeyConverter(mockRegistrationManager);
RegistryManager mockRegistryManager = MockRegistryManager.buildMock();
GenerateDwcaFactory mockDwcaFactory = mock(GenerateDwcaFactory.class);
Eml2Rtf mockEml2Rtf = mock(Eml2Rtf.class);
VocabulariesManager mockVocabulariesManager = mock(VocabulariesManager.class);
SimpleTextProvider mockSimpleTextProvider = mock(SimpleTextProvider.class);
BaseAction baseAction = new BaseAction(mockSimpleTextProvider, mockAppConfig, mockRegistrationManager);
// construct ExtensionFactory using injected parameters
Injector injector = Guice.createInjector(new ServletModule(), new Struts2GuicePluginModule(), new IPTModule());
DefaultHttpClient httpClient = injector.getInstance(DefaultHttpClient.class);
ThesaurusHandlingRule thesaurusRule = new ThesaurusHandlingRule(mock(VocabulariesManagerImpl.class));
SAXParserFactory saxf = injector.getInstance(SAXParserFactory.class);
ExtensionFactory extensionFactory = new ExtensionFactory(thesaurusRule, saxf, httpClient);
JdbcSupport support = injector.getInstance(JdbcSupport.class);
PasswordConverter passwordConverter = injector.getInstance(PasswordConverter.class);
JdbcInfoConverter jdbcConverter = new JdbcInfoConverter(support);
// construct occurrence core Extension
InputStream occurrenceCoreIs = GenerateDwcaTest.class.getResourceAsStream(
"/extensions/dwc_occurrence_2015-04-24.xml");
Extension occurrenceCore = extensionFactory.build(occurrenceCoreIs);
ExtensionManager extensionManager = mock(ExtensionManager.class);
// construct event core Extension
InputStream eventCoreIs = GenerateDwcaTest.class.getResourceAsStream("/extensions/dwc_event_2015-04-24.xml");
Extension eventCore = extensionFactory.build(eventCoreIs);
// mock ExtensionManager returning occurrence core Extension
when(extensionManager.get("http://rs.tdwg.org/dwc/terms/Occurrence")).thenReturn(occurrenceCore);
when(extensionManager.get("http://rs.tdwg.org/dwc/terms/Event")).thenReturn(eventCore);
when(extensionManager.get("http://rs.tdwg.org/dwc/xsd/simpledarwincore/SimpleDarwinRecord"))
.thenReturn(occurrenceCore);
ExtensionRowTypeConverter extensionRowTypeConverter = new ExtensionRowTypeConverter(extensionManager);
ConceptTermConverter conceptTermConverter = new ConceptTermConverter(extensionRowTypeConverter);
// mock finding resource.xml file
when(mockDataDir.resourceFile(anyString(), anyString())).thenReturn(resourceXML);
// retrieve sample zipped resource folder
File zippedResourceFolder = FileUtils.getClasspathFile("resources/res1.zip");
// retrieve sample eml.xml file
File emlXML = FileUtils.getClasspathFile("resources/res1/eml.xml");
// mock finding eml.xml file
when(mockDataDir.resourceEmlFile(anyString())).thenReturn(emlXML);
// mock finding dwca.zip file that does not exist
when(mockDataDir.resourceDwcaFile(anyString())).thenReturn(new File("dwca.zip"));
// create SourceManagerImpl
mockSourceManager = new SourceManagerImpl(mock(AppConfig.class), mockDataDir);
// create ResourceManagerImpl
ResourceManagerImpl resourceManager =
new ResourceManagerImpl(mockAppConfig, mockDataDir, mockEmailConverter, mockOrganisationKeyConverter,
extensionRowTypeConverter, jdbcConverter, mockSourceManager, extensionManager, mockRegistryManager,
conceptTermConverter, mockDwcaFactory, passwordConverter, mockEml2Rtf, mockVocabulariesManager,
mockSimpleTextProvider, mockRegistrationManager);
// create a new resource.
resource = resourceManager.create(RESOURCE_SHORTNAME, null, zippedResourceFolder, creator, baseAction);
// copy source file to tmp folder
File copied = new File(resourceDir, "occurrence.txt");
// mock file to which source file gets copied to
when(mockDataDir.sourceFile(any(Resource.class), any(FileSource.class))).thenReturn(copied);
// mock log file
when(mockDataDir.sourceLogFile(anyString(), anyString())).thenReturn(new File(resourceDir, "log.txt"));
// add SourceBase.TextFileSource fileSource to test Resource
FileSource fileSource = mockSourceManager.add(resource, sourceFile, "occurrence.txt");
resource.getMappings().get(0).setSource(fileSource);
// mock creation of zipped dwca in temp directory - this later becomes the actual archive generated
when(mockDataDir.tmpFile(anyString(), anyString())).thenReturn(new File(tmpDataDir, "dwca.zip"));
// mock creation of versioned zipped dwca in resource directory
when(mockDataDir.resourceDwcaFile(anyString(), any(BigDecimal.class)))
.thenReturn(new File(resourceDir, VERSIONED_ARCHIVE_FILENAME));
return resource;
}
@Test
public void testCreateFileName() throws Exception {
generateDwca = new GenerateDwca(resource, mockHandler, mockDataDir, mockSourceManager, mockAppConfig,
mock(VocabulariesManager.class));
// DwC-A directory
File dir = FileUtils.createTempDir();
// first file
File materialsample = new File(dir, "materialsample.txt");
materialsample.createNewFile();
String fileName = generateDwca.createFileName(dir, "materialsample");
assertEquals("materialsample2.txt", fileName);
// second file
File materialsample2 = new File(dir, "materialsample2.txt");
materialsample2.createNewFile();
fileName = generateDwca.createFileName(dir, "materialsample");
assertEquals("materialsample3.txt", fileName);
// third file
File materialsample3 = new File(dir, "materialsample3.txt");
materialsample3.createNewFile();
fileName = generateDwca.createFileName(dir, "materialsample");
assertEquals("materialsample4.txt", fileName);
}
/**
* Confirm occurrence core with rows missing basisOfRecord throws GeneratorException.
*/
@Test(expected = GeneratorException.class)
public void testGenerateCoreFromSingleSourceFileMissingBasisOfRecord() throws Exception {
// retrieve sample zipped resource XML configuration file corresponding to occurrence_missing_bor.txt
File resourceXML = FileUtils.getClasspathFile("resources/res1/resource_doi_dataset_id.xml");
// create resource from single source file
File occurrence = FileUtils.getClasspathFile("resources/res1/occurrence_missing_bor.txt");
Resource resource = getResource(resourceXML, occurrence);
generateDwca = new GenerateDwca(resource, mockHandler, mockDataDir, mockSourceManager, mockAppConfig,
mockVocabulariesManager);
generateDwca.call();
}
/**
* Confirm occurrence core with rows with basisOfRecord not matching Darwin Core Type Vocabulary throws
* GeneratorException.
*/
@Test(expected = GeneratorException.class)
public void testGenerateCoreFromSingleSourceFileNonMatchingBasisOfRecord() throws Exception {
// retrieve sample zipped resource XML configuration file corresponding to occurrence_missing_bor.txt
File resourceXML = FileUtils.getClasspathFile("resources/res1/resource_doi_dataset_id.xml");
// create resource from single source file
File occurrence = FileUtils.getClasspathFile("resources/res1/occurrence_non_matching_bor.txt");
Resource resource = getResource(resourceXML, occurrence);
generateDwca = new GenerateDwca(resource, mockHandler, mockDataDir, mockSourceManager, mockAppConfig,
mockVocabulariesManager);
generateDwca.call();
}
/**
* Confirm occurrence core missing required basisOfRecord mapping throws GeneratorException.
*/
@Test(expected = GeneratorException.class)
public void testGenerateCoreFromSingleSourceFileMissingBasisOfRecordMapping() throws Exception {
// retrieve sample zipped resource XML configuration file corresponding to occurrence_missing_bor.txt
File resourceXML = FileUtils.getClasspathFile("resources/res1/resource_no_bor_mapped.xml");
// create resource from single source file
File occurrence = FileUtils.getClasspathFile("resources/res1/occurrence_no_bor_mapped.txt");
Resource resource = getResource(resourceXML, occurrence);
generateDwca = new GenerateDwca(resource, mockHandler, mockDataDir, mockSourceManager, mockAppConfig,
mockVocabulariesManager);
generateDwca.call();
}
/**
* Test makes sure the multi-value field delimiter gets set on the appropriate term mappings in the meta.xml.
*/
@Test
public void testMultiValueFieldDelimiterSet() throws Exception {
// retrieve sample zipped resource XML configuration file
File resourceXML = FileUtils.getClasspathFile("resources/res1/resource_multivalue.xml");
// create resource from single source file
File occurrence = FileUtils.getClasspathFile("resources/res1/occurrence_multivalue.txt");
Resource resource = getResource(resourceXML, occurrence);
resource.getMappings().get(0).getSource().setMultiValueFieldsDelimitedBy("|");
generateDwca = new GenerateDwca(resource, mockHandler, mockDataDir, mockSourceManager, mockAppConfig,
mockVocabulariesManager);
Map<String, Integer> recordsByExtension = generateDwca.call();
// count for occurrence core only
assertEquals(1, recordsByExtension.size());
// 2 rows in core file
String coreRowType = resource.getCoreRowType();
assertEquals(Constants.DWC_ROWTYPE_OCCURRENCE, coreRowType);
int recordCount = recordsByExtension.get(resource.getCoreRowType());
assertEquals(2, recordCount);
// confirm existence of versioned (archived) DwC-A "dwca-3.0.zip"
File versionedDwca = new File(resourceDir, VERSIONED_ARCHIVE_FILENAME);
assertTrue(versionedDwca.exists());
// investigate the DwC-A
File dir = FileUtils.createTempDir();
CompressionUtil.decompressFile(dir, versionedDwca, true);
Archive archive = ArchiveFactory.openArchive(dir);
assertEquals(DwcTerm.Occurrence, archive.getCore().getRowType());
assertEquals(0, archive.getCore().getId().getIndex().intValue());
assertEquals(4, archive.getCore().getFieldsSorted().size());
assertEquals("|", archive.getCore().getField(DwcTerm.associatedMedia).getDelimitedBy());
assertNull(archive.getCore().getField(DwcTerm.occurrenceID).getDelimitedBy());
// confirm order of fields appear honors order of Occurrence Core Extension
assertEquals("associatedMedia", archive.getCore().getFieldsSorted().get(2).getTerm().simpleName());
// confirm data written to file
CSVReader reader = archive.getCore().getCSVReader();
// 1st record
String[] row = reader.next();
assertEquals("http://dummyimage.com/1|http://dummyimage.com/2", row[3]);
// 2nd record
row = reader.next();
assertEquals("http://dummyimage.com/3|http://dummyimage.com/4", row[3]);
reader.close();
}
/**
* A generated DwC-a with event core, but not having associated occurrences, is expected to show a warning message
*/
@Test
public void testValidateEventCoreFromSingleSourceFileMissingOccurrenceExtension() throws Exception {
// retrieve sample zipped resource XML configuration file
File resourceXML = FileUtils.getClasspathFile("resources/res1/resource_event_1.xml");
// create sampling event resource, with single source file
File event = FileUtils.getClasspathFile("resources/res1/event.txt");
Resource resource = getResource(resourceXML, event);
generateDwca = new GenerateDwca(resource, mockHandler, mockDataDir, mockSourceManager, mockAppConfig,
mock(VocabulariesManager.class));
generateDwca.call();
// check for warning message
boolean foundWarning = false;
for (Iterator<TaskMessage> iter = generateDwca.report().getMessages().iterator(); iter.hasNext();) {
TaskMessage msg = iter.next();
if (msg.getMessage().equals("The sampling event resource has no associated occurrences.")) {
foundWarning = true;
}
}
assertTrue(foundWarning);
}
@Test
public void testTabRow() throws IOException {
generateDwca = new GenerateDwca(resource, mockHandler, mockDataDir, mockSourceManager, mockAppConfig,
mockVocabulariesManager);
String[] elements = new String[] {"1", "humanObservation", "Panthera tigris"};
String tabRow = generateDwca.tabRow(elements);
assertEquals("1\thumanObservation\tPanthera tigris\n", tabRow);
// with line breaking characters replaced with empty space
elements = new String[] {"OBS\t1", "human\rObservation", "Panthera ti\ngris"};
tabRow = generateDwca.tabRow(elements);
assertEquals("OBS 1\thuman Observation\tPanthera ti gris\n", tabRow);
// check column with null value is still represented
elements = new String[] {"1", null, "humanObservation"};
tabRow = generateDwca.tabRow(elements);
assertEquals("1\t\thumanObservation\n", tabRow);
// with null values
elements = new String[] {null, null, null};
tabRow = generateDwca.tabRow(elements);
assertNull(tabRow);
}
}