package edu.harvard.iq.dataverse.ingest; import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.DataTable; import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.DatasetVersion; import edu.harvard.iq.dataverse.FileMetadata; import edu.harvard.iq.dataverse.mocks.MocksFactory; import org.junit.After; import org.junit.AfterClass; import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; import static org.junit.Assert.assertEquals; import java.sql.Timestamp; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Date; import java.util.List; import static edu.harvard.iq.dataverse.mocks.MocksFactory.makeDataset; import java.util.Set; import javax.validation.ConstraintViolation; /** * Tests against IngestServiceBean helper methods. * * @author bmckinney */ public class IngestServiceBeanHelperTest { @BeforeClass public static void setUpClass() { } @AfterClass public static void tearDownClass() { } @Before public void setUp() { } @After public void tearDown() { } @Test /** * Test adding duplicate file name labels to a dataset version with no subdirectories. */ public void testCheckForDuplicateFileNamesNoDirectories() throws Exception { SimpleDateFormat dateFmt = new SimpleDateFormat("yyyyMMdd"); // create dataset Dataset dataset = makeDataset(); // create dataset version DatasetVersion datasetVersion = dataset.getEditVersion(); datasetVersion.setCreateTime( dateFmt.parse("20001012") ); datasetVersion.setLastUpdateTime( datasetVersion.getLastUpdateTime() ); datasetVersion.setId( MocksFactory.nextId() ); datasetVersion.setReleaseTime( dateFmt.parse("20010101") ); datasetVersion.setVersionState(DatasetVersion.VersionState.RELEASED); datasetVersion.setMinorVersionNumber(0L); datasetVersion.setVersionNumber(1L); datasetVersion.setFileMetadatas(new ArrayList<>()); // create datafiles List<DataFile> dataFileList = new ArrayList<>(); DataFile datafile1 = new DataFile("application/octet-stream"); datafile1.setStorageIdentifier("datafile1.txt"); datafile1.setFilesize(200); datafile1.setModificationTime(new Timestamp(new Date().getTime())); datafile1.setCreateDate(new Timestamp(new Date().getTime())); datafile1.setPermissionModificationTime(new Timestamp(new Date().getTime())); datafile1.setOwner(dataset); datafile1.setIngestDone(); datafile1.setChecksumType(DataFile.ChecksumType.SHA1); datafile1.setChecksumValue("Unknown"); // set metadata and add verson FileMetadata fmd1 = new FileMetadata(); fmd1.setId(1L); fmd1.setLabel("datafile1.txt"); fmd1.setDataFile(datafile1); datafile1.getFileMetadatas().add(fmd1); datasetVersion.getFileMetadatas().add(fmd1); fmd1.setDatasetVersion(datasetVersion); dataFileList.add(datafile1); DataFile datafile2 = new DataFile("application/octet-stream"); datafile2.setStorageIdentifier("datafile2.txt"); datafile2.setFilesize(200); datafile2.setModificationTime(new Timestamp(new Date().getTime())); datafile2.setCreateDate(new Timestamp(new Date().getTime())); datafile2.setPermissionModificationTime(new Timestamp(new Date().getTime())); datafile2.setOwner(dataset); datafile2.setIngestDone(); datafile2.setChecksumType(DataFile.ChecksumType.SHA1); datafile2.setChecksumValue("Unknown"); // set metadata and add version FileMetadata fmd2 = new FileMetadata(); fmd2.setId(2L); fmd2.setLabel("datafile2.txt"); fmd2.setDataFile(datafile2); datafile2.getFileMetadatas().add(fmd2); datasetVersion.getFileMetadatas().add(fmd2); fmd2.setDatasetVersion(datasetVersion); dataFileList.add(datafile2); IngestUtil.checkForDuplicateFileNamesFinal(datasetVersion, dataFileList); boolean file1NameAltered = false; boolean file2NameAltered = false; for (DataFile df : dataFileList) { if (df.getFileMetadata().getLabel().equals("datafile1-1.txt")) { file1NameAltered = true; } if (df.getFileMetadata().getLabel().equals("datafile2-1.txt")) { file2NameAltered = true; } } // check filenames are unique and unaltered assertEquals(file1NameAltered, true); assertEquals(file2NameAltered, true); // try to add data files with "-1" duplicates and see if it gets incremented to "-2" IngestUtil.checkForDuplicateFileNamesFinal(datasetVersion, dataFileList); for (DataFile df : dataFileList) { if (df.getFileMetadata().getLabel().equals("datafile1-2.txt")) { file1NameAltered = true; } if (df.getFileMetadata().getLabel().equals("datafile2-2.txt")) { file2NameAltered = true; } } // check filenames are unique and unaltered assertEquals(file1NameAltered, true); assertEquals(file2NameAltered, true); } @Test /** * Test adding duplicate file name labels to a dataset version with empty directory labels. * This should simulate what happens when uploading a file via the file upload UI. */ public void testCheckForDuplicateFileNamesWithEmptyDirectoryLabels() throws Exception { SimpleDateFormat dateFmt = new SimpleDateFormat("yyyyMMdd"); // create dataset Dataset dataset = makeDataset(); // create dataset version DatasetVersion datasetVersion = dataset.getEditVersion(); datasetVersion.setCreateTime( dateFmt.parse("20001012") ); datasetVersion.setLastUpdateTime( datasetVersion.getLastUpdateTime() ); datasetVersion.setId( MocksFactory.nextId() ); datasetVersion.setReleaseTime( dateFmt.parse("20010101") ); datasetVersion.setVersionState(DatasetVersion.VersionState.RELEASED); datasetVersion.setMinorVersionNumber(0L); datasetVersion.setVersionNumber(1L); datasetVersion.setFileMetadatas(new ArrayList<>()); // create datafiles List<DataFile> dataFileList = new ArrayList<>(); DataFile datafile1 = new DataFile("application/octet-stream"); datafile1.setStorageIdentifier("datafile1.txt"); datafile1.setFilesize(200); datafile1.setModificationTime(new Timestamp(new Date().getTime())); datafile1.setCreateDate(new Timestamp(new Date().getTime())); datafile1.setPermissionModificationTime(new Timestamp(new Date().getTime())); datafile1.setOwner(dataset); datafile1.setIngestDone(); datafile1.setChecksumType(DataFile.ChecksumType.SHA1); datafile1.setChecksumValue("Unknown"); // set metadata and add verson FileMetadata fmd1 = new FileMetadata(); fmd1.setId(1L); fmd1.setLabel("datafile1.txt"); fmd1.setDirectoryLabel(""); fmd1.setDataFile(datafile1); datafile1.getFileMetadatas().add(fmd1); datasetVersion.getFileMetadatas().add(fmd1); fmd1.setDatasetVersion(datasetVersion); dataFileList.add(datafile1); DataFile datafile2 = new DataFile("application/octet-stream"); datafile2.setStorageIdentifier("datafile2.txt"); datafile2.setFilesize(200); datafile2.setModificationTime(new Timestamp(new Date().getTime())); datafile2.setCreateDate(new Timestamp(new Date().getTime())); datafile2.setPermissionModificationTime(new Timestamp(new Date().getTime())); datafile2.setOwner(dataset); datafile2.setIngestDone(); datafile2.setChecksumType(DataFile.ChecksumType.SHA1); datafile2.setChecksumValue("Unknown"); // set metadata and add version FileMetadata fmd2 = new FileMetadata(); fmd2.setId(2L); fmd2.setLabel("datafile2.txt"); fmd2.setDirectoryLabel(""); fmd2.setDataFile(datafile2); datafile2.getFileMetadatas().add(fmd2); datasetVersion.getFileMetadatas().add(fmd2); fmd2.setDatasetVersion(datasetVersion); dataFileList.add(datafile2); IngestUtil.checkForDuplicateFileNamesFinal(datasetVersion, dataFileList); boolean file1NameAltered = false; boolean file2NameAltered = false; for (DataFile df : dataFileList) { if (df.getFileMetadata().getLabel().equals("datafile1-1.txt")) { file1NameAltered = true; } if (df.getFileMetadata().getLabel().equals("datafile2-1.txt")) { file2NameAltered = true; } } // check filenames are unique and unaltered assertEquals(file1NameAltered, true); assertEquals(file2NameAltered, true); // try to add data files with "-1" duplicates and see if it gets incremented to "-2" IngestUtil.checkForDuplicateFileNamesFinal(datasetVersion, dataFileList); for (DataFile df : dataFileList) { if (df.getFileMetadata().getLabel().equals("datafile1-2.txt")) { file1NameAltered = true; } if (df.getFileMetadata().getLabel().equals("datafile2-2.txt")) { file2NameAltered = true; } } // check filenames are unique and unaltered assertEquals(file1NameAltered, true); assertEquals(file2NameAltered, true); } @Test /** * Test adding duplicate file name labels with directories, including a duplicate file name label in another * directory. */ public void testCheckForDuplicateFileNamesWithDirectories() throws Exception { SimpleDateFormat dateFmt = new SimpleDateFormat("yyyyMMdd"); // create dataset Dataset dataset = makeDataset(); // create dataset version DatasetVersion datasetVersion = dataset.getEditVersion(); datasetVersion.setCreateTime( dateFmt.parse("20001012") ); datasetVersion.setLastUpdateTime( datasetVersion.getLastUpdateTime() ); datasetVersion.setId( MocksFactory.nextId() ); datasetVersion.setReleaseTime( dateFmt.parse("20010101") ); datasetVersion.setVersionState(DatasetVersion.VersionState.RELEASED); datasetVersion.setMinorVersionNumber(0L); datasetVersion.setVersionNumber(1L); datasetVersion.setFileMetadatas(new ArrayList<>()); // create datafiles List<DataFile> dataFileList = new ArrayList<>(); DataFile datafile1 = new DataFile("application/octet-stream"); datafile1.setStorageIdentifier("subdir/datafile1.txt"); datafile1.setFilesize(200); datafile1.setModificationTime(new Timestamp(new Date().getTime())); datafile1.setCreateDate(new Timestamp(new Date().getTime())); datafile1.setPermissionModificationTime(new Timestamp(new Date().getTime())); datafile1.setOwner(dataset); datafile1.setIngestDone(); datafile1.setChecksumType(DataFile.ChecksumType.SHA1); datafile1.setChecksumValue("Unknown"); // set metadata and add version FileMetadata fmd1 = new FileMetadata(); fmd1.setId(1L); fmd1.setLabel("datafile1.txt"); fmd1.setDirectoryLabel("subdir"); fmd1.setDataFile(datafile1); datafile1.getFileMetadatas().add(fmd1); datasetVersion.getFileMetadatas().add(fmd1); fmd1.setDatasetVersion(datasetVersion); dataFileList.add(datafile1); DataFile datafile2 = new DataFile("application/octet-stream"); datafile2.setStorageIdentifier("subdir/datafile2.txt"); datafile2.setFilesize(200); datafile2.setModificationTime(new Timestamp(new Date().getTime())); datafile2.setCreateDate(new Timestamp(new Date().getTime())); datafile2.setPermissionModificationTime(new Timestamp(new Date().getTime())); datafile2.setOwner(dataset); datafile2.setIngestDone(); datafile2.setChecksumType(DataFile.ChecksumType.SHA1); datafile2.setChecksumValue("Unknown"); // set metadata and add version FileMetadata fmd2 = new FileMetadata(); fmd2.setId(2L); fmd2.setLabel("datafile2.txt"); fmd2.setDirectoryLabel("subdir"); fmd2.setDataFile(datafile2); datafile2.getFileMetadatas().add(fmd2); datasetVersion.getFileMetadatas().add(fmd2); fmd2.setDatasetVersion(datasetVersion); dataFileList.add(datafile2); DataFile datafile3 = new DataFile("application/octet-stream"); datafile3.setStorageIdentifier("datafile2.txt"); datafile3.setFilesize(200); datafile3.setModificationTime(new Timestamp(new Date().getTime())); datafile3.setCreateDate(new Timestamp(new Date().getTime())); datafile3.setPermissionModificationTime(new Timestamp(new Date().getTime())); datafile3.setOwner(dataset); datafile3.setIngestDone(); datafile3.setChecksumType(DataFile.ChecksumType.SHA1); datafile3.setChecksumValue("Unknown"); // set metadata and add version FileMetadata fmd3 = new FileMetadata(); fmd3.setId(3L); fmd3.setLabel("datafile2.txt"); fmd3.setDataFile(datafile3); datafile3.getFileMetadatas().add(fmd3); dataFileList.add(datafile3); IngestUtil.checkForDuplicateFileNamesFinal(datasetVersion, dataFileList); boolean file1NameAltered = false; boolean file2NameAltered = false; boolean file3NameAltered = true; for (DataFile df : dataFileList) { if (df.getFileMetadata().getLabel().equals("datafile1-1.txt")) { file1NameAltered = true; } if (df.getFileMetadata().getLabel().equals("datafile2-1.txt")) { file2NameAltered = true; } if (df.getFileMetadata().getLabel().equals("datafile2.txt")) { file3NameAltered = false; } } // check filenames are unique assertEquals(file1NameAltered, true); assertEquals(file2NameAltered, true); assertEquals(file3NameAltered, false); // add duplicate file in root datasetVersion.getFileMetadatas().add(fmd3); fmd3.setDatasetVersion(datasetVersion); // try to add data files with "-1" duplicates and see if it gets incremented to "-2" IngestUtil.checkForDuplicateFileNamesFinal(datasetVersion, dataFileList); for (DataFile df : dataFileList) { if (df.getFileMetadata().getLabel().equals("datafile1-2.txt")) { file1NameAltered = true; } if (df.getFileMetadata().getLabel().equals("datafile2-2.txt")) { file2NameAltered = true; } if (df.getFileMetadata().getLabel().equals("datafile2-1.txt")) { file3NameAltered = true; } } // check filenames are unique assertEquals(file1NameAltered, true); assertEquals(file2NameAltered, true); assertEquals(file3NameAltered, true); } @Test /** * Test tabular files (e.g., .dta) are changed when .tab files with the same name exist. */ public void testCheckForDuplicateFileNamesTabular() throws Exception { SimpleDateFormat dateFmt = new SimpleDateFormat("yyyyMMdd"); // create dataset Dataset dataset = makeDataset(); // create dataset version DatasetVersion datasetVersion = dataset.getEditVersion(); datasetVersion.setCreateTime( dateFmt.parse("20001012") ); datasetVersion.setLastUpdateTime( datasetVersion.getLastUpdateTime() ); datasetVersion.setId( MocksFactory.nextId() ); datasetVersion.setReleaseTime( dateFmt.parse("20010101") ); datasetVersion.setVersionState(DatasetVersion.VersionState.RELEASED); datasetVersion.setMinorVersionNumber(0L); datasetVersion.setVersionNumber(1L); datasetVersion.setFileMetadatas(new ArrayList<>()); // create datafiles List<DataFile> dataFileList = new ArrayList<>(); DataFile datafile1 = new DataFile("application/x-strata"); datafile1.setStorageIdentifier("foobar.dta"); datafile1.setFilesize(200); datafile1.setModificationTime(new Timestamp(new Date().getTime())); datafile1.setCreateDate(new Timestamp(new Date().getTime())); datafile1.setPermissionModificationTime(new Timestamp(new Date().getTime())); datafile1.setOwner(dataset); datafile1.setIngestDone(); datafile1.setChecksumType(DataFile.ChecksumType.SHA1); datafile1.setChecksumValue("Unknown"); DataTable dt1 = new DataTable(); dt1.setOriginalFileFormat("application/x-stata"); datafile1.setDataTable(dt1); // set metadata and add version FileMetadata fmd1 = new FileMetadata(); fmd1.setId(1L); fmd1.setLabel("foobar.tab"); fmd1.setDataFile(datafile1); datafile1.getFileMetadatas().add(fmd1); datasetVersion.getFileMetadatas().add(fmd1); fmd1.setDatasetVersion(datasetVersion); DataFile datafile2 = new DataFile("application/x-strata"); datafile2.setStorageIdentifier("foobar.dta"); datafile2.setFilesize(200); datafile2.setModificationTime(new Timestamp(new Date().getTime())); datafile2.setCreateDate(new Timestamp(new Date().getTime())); datafile2.setPermissionModificationTime(new Timestamp(new Date().getTime())); datafile2.setOwner(dataset); datafile2.setIngestDone(); datafile2.setChecksumType(DataFile.ChecksumType.SHA1); datafile2.setChecksumValue("Unknown"); DataTable dt2 = new DataTable(); dt2.setOriginalFileFormat("application/x-stata"); datafile2.setDataTable(dt2); // set metadata and add version FileMetadata fmd2 = new FileMetadata(); fmd2.setId(2L); fmd2.setLabel("foobar.dta"); fmd2.setDataFile(datafile2); datafile2.getFileMetadatas().add(fmd2); dataFileList.add(datafile2); IngestUtil.checkForDuplicateFileNamesFinal(datasetVersion, dataFileList); boolean file2NameAltered = false; for (DataFile df : dataFileList) { if (df.getFileMetadata().getLabel().equals("foobar-1.dta")) { file2NameAltered = true; } } // check filename is altered since tabular and will change to .tab after ingest assertEquals(file2NameAltered, true); } @Test public void testDirectoryLabels() { DatasetVersion datasetVersion = new DatasetVersion(); FileMetadata fileMetadata = new FileMetadata(); fileMetadata.setLabel("foo.png"); fileMetadata.setDirectoryLabel("/has/leading/slash"); datasetVersion.getFileMetadatas().add(fileMetadata); Set<ConstraintViolation> violations1 = datasetVersion.validate(); assertEquals(1, violations1.size()); ConstraintViolation violation1 = violations1.iterator().next(); assertEquals("Directory Name cannot contain leading or trailing file separators.", violation1.getMessage()); // reset datasetVersion.setFileMetadatas(new ArrayList<>()); Set<ConstraintViolation> violations2 = datasetVersion.validate(); assertEquals(0, violations2.size()); fileMetadata.setDirectoryLabel("has/trailing/slash/"); datasetVersion.getFileMetadatas().add(fileMetadata); Set<ConstraintViolation> violations3 = datasetVersion.validate(); assertEquals(1, violations3.size()); assertEquals("Directory Name cannot contain leading or trailing file separators.", violations3.iterator().next().getMessage()); // reset datasetVersion.setFileMetadatas(new ArrayList<>()); Set<ConstraintViolation> violations4 = datasetVersion.validate(); assertEquals(0, violations4.size()); fileMetadata.setDirectoryLabel("just/right"); datasetVersion.getFileMetadatas().add(fileMetadata); Set<ConstraintViolation> violations5 = datasetVersion.validate(); assertEquals(0, violations5.size()); // reset datasetVersion.setFileMetadatas(new ArrayList<>()); Set<ConstraintViolation> violations6 = datasetVersion.validate(); assertEquals(0, violations6.size()); fileMetadata.setDirectoryLabel(""); datasetVersion.getFileMetadatas().add(fileMetadata); Set<ConstraintViolation> violations7 = datasetVersion.validate(); assertEquals(0, violations7.size()); // reset datasetVersion.setFileMetadatas(new ArrayList<>()); Set<ConstraintViolation> violations8 = datasetVersion.validate(); assertEquals(0, violations8.size()); fileMetadata.setDirectoryLabel(null); datasetVersion.getFileMetadatas().add(fileMetadata); Set<ConstraintViolation> violations9 = datasetVersion.validate(); assertEquals(0, violations9.size()); // reset datasetVersion.setFileMetadatas(new ArrayList<>()); Set<ConstraintViolation> violations10 = datasetVersion.validate(); assertEquals(0, violations10.size()); String singleCharacter = "a"; fileMetadata.setDirectoryLabel(singleCharacter); datasetVersion.getFileMetadatas().add(fileMetadata); Set<ConstraintViolation> violations11 = datasetVersion.validate(); assertEquals(0, violations11.size()); // reset datasetVersion.setFileMetadatas(new ArrayList<>()); Set<ConstraintViolation> violations12 = datasetVersion.validate(); assertEquals(0, violations12.size()); fileMetadata.setDirectoryLabel("/leadingAndTrailing/"); datasetVersion.getFileMetadatas().add(fileMetadata); Set<ConstraintViolation> violations13 = datasetVersion.validate(); assertEquals(1, violations13.size()); } }