/** * */ package org.archive.hadoop.fs; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; import java.net.URI; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.Path; import org.archive.hadoop.fs.PetaboxFileSystem; import org.junit.Before; import org.junit.Test; /** * NB: this test case currently requires live access to archive.org. * * @author kenji * */ public class PetaboxFileSystemTest { Configuration conf; PetaboxFileSystem pfs; @Before public void setUp() throws Exception { conf = new Configuration(); pfs = new PetaboxFileSystem(); pfs.initialize(new URI("petabox://archive.org/"), conf); } @Test public void testProperties() { assertEquals("petabox://archive.org/", pfs.getUri().toString()); } // milliseconds timestamp for 1971-01-01 00:00:00 for testing mtime final static long LIKELY_BE_SECONDS = 31536000000L; @Test public void testGetFileStatus_ForItem() throws Exception { // item known to exist final String path1 = "/wide00006"; final String path2 = "petabox://archive.org/wide00006"; FileStatus fst1 = pfs.getFileStatus(new Path(path1)); assertNotNull(fst1); assertEquals("item is a directory", true, fst1.isDir()); assertTrue("replication > 0", fst1.getReplication() > 0); assertEquals("absolute path matches", path2, fst1.getPath().toString()); FileStatus fst2 = pfs.getFileStatus(new Path(path2)); assertNotNull(fst2); assertEquals("item is a directory", true, fst2.isDir()); assertTrue("replication > 0", fst2.getReplication() > 0); assertEquals("path matches the orginal", path2, fst2.getPath().toString()); // modification time is in milliseconds. this is not perfect, but should catch mtime in // seconds. assertTrue("modification time " + fst2.getModificationTime() + " is big enough as ms", fst2.getModificationTime() > LIKELY_BE_SECONDS); } @Test public void testGetFileStatus_ForFile() throws Exception { // item known to exist final String path1 = "/WIDE-20120914205820-crawl410/WIDE-20120914205820-crawl410.cdx.gz"; final String path2 = "petabox://archive.org/WIDE-20120914205820-crawl410/WIDE-20120914205820-crawl410.cdx.gz"; FileStatus fst1 = pfs.getFileStatus(new Path(path1)); assertNotNull(fst1); assertEquals("path is not a directory", false, fst1.isDir()); assertTrue("replication > 0", fst1.getReplication() > 0); assertEquals("absolute path matches", path2, fst1.getPath().toString()); FileStatus fst2 = pfs.getFileStatus(new Path(path2)); assertNotNull(fst2); assertEquals("path is not a directory", false, fst2.isDir()); assertTrue("replication > 0", fst2.getReplication() > 0); assertEquals("path matches the orginal", path2, fst2.getPath().toString()); // modification time is in milliseconds. this is not perfect, but should catch mtime in // seconds. assertTrue("modification time " + fst2.getModificationTime() + " is big enough as ms", fst2.getModificationTime() > LIKELY_BE_SECONDS); } }