/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.tools;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.DFSTestUtil;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.HdfsConfiguration;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.protocol.HdfsConstants;
import org.apache.hadoop.hdfs.protocol.SnapshotDiffReport;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.security.Credentials;
import org.apache.hadoop.tools.mapred.CopyMapper;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
public class TestDistCpSync {
private MiniDFSCluster cluster;
private final Configuration conf = new HdfsConfiguration();
private DistributedFileSystem dfs;
private DistCpOptions options;
private final Path source = new Path("/source");
private final Path target = new Path("/target");
private final long BLOCK_SIZE = 1024;
private final short DATA_NUM = 1;
@Before
public void setUp() throws Exception {
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(DATA_NUM).build();
cluster.waitActive();
dfs = cluster.getFileSystem();
dfs.mkdirs(source);
dfs.mkdirs(target);
options = new DistCpOptions(Arrays.asList(source), target);
options.setSyncFolder(true);
options.setDeleteMissing(true);
options.setUseDiff(true, "s1", "s2");
options.appendToConf(conf);
conf.set(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH, target.toString());
conf.set(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH, target.toString());
}
@After
public void tearDown() throws Exception {
IOUtils.cleanup(null, dfs);
if (cluster != null) {
cluster.shutdown();
}
}
/**
* Test the sync returns false in the following scenarios:
* 1. the source/target dir are not snapshottable dir
* 2. the source/target does not have the given snapshots
* 3. changes have been made in target
*/
@Test
public void testFallback() throws Exception {
// the source/target dir are not snapshottable dir
Assert.assertFalse(DistCpSync.sync(options, conf));
// make sure the source path has been updated to the snapshot path
final Path spath = new Path(source,
HdfsConstants.DOT_SNAPSHOT_DIR + Path.SEPARATOR + "s2");
Assert.assertEquals(spath, options.getSourcePaths().get(0));
// reset source path in options
options.setSourcePaths(Arrays.asList(source));
// the source/target does not have the given snapshots
dfs.allowSnapshot(source);
dfs.allowSnapshot(target);
Assert.assertFalse(DistCpSync.sync(options, conf));
Assert.assertEquals(spath, options.getSourcePaths().get(0));
// reset source path in options
options.setSourcePaths(Arrays.asList(source));
dfs.createSnapshot(source, "s1");
dfs.createSnapshot(source, "s2");
dfs.createSnapshot(target, "s1");
Assert.assertTrue(DistCpSync.sync(options, conf));
// reset source paths in options
options.setSourcePaths(Arrays.asList(source));
// changes have been made in target
final Path subTarget = new Path(target, "sub");
dfs.mkdirs(subTarget);
Assert.assertFalse(DistCpSync.sync(options, conf));
// make sure the source path has been updated to the snapshot path
Assert.assertEquals(spath, options.getSourcePaths().get(0));
// reset source paths in options
options.setSourcePaths(Arrays.asList(source));
dfs.delete(subTarget, true);
Assert.assertTrue(DistCpSync.sync(options, conf));
}
/**
* create some files and directories under the given directory.
* the final subtree looks like this:
* dir/
* foo/ bar/
* d1/ f1 d2/ f2
* f3 f4
*/
private void initData(Path dir) throws Exception {
final Path foo = new Path(dir, "foo");
final Path bar = new Path(dir, "bar");
final Path d1 = new Path(foo, "d1");
final Path f1 = new Path(foo, "f1");
final Path d2 = new Path(bar, "d2");
final Path f2 = new Path(bar, "f2");
final Path f3 = new Path(d1, "f3");
final Path f4 = new Path(d2, "f4");
DFSTestUtil.createFile(dfs, f1, BLOCK_SIZE, DATA_NUM, 0);
DFSTestUtil.createFile(dfs, f2, BLOCK_SIZE, DATA_NUM, 0);
DFSTestUtil.createFile(dfs, f3, BLOCK_SIZE, DATA_NUM, 0);
DFSTestUtil.createFile(dfs, f4, BLOCK_SIZE, DATA_NUM, 0);
}
/**
* make some changes under the given directory (created in the above way).
* 1. rename dir/foo/d1 to dir/bar/d1
* 2. delete dir/bar/d1/f3
* 3. rename dir/foo to /dir/bar/d1/foo
* 4. delete dir/bar/d1/foo/f1
* 5. create file dir/bar/d1/foo/f1 whose size is 2*BLOCK_SIZE
* 6. append one BLOCK to file dir/bar/f2
* 7. rename dir/bar to dir/foo
*
* Thus after all these ops the subtree looks like this:
* dir/
* foo/
* d1/ f2(A) d2/
* foo/ f4
* f1(new)
*/
private void changeData(Path dir) throws Exception {
final Path foo = new Path(dir, "foo");
final Path bar = new Path(dir, "bar");
final Path d1 = new Path(foo, "d1");
final Path f2 = new Path(bar, "f2");
final Path bar_d1 = new Path(bar, "d1");
dfs.rename(d1, bar_d1);
final Path f3 = new Path(bar_d1, "f3");
dfs.delete(f3, true);
final Path newfoo = new Path(bar_d1, "foo");
dfs.rename(foo, newfoo);
final Path f1 = new Path(newfoo, "f1");
dfs.delete(f1, true);
DFSTestUtil.createFile(dfs, f1, 2 * BLOCK_SIZE, DATA_NUM, 0);
DFSTestUtil.appendFile(dfs, f2, (int) BLOCK_SIZE);
dfs.rename(bar, new Path(dir, "foo"));
}
/**
* Test the basic functionality.
*/
@Test
public void testSync() throws Exception {
initData(source);
initData(target);
dfs.allowSnapshot(source);
dfs.allowSnapshot(target);
dfs.createSnapshot(source, "s1");
dfs.createSnapshot(target, "s1");
// make changes under source
changeData(source);
dfs.createSnapshot(source, "s2");
// before sync, make some further changes on source. this should not affect
// the later distcp since we're copying (s2-s1) to target
final Path toDelete = new Path(source, "foo/d1/foo/f1");
dfs.delete(toDelete, true);
final Path newdir = new Path(source, "foo/d1/foo/newdir");
dfs.mkdirs(newdir);
// do the sync
Assert.assertTrue(DistCpSync.sync(options, conf));
// make sure the source path has been updated to the snapshot path
final Path spath = new Path(source,
HdfsConstants.DOT_SNAPSHOT_DIR + Path.SEPARATOR + "s2");
Assert.assertEquals(spath, options.getSourcePaths().get(0));
// build copy listing
final Path listingPath = new Path("/tmp/META/fileList.seq");
CopyListing listing = new GlobbedCopyListing(conf, new Credentials());
listing.buildListing(listingPath, options);
Map<Text, CopyListingFileStatus> copyListing = getListing(listingPath);
CopyMapper copyMapper = new CopyMapper();
StubContext stubContext = new StubContext(conf, null, 0);
Mapper<Text, CopyListingFileStatus, Text, Text>.Context context =
stubContext.getContext();
// Enable append
context.getConfiguration().setBoolean(
DistCpOptionSwitch.APPEND.getConfigLabel(), true);
copyMapper.setup(context);
for (Map.Entry<Text, CopyListingFileStatus> entry : copyListing.entrySet()) {
copyMapper.map(entry.getKey(), entry.getValue(), context);
}
// verify that we only copied new appended data of f2 and the new file f1
Assert.assertEquals(BLOCK_SIZE * 3, stubContext.getReporter()
.getCounter(CopyMapper.Counter.BYTESCOPIED).getValue());
// verify the source and target now has the same structure
verifyCopy(dfs.getFileStatus(spath), dfs.getFileStatus(target), false);
}
private Map<Text, CopyListingFileStatus> getListing(Path listingPath)
throws Exception {
SequenceFile.Reader reader = new SequenceFile.Reader(conf,
SequenceFile.Reader.file(listingPath));
Text key = new Text();
CopyListingFileStatus value = new CopyListingFileStatus();
Map<Text, CopyListingFileStatus> values = new HashMap<>();
while (reader.next(key, value)) {
values.put(key, value);
key = new Text();
value = new CopyListingFileStatus();
}
return values;
}
private void verifyCopy(FileStatus s, FileStatus t, boolean compareName)
throws Exception {
Assert.assertEquals(s.isDirectory(), t.isDirectory());
if (compareName) {
Assert.assertEquals(s.getPath().getName(), t.getPath().getName());
}
if (!s.isDirectory()) {
// verify the file content is the same
byte[] sbytes = DFSTestUtil.readFileBuffer(dfs, s.getPath());
byte[] tbytes = DFSTestUtil.readFileBuffer(dfs, t.getPath());
Assert.assertArrayEquals(sbytes, tbytes);
} else {
FileStatus[] slist = dfs.listStatus(s.getPath());
FileStatus[] tlist = dfs.listStatus(t.getPath());
Assert.assertEquals(slist.length, tlist.length);
for (int i = 0; i < slist.length; i++) {
verifyCopy(slist[i], tlist[i], true);
}
}
}
/**
* Similar test with testSync, but the "to" snapshot is specified as "."
* @throws Exception
*/
@Test
public void testSyncWithCurrent() throws Exception {
options.setUseDiff(true, "s1", ".");
initData(source);
initData(target);
dfs.allowSnapshot(source);
dfs.allowSnapshot(target);
dfs.createSnapshot(source, "s1");
dfs.createSnapshot(target, "s1");
// make changes under source
changeData(source);
// do the sync
Assert.assertTrue(DistCpSync.sync(options, conf));
// make sure the source path is still unchanged
Assert.assertEquals(source, options.getSourcePaths().get(0));
}
private void initData2(Path dir) throws Exception {
final Path test = new Path(dir, "test");
final Path foo = new Path(dir, "foo");
final Path bar = new Path(dir, "bar");
final Path f1 = new Path(test, "f1");
final Path f2 = new Path(foo, "f2");
final Path f3 = new Path(bar, "f3");
DFSTestUtil.createFile(dfs, f1, BLOCK_SIZE, DATA_NUM, 0L);
DFSTestUtil.createFile(dfs, f2, BLOCK_SIZE, DATA_NUM, 1L);
DFSTestUtil.createFile(dfs, f3, BLOCK_SIZE, DATA_NUM, 2L);
}
private void changeData2(Path dir) throws Exception {
final Path tmpFoo = new Path(dir, "tmpFoo");
final Path test = new Path(dir, "test");
final Path foo = new Path(dir, "foo");
final Path bar = new Path(dir, "bar");
dfs.rename(test, tmpFoo);
dfs.rename(foo, test);
dfs.rename(bar, foo);
dfs.rename(tmpFoo, bar);
}
@Test
public void testSync2() throws Exception {
initData2(source);
initData2(target);
dfs.allowSnapshot(source);
dfs.allowSnapshot(target);
dfs.createSnapshot(source, "s1");
dfs.createSnapshot(target, "s1");
// make changes under source
changeData2(source);
dfs.createSnapshot(source, "s2");
SnapshotDiffReport report = dfs.getSnapshotDiffReport(source, "s1", "s2");
System.out.println(report);
// do the sync
Assert.assertTrue(DistCpSync.sync(options, conf));
verifyCopy(dfs.getFileStatus(source), dfs.getFileStatus(target), false);
}
private void initData3(Path dir) throws Exception {
final Path test = new Path(dir, "test");
final Path foo = new Path(dir, "foo");
final Path bar = new Path(dir, "bar");
final Path f1 = new Path(test, "file");
final Path f2 = new Path(foo, "file");
final Path f3 = new Path(bar, "file");
DFSTestUtil.createFile(dfs, f1, BLOCK_SIZE, DATA_NUM, 0L);
DFSTestUtil.createFile(dfs, f2, BLOCK_SIZE * 2, DATA_NUM, 1L);
DFSTestUtil.createFile(dfs, f3, BLOCK_SIZE * 3, DATA_NUM, 2L);
}
private void changeData3(Path dir) throws Exception {
final Path test = new Path(dir, "test");
final Path foo = new Path(dir, "foo");
final Path bar = new Path(dir, "bar");
final Path f1 = new Path(test, "file");
final Path f2 = new Path(foo, "file");
final Path f3 = new Path(bar, "file");
final Path newf1 = new Path(test, "newfile");
final Path newf2 = new Path(foo, "newfile");
final Path newf3 = new Path(bar, "newfile");
dfs.rename(f1, newf1);
dfs.rename(f2, newf2);
dfs.rename(f3, newf3);
}
/**
* Test a case where there are multiple source files with the same name
*/
@Test
public void testSync3() throws Exception {
initData3(source);
initData3(target);
dfs.allowSnapshot(source);
dfs.allowSnapshot(target);
dfs.createSnapshot(source, "s1");
dfs.createSnapshot(target, "s1");
// make changes under source
changeData3(source);
dfs.createSnapshot(source, "s2");
SnapshotDiffReport report = dfs.getSnapshotDiffReport(source, "s1", "s2");
System.out.println(report);
// do the sync
Assert.assertTrue(DistCpSync.sync(options, conf));
verifyCopy(dfs.getFileStatus(source), dfs.getFileStatus(target), false);
}
}