/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.beam.sdk.io.hdfs; import static org.hamcrest.Matchers.contains; import static org.hamcrest.Matchers.containsInAnyOrder; import static org.junit.Assert.assertArrayEquals; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertThat; import com.google.common.collect.ImmutableList; import com.google.common.collect.Iterables; import com.google.common.io.ByteStreams; import java.net.URI; import java.nio.ByteBuffer; import java.nio.channels.Channels; import java.nio.channels.ReadableByteChannel; import java.nio.channels.WritableByteChannel; import java.util.List; import org.apache.beam.sdk.io.FileSystems; import org.apache.beam.sdk.io.TextIO; import org.apache.beam.sdk.io.fs.CreateOptions.StandardCreateOptions; import org.apache.beam.sdk.io.fs.MatchResult; import org.apache.beam.sdk.io.fs.MatchResult.Metadata; import org.apache.beam.sdk.io.fs.MatchResult.Status; import org.apache.beam.sdk.testing.PAssert; import org.apache.beam.sdk.testing.TestPipeline; import org.apache.beam.sdk.util.MimeTypes; import org.apache.beam.sdk.values.PCollection; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdfs.MiniDFSCluster; import org.junit.After; import org.junit.Before; import org.junit.Ignore; import org.junit.Rule; import org.junit.Test; import org.junit.rules.ExpectedException; import org.junit.rules.TemporaryFolder; import org.junit.runner.RunWith; import org.junit.runners.JUnit4; /** * Tests for {@link HadoopFileSystem}. */ @RunWith(JUnit4.class) public class HadoopFileSystemTest { @Rule public TestPipeline p = TestPipeline.create(); @Rule public TemporaryFolder tmpFolder = new TemporaryFolder(); @Rule public ExpectedException thrown = ExpectedException.none(); private MiniDFSCluster hdfsCluster; private URI hdfsClusterBaseUri; private HadoopFileSystem fileSystem; @Before public void setUp() throws Exception { Configuration configuration = new Configuration(); configuration.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, tmpFolder.getRoot().getAbsolutePath()); MiniDFSCluster.Builder builder = new MiniDFSCluster.Builder(configuration); hdfsCluster = builder.build(); hdfsClusterBaseUri = new URI(configuration.get("fs.defaultFS") + "/"); fileSystem = new HadoopFileSystem(configuration); } @After public void tearDown() throws Exception { hdfsCluster.shutdown(); } @Test public void testCreateAndReadFile() throws Exception { create("testFile", "testData".getBytes()); assertArrayEquals("testData".getBytes(), read("testFile")); } @Test public void testCopy() throws Exception { create("testFileA", "testDataA".getBytes()); create("testFileB", "testDataB".getBytes()); fileSystem.copy( ImmutableList.of( testPath("testFileA"), testPath("testFileB")), ImmutableList.of( testPath("copyTestFileA"), testPath("copyTestFileB"))); assertArrayEquals("testDataA".getBytes(), read("testFileA")); assertArrayEquals("testDataB".getBytes(), read("testFileB")); assertArrayEquals("testDataA".getBytes(), read("copyTestFileA")); assertArrayEquals("testDataB".getBytes(), read("copyTestFileB")); } @Test public void testDelete() throws Exception { create("testFileA", "testDataA".getBytes()); create("testFileB", "testDataB".getBytes()); create("testFileC", "testDataC".getBytes()); // ensure files exist assertArrayEquals("testDataA".getBytes(), read("testFileA")); assertArrayEquals("testDataB".getBytes(), read("testFileB")); assertArrayEquals("testDataC".getBytes(), read("testFileC")); fileSystem.delete(ImmutableList.of( testPath("testFileA"), testPath("testFileC"))); List<MatchResult> results = fileSystem.match(ImmutableList.of(testPath("testFile*").toString())); assertThat(results, contains(MatchResult.create(Status.OK, ImmutableList.of( Metadata.builder() .setResourceId(testPath("testFileB")) .setIsReadSeekEfficient(true) .setSizeBytes("testDataB".getBytes().length) .build())))); } @Test public void testMatch() throws Exception { create("testFileAA", "testDataAA".getBytes()); create("testFileA", "testDataA".getBytes()); create("testFileB", "testDataB".getBytes()); // ensure files exist assertArrayEquals("testDataAA".getBytes(), read("testFileAA")); assertArrayEquals("testDataA".getBytes(), read("testFileA")); assertArrayEquals("testDataB".getBytes(), read("testFileB")); List<MatchResult> results = fileSystem.match(ImmutableList.of(testPath("testFileA*").toString())); assertEquals(Status.OK, Iterables.getOnlyElement(results).status()); assertThat(Iterables.getOnlyElement(results).metadata(), containsInAnyOrder( Metadata.builder() .setResourceId(testPath("testFileAA")) .setIsReadSeekEfficient(true) .setSizeBytes("testDataAA".getBytes().length) .build(), Metadata.builder() .setResourceId(testPath("testFileA")) .setIsReadSeekEfficient(true) .setSizeBytes("testDataA".getBytes().length) .build())); } @Test public void testRename() throws Exception { create("testFileA", "testDataA".getBytes()); create("testFileB", "testDataB".getBytes()); // ensure files exist assertArrayEquals("testDataA".getBytes(), read("testFileA")); assertArrayEquals("testDataB".getBytes(), read("testFileB")); fileSystem.rename( ImmutableList.of( testPath("testFileA"), testPath("testFileB")), ImmutableList.of( testPath("renameFileA"), testPath("renameFileB"))); List<MatchResult> results = fileSystem.match(ImmutableList.of(testPath("*").toString())); assertEquals(Status.OK, Iterables.getOnlyElement(results).status()); assertThat(Iterables.getOnlyElement(results).metadata(), containsInAnyOrder( Metadata.builder() .setResourceId(testPath("renameFileA")) .setIsReadSeekEfficient(true) .setSizeBytes("testDataA".getBytes().length) .build(), Metadata.builder() .setResourceId(testPath("renameFileB")) .setIsReadSeekEfficient(true) .setSizeBytes("testDataB".getBytes().length) .build())); // ensure files exist assertArrayEquals("testDataA".getBytes(), read("renameFileA")); assertArrayEquals("testDataB".getBytes(), read("renameFileB")); } @Test public void testMatchNewResource() throws Exception { // match file spec assertEquals(testPath("file"), fileSystem.matchNewResource(testPath("file").toString(), false)); // match dir spec missing '/' assertEquals(testPath("dir/"), fileSystem.matchNewResource(testPath("dir").toString(), true)); // match dir spec with '/' assertEquals(testPath("dir/"), fileSystem.matchNewResource(testPath("dir/").toString(), true)); thrown.expect(IllegalArgumentException.class); thrown.expectMessage("Expected file path but received directory path"); fileSystem.matchNewResource(testPath("dir/").toString(), false); } @Test @Ignore("TestPipeline needs a way to take in HadoopFileSystemOptions") public void testReadPipeline() throws Exception { create("testFileA", "testDataA".getBytes()); create("testFileB", "testDataB".getBytes()); create("testFileC", "testDataC".getBytes()); HadoopFileSystemOptions options = TestPipeline.testingPipelineOptions() .as(HadoopFileSystemOptions.class); options.setHdfsConfiguration(ImmutableList.of(fileSystem.fileSystem.getConf())); FileSystems.setDefaultPipelineOptions(options); PCollection<String> pc = p.apply( TextIO.read().from(testPath("testFile*").toString())); PAssert.that(pc).containsInAnyOrder("testDataA", "testDataB", "testDataC"); p.run(); } private void create(String relativePath, byte[] contents) throws Exception { try (WritableByteChannel channel = fileSystem.create( testPath(relativePath), StandardCreateOptions.builder().setMimeType(MimeTypes.BINARY).build())) { channel.write(ByteBuffer.wrap(contents)); } } private byte[] read(String relativePath) throws Exception { try (ReadableByteChannel channel = fileSystem.open(testPath(relativePath))) { return ByteStreams.toByteArray(Channels.newInputStream(channel)); } } private HadoopResourceId testPath(String relativePath) { return new HadoopResourceId(hdfsClusterBaseUri.resolve(relativePath)); } }