/** * Copyright 2011-2017 Asakusa Framework Team. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.asakusafw.runtime.directio.hadoop; import static org.hamcrest.Matchers.*; import static org.junit.Assert.*; import java.io.BufferedOutputStream; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.io.OutputStreamWriter; import java.io.PrintWriter; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.Scanner; import org.apache.hadoop.conf.Configurable; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.junit.Before; import org.junit.ClassRule; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; import org.junit.runners.Parameterized.Parameters; import com.asakusafw.runtime.directio.BinaryStreamFormat; import com.asakusafw.runtime.directio.Counter; import com.asakusafw.runtime.directio.DataDefinition; import com.asakusafw.runtime.directio.DataFormat; import com.asakusafw.runtime.directio.DirectInputFragment; import com.asakusafw.runtime.directio.FilePattern; import com.asakusafw.runtime.directio.OutputAttemptContext; import com.asakusafw.runtime.directio.SimpleDataDefinition; import com.asakusafw.runtime.directio.util.CountInputStream; import com.asakusafw.runtime.directio.util.CountOutputStream; import com.asakusafw.runtime.io.ModelInput; import com.asakusafw.runtime.io.ModelOutput; import com.asakusafw.runtime.windows.WindowsSupport; /** * Test for {@link HadoopDataSourceCore}. */ @RunWith(Parameterized.class) public class HadoopDataSourceCoreTest { /** * Windows platform support. */ @ClassRule public static final WindowsSupport WINDOWS_SUPPORT = new WindowsSupport(); /** * Temporary folder. */ @Rule public final TemporaryFolder temp = new TemporaryFolder(); private final DataDefinition<StringBuilder> definition; private Configuration conf; private File mapping; private File temporary; private File localtemp; private HadoopDataSourceProfile profile; private OutputAttemptContext context; private final Counter counter = new Counter(); /** * Returns the parameters. * @return the parameters */ @Parameters public static List<Object[]> data() { return Arrays.asList(new Object[][] { { new MockStreamFormat() }, { new MockFileFormat() }, }); } /** * Creates a new instance. * @param format the format. */ public HadoopDataSourceCoreTest(DataFormat<StringBuilder> format) { this.definition = SimpleDataDefinition.newInstance(StringBuilder.class, format); } /** * Initializes the test. * @throws Exception if some errors were occurred */ @Before public void setUp() throws Exception { conf = new Configuration(true); if (definition.getDataFormat() instanceof Configurable) { ((Configurable) definition.getDataFormat()).setConf(conf); } mapping = new File(temp.getRoot(), "mapping").getCanonicalFile(); temporary = new File(temp.getRoot(), "temporary").getCanonicalFile(); localtemp = new File(temp.getRoot(), "localtemp").getCanonicalFile(); profile = new HadoopDataSourceProfile( conf, "testing", "testing", new Path(mapping.toURI()), new Path(temporary.toURI())); context = new OutputAttemptContext("tx", "atmpt", profile.getId(), new Counter()); } /** * simple input. * @throws Exception if failed */ @Test public void input() throws Exception { put(new File(mapping, "input/file.txt"), "Hello, world!"); profile.setMinimumFragmentSize(-1); HadoopDataSourceCore core = new HadoopDataSourceCore(profile); List<DirectInputFragment> fragments = core.findInputFragments( definition, "input", FilePattern.compile("**")); assertThat(fragments.size(), is(1)); List<String> results = consume(core, fragments); assertThat(counter.get(), is(greaterThan(0L))); assertThat(results.size(), is(1)); assertThat(results, hasItem("Hello, world!")); } /** * input multiple records. * @throws Exception if failed */ @Test public void input_multirecord() throws Exception { put(new File(mapping, "input/file.txt"), "Hello1", "Hello2", "Hello3"); profile.setMinimumFragmentSize(-1); HadoopDataSourceCore core = new HadoopDataSourceCore(profile); List<DirectInputFragment> fragments = core.findInputFragments( definition, "input", FilePattern.compile("**")); assertThat(fragments.size(), is(1)); List<String> results = consume(core, fragments); assertThat(counter.get(), is(greaterThan(0L))); assertThat(results.size(), is(3)); assertThat(results, hasItem("Hello1")); assertThat(results, hasItem("Hello2")); assertThat(results, hasItem("Hello3")); } /** * input multiple records. * @throws Exception if failed */ @Test public void input_large() throws Exception { long fragmentSize = 1 * 1024 * 1024; int fragmentCount = 20; put(new File(mapping, "input/file.txt"), fragmentSize * fragmentCount); profile.setMinimumFragmentSize(1); profile.setPreferredFragmentSize(fragmentSize); HadoopDataSourceCore core = new HadoopDataSourceCore(profile); List<DirectInputFragment> fragments = core.findInputFragments( definition, "input", FilePattern.compile("**")); assertThat(fragments.size(), is(greaterThanOrEqualTo(fragmentCount / 2))); for (DirectInputFragment fragment : fragments) { assertThat(fragment.getSize(), is(greaterThanOrEqualTo(fragmentSize / 2))); assertThat(fragment.getSize(), is(lessThanOrEqualTo(fragmentSize * 2))); } } /** * input multiple files. * @throws Exception if failed */ @Test public void input_multifile() throws Exception { put(new File(mapping, "input/file1.txt"), "Hello1"); put(new File(mapping, "input/file2.txt"), "Hello2"); put(new File(mapping, "input/file3.txt"), "Hello3"); profile.setMinimumFragmentSize(-1); HadoopDataSourceCore core = new HadoopDataSourceCore(profile); List<DirectInputFragment> fragments = core.findInputFragments( definition, "input", FilePattern.compile("**")); assertThat(fragments.size(), is(3)); List<String> results = consume(core, fragments); assertThat(counter.get(), is(greaterThan(0L))); assertThat(results.size(), is(3)); assertThat(results, hasItem("Hello1")); assertThat(results, hasItem("Hello2")); assertThat(results, hasItem("Hello3")); } /** * simple output. * @throws Exception if failed */ @Test public void output() throws Exception { HadoopDataSourceCore core = new HadoopDataSourceCore(profile); setup(core); try (ModelOutput<StringBuilder> output = core.openOutput( context, definition, "output", "file.txt", counter)){ output.write(new StringBuilder("Hello, world!")); } assertThat(counter.get(), is(greaterThan(0L))); File target = new File(mapping, "output/file.txt"); assertThat(target.exists(), is(false)); commitAttempt(core); assertThat(target.exists(), is(false)); commitTransaction(core); assertThat(target.exists(), is(true)); assertThat(get(target), is(Arrays.asList("Hello, world!"))); } /** * output without staging. * @throws Exception if failed */ @Test public void output_nostaging() throws Exception { profile.setOutputStaging(false); HadoopDataSourceCore core = new HadoopDataSourceCore(profile); setup(core); try (ModelOutput<StringBuilder> output = core.openOutput( context, definition, "output", "file.txt", counter)) { output.write(new StringBuilder("Hello, world!")); } assertThat(counter.get(), is(greaterThan(0L))); File target = new File(mapping, "output/file.txt"); assertThat(target.exists(), is(false)); commitAttempt(core); assertThat(target.exists(), is(true)); commitTransaction(core); assertThat(target.exists(), is(true)); assertThat(get(target), is(Arrays.asList("Hello, world!"))); } /** * output without streaming. * @throws Exception if failed */ @Test public void output_nostreaming() throws Exception { profile.setOutputStreaming(false); profile.getLocalFileSystem().getConf().set( HadoopDataSourceUtil.KEY_LOCAL_TEMPDIR, localtemp.getPath()); HadoopDataSourceCore core = new HadoopDataSourceCore(profile); setup(core); try (ModelOutput<StringBuilder> output = core.openOutput( context, definition, "output", "file.txt", counter)) { output.write(new StringBuilder("Hello, world!")); } assertThat(counter.get(), is(greaterThan(0L))); File target = new File(mapping, "output/file.txt"); assertThat(target.exists(), is(false)); commitAttempt(core); assertThat(target.exists(), is(false)); commitTransaction(core); assertThat(target.exists(), is(true)); assertThat(get(target), is(Arrays.asList("Hello, world!"))); } /** * output without streaming nor staging. * @throws Exception if failed */ @Test public void output_nomove() throws Exception { profile.setOutputStaging(false); profile.setOutputStreaming(false); profile.getLocalFileSystem().getConf().set( HadoopDataSourceUtil.KEY_LOCAL_TEMPDIR, localtemp.getPath()); HadoopDataSourceCore core = new HadoopDataSourceCore(profile); setup(core); try (ModelOutput<StringBuilder> output = core.openOutput( context, definition, "output", "file.txt", counter)) { output.write(new StringBuilder("Hello, world!")); } assertThat(counter.get(), is(greaterThan(0L))); File target = new File(mapping, "output/file.txt"); assertThat(target.exists(), is(false)); commitAttempt(core); assertThat(target.exists(), is(true)); commitTransaction(core); assertThat(target.exists(), is(true)); assertThat(get(target), is(Arrays.asList("Hello, world!"))); } /** * output multiple records. * @throws Exception if failed */ @Test public void output_multirecord() throws Exception { HadoopDataSourceCore core = new HadoopDataSourceCore(profile); setup(core); try (ModelOutput<StringBuilder> output = core.openOutput( context, definition, "output", "file.txt", counter)) { output.write(new StringBuilder("Hello, world!")); } File target = new File(mapping, "output/file.txt"); assertThat(target.exists(), is(false)); commitAttempt(core); assertThat(target.exists(), is(false)); commitTransaction(core); assertThat(target.exists(), is(true)); assertThat(get(target), is(Arrays.asList("Hello, world!"))); } /** * output multiple files. * @throws Exception if failed */ @Test public void output_multifile() throws Exception { HadoopDataSourceCore core = new HadoopDataSourceCore(profile); setup(core); for (int i = 0; i < 3; i++) { try (ModelOutput<StringBuilder> output = core.openOutput( context, definition, "output", "file" + i + ".txt", counter)) { for (int j = 0; j < i + 1; j++) { output.write(new StringBuilder("Hello" + j)); } } } commit(core); assertThat(get(new File(mapping, "output/file0.txt")), is(Arrays.asList("Hello0"))); assertThat(get(new File(mapping, "output/file1.txt")), is(Arrays.asList("Hello0", "Hello1"))); assertThat(get(new File(mapping, "output/file2.txt")), is(Arrays.asList("Hello0", "Hello1", "Hello2"))); } /** * rollback output. * @throws Exception if failed */ @Test public void output_rollback() throws Exception { HadoopDataSourceCore core = new HadoopDataSourceCore(profile); setup(core); try (ModelOutput<StringBuilder> output = core.openOutput( context, definition, "output", "file.txt", counter)) { output.write(new StringBuilder("Hello, world!")); } cleanup(core); assertThat(new File(mapping, "output/file.txt").exists(), is(false)); } /** * simple delete. * @throws Exception if failed */ @Test public void delete() throws Exception { File file = new File(mapping, "delete/file.txt"); put(file, "Hello, world!"); HadoopDataSourceCore core = new HadoopDataSourceCore(profile); assertThat(file.exists(), is(true)); boolean result = core.delete("delete", FilePattern.compile("**/*"), true, counter); assertThat(result, is(true)); assertThat(file.exists(), is(false)); } /** * simple delete. * @throws Exception if failed */ @Test public void delete_multifile() throws Exception { File[] files = { new File(mapping, "delete/file.txt"), new File(mapping, "delete/file2.txt"), new File(mapping, "delete/a/file.txt"), new File(mapping, "delete/a/b/file.txt"), }; for (File file : files) { put(file, "Hello, world!"); } HadoopDataSourceCore core = new HadoopDataSourceCore(profile); for (File file : files) { assertThat(file.exists(), is(true)); } boolean result = core.delete("delete", FilePattern.compile("**/*"), true, counter); assertThat(result, is(true)); for (File file : files) { assertThat(file.exists(), is(false)); } } /** * simple delete. * @throws Exception if failed */ @Test public void delete_sharetemp() throws Exception { HadoopDataSourceProfile shareTempProfile = new HadoopDataSourceProfile( conf, profile.getId(), profile.getContextPath(), profile.getFileSystemPath(), new Path(profile.getFileSystemPath(), "_TEMP")); HadoopDataSourceCore core = new HadoopDataSourceCore(shareTempProfile); File onProd = new File(mapping, "file.txt"); File onTemp = new File(mapping, "_TEMP/temp.txt"); put(onProd, "production"); put(onTemp, "temporary"); assertThat(onProd.exists(), is(true)); assertThat(onTemp.exists(), is(true)); boolean result = core.delete("", FilePattern.compile("**/*"), true, counter); assertThat(result, is(true)); assertThat(onProd.exists(), is(false)); assertThat(onTemp.exists(), is(true)); } /** * simple delete. * @throws Exception if failed */ @Test public void delete_all() throws Exception { File file = new File(mapping, "file.txt"); put(file, "Hello, world!"); HadoopDataSourceCore core = new HadoopDataSourceCore(profile); assertThat(file.exists(), is(true)); boolean result = core.delete("", FilePattern.compile("**"), true, counter); assertThat(result, is(true)); assertThat(file.exists(), is(false)); assertThat("the root directory must not be deleted", mapping.exists(), is(true)); } private List<String> consume( HadoopDataSourceCore core, List<DirectInputFragment> fragments) throws IOException, InterruptedException { List<String> results = new ArrayList<>(); for (DirectInputFragment fragment : fragments) { try (ModelInput<StringBuilder> input = core.openInput(definition, fragment, counter)) { StringBuilder buf = new StringBuilder(); while (input.readTo(buf)) { results.add(buf.toString()); } } } return results; } private List<String> get(File target) throws IOException { try (Scanner s = new Scanner(target, "UTF-8")) { List<String> results = new ArrayList<>(); while (s.hasNextLine()) { results.add(s.nextLine()); } return results; } } private void put(File target, String... contents) throws IOException { target.getParentFile().mkdirs(); try (PrintWriter w = new PrintWriter(target, "UTF-8")) { for (String line : contents) { w.println(line); } } } private void put(File target, long size) throws IOException { byte[] buf = "Hello, world\n".getBytes(); long rest = size; target.getParentFile().mkdirs(); try (OutputStream out = new BufferedOutputStream(new FileOutputStream(target))) { while (rest > 0) { int count = (int) Math.min(buf.length, rest); out.write(buf, 0, count); rest -= count; } } } private void setup(HadoopDataSourceCore core) throws IOException, InterruptedException { core.setupTransactionOutput(context.getTransactionContext()); core.setupAttemptOutput(context); } private void commit(HadoopDataSourceCore core) throws IOException, InterruptedException { commitAttempt(core); commitTransaction(core); } private void commitAttempt(HadoopDataSourceCore core) throws IOException, InterruptedException { core.commitAttemptOutput(context); core.cleanupAttemptOutput(context); } private void commitTransaction(HadoopDataSourceCore core) throws IOException, InterruptedException { core.commitTransactionOutput(context.getTransactionContext()); core.cleanupTransactionOutput(context.getTransactionContext()); } private void cleanup(HadoopDataSourceCore core) throws IOException, InterruptedException { core.cleanupAttemptOutput(context); core.cleanupTransactionOutput(context.getTransactionContext()); } private static class MockStreamFormat extends BinaryStreamFormat<StringBuilder> { MockStreamFormat() { return; } @Override public Class<StringBuilder> getSupportedType() { return StringBuilder.class; } @Override public long getPreferredFragmentSize() throws IOException, InterruptedException { return -1; } @Override public long getMinimumFragmentSize() throws IOException, InterruptedException { return 1; } @Override public ModelInput<StringBuilder> createInput(Class<? extends StringBuilder> dataType, String path, InputStream stream, long offset, long fragmentSize) throws IOException, InterruptedException { Scanner s = new Scanner(stream, "UTF-8"); return new ModelInput<StringBuilder>() { @Override public boolean readTo(StringBuilder model) throws IOException { if (s.hasNextLine()) { model.delete(0, model.length()); model.append(s.nextLine()); return true; } return false; } @Override public void close() throws IOException { s.close(); } }; } @Override public ModelOutput<StringBuilder> createOutput(Class<? extends StringBuilder> dataType, String path, OutputStream stream) throws IOException, InterruptedException { PrintWriter w = new PrintWriter(new OutputStreamWriter(stream)); return new ModelOutput<StringBuilder>() { @Override public void write(StringBuilder model) throws IOException { w.println(model.toString()); } @Override public void close() throws IOException { w.close(); } }; } } private static class MockFileFormat extends HadoopFileFormat<StringBuilder> { private final MockStreamFormat format = new MockStreamFormat(); MockFileFormat() { return; } @Override public Class<StringBuilder> getSupportedType() { return format.getSupportedType(); } @Override public long getPreferredFragmentSize() throws IOException, InterruptedException { return format.getPreferredFragmentSize(); } @Override public long getMinimumFragmentSize() throws IOException, InterruptedException { return format.getMinimumFragmentSize(); } @Override public ModelInput<StringBuilder> createInput( Class<? extends StringBuilder> dataType, FileSystem fileSystem, Path path, long offset, long fragmentSize, Counter counter) throws IOException, InterruptedException { FileSystem fs = FileSystem.get(path.toUri(), getConf()); FSDataInputStream in = fs.open(path); boolean succeed = false; try { in.seek(offset); ModelInput<StringBuilder> result = format.createInput( dataType, path.toString(), new CountInputStream(in, counter), offset, fragmentSize); succeed = true; return result; } finally { if (succeed == false) { in.close(); } } } @Override public ModelOutput<StringBuilder> createOutput( Class<? extends StringBuilder> dataType, FileSystem fileSystem, Path path, Counter counter) throws IOException, InterruptedException { FileSystem fs = FileSystem.get(path.toUri(), getConf()); FSDataOutputStream out = fs.create(path); return format.createOutput(dataType, path.toString(), new CountOutputStream(out, counter)); } } }