/** * Copyright 2011-2017 Asakusa Framework Team. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.asakusafw.runtime.io.text.directio; import static org.hamcrest.Matchers.*; import static org.junit.Assert.*; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.io.Reader; import java.io.Writer; import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.function.UnaryOperator; import java.util.stream.Collectors; import java.util.stream.IntStream; import java.util.zip.GZIPInputStream; import java.util.zip.GZIPOutputStream; import org.apache.commons.compress.utils.IOUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.compress.CompressionCodec; import org.apache.hadoop.io.compress.GzipCodec; import org.junit.Test; import com.asakusafw.runtime.io.ModelInput; import com.asakusafw.runtime.io.ModelOutput; import com.asakusafw.runtime.io.text.FieldReader; import com.asakusafw.runtime.io.text.FieldWriter; import com.asakusafw.runtime.io.text.TextFormat; import com.asakusafw.runtime.io.text.driver.FieldDefinition; import com.asakusafw.runtime.io.text.driver.HeaderType; import com.asakusafw.runtime.io.text.driver.RecordDefinition; import com.asakusafw.runtime.io.text.mock.MockFieldAdapter; import com.asakusafw.runtime.io.text.mock.MockFieldReader; import com.asakusafw.runtime.io.text.mock.MockFieldWriter; /** * Test for {@link AbstractTextStreamFormat}. */ public class AbstractTextStreamFormatTest { /** * input - simple. * @throws Exception if failed */ @Test public void input() throws Exception { MockFormat format = format(1); assertThat(format.getPreferredFragmentSize(), is(-1L)); assertThat(format.getMinimumFragmentSize(), is(-1L)); String[][] data = { { "Hello, world!" } }; try (ModelInput<String[]> in = format.createInput(String[].class, "dummy", input(data))) { String[][] result = collect(1, in); assertThat(result, is(data)); } } /** * input - w/ multiple rows/cols. * @throws Exception if failed */ @Test public void input_multiple() throws Exception { MockFormat format = format(3); String[][] data = { { "A", "B", "C", }, { "D", "E", "F", }, { "G", "H", "I", }, }; try (ModelInput<String[]> in = format.createInput(String[].class, "dummy", input(data))) { String[][] result = collect(3, in); assertThat(result, is(data)); } } /** * input - w/ splitter. * @throws Exception if failed */ @Test public void input_splitter_whole() throws Exception { MockFormat format = format(3) .withInputSplitter(InputSplitters.byLineFeed()); assertThat(format.getPreferredFragmentSize(), is(-1L)); assertThat(format.getMinimumFragmentSize(), is(greaterThan(0L))); String[][] data = { { "A", "B", "C", }, { "D", "E", "F", }, { "G", "H", "I", }, }; try (ModelInput<String[]> in = format.createInput(String[].class, "dummy", input(data))) { String[][] result = collect(3, in); assertThat(result, is(data)); } } /** * input - w/ splitter. * @throws Exception if failed */ @Test public void input_splitter_trim_lead() throws Exception { MockFormat format = format(3) .withInputSplitter(InputSplitters.byLineFeed()); String[][] data = { { "A", "B", "C", }, { "D", "E", "F", }, { "G", "H", "I", }, }; try (ModelInput<String[]> in = format.createInput(String[].class, "dummy", input(data), 1, Long.MAX_VALUE)) { String[][] result = collect(3, in); assertThat(result, is(Arrays.copyOfRange(data, 1, 3))); } } /** * input - w/ splitter. * @throws Exception if failed */ @Test public void input_splitter_trim_trail() throws Exception { MockFormat format = format(3) .withInputSplitter(InputSplitters.byLineFeed()); String[][] data = { { "A", "B", "C", }, { "D", "E", "F", }, { "G", "H", "I", }, }; try (ModelInput<String[]> in = format.createInput(String[].class, "dummy", input(data), 0, 6)) { String[][] result = collect(3, in); assertThat(result, is(Arrays.copyOfRange(data, 0, 2))); } } /** * input - w/ splitter. * @throws Exception if failed */ @Test public void input_splitter_trim_around() throws Exception { MockFormat format = format(3) .withInputSplitter(InputSplitters.byLineFeed()); String[][] data = { { "A", "B", "C", }, { "D", "E", "F", }, { "G", "H", "I", }, }; try (ModelInput<String[]> in = format.createInput(String[].class, "dummy", input(data), 1, 6)) { String[][] result = collect(3, in); assertThat(result, is(Arrays.copyOfRange(data, 1, 2))); } } /** * input - w/ header. * @throws Exception if failed */ @Test public void input_hedaer() throws Exception { MockFormat format = format(3, HeaderType.FORCE); String[][] data = { { "A", "B", "C", }, { "D", "E", "F", }, { "G", "H", "I", }, }; try (ModelInput<String[]> in = format.createInput(String[].class, "dummy", input(data))) { String[][] result = collect(3, in); assertThat(result, is(Arrays.copyOfRange(data, 1, 3))); } } /** * input - w/ header. * @throws Exception if failed */ @Test public void input_hedaer_split_first() throws Exception { MockFormat format = format(3, HeaderType.FORCE) .withInputSplitter(InputSplitters.byLineFeed()); String[][] data = { { "A", "B", "C", }, { "D", "E", "F", }, { "G", "H", "I", }, }; try (ModelInput<String[]> in = format.createInput(String[].class, "dummy", input(data), 0, 1)) { String[][] result = collect(3, in); assertThat(result, is(new String[0][])); } } /** * input - w/ header. * @throws Exception if failed */ @Test public void input_hedaer_split_rest() throws Exception { MockFormat format = format(3, HeaderType.FORCE) .withInputSplitter(InputSplitters.byLineFeed()); String[][] data = { { "A", "B", "C", }, { "D", "E", "F", }, { "G", "H", "I", }, }; try (ModelInput<String[]> in = format.createInput(String[].class, "dummy", input(data), 1, Long.MAX_VALUE)) { String[][] result = collect(3, in); assertThat(result, is(Arrays.copyOfRange(data, 1, 3))); } } /** * input - w/ compression. * @throws Exception if failed */ @Test public void input_compression() throws Exception { MockFormat format = format(1) .withCodecClass(GzipCodec.class); String[][] data = { { "Hello, world!" } }; ByteArrayOutputStream buf = new ByteArrayOutputStream(); try (InputStream in = input(data); OutputStream out = new GZIPOutputStream(buf)) { IOUtils.copy(in, out); } try (ModelInput<String[]> in = format.createInput(String[].class, "dummy", new ByteArrayInputStream(buf.toByteArray()))) { String[][] result = collect(1, in); assertThat(result, is(data)); } } /** * output - simple. * @throws Exception if failed */ @Test public void output() throws Exception { MockFormat format = format(1); String[][] data = { { "Hello, world!" } }; ByteArrayOutputStream output = new ByteArrayOutputStream(); try (ModelOutput<String[]> out = format.createOutput(String[].class, "dummy", output)) { dump(out, data); } assertThat(deserialize(output.toByteArray()), is(data)); } /** * output - w/ header. * @throws Exception if failed */ @Test public void output_header() throws Exception { MockFormat format = format(HeaderType.FORCE, "a", "b", "c"); ByteArrayOutputStream output = new ByteArrayOutputStream(); try (ModelOutput<String[]> out = format.createOutput(String[].class, "dummy", output)) { dump(out, new String[][] { { "A", "B", "C", }, { "D", "E", "F", }, }); } assertThat(deserialize(output.toByteArray()), is(new String[][] { { "a", "b", "c", }, { "A", "B", "C", }, { "D", "E", "F", }, })); } /** * output - w/ compression. * @throws Exception if failed */ @Test public void output_compression() throws Exception { MockFormat format = format(1) .withCodecClass(GzipCodec.class); String[][] data = { { "Hello, world!" } }; ByteArrayOutputStream output = new ByteArrayOutputStream(); try (ModelOutput<String[]> out = format.createOutput(String[].class, "dummy", output)) { dump(out, data); } ByteArrayOutputStream buf = new ByteArrayOutputStream(); try (InputStream in = new GZIPInputStream(new ByteArrayInputStream(output.toByteArray()))) { IOUtils.copy(in, buf); } assertThat(deserialize(buf.toByteArray()), is(data)); } private String[][] collect(int columns, ModelInput<String[]> input) throws IOException { List<String[]> results = new ArrayList<>(); while (true) { String[] row = new String[columns]; if (input.readTo(row)) { results.add(row); } else { break; } } return results.toArray(new String[results.size()][]); } private void dump(ModelOutput<String[]> output, String[][] fields) throws IOException { for (String[] row : fields) { output.write(row); } } private MockFormat format(int columns) { return format(columns, null); } private MockFormat format(int columns, HeaderType headerType) { return format(headerType, IntStream.range(0, columns) .mapToObj(i -> "p" + i) .toArray(String[]::new)); } private MockFormat format(HeaderType headerType, String... header) { RecordDefinition.Builder<String[]> builder = RecordDefinition.builder(String[].class); if (headerType != null) { builder.withHeaderType(headerType); } for (int i = 0; i < header.length; i++) { FieldDefinition<String[]> field = FieldDefinition.builder(header[i], MockFieldAdapter.supplier(i)) .build(); builder.withField(UnaryOperator.identity(), field); } MockFormat format = new MockFormat(builder.build()); format.setConf(new Configuration()); return format; } static ByteArrayInputStream input(String[][] input) { return new ByteArrayInputStream(serialize(input)); } static byte[] serialize(String[][] fields) { return Arrays.stream(fields) .map(ss -> String.join(":", ss)) .collect(Collectors.joining("\n")) .getBytes(StandardCharsets.UTF_8); } static String[][] deserialize(byte[] data) { String file = new String(data, StandardCharsets.UTF_8); return Arrays.stream(file.split("\n")) .map(s -> s.split(":")) .toArray(String[][]::new); } private static class MockFormat extends AbstractTextStreamFormat<String[]> { private final RecordDefinition<String[]> definition; private Class<? extends CompressionCodec> codecClass; private InputSplitter inputSplitter; MockFormat(RecordDefinition<String[]> definition) { this.definition = definition; } MockFormat withCodecClass(Class<? extends CompressionCodec> aClass) { this.codecClass = aClass; return this; } MockFormat withInputSplitter(InputSplitter splitter) { this.inputSplitter = splitter; return this; } @Override public Class<String[]> getSupportedType() { return String[].class; } @Override protected TextFormat createTextFormat() { return new TextFormat() { @Override public FieldReader open(InputStream input) throws IOException { ByteArrayOutputStream buffer = new ByteArrayOutputStream(); IOUtils.copy(input, buffer); return new MockFieldReader(deserialize(buffer.toByteArray())) { @Override public void close() throws IOException { input.close(); } }; } @Override public FieldWriter open(OutputStream output) throws IOException { return new MockFieldWriter() { @Override public void close() throws IOException { try { output.write(serialize(get())); } catch (IOException e) { throw new AssertionError(e); } finally { output.close(); } } }; } @Override public FieldReader open(Reader input) throws IOException { throw new UnsupportedOperationException(); } @Override public FieldWriter open(Writer output) throws IOException { throw new UnsupportedOperationException(); } }; } @Override protected RecordDefinition<String[]> createRecordDefinition() { return definition; } @Override protected Class<? extends CompressionCodec> getCompressionCodecClass() { return codecClass == null ? super.getCompressionCodecClass() : codecClass; } @Override protected InputSplitter getInputSplitter() { return inputSplitter == null ? super.getInputSplitter() : inputSplitter; } } }