HadoopInputFormatTest.java example

Explorer
flink-master
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.api.java.hadoop.mapred;

import org.apache.flink.api.common.typeinfo.BasicTypeInfo;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.java.hadoop.mapred.wrapper.HadoopInputSplit;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.typeutils.TupleTypeInfo;
import org.apache.hadoop.conf.Configurable;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileSplit;
import org.apache.hadoop.mapred.InputFormat;
import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.JobConfigurable;
import org.apache.hadoop.mapred.RecordReader;
import org.apache.hadoop.mapred.Reporter;
import org.junit.Test;

import java.io.IOException;

import static org.hamcrest.CoreMatchers.equalTo;
import static org.hamcrest.core.Is.is;
import static org.junit.Assert.assertThat;
import static org.mockito.Mockito.any;
import static org.mockito.Mockito.anyInt;
import static org.mockito.Mockito.anyLong;
import static org.mockito.Mockito.anyString;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.times;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;


public class HadoopInputFormatTest {

	@Test
	public void testConfigureWithConfigurableInstance() {
		ConfigurableDummyInputFormat inputFormat = mock(ConfigurableDummyInputFormat.class);

		HadoopInputFormat<String, Long> hadoopInputFormat = new HadoopInputFormat<>(inputFormat, String.class, Long.class, new JobConf());
		verify(inputFormat, times(1)).setConf(any(JobConf.class));

		hadoopInputFormat.configure(new org.apache.flink.configuration.Configuration());
		verify(inputFormat, times(2)).setConf(any(JobConf.class));
	}

	@Test
	public void testConfigureWithJobConfigurableInstance() {
		JobConfigurableDummyInputFormat inputFormat = mock(JobConfigurableDummyInputFormat.class);

		HadoopInputFormat<String, Long> hadoopInputFormat = new HadoopInputFormat<>(inputFormat, String.class, Long.class, new JobConf());
		verify(inputFormat, times(1)).configure(any(JobConf.class));

		hadoopInputFormat.configure(new org.apache.flink.configuration.Configuration());
		verify(inputFormat, times(2)).configure(any(JobConf.class));
	}

	@Test
	public void testOpenClose() throws Exception {
		DummyRecordReader recordReader = mock(DummyRecordReader.class);
		DummyInputFormat inputFormat = mock(DummyInputFormat.class);
		when(inputFormat.getRecordReader(any(InputSplit.class), any(JobConf.class), any(Reporter.class))).thenReturn(recordReader);

		HadoopInputFormat<String, Long> hadoopInputFormat = new HadoopInputFormat<>(inputFormat, String.class, Long.class, new JobConf());
		hadoopInputFormat.open(getHadoopInputSplit());

		verify(inputFormat, times(1)).getRecordReader(any(InputSplit.class), any(JobConf.class), any(Reporter.class));
		verify(recordReader, times(1)).createKey();
		verify(recordReader, times(1)).createValue();

		assertThat(hadoopInputFormat.fetched, is(false));

		hadoopInputFormat.close();
		verify(recordReader, times(1)).close();
	}

	@Test
	public void testOpenWithConfigurableReader() throws Exception {
		ConfigurableDummyRecordReader recordReader = mock(ConfigurableDummyRecordReader.class);
		DummyInputFormat inputFormat = mock(DummyInputFormat.class);
		when(inputFormat.getRecordReader(any(InputSplit.class), any(JobConf.class), any(Reporter.class))).thenReturn(recordReader);

		HadoopInputFormat<String, Long> hadoopInputFormat = new HadoopInputFormat<>(inputFormat, String.class, Long.class, new JobConf());
		hadoopInputFormat.open(getHadoopInputSplit());

		verify(inputFormat, times(1)).getRecordReader(any(InputSplit.class), any(JobConf.class), any(Reporter.class));
		verify(recordReader, times(1)).setConf(any(JobConf.class));
		verify(recordReader, times(1)).createKey();
		verify(recordReader, times(1)).createValue();

		assertThat(hadoopInputFormat.fetched, is(false));

	}

	@Test
	public void testCreateInputSplits() throws Exception {

		FileSplit[] result = new FileSplit[1];
		result[0] = getFileSplit();
		DummyInputFormat inputFormat = mock(DummyInputFormat.class);
		when(inputFormat.getSplits(any(JobConf.class), anyInt())).thenReturn(result);

		HadoopInputFormat<String, Long> hadoopInputFormat = new HadoopInputFormat<>(inputFormat, String.class, Long.class, new JobConf());
		hadoopInputFormat.createInputSplits(2);

		verify(inputFormat, times(1)).getSplits(any(JobConf.class), anyInt());
	}

	@Test
	public void testReachedEndWithElementsRemaining() throws IOException {

		HadoopInputFormat<String, Long> hadoopInputFormat = new HadoopInputFormat<>(new DummyInputFormat(), String.class, Long.class, new JobConf());
		hadoopInputFormat.fetched = true;
		hadoopInputFormat.hasNext = true;

		assertThat(hadoopInputFormat.reachedEnd(), is(false));
	}

	@Test
	public void testReachedEndWithNoElementsRemaining() throws IOException {
		HadoopInputFormat<String, Long> hadoopInputFormat = new HadoopInputFormat<>(new DummyInputFormat(), String.class, Long.class, new JobConf());
		hadoopInputFormat.fetched = true;
		hadoopInputFormat.hasNext = false;

		assertThat(hadoopInputFormat.reachedEnd(), is(true));
	}

	@Test
	public void testFetchNext() throws IOException {
		DummyRecordReader recordReader = mock(DummyRecordReader.class);
		when(recordReader.next(anyString(), anyLong())).thenReturn(true);

		DummyInputFormat inputFormat = mock(DummyInputFormat.class);
		when(inputFormat.getRecordReader(any(InputSplit.class), any(JobConf.class), any(Reporter.class))).thenReturn(recordReader);

		HadoopInputFormat<String, Long> hadoopInputFormat = new HadoopInputFormat<>(inputFormat, String.class, Long.class, new JobConf());
		hadoopInputFormat.open(getHadoopInputSplit());
		hadoopInputFormat.fetchNext();

		verify(recordReader, times(1)).next(anyString(), anyLong());
		assertThat(hadoopInputFormat.hasNext, is(true));
		assertThat(hadoopInputFormat.fetched, is(true));
	}
	
	@Test
	public void checkTypeInformation() throws Exception {
		HadoopInputFormat<Void, Long> hadoopInputFormat = new HadoopInputFormat<>(
				new DummyVoidKeyInputFormat<Long>(), Void.class, Long.class, new JobConf());

		TypeInformation<Tuple2<Void,Long>> tupleType = hadoopInputFormat.getProducedType();
		TypeInformation<Tuple2<Void,Long>> expectedType = new TupleTypeInfo<>(BasicTypeInfo.VOID_TYPE_INFO, BasicTypeInfo.LONG_TYPE_INFO);

		assertThat(tupleType.isTupleType(), is(true));
		assertThat(tupleType, is(equalTo(expectedType)));
	}

	@Test
	public void testCloseWithoutOpen() throws Exception {
		HadoopInputFormat<Void, Long> hadoopInputFormat = new HadoopInputFormat<>(
			new DummyVoidKeyInputFormat<Long>(), Void.class, Long.class, new JobConf());
		hadoopInputFormat.close();
	}

	private HadoopInputSplit getHadoopInputSplit() {
		return new HadoopInputSplit(1, getFileSplit(), new JobConf());
	}

	private FileSplit getFileSplit() {
		return new FileSplit(new Path("path"), 1, 2, new String[]{});
	}

	public class DummyVoidKeyInputFormat<T> extends FileInputFormat<Void, T> {

		public DummyVoidKeyInputFormat() {}

		@Override
		public org.apache.hadoop.mapred.RecordReader<Void, T> getRecordReader(org.apache.hadoop.mapred.InputSplit inputSplit, JobConf jobConf, Reporter reporter) throws IOException {
			return null;
		}
	}

	public class DummyRecordReader implements RecordReader<String, Long> {

		@Override
		public float getProgress() throws IOException {
			return 0;
		}

		@Override
		public boolean next(String s, Long aLong) throws IOException {
			return false;
		}

		@Override
		public String createKey() {
			return null;
		}

		@Override
		public Long createValue() {
			return null;
		}

		@Override
		public long getPos() throws IOException {
			return 0;
		}

		@Override
		public void close() throws IOException {

		}
	}

	public class ConfigurableDummyRecordReader implements RecordReader<String, Long>, Configurable {

		@Override
		public void setConf(Configuration configuration) {}

		@Override
		public Configuration getConf() {
			return null;
		}

		@Override
		public boolean next(String s, Long aLong) throws IOException {
			return false;
		}

		@Override
		public String createKey() {
			return null;
		}

		@Override
		public Long createValue() {
			return null;
		}

		@Override
		public long getPos() throws IOException {
			return 0;
		}

		@Override
		public void close() throws IOException {

		}

		@Override
		public float getProgress() throws IOException {
			return 0;
		}
	}

	public class DummyInputFormat implements InputFormat<String, Long> {

		@Override
		public InputSplit[] getSplits(JobConf jobConf, int i) throws IOException {
			return new InputSplit[0];
		}

		@Override
		public RecordReader<String, Long> getRecordReader(InputSplit inputSplit, JobConf jobConf, Reporter reporter) throws IOException {
			return null;
		}
	}

	public class ConfigurableDummyInputFormat extends DummyInputFormat implements Configurable {
		@Override
		public void setConf(Configuration configuration) {}

		@Override
		public Configuration getConf() {
			return null;
		}
	}

	public class JobConfigurableDummyInputFormat extends DummyInputFormat implements JobConfigurable {

		@Override
		public void configure(JobConf jobConf) {}
	}
}