/******************************************************************************* * Copyright 2013 * TU Darmstadt, FG Sprachtechnologie * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. ******************************************************************************/ package org.dkpro.bigdata.io.hadoop; import java.io.IOException; import org.apache.hadoop.fs.Path; import org.apache.hadoop.mapred.FileSplit; import org.apache.hadoop.mapred.JobConf; import org.dkpro.bigdata.io.hadoop.ARCInputFormat.ARCRecordReader; import org.junit.Before; import org.junit.Test; /** * Unit tests for webcorpus.common.io.ARCInputFormat * * @author Johannes Simon * */ public class ARCInputFormatTest extends InputFormatTest { JobConf jobConf; final static String ARCHIVE_SIMPLE = "src/test/resources/arc/simple-archive.arc"; final static String ARCHIVE_SEMI_COMPLEX = "src/test/resources/arc/semi-complex-archive.arc"; @Before public void init() { jobConf = new JobConf(ARCInputFormatTest.class); } final static int offsetInRecord1 = 10; final static int offsetInRecord2 = 200; @Test public void testReadFromOffset1() throws IOException { Path filePath = new Path(ARCHIVE_SIMPLE); FileSplit inputSplit = new FileSplit(filePath, offsetInRecord1, 639, (String[])null); ARCRecordReader recordReader = new ARCRecordReader(inputSplit, jobConf); checkNRecordsRemaining(recordReader, 2); } @Test public void testReadFromOffset2() throws IOException { Path filePath = new Path(ARCHIVE_SIMPLE); FileSplit inputSplit = new FileSplit(filePath, offsetInRecord2, 639, (String[])null); ARCRecordReader recordReader = new ARCRecordReader(inputSplit, jobConf); checkNRecordsRemaining(recordReader, 1); } @Test public void testReadSimpleArchive() throws IOException { FileSplit inputSplit = new FileSplit(new Path(ARCHIVE_SIMPLE), 0, 639, (String[])null); ARCRecordReader recordReader = new ARCRecordReader(inputSplit, jobConf); checkNRecordsRemaining(recordReader, 2); } @Test public void testReadSemiComplexArchive() throws IOException { FileSplit inputSplit = new FileSplit(new Path(ARCHIVE_SEMI_COMPLEX), 0, 2382, (String[])null); ARCRecordReader recordReader = new ARCRecordReader(inputSplit, jobConf); checkNRecordsRemaining(recordReader, 9); } }