/* * Copyright 2012 * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.dkpro.bigdata.hadoop; import static org.apache.uima.fit.factory.AnalysisEngineFactory.createEngineDescription; import java.io.File; import java.io.FileFilter; import java.util.Arrays; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.lib.NullOutputFormat; import org.apache.hadoop.util.ToolRunner; import org.apache.uima.analysis_engine.AnalysisEngineDescription; import org.apache.uima.fit.component.CasDumpWriter; import org.apache.uima.fit.factory.AnalysisEngineFactory; import org.apache.uima.resource.ResourceInitializationException; import org.dkpro.bigdata.hadoop.DkproHadoopDriver; import org.dkpro.bigdata.io.hadoop.Text2CASInputFormat; import org.junit.Assert; import org.junit.Test; import com.google.common.io.Files; /** * * @author Steffen Remus **/ public class CasConsumerOutputTest { @Test public void test() { try { String inputdir = "src/test/resources/test-input";//CasConsumerOutputTest.class.getResource("test-input").getFile(); String outputdir = Files.createTempDir().getAbsolutePath(); String[] args = { inputdir, outputdir }; System.out.println(Arrays.asList(args)); ToolRunner.run(new Configuration(), new CasConsumerOutputPipeline(), args); Assert.assertTrue(new File(outputdir, "_SUCCESS").exists()); int numOutputDirs = 0; for (File uima_output_attempt_dir : new File(outputdir).listFiles(new FileFilter() { @Override public boolean accept(File pathname) { return pathname.isDirectory() && pathname.getName().startsWith("uima_output_attempt"); } })) { numOutputDirs++; File output_file = new File(uima_output_attempt_dir, "cas_consumer_output.txt"); Assert.assertTrue(output_file.exists()); Assert.assertTrue(output_file.length() > 0); } Assert.assertEquals(1, numOutputDirs); } catch (Exception e) { e.printStackTrace(); } } public static class CasConsumerOutputPipeline extends DkproHadoopDriver { @Override public AnalysisEngineDescription buildMapperEngine(Configuration job) throws ResourceInitializationException { AnalysisEngineDescription wri = AnalysisEngineFactory.createEngineDescription( CasDumpWriter.class, CasDumpWriter.PARAM_OUTPUT_FILE, "$dir/cas_consumer_output.txt"); return createEngineDescription(wri); } @Override public AnalysisEngineDescription buildReducerEngine(Configuration job) throws ResourceInitializationException { return null; } @Override public Class<?> getInputFormatClass() { return Text2CASInputFormat.class; } @Override public void configure(JobConf job) { job.setOutputFormat(NullOutputFormat.class); } } }