package skywriting.examples.wordcount; /** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.io.IOException; import java.util.StringTokenizer; import java.io.*; import java.net.URL; import skywriting.examples.grep.IncrementerCombiner; import skywriting.examples.grep.IntWritable; import skywriting.examples.grep.PartialHashOutputCollector; import skywriting.examples.grep.Text; import uk.co.mrry.mercator.task.Task; public class WordCountMapper implements Task { private final static IntWritable one = new IntWritable(1); public void invoke(InputStream[] inputs, OutputStream[] outputs, String[] args) { int nReducers = outputs.length; int nInputs = inputs.length; BufferedReader[] dis = new BufferedReader[nInputs]; DataOutputStream[] dos = new DataOutputStream[nReducers]; for(int i = 0; i < nInputs; i++) { dis[i] = new BufferedReader(new InputStreamReader(inputs[i])); } for(int i = 0; i < nReducers; i++) { dos[i] = new DataOutputStream(new BufferedOutputStream(outputs[i])); } String line; try { IncrementerCombiner comb = new IncrementerCombiner(); PartialHashOutputCollector<Text, IntWritable> outMap = new PartialHashOutputCollector<Text, IntWritable>(dos, nReducers, 1000, comb); while ((line = dis[0].readLine()) != null) { //System.out.println(line); StringTokenizer itr = new StringTokenizer(line); while (itr.hasMoreTokens()) { Text word = new Text(); word.set(itr.nextToken()); outMap.collect(word, one); } } outMap.flushAll(); for (DataOutputStream d : dos) d.close(); } catch (IOException e) { System.out.println("IOException while running mapper"); e.printStackTrace(); System.exit(1); } } public static void main(String[] args) throws Exception { int nMappers = 4; InputStream[] fis = new InputStream[1]; FileOutputStream[] fos = new FileOutputStream[2]; for (int i = 0; i < nMappers; i++) { URL u = new URL("http://www.cl.cam.ac.uk/~ms705/sw/wc_input_" + i); fis[0] = u.openStream(); for (int j = 0; j < fos.length; j++) { fos[j] = new FileOutputStream("wc_map_out_" + i + "_" + j); } WordCountMapper m = new WordCountMapper(); m.invoke(fis, fos, args); } } }