/* * JBoss, Home of Professional Open Source * Copyright 2011 Red Hat Inc. and/or its affiliates and other contributors * as indicated by the @author tag. All rights reserved. * See the copyright.txt in the distribution for a * full listing of individual contributors. * * This copyrighted material is made available to anyone wishing to use, * modify, copy, or redistribute it subject to the terms and conditions * of the GNU Lesser General Public License, v. 2.1. * This program is distributed in the hope that it will be useful, but WITHOUT A * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A * PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. * You should have received a copy of the GNU Lesser General Public License, * v.2.1 along with this distribution; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, * MA 02110-1301, USA. */ package org.infinispan.demo.mapreduce; import com.martiansoftware.jsap.FlaggedOption; import com.martiansoftware.jsap.JSAP; import com.martiansoftware.jsap.JSAPException; import com.martiansoftware.jsap.Parameter; import com.martiansoftware.jsap.SimpleJSAP; import org.infinispan.Cache; import org.infinispan.demo.Demo; import org.infinispan.distexec.mapreduce.MapReduceTask; import org.infinispan.util.Util; import java.io.BufferedReader; import java.io.FileReader; import java.io.IOException; import java.nio.Buffer; import java.nio.CharBuffer; import java.util.List; import java.util.Map.Entry; import java.util.concurrent.locks.LockSupport; /** * Infinispan MapReduceTask demo * * @author Vladimir Blagojevic */ public class WordCountDemo extends Demo { private String textFile; private int numPopularWords; public static void main(String... args) throws Exception { new WordCountDemo(args).run(); } public WordCountDemo(String[] args) throws Exception { super(args); textFile = commandLineOptions.getString("textFile"); numPopularWords = commandLineOptions.getInt("mostPopularWords"); } private void run() throws Exception { // Step 1: start cache. Cache<String, String> cache = startCache(); // Step 2: load up data file if (textFile != null) loadData(cache); // Step 3: if slave, wait. Else, start Map/Reduce task. try { if (isMaster) { long start = System.currentTimeMillis(); MapReduceTask<String, String, String, Integer> mapReduceTask = new MapReduceTask<String, String, String, Integer>(cache); List<Entry<String, Integer>> topList = mapReduceTask .mappedWith(new WordCountMapper()) .reducedWith(new WordCountReducer()) .execute(new WordCountCollator(numPopularWords)); System.out.printf(" ---- RESULTS: Top %s words in %s ---- %n%n", numPopularWords, textFile); int z = 0; for (Entry<String, Integer> e : topList) System.out.printf(" %s) %s [ %,d occurences ]%n", ++z, e.getKey(), e.getValue()); System.out.printf("%nCompleted in %s%n%n", Util.prettyPrintTime(System.currentTimeMillis() - start)); } else { System.out.println("Slave node waiting for Map/Reduce tasks. Ctrl-C to exit."); LockSupport.park(); } } finally { cache.getCacheManager().stop(); } } private void loadData(Cache<String, String> cache) throws IOException { FileReader in = new FileReader(textFile); try { BufferedReader bufferedReader = new BufferedReader(in); //chunk and insert into cache int chunkSize = 10; // 10K int chunkId = 0; CharBuffer cbuf = CharBuffer.allocate(1024 * chunkSize); while (bufferedReader.read(cbuf) >= 0) { Buffer buffer = cbuf.flip(); String textChunk = buffer.toString(); cache.put(textFile + (chunkId++), textChunk); cbuf.clear(); if (chunkId % 100 == 0) System.out.printf(" Inserted %s chunks from %s into grid%n", chunkId, textFile); } } finally { Util.close(in); } } @Override protected SimpleJSAP buildCommandLineOptions() throws JSAPException { return new SimpleJSAP( "WordCountDemo", "Count words in Infinispan cache usin MapReduceTask ", new Parameter[]{ new FlaggedOption("configFile", JSAP.STRING_PARSER, "config-samples/distributed-ec2.xml", JSAP.NOT_REQUIRED, 'c', "configFile", "Infinispan transport config file"), new FlaggedOption("nodeType", JSAP.STRING_PARSER, "slave", JSAP.REQUIRED, 't', "nodeType", "Node type as either master or slave"), new FlaggedOption("textFile", JSAP.STRING_PARSER, null, JSAP.NOT_REQUIRED, 'f', "textFile", "Input text file to distribute onto grid"), new FlaggedOption("mostPopularWords", JSAP.INTEGER_PARSER, "15", JSAP.NOT_REQUIRED, 'n', "mostPopularWords", "Number of most popular words to find")}); } }