/*
* Copyright (c) 2012 GigaSpaces Technologies Ltd. All rights reserved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.openspaces.bigdata.processor;
import static com.google.common.collect.Maps.newHashMap;
import static com.j_spaces.core.client.UpdateModifiers.UPDATE_OR_WRITE;
import java.util.Map;
import java.util.Map.Entry;
import java.util.logging.Logger;
import javax.annotation.Resource;
import org.openspaces.bigdata.processor.events.TokenCounter;
import org.openspaces.bigdata.processor.events.TokenizedTweet;
import org.openspaces.core.GigaSpace;
import org.openspaces.events.EventDriven;
import org.openspaces.events.EventTemplate;
import org.openspaces.events.TransactionalEvent;
import org.openspaces.events.adapter.SpaceDataEvent;
import org.openspaces.events.polling.Polling;
import org.openspaces.events.polling.ReceiveHandler;
import org.openspaces.events.polling.receive.MultiTakeReceiveOperationHandler;
import org.openspaces.events.polling.receive.ReceiveOperationHandler;
/**
* This polling container processor performs token count on bulks of {@link TokenizedTweet}
*
* @author Dotan Horovits
*/
@EventDriven
@Polling(gigaSpace = "gigaSpace", passArrayAsIs = true, concurrentConsumers = 1, maxConcurrentConsumers = 1, receiveTimeout = 1000)
@TransactionalEvent
public class LocalTokenCounter {
private static final Logger log = Logger.getLogger(LocalTokenCounter.class.getName());
private static final int BATCH_SIZE = 5;
private static final int WRITE_TIMEOUT = 1000;
private static final int LEASE_TTL = 5000;
@Resource(name = "clusteredGigaSpace")
GigaSpace clusteredGigaSpace;
@Resource(name = "gigaSpace")
GigaSpace gigaSpace;
@ReceiveHandler
ReceiveOperationHandler receiveHandler() {
MultiTakeReceiveOperationHandler receiveHandler = new MultiTakeReceiveOperationHandler();
receiveHandler.setMaxEntries(BATCH_SIZE);
receiveHandler.setNonBlocking(true);
receiveHandler.setNonBlockingFactor(1);
return receiveHandler;
}
/**
* This method returns the template of a filtered {@link TokenizedTweet}.
*
* @return template for the event container
*/
@EventTemplate
TokenizedTweet tokenizedFilteredTweet() {
TokenizedTweet template = new TokenizedTweet();
template.setFiltered(true);
return template;
}
/**
* Event handler that takes a bulk of {@link TokenizedTweet}, counts appearances of tokens in the bulk, and generates a corresponding {@link TokenCounter}
* for each token.
*
* @param tokenizedTweets
* array of {@link TokenizedTweet} matching the event template
*/
@SpaceDataEvent
public void eventListener(TokenizedTweet[] tokenizedTweets) {
log.info("local counting of a bulk of " + tokenizedTweets.length + " tweets");
Map<String, Integer> tokenMap = newHashMap();
for (TokenizedTweet tokenizedTweet : tokenizedTweets) {
log.fine("--processing " + tokenizedTweet);
for (Entry<String, Integer> entry : tokenizedTweet.getTokenMap().entrySet()) {
String token = entry.getKey();
Integer count = entry.getValue();
int newCount = tokenMap.containsKey(token) ? tokenMap.get(token) + count : count;
log.finest("put token " + token + " with count " + newCount);
tokenMap.put(token, newCount);
}
}
log.info("writing " + tokenMap.size() + " TokenCounters across the cluster");
for (Entry<String, Integer> entry : tokenMap.entrySet()) {
String token = entry.getKey();
Integer count = entry.getValue();
log.fine("writing new TokenCounter: token=" + token + ", count=" + count);
clusteredGigaSpace.write(new TokenCounter(token, count), LEASE_TTL, WRITE_TIMEOUT, UPDATE_OR_WRITE);
}
}
}