package org.p7h.storm.sentimentanalysis.bolts;
import backtype.storm.task.OutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.base.BaseRichBolt;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Tuple;
import backtype.storm.tuple.Values;
import com.google.common.base.Optional;
import org.p7h.storm.sentimentanalysis.utils.BingMapsLookup;
import org.p7h.storm.sentimentanalysis.utils.Constants;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import twitter4j.GeoLocation;
import twitter4j.Place;
import twitter4j.Status;
import java.io.IOException;
import java.util.Map;
import java.util.Properties;
/**
* Gets the location of tweet by all 3 means and then fwds the State code with the tweet to the next Bolt.
* There are three different objects within a tweet that we can use to determine it’s origin.
* This Class utilizes all the three of them and prioritizes in the following order [high to low]:
* 1. The coordinates object
* 2. The place object
* 3. The user object
*
* @author - Prashanth Babu
*/
public final class StateLocatorBolt extends BaseRichBolt {
private static final Logger LOGGER = LoggerFactory.getLogger(StateLocatorBolt.class);
private static final long serialVersionUID = 791846845769636712L;
private OutputCollector _outputCollector;
public StateLocatorBolt() {
//No op
}
@Override
public final void prepare(final Map map, final TopologyContext topologyContext,
final OutputCollector outputCollector) {
this._outputCollector = outputCollector;
final Properties properties = new Properties();
try {
properties.load(StateLocatorBolt.class.getClassLoader()
.getResourceAsStream(Constants.CONFIG_PROPERTIES_FILE));
} catch (final IOException ioException) {
//Should not occur. If it does, we cant continue. So exiting the program!
LOGGER.error(ioException.getMessage(), ioException);
System.exit(1);
}
//Bolt reads the Bing Maps API Value and stores the same to BING_MAPS_API_KEY_VALUE of Constants.java so that the Bolt can use it.
//For the lack of time I am using this Constant or else using a good Design Pattern, this can be fine-tuned.
Constants.BING_MAPS_API_KEY_VALUE = properties.getProperty(Constants.BING_MAPS_API_KEY);
}
@Override
public final void declareOutputFields(final OutputFieldsDeclarer outputFieldsDeclarer) {
//Emit the state and also the complete tweet to the next Bolt.
outputFieldsDeclarer.declare(new Fields("state", "tweet"));
}
@Override
@SuppressWarnings("unchecked")
public final void execute(final Tuple input) {
final Status status = (Status) input.getValueByField("tweet");
final Optional<String> stateOptional = getStateFromTweet(status);
if(stateOptional.isPresent()) {
final String state = stateOptional.get();
//Emit the state and also the complete tweet to the next Bolt.
this._outputCollector.emit(new Values(state, status));
}
}
/**
* Tries to get the State of the tweet by checking first GeoLocation Object, then Place Object and finally User Object.
*
* @param status -- Status Object.
* @return State of the Tweet.
*/
private final Optional<String> getStateFromTweet(final Status status) {
String state = getStateFromTweetGeoLocation(status);
state = getStateFromTweetPlaceObject(status, state);
state = getStateFromTweetUserObject(status, state);
if(null == state || !Constants.CONSOLIDATED_STATE_CODES.contains(state)) {
// LOGGER.info("Skipping invalid State: {}.", state);
return Optional.absent();
}
LOGGER.debug("State:{}", state);
return Optional.of(state);
}
/**
* Retrieves the State from User Object of the Tweet.
*
* @param status -- Status Object.
* @param state -- Current State.
* @return State of tweet.
*/
private final String getStateFromTweetUserObject(final Status status, String state) {
String stateFromUserObject = status.getUser().getLocation();
if(null == state && null != stateFromUserObject && 1 < stateFromUserObject.length()) {
String stateUser = stateFromUserObject.substring(stateFromUserObject.length() - 2).toUpperCase();
LOGGER.debug("State from User:{}", stateFromUserObject);
//Retry to get the State of the User if the last 2 chars are US for the User's Location object.
//This is just a pro-active check.
//This assumes the format: NY, US
if("US".equalsIgnoreCase(stateUser) && 5 < stateFromUserObject.length()){
stateUser = stateFromUserObject.substring(stateFromUserObject.length() - 6, stateFromUserObject.length() - 4);
LOGGER.debug("State from User again:{}", stateFromUserObject);
}
state = (2 == stateUser.length())? stateUser.toUpperCase(): null;
}
return state;
}
/**
* Retrieves the State from Place Object of the Tweet.
*
* @param status -- Status Object.
* @param state -- Current State.
* @return State of tweet.
*/
private final String getStateFromTweetPlaceObject(final Status status, String state) {
final Place place = status.getPlace();
if (null == state && null != place) {
final String placeName = place.getFullName();
if (null != placeName && 2 < placeName.length()) {
final String stateFromPlaceObject = placeName.substring(placeName.length() - 2);
LOGGER.debug("State from Place:{}", stateFromPlaceObject);
state = (2 == stateFromPlaceObject.length())? stateFromPlaceObject.toUpperCase(): null;
}
}
return state;
}
/**
* Retrieves the State from GeoLocation Object of the Tweet.
* This is considered as the primary and correct value for the State of the tweet.
*
* @param status -- Status Object.
* @return State of tweet.
*/
private final String getStateFromTweetGeoLocation(final Status status) {
String state = null;
final double latitude;
final double longitude;
final GeoLocation geoLocation = status.getGeoLocation();
if (null != geoLocation) {
latitude = geoLocation.getLatitude();
longitude = geoLocation.getLongitude();
LOGGER.debug("LatLng for BingMaps:{} and {}", latitude, longitude);
final Optional<String> stateGeoOptional = BingMapsLookup.reverseGeocodeFromLatLong(latitude, longitude);
if(stateGeoOptional.isPresent()){
final String stateFromGeoLocation = stateGeoOptional.get();
LOGGER.debug("State from BingMaps:{}", stateFromGeoLocation);
state = (2 == stateFromGeoLocation.length())? stateFromGeoLocation.toUpperCase(): null;
}
}
return state;
}
}