package com.produban.openbus.analysis; import backtype.storm.tuple.Values; import org.json.JSONArray; import org.json.JSONObject; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import storm.trident.operation.BaseFunction; import storm.trident.operation.TridentCollector; import storm.trident.tuple.TridentTuple; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Date; import static com.produban.openbus.util.Common.join; /** * Class Description */ public class TweetJsonDecoder extends BaseFunction { private static final Logger logger = LoggerFactory.getLogger(TweetJsonDecoder.class); @Override public void execute(TridentTuple objects, TridentCollector tridentCollector) { byte[] jsonBytes = objects.getBinary(0); String jsonString = new String(jsonBytes); if (jsonBytes.length == 0) { logger.warn("EMPTY json bytes received!"); return; } logger.info("DECODING JSON:"); logger.info(jsonString); logger.info("FIRST CHAR:" + jsonString.charAt(0)); JSONObject jsonObj = new JSONObject(jsonString.trim()); String tweetId = jsonObj.getString("id_str"); String rawDate = jsonObj.getString("created_at"); String twitterDateFormat = "EEE MMM dd HH:mm:ss Z yyyy"; Date tweetDate = null; try { tweetDate = new SimpleDateFormat(twitterDateFormat).parse(rawDate); } catch (ParseException e) { logger.error("Error parsing tweet date: "+rawDate); logger.error(e.toString()); } String text = jsonObj.getString("text"); String lang = jsonObj.isNull("lang") ? null : jsonObj.getString("lang"); int retweetCount = jsonObj.getInt("retweet_count"); double longitude = 0; double latitude = 0; if (!jsonObj.isNull("coordinates")) { JSONArray coordinates = jsonObj.getJSONObject("coordinates").getJSONArray("coordinates"); longitude = coordinates.getDouble(0); latitude = coordinates.getDouble(1); } JSONObject user = jsonObj.getJSONObject("user"); int userFollowerCount = user.getInt("followers_count"); String userLocation = user.getString("location"); String userName = user.getString("screen_name"); String userId = user.getString("id_str"); String userImgUrl = user.getString("profile_image_url"); JSONObject entities = jsonObj.getJSONObject("entities"); JSONArray entityUrls = entities.getJSONArray("urls"); ArrayList<String> urls = new ArrayList<>(); for (int i = 0; i < entityUrls.length(); i++) { JSONObject entityUrl = entityUrls.getJSONObject(i); urls.add(entityUrl.getString("url")); } JSONArray entityUserMentions = entities.getJSONArray("user_mentions"); ArrayList<String> mentionedUsers = new ArrayList<>(); for (int i = 0; i < entityUserMentions.length(); i++) { JSONObject mention = entityUserMentions.getJSONObject(i); mentionedUsers.add(mention.getString("screen_name")); } JSONArray entityHashTags = entities.getJSONArray("hashtags"); ArrayList<String> hashtags = new ArrayList<>(); for (int i = 0; i < entityHashTags.length(); i++) { JSONObject hashtag = entityHashTags.getJSONObject(i); hashtags.add(hashtag.getString("text")); } Values tridentValues = new Values(); tridentValues.add(tweetId); tridentValues.add(rawDate); tridentValues.add(tweetDate); tridentValues.add(text); tridentValues.add(lang); tridentValues.add(retweetCount); tridentValues.add(longitude); tridentValues.add(latitude); tridentValues.add(userFollowerCount); tridentValues.add(userLocation); tridentValues.add(userName); tridentValues.add(userId); tridentValues.add(userImgUrl); tridentValues.add(join(urls, "|")); tridentValues.add(join(mentionedUsers, "|")); tridentValues.add(join(hashtags, "|")); tridentCollector.emit(tridentValues); } }