package com.ebottabi.bolt; import java.io.IOException; import java.net.HttpURLConnection; import java.net.MalformedURLException; import java.net.URL; import java.util.Arrays; import java.util.LinkedList; import java.util.List; import java.util.logging.Level; import java.util.logging.Logger; import org.apache.zookeeper.proto.SyncRequest; import org.json.simple.JSONObject; import com.ebottabi.bolt.RedisBolt.OnDynamicConfigurationListener; import java.util.Map; //import storm.starter.utils.Utils; import twitter4j.Status; import twitter4j.URLEntity; /** * * Bolt for publishing into a redis channel the results for those tweets that * contains a link. * * Will try to expand the url if it's shorten. * * @author ebot * */ public class RedisLinksPublisherBolt extends RedisBolt implements OnDynamicConfigurationListener { public static final String CHANNEL = "links"; private final List<String> forbiddenDomains = new LinkedList<String>(); public RedisLinksPublisherBolt() { super(CHANNEL); } @Override protected void setupNonSerializableAttributes() { // TODO Auto-generated method stub super.setupNonSerializableAttributes(); setupDynamicConfiguration(this); } @Override public List<Object> filter(Status status) { URLEntity urls[] = status.getURLEntities(); if (urls == null) { return null; } URL finalUrl = null; List<Object> marketUrls = new LinkedList<Object>(); for (URLEntity url : urls) { try { //finalUrl = getFinalUrl(url.getURL()); finalUrl = new URL(url.getExpandedURL()); if (finalUrl == null) { continue; } String extra = null; if (!url.getURL().toString().equals(finalUrl.toString())) { extra = ", \"original\": \"" + url.getURL().toString() + "\""; } JSONObject msg = new JSONObject(); msg.put("link", finalUrl.toString()); msg.put("host", finalUrl.getHost()); if (extra != null) { msg.put("original", url.getURL().toString()); } publish(msg.toJSONString()); if ("play.google.com".equals(finalUrl.getHost()) && finalUrl.getPath().contains("details")) { marketUrls.add(msg.toJSONString()); } } catch (MalformedURLException ex) { Logger.getLogger(RedisLinksPublisherBolt.class.getName()).log(Level.SEVERE, null, ex); } } return marketUrls.size() == 0 ? null : marketUrls; } private URL getFinalUrl(URL url, int deep) { if (url == null) { return null; } if (deep <= 0) { return url; } synchronized (forbiddenDomains) { if (forbiddenDomains.contains(url.getHost())) { log.debug("Forbidden link: " + url.toString()); return null; } } if (url.toString().length() > 30) { return url; } try { HttpURLConnection connection = (HttpURLConnection) url.openConnection(); connection.setInstanceFollowRedirects(false); String location = connection.getHeaderField("Location"); if (location != null && location.length() > 0) { URL newURL = new URL(location); return getFinalUrl(newURL, deep - 1); } } catch (IOException e) { return url; } return url; } private URL getFinalUrl(URL url) { return getFinalUrl(url, 5); } public void onConfigurationChange(String conf) { //Utils.StringToList(conf, forbiddenDomains); } @Override public Map<String, Object> getComponentConfiguration() { return null; } }