package qa.qcri.aidr.collector.collectors;
import java.io.Closeable;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
import org.codehaus.jackson.map.ObjectMapper;
import qa.qcri.aidr.collector.beans.FacebookCollectionTask;
import qa.qcri.aidr.collector.beans.FacebookEntityType;
import qa.qcri.aidr.collector.beans.FacebookProfile;
import qa.qcri.aidr.collector.utils.CollectorConfigurationProperty;
import qa.qcri.aidr.collector.utils.CollectorConfigurator;
import qa.qcri.aidr.collector.utils.GenericCache;
import qa.qcri.aidr.common.redis.LoadShedder;
import com.google.gson.Gson;
import facebook4j.Facebook;
import facebook4j.FacebookException;
import facebook4j.FacebookFactory;
import facebook4j.Ordering;
import facebook4j.Post;
import facebook4j.Reading;
import facebook4j.ResponseList;
import facebook4j.conf.Configuration;
import facebook4j.conf.ConfigurationBuilder;
import facebook4j.internal.logging.Logger;
import facebook4j.internal.org.json.JSONException;
import facebook4j.internal.org.json.JSONObject;
public class FacebookFeedTracker implements Closeable {
private static Logger logger = Logger.getLogger(FacebookFeedTracker.class);
private static CollectorConfigurator configProperties = CollectorConfigurator.getInstance();
private JedisPublisher publisher;
private Facebook facebook;
private final FacebookCollectionTask task;
private final LoadShedder fbApiHitShedder;
private static final int DEFAULT_LIMIT = 100;
private static final Long HOUR_IN_MILLISECS = 60 * 60 * 1000L;
private static String FIELDS_TO_FETCH = "id,updated_time,message_tags,scheduled_publish_time,"
+ "created_time, full_picture,object_id,with_tags, is_published, "
+ "from,to,message,picture,link,name,caption,description,source,properties,"
+ "icon,actions,privacy,type,shares,status_type,place,story,"
+ "application,targeting,likes.summary(true),comments.summary(true)";
public FacebookFeedTracker(String accessToken){
task = null;
fbApiHitShedder = null;
this.facebook = getFacebookInstance(accessToken);
}
public FacebookFeedTracker(FacebookCollectionTask task) {
logger.info("Waiting to aquire Jedis connection for collection " + task.getCollectionCode());
this.facebook = getFacebookInstance(task.getAccessToken());
this.publisher = JedisPublisher.newInstance();
logger.info("Jedis connection acquired for collection " + task.getCollectionCode());
fbApiHitShedder = new LoadShedder(Integer.parseInt(configProperties.getProperty(CollectorConfigurationProperty.FACEBOOK_MAX_API_HITS_HOURLY_PER_USER)),
Integer.parseInt(configProperties.getProperty(CollectorConfigurationProperty.FACEBOOK_LOAD_CHECK_INTERVAL_MINUTES)),
true, "FACEBOOK_API_CALL_SHEDDER." + task.getCollectionCode());
this.task = task;
}
public void start() {
new Thread(new Runnable() {
@Override
public void run() {
Boolean syncObj = GenericCache.getInstance().getFbSyncObjMap(task.getCollectionCode()) == null ? Boolean.TRUE : GenericCache.getInstance().getFbSyncObjMap(task.getCollectionCode());
synchronized (syncObj) {
GenericCache.getInstance().setFbSyncObjMap(task.getCollectionCode(), syncObj);
GenericCache.getInstance().setFbSyncStateMap(task.getCollectionCode(), 0);
collectFacebookData();
}
}
}).start();
}
@Override
public void close() throws IOException {
facebook.shutdown();
publisher.close();
logger.info("AIDR-Fetcher: Collection stopped which was tracking ");
}
private static Facebook getFacebookInstance(String accessToken) {
ConfigurationBuilder builder = new ConfigurationBuilder();
builder.setDebugEnabled(false)
.setOAuthAppId(configProperties.getProperty(CollectorConfigurationProperty.FACEBOOK_CONSUMER_KEY))
.setOAuthAppSecret(
configProperties.getProperty(CollectorConfigurationProperty.FACEBOOK_CONSUMER_SECRET))
.setJSONStoreEnabled(true).setOAuthAccessToken(accessToken);
Configuration configuration = builder.build();
Facebook instance = new FacebookFactory(configuration).getInstance();
return instance;
}
public void collectFacebookData() {
this.publisher = JedisPublisher.newInstance();
logger.info("Jedis connection acquired for collection " + task.getCollectionCode());
Date toTimestamp = new Date();
long fetchFromInMiliSecs = task.getFetchFrom() * HOUR_IN_MILLISECS;
Date fromTimestamp = new Date(System.currentTimeMillis() - fetchFromInMiliSecs);
try {
task.setPullInProgress(true);
//Search all profiles by keywords
List<FacebookProfile> fbProfiles = new ArrayList<FacebookProfile>();
if(StringUtils.isNotBlank(task.getToTrack())){
fbProfiles = searchProfiles(task.getToTrack(), -1, 0);
}
//Add subscribed profilesIds to list
if(StringUtils.isNotBlank(task.getToFollow())){
try {
ObjectMapper mapper = new ObjectMapper();
List<FacebookProfile> subscribedProfiles = mapper.readValue(task.getToFollow(), mapper.getTypeFactory().constructCollectionType(List.class, FacebookProfile.class));
fbProfiles.addAll(subscribedProfiles);
} catch (IOException e) {
logger.error("Exception while parsing facebook subscribed page json",e);
}
}
if(task.getLastExecutionTime() != null &&
(System.currentTimeMillis() - task.getLastExecutionTime().getTime()) <= fetchFromInMiliSecs) {
fromTimestamp = task.getLastExecutionTime();
}
if (GenericCache.getInstance().getFbSyncStateMap(task.getCollectionCode()) == 0) {
this.processPost(toTimestamp, fromTimestamp, fbProfiles);
} else {
GenericCache.getInstance().getFbSyncObjMap(task.getCollectionCode()).notifyAll();
}
task.setPullInProgress(false);
task.setLastExecutionTime(toTimestamp);
GenericCache.getInstance().setFbConfigMap(task.getCollectionCode(), task);
} catch (FacebookException e) {
GenericCache.getInstance().setFailedCollection(task.getCollectionCode(), task);
}
}
private void processPost(Date toTimestamp, Date since, List<FacebookProfile> fbProfiles)
throws FacebookException {
String channelName = configProperties.getProperty(CollectorConfigurationProperty.COLLECTOR_CHANNEL) + "."
+ task.getCollectionCode();
Gson gson = new Gson();
Set<String> processedFbProfileIdsSet = new HashSet<String>();
for (FacebookProfile fbProfile : fbProfiles) {
if(processedFbProfileIdsSet.contains(fbProfile.getId())){
continue;
}else{
processedFbProfileIdsSet.add(fbProfile.getId());
}
int postsOffset = 0;
if (GenericCache.getInstance().getFbSyncStateMap(task.getCollectionCode()) == 0) {
while (postsOffset >= 0 ) {
while(!fbApiHitShedder.canProcess() && GenericCache.getInstance().getFbSyncStateMap(task.getCollectionCode()) == 0)
{
try {
Thread.sleep(5000);
} catch (InterruptedException e) {
logger.warn("Interrupted exception while sleeping in load shedder for collection code: "
+ task.getCollectionCode());
}
}
if (GenericCache.getInstance().getFbSyncStateMap(task.getCollectionCode()) == 0) {
try {
ResponseList<Post> feed = facebook.getFeed(fbProfile.getId(), new Reading().fields(FIELDS_TO_FETCH)
.since(since).until(toTimestamp).order(Ordering.CHRONOLOGICAL).limit(DEFAULT_LIMIT)
.offset(postsOffset));
postsOffset = feed.size() == DEFAULT_LIMIT ? postsOffset + DEFAULT_LIMIT : -1;
for (Post post : feed) {
try {
JSONObject aidrJson = new JSONObject();
aidrJson.put("doctype", "facebook");
aidrJson.put("crisis_code", task.getCollectionCode());
aidrJson.put("crisis_name", task.getCollectionName());
aidrJson.put("parent_type", fbProfile.getType().name().toLowerCase());
JSONObject docJson = new JSONObject(gson.toJson(post));
docJson.put("aidr", aidrJson);
int likeCount = post.getLikes().getSummary() != null ? post.getLikes().getSummary()
.getTotalCount() : 0;
docJson.put("likesCount", likeCount);
int commentCount = post.getComments().getSummary() != null ? post.getComments()
.getSummary().getTotalCount() : 0;
docJson.put("commentsCount", commentCount);
publisher.publish(channelName, docJson.toString());
} catch (JSONException e) {
logger.warn("Post error for parent id : " + fbProfile.getId() + " and type : " + fbProfile.getType().name());
}
}
GenericCache.getInstance().incrCounter(task.getCollectionCode(), (long) feed.size());
if (feed != null && feed.size() > 0) {
String lastDownloadedDoc = feed.get(feed.size() - 1).getMessage();
if (lastDownloadedDoc != null && !lastDownloadedDoc.isEmpty()
&& lastDownloadedDoc.length() > 500) {
lastDownloadedDoc = lastDownloadedDoc.substring(0, 250) + "...";
}
GenericCache.getInstance().setLastDownloadedDoc(task.getCollectionCode(),
lastDownloadedDoc);
}
} catch (FacebookException e) {
logger.warn("Exception while fetching feeds for id: " + fbProfile.getId());
handleFacebookException(e, task.getCollectionCode());
postsOffset = -1;
}
} else {
GenericCache.getInstance().getFbSyncObjMap(task.getCollectionCode()).notifyAll();
break;
}
}
} else {
GenericCache.getInstance().getFbSyncObjMap(task.getCollectionCode()).notifyAll();
break;
}
}
}
public List<FacebookProfile> searchProfiles(String keyword, Integer limit, Integer offset){
List<FacebookProfile> searchedProfiles = new ArrayList<FacebookProfile>();
if(limit != -1){
searchedProfiles.addAll(fetchPages(keyword, limit, offset));
searchedProfiles.addAll(fetchGroups(keyword, limit, offset));
searchedProfiles.addAll(fetchEvents(keyword, limit, offset));
}
else{
//To fetch all profiles
limit = DEFAULT_LIMIT;
while(offset >= 0){
List<FacebookProfile> profiles = fetchPages(keyword, limit, offset);
searchedProfiles.addAll(profiles);
offset = profiles.size() == limit ? offset + limit : -1;
}
offset = 0;
while(offset >= 0){
List<FacebookProfile> profiles = fetchGroups(keyword, limit, offset);
searchedProfiles.addAll(profiles);
offset = profiles.size() == limit ? offset + limit : -1;
}
offset = 0;
while(offset >= 0){
List<FacebookProfile> profiles = fetchEvents(keyword, limit, offset);
searchedProfiles.addAll(profiles);
offset = profiles.size() == limit ? offset + limit : -1;
}
}
return searchedProfiles;
}
public List<FacebookProfile> fetchPages(String keyword, Integer limit, Integer offset) {
if(limit>DEFAULT_LIMIT || limit < 0){
limit = DEFAULT_LIMIT;
}
List<FacebookProfile> fbProfiles = new ArrayList<FacebookProfile>();
if (offset >= 0) {
ResponseList<JSONObject> pageList = null;
try {
pageList = facebook.search(keyword,"page",
new Reading().fields("id,name,link,likes.summary(true),fan_count,picture").limit(limit).offset(offset));
} catch (FacebookException e) {
logger.error("Exception while searching Facebook pages for keyword: "+keyword);
}
if(CollectionUtils.isNotEmpty(pageList)){
FacebookProfile facebookProfile = null;
for (JSONObject jsonObject : pageList) {
try{
facebookProfile = new FacebookProfile();
facebookProfile.setId(jsonObject.getString("id"));
facebookProfile.setLink(jsonObject.getString("link"));
facebookProfile.setName(jsonObject.getString("name"));
facebookProfile.setFans(jsonObject.getInt("fan_count"));
facebookProfile.setType(FacebookEntityType.PAGE);
facebookProfile.setImageUrl((jsonObject.getJSONObject("picture").getJSONObject("data").getString("url")));
fbProfiles.add(facebookProfile);
}catch(JSONException e){
logger.warn("Exception while parsing page Json");
}
}
}
}
return fbProfiles;
}
public List<FacebookProfile> fetchGroups(String keyword, Integer limit, Integer offset) {
if(limit>DEFAULT_LIMIT || limit < 0){
limit = DEFAULT_LIMIT;
}
List<FacebookProfile> fbProfiles = new ArrayList<FacebookProfile>();
if (offset >= 0) {
ResponseList<JSONObject> groupList = null;
try {
groupList = facebook.search(keyword,"group",
new Reading().fields("id,name,link,picture").limit(limit).offset(offset));
} catch (FacebookException e) {
logger.error("Exception while searching Facebook groups for keyword: "+keyword);
}
if(CollectionUtils.isNotEmpty(groupList)){
FacebookProfile facebookProfile = null;
for (JSONObject jsonObject : groupList) {
try{
facebookProfile = new FacebookProfile();
facebookProfile.setId(jsonObject.getString("id"));
facebookProfile.setLink("https://www.facebook.com/groups/"+facebookProfile.getId());
facebookProfile.setName(jsonObject.getString("name"));
facebookProfile.setImageUrl(jsonObject.getJSONObject("picture").getJSONObject("data").getString("url"));
facebookProfile.setType(FacebookEntityType.GROUP);
fbProfiles.add(facebookProfile);
}catch(JSONException e){
logger.warn("Exception while parsing group Json");
}
}
}
}
return fbProfiles;
}
public List<FacebookProfile> fetchEvents(String keyword, Integer limit, Integer offset) {
if(limit>DEFAULT_LIMIT || limit < 0){
limit = DEFAULT_LIMIT;
}
List<FacebookProfile> fbProfiles = new ArrayList<FacebookProfile>();
if (offset >= 0) {
ResponseList<JSONObject> eventList = null;
try {
eventList = facebook.search(keyword,"event",
new Reading().fields("id,name,link,picture").limit(limit).offset(offset));
} catch (FacebookException e) {
logger.error("Exception while searching Facebook events for keyword: "+keyword);
}
if(CollectionUtils.isNotEmpty(eventList)){
FacebookProfile facebookProfile = null;
for (JSONObject jsonObject : eventList) {
try{
facebookProfile = new FacebookProfile();
facebookProfile.setId(jsonObject.getString("id"));
facebookProfile.setLink("https://www.facebook.com/events/"+facebookProfile.getId());
facebookProfile.setName(jsonObject.getString("name"));
facebookProfile.setImageUrl(jsonObject.getJSONObject("picture").getJSONObject("data").getString("url"));
facebookProfile.setType(FacebookEntityType.EVENT);
fbProfiles.add(facebookProfile);
}catch(JSONException e){
logger.warn("Exception while parsing event Json");
}
}
}
}
return fbProfiles;
}
private void handleFacebookException(FacebookException e, String collectionCode) throws FacebookException {
boolean found = false;
switch (e.getErrorCode()) {
case 1:
case 2:
case 4:
case 17:
case 341:
logger.error("Facebook api is rate limited for collectionCode: " + collectionCode, e);
found = true;
break;
case 102:
logger.error("Oauth Exception. May be access token got expired for collectionCode: " + collectionCode, e);
throw new FacebookException(e);
}
if (!found) {
switch (e.getErrorSubcode()) {
case 458:
case 459:
case 460:
case 463:
case 464:
case 467:
logger.error("Oauth Exception. May be access token got expired for collectionCode: " + collectionCode,
e);
throw new FacebookException(e);
default:
logger.error("Facebook Exception", e);
}
}
}
}