package clustering;
import java.io.BufferedWriter;
import java.io.FileWriter;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.carrot2.clustering.lingo.LingoClusteringAlgorithm;
import org.carrot2.core.Cluster;
import org.carrot2.core.Controller;
import org.carrot2.core.ControllerFactory;
import org.carrot2.core.Document;
import org.carrot2.core.ProcessingResult;
import net.sf.json.JSONArray;
import net.sf.json.JSONObject;
import net.sf.json.JSONSerializer;
import tathya.text.tokenizer.TwitterTokenizer;
import twitter.ConsoleFormatter;
import twitter.HTTPRequest;
public class CarrotClustering {
public static String extractText(JSONObject jsonObject) {
JSONObject jObj = JSONObject.fromObject(jsonObject);
if (jObj.containsKey("text")) {
return (String) jObj.get("text");
} else {
return null;
}
}
public static void main(String args[]) {
HTTPRequest htr = new HTTPRequest();
String query = "http://twitter.com/statuses/user_timeline/rsumbaly.json?count=300";
JSONArray jsonArray = null;
String response = htr.getHTTPResponse(query);
jsonArray = (JSONArray) JSONSerializer.toJSON(response);
Iterator<JSONObject> itr = jsonArray.iterator();
final ArrayList<Document> documents = new ArrayList<Document>();
while (itr.hasNext()) {
while (itr.hasNext()) {
String tweetText = extractText(itr.next());
TwitterTokenizer tweetTokenizer = new TwitterTokenizer();
for (String cleanTweet : tweetTokenizer.tokenize(tweetText)) {
documents.add(new Document("", cleanTweet));
}
}
}
final Controller controller = ControllerFactory.createSimple();
final ProcessingResult byTopicClusters = controller.process(documents, null, LingoClusteringAlgorithm.class);
final List<Cluster> clustersByTopic = byTopicClusters.getClusters();
ConsoleFormatter.displayClusters(clustersByTopic);
}
}