package com.personalityextractor.commons.data;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Pattern;
public class Tweet {
private String text;
private List<String> sentences =null;
private List<String> links =null;
private List<String> hashTags =null;
private List<String> analyzedhashTags =null;
private boolean isReply=false;
public Tweet(String text){
this.text = text;
this.hashTags = new ArrayList<String>();
this.links = new ArrayList<String>();
this.sentences = new ArrayList<String>();
this.analyzedhashTags = new ArrayList<String>();
tokenize();
}
private boolean isNewsArticle(String text){
String[] split = text.split("\\s+");
double numCaps =0.0;
Pattern p = Pattern.compile("^[A-Z]+.*");
for(String token : split){
if(p.matcher(token).matches()){
numCaps++;
}
}
if((numCaps/((double) split.length)) > 0.5)
return true;
return false;
}
private static String determineCase(char c){
String ccase = "";
if (Character.isUpperCase(c)) {
ccase="u";
} else{
ccase="l";
}
return ccase;
}
private static String analyzeHashTags(String tag) {
char[] arr = tag.toCharArray();
List<String> words = new ArrayList<String>();
StringBuffer sb = new StringBuffer();
for (int i = 0; i < arr.length; i++) {
sb.append(arr[i]);
String ccase = determineCase(arr[i]);
String ncase = "";
if(i<arr.length-1){
ncase= determineCase(arr[i+1]);
}
if(!ccase.equalsIgnoreCase(ncase) && sb.length()>1){
words.add(sb.toString());
sb = new StringBuffer();
}
}
words.add(sb.toString());
sb = new StringBuffer();
for (String s : words) {
sb.append(s + " ");
}
return sb.toString().trim();
}
private void tokenize(){
if(this.text.startsWith("@")){
this.isReply= true;
}
String[] tokens = this.text.split("\\s+");
StringBuffer plainText = new StringBuffer();
//Pattern p = Pattern.compile("^(http)://[-a-zA-Z0-9+&@#/%?=~_|!:,.;]*[-a-zA-Z0-9+&@#/%=~_|]");
//Pattern p = Pattern.compile("http://[a-z.0-9+/%~\\\\-_]");
for(String token : tokens){
if(token.equalsIgnoreCase("rt") || token.startsWith("@")){
continue;
}
if(token.startsWith("http://") || token.endsWith(".com")){
links.add(token);
continue;
}
if(token.contains("http://") || token.contains(".com")){
// if(this.links==null)
// this.links= new ArrayList<String>();
// links.add(token);
continue;
}
if(token.startsWith("#")){
token=token.toLowerCase();
this.hashTags.add(token);
String analyzedHashTag = analyzeHashTags(token.replace("#",""));
this.analyzedhashTags.add(analyzedHashTag);
token=analyzedHashTag;
continue;
}
plainText.append(token+" ");
}
String pText = plainText.toString().replaceAll("'s", "");
String[] sentences = pText.trim().split("[:;\"?/><,\\.!@%^()\\-+=~`{}|]+");
this.sentences = new ArrayList<String>();
for(String sentence : sentences){
if((sentence=sentence.trim()).length()!=0){
// if(isNewsArticle(sentence))
// sentence= sentence.toLowerCase();
this.sentences.add(sentence);
}
}
}
public List<String> getLinks(){
return this.links;
}
public List<String> getSentences(){
return this.sentences;
}
public List<String> getHashTagsEntities(){
return this.analyzedhashTags;
}
public List<String> getHashTags(){
return this.hashTags;
}
public boolean isReply(){
return this.isReply;
}
public static void main(String[] args){
String s = "WhatEverthatIS";
System.out.println(analyzeHashTags(s));
// String text = "Are you a do-good geek? @EFF is hiring a \"Technology Generalist\": http://bit.ly/jeb8x7";
// Tweet t = new Tweet(text);
// System.out.println(t.sentences);
// System.out.println(t.links);
}
}