package ldbc.snb.datagen.generator;
import java.io.IOException;
import java.util.Properties;
import java.util.Random;
import java.util.TreeSet;
import ldbc.snb.datagen.dictionary.TagDictionary;
import ldbc.snb.datagen.objects.Person.PersonSummary;
import ldbc.snb.datagen.util.DistributionKey;
public class TweetGenerator extends TextGenerator {
private DistributionKey hashtag;
private DistributionKey sentiment;
private DistributionKey popularword;
private DistributionKey proportion; //distribution popular,negative, neutral tweets
private DistributionKey lengthsentence; // sentece length and sentences per tweet
private DistributionKey lengthtweet;
public TweetGenerator(Random random, TagDictionary tagDic) throws NumberFormatException, IOException {
super(random, tagDic);
//input de fitxers i crea els 4 maps
String dir = null;
hashtag = new DistributionKey(DatagenParams.SPARKBENCH_DIRECTORY + "/hashtags.csv");
sentiment = new DistributionKey(DatagenParams.SPARKBENCH_DIRECTORY + "/sentiment.csv");
popularword = new DistributionKey(DatagenParams.SPARKBENCH_DIRECTORY + "/words.csv");
//proportion = new DistributionKey(DatagenParams.SPARKBENCH_DIRECTORY + "/sentiment.csv");
lengthsentence = new DistributionKey(DatagenParams.SPARKBENCH_DIRECTORY + "/sentence_lengths.csv");
lengthtweet = new DistributionKey(DatagenParams.SPARKBENCH_DIRECTORY + "/sentence_count.csv");
}
@Override
protected void load() {
}
@Override
public String generateText(PersonSummary member, TreeSet<Integer> tags, Properties prop) {
StringBuffer content = null;
//mirar num de frases
Double numsentences = Double.valueOf(lengthtweet.nextDouble(this.random));
for (int i = 0; i < numsentences; ++i){
Double numwords = Double.valueOf(lengthsentence.nextDouble(this.random));
// depenen de la distribució de number hashtags per sentence int numhashtags;
//int numhashtags = funciondistribuciohashtags(numwords);
int numhashtags = (int)(numwords*0.4);
for (int j = 0; j<numhashtags; ++j){
content.append(" "+ hashtag.nextDouble(this.random));
}
// depenen de la distribució de number sentiment words per sentence int numhashtags;
//int numsentimentswords = funciondistribuciosentimentswords(numwords);
int numsentimentswords = (int)(numwords*0.4);
for (int q = 0; q<numhashtags; ++q){
content.append(" "+ sentiment.nextDouble(this.random));
}
numwords -= (numsentimentswords + numhashtags);
for (int j = 0; j<numwords; ++j){
content.append(" "+ popularword.nextDouble(this.random));
}
}
//per cada frase mirar numero de paraules
//mirar numero de hashtags
content.toString();
return content.toString();
}
}