package ldbc.snb.datagen.util;
import ldbc.snb.datagen.dictionary.Dictionaries;
import ldbc.snb.datagen.generator.DatagenParams;
import ldbc.snb.datagen.objects.*;
import java.io.IOException;
import java.io.OutputStream;
import java.util.*;
/**
* Created by aprat on 1/8/15.
*/
public class FactorTable {
static public class PersonCounts {
private long numFriends_ = 0;
private long numPosts_ = 0;
private long numLikes_ = 0;
private long numTagsOfMessages_ = 0;
private long numForums_ = 0;
private long numWorkPlaces_ = 0;
private long numComments_ = 0;
private int country_= 0;
private String name_ = null;
private ArrayList<Long> numMessagesPerMonth_ = null;
private ArrayList<Long> numForumsPerMonth_ = null;
public PersonCounts() {
numMessagesPerMonth_ = new ArrayList<Long>(36+1);
for( int i = 0; i < 36+1; ++i ) {
numMessagesPerMonth_.add(new Long(0));
}
numForumsPerMonth_ = new ArrayList<Long>(36+1);
for( int i = 0; i < 36+1; ++i ) {
numForumsPerMonth_.add(new Long(0));
}
}
public int country() {
return country_;
}
public String name() {
return name_;
}
public void country(int country) {
this.country_ = country;
}
public void name(String name) {
this.name_ = name;
}
public long numFriends() {
return numFriends_;
}
public void numFriends( long numFriends ) {
numFriends_ = numFriends;
}
public long numPosts() {
return numPosts_;
}
public void numPosts(long numPosts) {
numPosts_ = numPosts;
}
public void incrNumPosts() {
numPosts_++;
}
public long numLikes() {
return numLikes_;
}
public void numLikes( long numLikes ) {
numLikes_ = numLikes;
}
public void incrNumLikes() {
numLikes_++;
}
public long numTagsOfMessages () {
return numTagsOfMessages_;
}
public void numTagsOfMessages( long numTagsOfMessages ) {
numTagsOfMessages_ = numTagsOfMessages;
}
public long numForums() {
return numForums_;
}
public void incrNumForums() {
numForums_++;
}
public void numForums( long numForums ) {
numForums_ = numForums;
}
public long numWorkPlaces () {
return numWorkPlaces_;
}
public void numWorkPlaces( long numWorkPlaces ) {
numWorkPlaces_ = numWorkPlaces;
}
public long numComments() {
return numComments_;
}
public void numComments( long numComments ) {
numComments_ = numComments;
}
public void incrNumComments() {
numComments_++;
}
public ArrayList<Long> numMessagesPerMonth( ) {
return numMessagesPerMonth_;
}
public void numMessagesPerMonth( ArrayList<Long> numMessagesPerMonth) {
numMessagesPerMonth_.clear();
numMessagesPerMonth_.addAll(numMessagesPerMonth);
}
public void incrNumMessagesPerMonth( int month) {
numMessagesPerMonth_.set(month,numMessagesPerMonth_.get(month)+1);
}
public ArrayList<Long> numForumsPerMonth() {
return numForumsPerMonth_;
}
public void numGroupsPerMonth( ArrayList<Long> numForumsPerMonth) {
numForumsPerMonth_.clear();
numForumsPerMonth_ = numForumsPerMonth;
}
public void incrNumForumsPerMonth( int month) {
numForumsPerMonth_.set(month,numForumsPerMonth_.get(month)+1);
}
}
private HashMap<Long, PersonCounts > personCounts_;
private HashMap<Integer, Long> postsPerCountry_;
private HashMap<Integer, Long> tagClassCount_;
private HashMap<String,Long> firstNameCount_;
private HashMap<Integer,Long> tagCount_;
private HashMap<Long, String> medianFirstName_;
private long minWorkFrom_ = Long.MAX_VALUE;
private long maxWorkFrom_ = Long.MIN_VALUE;
public FactorTable() {
personCounts_ = new HashMap<Long, PersonCounts >();
postsPerCountry_ = new HashMap<Integer, Long>();
tagClassCount_ = new HashMap<Integer, Long> ();
firstNameCount_ = new HashMap<String, Long>();
tagCount_ = new HashMap<Integer, Long>();
medianFirstName_ = new HashMap<Long, String>();
}
private PersonCounts personCounts(Long id) {
PersonCounts ret = personCounts_.get(id);
if(ret == null) {
ret = new FactorTable.PersonCounts();
personCounts_.put(id, ret);
}
return ret;
}
private void incrPostPerCountry( int country ) {
Long num = postsPerCountry_.get(country);
if( num == null ) {
num = new Long(0);
}
postsPerCountry_.put(country, ++num);
}
private void incrTagClassCount( int tagClass ) {
Long num = tagClassCount_.get(tagClass);
if( num == null ) {
num = new Long(0);
}
tagClassCount_.put(tagClass,++num);
}
private void incrTagCount( int tag ) {
Long num = tagCount_.get(tag);
if( num == null ) {
num = new Long(0);
}
tagCount_.put(tag, ++num);
}
private void incrFirstNameCount( String name ) {
Long num = firstNameCount_.get(name);
if( num == null ) {
num = new Long(0);
}
firstNameCount_.put(name, ++num);
}
public void extractFactors( Person person ) {
if( person.creationDate() < Dictionaries.dates.getUpdateThreshold() || !DatagenParams.updateStreams ) {
personCounts(person.accountId()).country(person.countryId());
personCounts(person.accountId()).name(person.firstName());
personCounts(person.accountId()).numFriends(person.knows().size());
personCounts(person.accountId()).numWorkPlaces(person.companies().size());
for (Map.Entry<Long, Long> e : person.companies().entrySet()) {
if (minWorkFrom_ > e.getValue()) minWorkFrom_ = e.getValue();
if (maxWorkFrom_ < e.getValue()) maxWorkFrom_ = e.getValue();
}
incrFirstNameCount(person.firstName());
String medianName = Dictionaries.names.getMedianGivenName(person.countryId(), person.gender() == 1,
Dictionaries.dates.getBirthYear(person.birthDay()));
medianFirstName_.put(person.accountId(), medianName);
}
}
public void extractFactors( ForumMembership member ) {
if( member.creationDate() < Dictionaries.dates.getUpdateThreshold() || !DatagenParams.updateStreams ) {
long memberId = member.person().accountId();
personCounts(memberId).incrNumForums();
int bucket = Dictionaries.dates.getNumberOfMonths(member.creationDate(), DatagenParams.startMonth, DatagenParams.startYear);
if (bucket < 36 + 1)
personCounts(memberId).incrNumForumsPerMonth(bucket);
}
}
public void extractFactors( Comment comment ) {
if( comment.creationDate() < Dictionaries.dates.getUpdateThreshold() || !DatagenParams.updateStreams ) {
assert personCounts_.get(comment.author().accountId()) != null : "Person counts does not exist when extracting factors from comment";
extractFactors((Message) comment);
personCounts(comment.author().accountId()).incrNumComments();
}
}
public void extractFactors( Post post ) {
if( post.creationDate() < Dictionaries.dates.getUpdateThreshold() || !DatagenParams.updateStreams ) {
assert(personCounts_.get(post.author().accountId()) != null): "Person counts does not exist when extracting factors from post";
extractFactors((Message) post);
personCounts(post.author().accountId()).incrNumPosts();
}
}
public void extractFactors( Photo photo ) {
if( photo.creationDate() < Dictionaries.dates.getUpdateThreshold() || !DatagenParams.updateStreams ) {
assert(personCounts_.get(photo.author().accountId()) != null): "Person counts does not exist when extracting factors from photo";
extractFactors((Message) photo);
personCounts(photo.author().accountId()).incrNumPosts();
}
}
private void extractFactors( Message message ) {
if( message.creationDate() < Dictionaries.dates.getUpdateThreshold() || !DatagenParams.updateStreams ) {
assert(personCounts_.get(message.author().accountId()) != null): "Person counts does not exist when extracting factors from message";
long authorId = message.author().accountId();
long current = personCounts(authorId).numTagsOfMessages();
personCounts(authorId).numTagsOfMessages(current + message.tags().size());
int bucket = Dictionaries.dates.getNumberOfMonths(message.creationDate(), DatagenParams.startMonth, DatagenParams.startYear);
if (bucket < 36 + 1)
personCounts(authorId).incrNumMessagesPerMonth(bucket);
incrPostPerCountry(message.countryId());
for (Integer t : message.tags()) {
Integer tagClass = Dictionaries.tags.getTagClass(t);
incrTagClassCount(tagClass);
incrTagCount(t);
}
}
}
public void extractFactors( Like like ) {
if( like.date < Dictionaries.dates.getUpdateThreshold() || !DatagenParams.updateStreams ) {
assert(personCounts_.get(like.user) != null): "Person counts does not exist when extracting factors from like";
personCounts(like.user).incrNumLikes();
}
}
public void writePersonFactors(OutputStream writer ) {
try {
Map<Integer,List<String>> countryNames = new TreeMap<Integer,List<String>>();
for (Map.Entry<Long, PersonCounts> c: personCounts_.entrySet()) {
if(c.getValue().name() != null) {
List<String> names = countryNames.get(c.getValue().country());
if (names == null) {
names = new ArrayList<String>();
countryNames.put(c.getValue().country(), names);
}
names.add(c.getValue().name());
}
}
Map<Integer,String> medianNames = new TreeMap<Integer,String>();
for (Map.Entry<Integer,List<String>> entry : countryNames.entrySet()) {
entry.getValue().sort( (a ,b) -> a.compareTo(b));
medianNames.put(entry.getKey(),entry.getValue().get(entry.getValue().size()/2));
}
for (Map.Entry<Long, PersonCounts> c: personCounts_.entrySet()){
PersonCounts count = c.getValue();
// correct the group counts
//count.numberOfGroups += count.numberOfFriends;
//String name = medianFirstName_.get(c.getKey());
String name = medianNames.get(c.getValue().country());
if( name != null ) {
StringBuffer strbuf = new StringBuffer();
strbuf.append(c.getKey()); strbuf.append(",");
strbuf.append(name);
strbuf.append(",");
strbuf.append(count.numFriends());
strbuf.append(",");
strbuf.append(count.numPosts());
strbuf.append(",");
strbuf.append(count.numLikes());
strbuf.append(",");
strbuf.append(count.numTagsOfMessages());
strbuf.append(",");
strbuf.append(count.numForums());
strbuf.append(",");
strbuf.append(count.numWorkPlaces());
strbuf.append(",");
strbuf.append(count.numComments());
strbuf.append(",");
for (Long bucket : count.numMessagesPerMonth()) {
strbuf.append(bucket);
strbuf.append(",");
}
for (Long bucket : count.numForumsPerMonth()) {
strbuf.append(bucket);
strbuf.append(",");
}
strbuf.setCharAt(strbuf.length() - 1, '\n');
writer.write(strbuf.toString().getBytes("UTF8"));
}
}
personCounts_.clear();
medianFirstName_.clear();
} catch (AssertionError e) {
System.err.println("Unable to write parameter counts");
System.err.println(e.getMessage());
e.printStackTrace();
} catch (IOException e) {
System.err.println("Unable to write parameter counts");
System.err.println(e.getMessage());
e.printStackTrace();
}
}
public void writeActivityFactors(OutputStream writer ) {
try {
writer.write(Integer.toString(postsPerCountry_.size()).getBytes("UTF8"));
writer.write("\n".getBytes("UTF8"));
for (Map.Entry<Integer, Long> c: postsPerCountry_.entrySet()){
StringBuffer strbuf = new StringBuffer();
strbuf.append(Dictionaries.places.getPlaceName(c.getKey()));
strbuf.append(",");
strbuf.append(c.getValue());
strbuf.append("\n");
writer.write(strbuf.toString().getBytes("UTF8"));
}
writer.write(Integer.toString(tagClassCount_.size()).getBytes("UTF8"));
writer.write("\n".getBytes("UTF8"));
for (Map.Entry<Integer, Long> c: tagClassCount_.entrySet()){
StringBuffer strbuf = new StringBuffer();
strbuf.append(Dictionaries.tags.getClassName(c.getKey()));
strbuf.append(",");
strbuf.append(Dictionaries.tags.getClassName(c.getKey()));
strbuf.append(",");
strbuf.append(c.getValue());
strbuf.append("\n");
writer.write(strbuf.toString().getBytes("UTF8"));
}
writer.write(Integer.toString(tagCount_.size()).getBytes("UTF8"));
writer.write("\n".getBytes("UTF8"));
for (Map.Entry<Integer, Long> c: tagCount_.entrySet()){
StringBuffer strbuf = new StringBuffer();
strbuf.append(Dictionaries.tags.getName(c.getKey()));
strbuf.append(",");
//strbuf.append(tagDictionary.getClassName(c.getKey()));
//strbuf.append(",");
strbuf.append(c.getValue());
strbuf.append("\n");
writer.write(strbuf.toString().getBytes("UTF8"));
}
writer.write(Integer.toString(firstNameCount_.size()).getBytes("UTF8"));
writer.write("\n".getBytes("UTF8"));
for (Map.Entry<String, Long> c: firstNameCount_.entrySet()){
StringBuffer strbuf = new StringBuffer();
strbuf.append(c.getKey());
strbuf.append(",");
strbuf.append(c.getValue());
strbuf.append("\n");
writer.write(strbuf.toString().getBytes("UTF8"));
}
StringBuffer strbuf = new StringBuffer();
strbuf.append(DatagenParams.startMonth); strbuf.append("\n");
strbuf.append(DatagenParams.startYear); strbuf.append("\n");
strbuf.append(Dictionaries.dates.formatYear(minWorkFrom_)); strbuf.append("\n");
strbuf.append(Dictionaries.dates.formatYear(maxWorkFrom_)); strbuf.append("\n");
writer.write(strbuf.toString().getBytes("UTF8"));
writer.flush();
writer.close();
} catch (IOException e) {
System.err.println("Unable to write parameter counts");
System.err.println(e.getMessage());
e.printStackTrace();
}
}
}