/*
* Copyright (c) 2013 LDBC
* Linked Data Benchmark Council (http://ldbc.eu)
*
* This file is part of ldbc_socialnet_dbgen.
*
* ldbc_socialnet_dbgen is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* ldbc_socialnet_dbgen is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with ldbc_socialnet_dbgen. If not, see <http://www.gnu.org/licenses/>.
*
* Copyright (C) 2011 OpenLink Software <bdsmt@openlinksw.com>
* All Rights Reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; only Version 2 of the License dated
* June 1991.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
package ldbc.snb.datagen.serializer.snb.interactive;
import ldbc.snb.datagen.dictionary.Dictionaries;
import ldbc.snb.datagen.objects.Knows;
import ldbc.snb.datagen.objects.Person;
import ldbc.snb.datagen.objects.StudyAt;
import ldbc.snb.datagen.objects.WorkAt;
import ldbc.snb.datagen.serializer.HDFSCSVWriter;
import ldbc.snb.datagen.serializer.PersonSerializer;
import org.apache.hadoop.conf.Configuration;
import java.util.ArrayList;
import java.util.Iterator;
public class CSVPersonSerializer extends PersonSerializer {
private HDFSCSVWriter [] writers;
private enum FileNames {
PERSON ("person"),
PERSON_SPEAKS_LANGUAGE ("person_speaks_language"),
PERSON_HAS_EMAIL ("person_email_emailaddress"),
PERSON_LOCATED_IN_PLACE ("person_isLocatedIn_place"),
PERSON_HAS_INTEREST_TAG ("person_hasInterest_tag"),
PERSON_WORK_AT ("person_workAt_organisation"),
PERSON_STUDY_AT ("person_studyAt_organisation"),
PERSON_KNOWS_PERSON("person_knows_person");
private final String name;
private FileNames( String name ) {
this.name = name;
}
public String toString() {
return name;
}
}
public CSVPersonSerializer() {
}
public void initialize(Configuration conf, int reducerId) {
int numFiles = FileNames.values().length;
writers = new HDFSCSVWriter[numFiles];
for( int i = 0; i < numFiles; ++i) {
writers[i] = new HDFSCSVWriter(conf.get("ldbc.snb.datagen.serializer.socialNetworkDir"),FileNames.values()[i].toString()+"_"+reducerId,conf.getInt("ldbc.snb.datagen.serializer.numPartitions",1),conf.getBoolean("ldbc.snb.datagen.serializer.compressed",false),"|",conf.getBoolean("ldbc.snb.datagen.serializer.endlineSeparator",false));
}
ArrayList<String> arguments = new ArrayList<String>();
arguments.add("id");
arguments.add("firstName");
arguments.add("lastName");
arguments.add("gender");
arguments.add("birthday");
arguments.add("creationDate");
arguments.add("locationIP");
arguments.add("browserUsed");
writers[FileNames.PERSON.ordinal()].writeEntry(arguments);
arguments.clear();
arguments.add("Person.id");
arguments.add("language");
writers[FileNames.PERSON_SPEAKS_LANGUAGE.ordinal()].writeEntry(arguments);
arguments.clear();
arguments.add("Person.id");
arguments.add("email");
writers[FileNames.PERSON_HAS_EMAIL.ordinal()].writeEntry(arguments);
arguments.clear();
arguments.add("Person.id");
arguments.add("Place.id");
writers[FileNames.PERSON_LOCATED_IN_PLACE.ordinal()].writeEntry(arguments);
arguments.clear();
arguments.add("Person.id");
arguments.add("Tag.id");
writers[FileNames.PERSON_HAS_INTEREST_TAG.ordinal()].writeEntry(arguments);
arguments.clear();
arguments.add("Person.id");
arguments.add("Organisation.id");
arguments.add("workFrom");
writers[FileNames.PERSON_WORK_AT.ordinal()].writeEntry(arguments);
arguments.clear();
arguments.add("Person.id");
arguments.add("Organisation.id");
arguments.add("classYear");
writers[FileNames.PERSON_STUDY_AT.ordinal()].writeEntry(arguments);
arguments.clear();
arguments.add("Person.id");
arguments.add("Person.id");
arguments.add("creationDate");
writers[FileNames.PERSON_KNOWS_PERSON.ordinal()].writeEntry(arguments);
}
@Override
public void close() {
int numFiles = FileNames.values().length;
for(int i = 0; i < numFiles; ++i) {
writers[i].close();
}
}
@Override
protected void serialize(final Person p) {
ArrayList<String> arguments = new ArrayList<String>();
arguments.add(Long.toString(p.accountId()));
arguments.add(p.firstName());
arguments.add(p.lastName());
if(p.gender() == 1) {
arguments.add("male");
} else {
arguments.add("female");
}
String dateString = Dictionaries.dates.formatDate(p.birthDay());
arguments.add(dateString);
dateString = Dictionaries.dates.formatDateTime(p.creationDate());
arguments.add(dateString);
arguments.add(p.ipAddress().toString());
arguments.add(Dictionaries.browsers.getName(p.browserId()));
writers[FileNames.PERSON.ordinal()].writeEntry(arguments);
ArrayList<Integer> languages = p.languages();
for (int i = 0; i < languages.size(); i++) {
arguments.clear();
arguments.add(Long.toString(p.accountId()));
arguments.add(Dictionaries.languages.getLanguageName(languages.get(i)));
writers[FileNames.PERSON_SPEAKS_LANGUAGE.ordinal()].writeEntry(arguments);
}
Iterator<String> itString = p.emails().iterator();
while (itString.hasNext()) {
arguments.clear();
String email = itString.next();
arguments.add(Long.toString(p.accountId()));
arguments.add(email);
writers[FileNames.PERSON_HAS_EMAIL.ordinal()].writeEntry(arguments);
}
arguments.clear();
arguments.add(Long.toString(p.accountId()));
arguments.add(Integer.toString(p.cityId()));
writers[FileNames.PERSON_LOCATED_IN_PLACE.ordinal()].writeEntry(arguments);
Iterator<Integer> itInteger = p.interests().iterator();
while (itInteger.hasNext()) {
arguments.clear();
Integer interestIdx = itInteger.next();
arguments.add(Long.toString(p.accountId()));
arguments.add(Integer.toString(interestIdx));
writers[FileNames.PERSON_HAS_INTEREST_TAG.ordinal()].writeEntry(arguments);
}
}
@Override
protected void serialize(final StudyAt studyAt) {
ArrayList<String> arguments = new ArrayList<String>();
String dateString = Dictionaries.dates.formatYear(studyAt.year);
arguments.add(Long.toString(studyAt.user));
arguments.add(Long.toString(studyAt.university));
arguments.add(dateString);
writers[FileNames.PERSON_STUDY_AT.ordinal()].writeEntry(arguments);
}
@Override
protected void serialize(final WorkAt workAt) {
ArrayList<String> arguments = new ArrayList<String>();
String dateString = Dictionaries.dates.formatYear(workAt.year);
arguments.add(Long.toString(workAt.user));
arguments.add(Long.toString(workAt.company));
arguments.add(dateString);
writers[FileNames.PERSON_WORK_AT.ordinal()].writeEntry(arguments);
}
@Override
protected void serialize(final Person p, Knows knows) {
ArrayList<String> arguments = new ArrayList<String>();
String dateString = Dictionaries.dates.formatDateTime(knows.creationDate());
arguments.add(Long.toString(p.accountId()));
arguments.add(Long.toString(knows.to().accountId()));
arguments.add(dateString);
writers[FileNames.PERSON_KNOWS_PERSON.ordinal()].writeEntry(arguments);
}
@Override
public void reset() {
}
}