package ldbc.snb.datagen.hadoop;
import ldbc.snb.datagen.dictionary.Dictionaries;
import ldbc.snb.datagen.generator.DatagenParams;
import ldbc.snb.datagen.generator.LDBCDatagen;
import ldbc.snb.datagen.objects.Organization;
import ldbc.snb.datagen.objects.Place;
import ldbc.snb.datagen.objects.Tag;
import ldbc.snb.datagen.objects.TagClass;
import ldbc.snb.datagen.serializer.InvariantSerializer;
import ldbc.snb.datagen.util.StringUtils;
import org.apache.hadoop.conf.Configuration;
import java.util.Iterator;
import java.util.Set;
import java.util.TreeSet;
/**
* Created by aprat on 12/17/14.
*/
public class HadoopInvariantSerializer {
private InvariantSerializer [] invariantSerializer_;
private TreeSet<Integer> exportedClasses_;
private int currentFile_ = 0;
private Configuration conf_;
public HadoopInvariantSerializer( Configuration conf ) {
conf_ = new Configuration(conf);
exportedClasses_ = new TreeSet<Integer>();
LDBCDatagen.init(conf_);
}
public void run() throws Exception {
try {
invariantSerializer_ = new InvariantSerializer[DatagenParams.numThreads];
for( int i = 0; i < DatagenParams.numThreads; ++i ) {
invariantSerializer_[i] = (InvariantSerializer) Class.forName(conf_.get("ldbc.snb.datagen.serializer.invariantSerializer")).newInstance();
invariantSerializer_[i].initialize(conf_, i);
}
} catch( Exception e ) {
System.err.println(e.getMessage());
e.printStackTrace();
}
exportPlaces();
exportTags();
exportOrganizations();
for( int i = 0; i < DatagenParams.numThreads; ++i ) {
invariantSerializer_[i].close();
}
}
private int nextFile() {
int ret = currentFile_;
currentFile_ = (++currentFile_) % DatagenParams.numThreads;
return ret;
}
private void exportTagHierarchy(Tag tag) {
int classId = tag.tagClass;
while (classId != -1 && !exportedClasses_.contains(classId)) {
exportedClasses_.add(classId);
TagClass tagClass = new TagClass();
tagClass.id = classId;
tagClass.name = StringUtils.clampString(Dictionaries.tags.getClassName(classId),256);
tagClass.parent = Dictionaries.tags.getClassParent(tagClass.id);
invariantSerializer_[nextFile()].export(tagClass);
classId = tagClass.parent;
}
}
public void exportPlaces() {
Set<Integer> locations = Dictionaries.places.getPlaces();
Iterator<Integer> it = locations.iterator();
while(it.hasNext()) {
Place place = Dictionaries.places.getLocation(it.next());
place.setName(StringUtils.clampString(place.getName(),256));
invariantSerializer_[nextFile()].export(place);
}
}
public void exportOrganizations() {
Set<Long> companies = Dictionaries.companies.getCompanies();
Iterator<Long> it = companies.iterator();
while(it.hasNext()) {
Organization company = new Organization();
company.id = it.next();
company.type = Organization.OrganisationType.company;
company.name = StringUtils.clampString(Dictionaries.companies.getCompanyName(company.id),256);
company.location = Dictionaries.companies.getCountry(company.id);
invariantSerializer_[nextFile()].export(company);
}
Set<Long> universities = Dictionaries.universities.getUniversities();
it = universities.iterator();
while(it.hasNext()) {
Organization university = new Organization();
university.id = it.next();
university.type = Organization.OrganisationType.university;
university.name = StringUtils.clampString(Dictionaries.universities.getUniversityName(university.id),256);
university.location = Dictionaries.universities.getUniversityCity(university.id);
invariantSerializer_[nextFile()].export(university);
}
}
public void exportTags() {
Set<Integer> tags = Dictionaries.tags.getTags();
Iterator<Integer> it = tags.iterator();
while(it.hasNext()) {
Tag tag = new Tag();
tag.id = it.next();
tag.name = StringUtils.clampString(Dictionaries.tags.getName(tag.id),256);
tag.name.replace("\"", "\\\"");
tag.tagClass = Dictionaries.tags.getTagClass(tag.id);
invariantSerializer_[nextFile()].export(tag);
exportTagHierarchy(tag);
}
}
}