/* * Copyright (c) 2013 LDBC * Linked Data Benchmark Council (http://ldbc.eu) * * This file is part of ldbc_socialnet_dbgen. * * ldbc_socialnet_dbgen is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * ldbc_socialnet_dbgen is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with ldbc_socialnet_dbgen. If not, see <http://www.gnu.org/licenses/>. * * Copyright (C) 2011 OpenLink Software <bdsmt@openlinksw.com> * All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; only Version 2 of the License dated * June 1991. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ package ldbc.snb.datagen.serializer.snb.interactive; import ldbc.snb.datagen.dictionary.Dictionaries; import ldbc.snb.datagen.objects.Knows; import ldbc.snb.datagen.objects.Person; import ldbc.snb.datagen.objects.StudyAt; import ldbc.snb.datagen.objects.WorkAt; import ldbc.snb.datagen.serializer.HDFSWriter; import ldbc.snb.datagen.serializer.PersonSerializer; import ldbc.snb.datagen.serializer.Turtle; import ldbc.snb.datagen.vocabulary.*; import org.apache.hadoop.conf.Configuration; import java.text.SimpleDateFormat; import java.util.Date; public class TurtlePersonSerializer extends PersonSerializer { private HDFSWriter [] writers; private long workAtId = 0; private long studyAtId = 0; private long knowsId = 0; private SimpleDateFormat dateTimeFormat = null; private enum FileNames { SOCIAL_NETWORK ("social_network_person"); private final String name; private FileNames( String name ) { this.name = name; } public String toString() { return name; } } public TurtlePersonSerializer() { } public void initialize(Configuration conf, int reducerId) { dateTimeFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSXXX"); int numFiles = FileNames.values().length; writers = new HDFSWriter[numFiles]; for( int i = 0; i < numFiles; ++i) { writers[i] = new HDFSWriter(conf.get("ldbc.snb.datagen.serializer.socialNetworkDir"), FileNames.values()[i].toString()+"_"+reducerId,conf.getInt("ldbc.snb.datagen.numPartitions",1),conf.getBoolean("ldbc.snb.datagen.serializer.compressed",false),"ttl"); writers[i].writeAllPartitions(Turtle.getNamespaces()); writers[i].writeAllPartitions(Turtle.getStaticNamespaces()); } } @Override public void close() { int numFiles = FileNames.values().length; for(int i = 0; i < numFiles; ++i) { writers[i].close(); } } @Override protected void serialize(final Person p) { StringBuffer result = new StringBuffer(19000); String prefix = SN.getPersonURI(p.accountId()); Turtle.AddTriple(result, true, false, prefix, RDF.type, SNVOC.Person); Turtle.AddTriple(result, false, false, prefix, SNVOC.id, Turtle.createDataTypeLiteral(Long.toString(p.accountId()), XSD.Long)); Turtle.AddTriple(result, false, false, prefix, SNVOC.firstName, Turtle.createLiteral(p.firstName())); Turtle.AddTriple(result, false, false, prefix, SNVOC.lastName, Turtle.createLiteral(p.lastName())); if(p.gender() == 1) { Turtle.AddTriple(result, false, false, prefix, SNVOC.gender, Turtle.createLiteral("male")); } else { Turtle.AddTriple(result, false, false, prefix, SNVOC.gender, Turtle.createLiteral("female")); } Turtle.AddTriple(result, false, false, prefix, SNVOC.birthday, Turtle.createDataTypeLiteral(Dictionaries.dates.formatDate(p.birthDay()), XSD.Date)); Turtle.AddTriple(result, false, false, prefix, SNVOC.ipaddress, Turtle.createLiteral(p.ipAddress().toString())); Turtle.AddTriple(result, false, false, prefix, SNVOC.browser, Turtle.createLiteral(Dictionaries.browsers.getName(p.browserId()))); Turtle.AddTriple(result, false, true, prefix, SNVOC.creationDate, Turtle.createDataTypeLiteral(dateTimeFormat.format(p.creationDate()), XSD.DateTime)); Turtle.createTripleSPO(result, prefix, SNVOC.locatedIn, DBP.fullPrefixed(Dictionaries.places.getPlaceName(p.cityId()))); for (Integer i : p.languages()) { Turtle.createTripleSPO(result, prefix, SNVOC.speaks, Turtle.createLiteral(Dictionaries.languages.getLanguageName(i))); } for( String email : p.emails()) { Turtle.createTripleSPO(result, prefix, SNVOC.email, Turtle.createLiteral(email)); } for(Integer tag : p.interests()) { String interest = Dictionaries.tags.getName(tag); Turtle.createTripleSPO(result, prefix, SNVOC.hasInterest, SNTAG.fullPrefixed(interest)); } writers[FileNames.SOCIAL_NETWORK.ordinal()].write(result.toString()); } @Override protected void serialize(final StudyAt studyAt) { String prefix = SN.getPersonURI(studyAt.user); StringBuffer result = new StringBuffer(19000); long id = SN.formId(studyAtId); Turtle.createTripleSPO(result, prefix, SNVOC.studyAt, SN.getStudyAtURI(id)); Turtle.createTripleSPO(result, SN.getStudyAtURI(id), SNVOC.hasOrganisation, SN.getUnivURI(studyAt.university)); String yearString = Dictionaries.dates.formatYear(studyAt.year); Turtle.createTripleSPO(result, SN.getStudyAtURI(id), SNVOC.classYear, Turtle.createDataTypeLiteral(yearString, XSD.Integer)); studyAtId++; writers[FileNames.SOCIAL_NETWORK.ordinal()].write(result.toString()); } @Override protected void serialize(final WorkAt workAt) { String prefix = SN.getPersonURI(workAt.user); StringBuffer result = new StringBuffer(19000); long id = SN.formId(workAtId); Turtle.createTripleSPO(result, prefix, SNVOC.workAt, SN.getWorkAtURI(id)); Turtle.createTripleSPO(result, SN.getWorkAtURI(id), SNVOC.hasOrganisation, SN.getCompURI(workAt.company)); String yearString = Dictionaries.dates.formatYear(workAt.year); Turtle.createTripleSPO(result, SN.getWorkAtURI(id), SNVOC.workFrom, Turtle.createDataTypeLiteral(yearString, XSD.Integer)); workAtId++; writers[FileNames.SOCIAL_NETWORK.ordinal()].write(result.toString()); } @Override protected void serialize(final Person p, Knows knows) { String prefix = SN.getPersonURI(p.accountId()); StringBuffer result = new StringBuffer(19000); long id = SN.formId(knowsId); Turtle.createTripleSPO(result, prefix, SNVOC.knows, SN.getKnowsURI(id)); Turtle.createTripleSPO(result, SN.getKnowsURI(id), SNVOC.hasPerson, SN.getPersonURI(knows.to().accountId())); Turtle.createTripleSPO(result, SN.getKnowsURI(id), SNVOC.creationDate, Turtle.createDataTypeLiteral(dateTimeFormat.format(knows.creationDate()), XSD.DateTime)); writers[FileNames.SOCIAL_NETWORK.ordinal()].write(result.toString()); knowsId++; } public void reset() { workAtId = 0; studyAtId = 0; knowsId = 0; } }