/* * Copyright (c) 2013 LDBC * Linked Data Benchmark Council (http://ldbc.eu) * * This file is part of ldbc_socialnet_dbgen. * * ldbc_socialnet_dbgen is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * ldbc_socialnet_dbgen is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with ldbc_socialnet_dbgen. If not, see <http://www.gnu.org/licenses/>. * * Copyright (C) 2011 OpenLink Software <bdsmt@openlinksw.com> * All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; only Version 2 of the License dated * June 1991. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ package ldbc.snb.datagen.serializer; import ldbc.snb.datagen.dictionary.Dictionaries; import ldbc.snb.datagen.generator.DatagenParams; import ldbc.snb.datagen.hadoop.TupleKey; import ldbc.snb.datagen.hadoop.UpdateEventKey; import ldbc.snb.datagen.objects.*; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.*; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.SequenceFile; import org.apache.hadoop.io.SequenceFile.CompressionType; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.compress.DefaultCodec; import java.io.IOException; import java.io.OutputStream; import java.util.ArrayList; import java.util.EnumSet; import java.util.Properties; /** * Created by aprat on 3/27/14. */ public class UpdateEventSerializer { private class UpdateStreamStats { public long minDate_ = Long.MAX_VALUE; public long maxDate_ = Long.MIN_VALUE; public long count_ = 0; } private SequenceFile.Writer streamWriter_[]; private ArrayList<String> data_; private ArrayList<String> list_; private UpdateEvent currentEvent_; private int numPartitions_ = 1; private int nextPartition_ = 0; private StringBuffer stringBuffer_; private long currentDependantDate_ = 0; private Configuration conf_; private UpdateStreamStats stats_; private String fileNamePrefix_; private int reducerId_; public UpdateEventSerializer(Configuration conf, String fileNamePrefix, int reducerId, int numPartitions ) throws IOException{ conf_ = conf; reducerId_ = reducerId; stringBuffer_ = new StringBuffer(512); data_ = new ArrayList<String>(); list_ = new ArrayList<String>(); currentEvent_ = new UpdateEvent(-1,-1, UpdateEvent.UpdateEventType.NO_EVENT,new String("")); numPartitions_ = numPartitions; stats_ = new UpdateStreamStats(); fileNamePrefix_ = fileNamePrefix; try{ streamWriter_ = new SequenceFile.Writer[numPartitions_]; FileContext fc = FileContext.getFileContext(conf); for( int i = 0; i < numPartitions_; ++i ) { Path outFile = new Path(fileNamePrefix_+"_"+i); streamWriter_[i] = SequenceFile.createWriter(fc, conf, outFile, UpdateEventKey.class, Text.class, CompressionType.NONE, new DefaultCodec(),new SequenceFile.Metadata(), EnumSet.of(CreateFlag.CREATE,CreateFlag.OVERWRITE), Options.CreateOpts.checksumParam(Options.ChecksumOpt.createDisabled())); FileSystem fs = FileSystem.get(conf); Path propertiesFile = new Path(fileNamePrefix_+".properties"); if(fs.exists(propertiesFile)){ FSDataInputStream file = fs.open(propertiesFile); Properties properties = new Properties(); properties.load(file); stats_.minDate_ = Long.parseLong(properties.getProperty("ldbc.snb.interactive.min_write_event_start_time")); stats_.maxDate_ = Long.parseLong(properties.getProperty("ldbc.snb.interactive.max_write_event_start_time")); stats_.count_ = Long.parseLong(properties.getProperty("ldbc.snb.interactive.num_events")); file.close(); fs.delete(propertiesFile,true); } } } catch(IOException e){ throw e; } } public void changePartition() { nextPartition_ = (++nextPartition_) % numPartitions_; } public void writeKeyValue( UpdateEvent event ) throws IOException { try{ if(event.date <= Dictionaries.dates.getEndDateTime()) { StringBuilder string = new StringBuilder(); string.append(Long.toString(event.date)); string.append("|"); string.append(Long.toString(event.dependantDate)); string.append("|"); string.append(Integer.toString(event.type.ordinal() + 1)); string.append("|"); string.append(event.eventData); string.append("\n"); streamWriter_[nextPartition_].append(new UpdateEventKey(event.date, reducerId_, nextPartition_), new Text(string.toString())); } } catch(IOException e){ throw e; } } private String formatStringArray(ArrayList<String> array, String separator) { if( array.size() == 0 ) return ""; stringBuffer_.setLength(0); for( String s : array) { stringBuffer_.append(s); stringBuffer_.append(separator); } return stringBuffer_.substring(0,stringBuffer_.length()-1); } private void beginEvent( long date, UpdateEvent.UpdateEventType type ) { stats_.minDate_ = stats_.minDate_ > date ? date : stats_.minDate_; stats_.maxDate_ = stats_.maxDate_ < date ? date : stats_.maxDate_; stats_.count_++; currentEvent_.date = date; currentEvent_.dependantDate = currentDependantDate_; currentEvent_.type = type; currentEvent_.eventData = null; data_.clear(); } private void endEvent() throws IOException { currentEvent_.eventData = formatStringArray(data_,"|"); writeKeyValue(currentEvent_); } private void beginList() { list_.clear(); } private void endList() { data_.add(formatStringArray(list_,";")); } public void close() { try { FileSystem fs = FileSystem.get(conf_); for( int i = 0; i < numPartitions_; ++i ) { streamWriter_[i].close(); } if(DatagenParams.updateStreams) { OutputStream output = fs.create(new Path(fileNamePrefix_+".properties"),true); output.write(new String("ldbc.snb.interactive.gct_delta_duration:" + DatagenParams.deltaTime + "\n").getBytes()); output.write(new String("ldbc.snb.interactive.min_write_event_start_time:" + stats_.minDate_ + "\n").getBytes()); output.write(new String("ldbc.snb.interactive.max_write_event_start_time:" + stats_.maxDate_ + "\n").getBytes()); if( stats_.count_ != 0 ) { output.write(new String("ldbc.snb.interactive.update_interleave:" + (stats_.maxDate_ - stats_.minDate_) / stats_.count_ + "\n").getBytes()); } else { output.write(new String("ldbc.snb.interactive.update_interleave:" + "0" + "\n").getBytes()); } output.write(new String("ldbc.snb.interactive.num_events:" + stats_.count_).getBytes()); output.close(); } } catch(IOException e){ System.err.println(e.getMessage()); System.exit(-1); } } public void export(Person person) throws IOException { currentDependantDate_ = 0; beginEvent(person.creationDate(), UpdateEvent.UpdateEventType.ADD_PERSON); data_.add(Long.toString(person.accountId())); data_.add(person.firstName()); data_.add(person.lastName()); if(person.gender() == 1) { data_.add("male"); } else { data_.add("female"); } data_.add(Long.toString(person.birthDay())); data_.add(Long.toString(person.creationDate())); data_.add(person.ipAddress().toString()); data_.add(Dictionaries.browsers.getName(person.browserId())); data_.add(Integer.toString(person.cityId())); beginList(); for( Integer l : person.languages()) { list_.add(Dictionaries.languages.getLanguageName(l)); } endList(); beginList(); for(String e : person.emails()) { list_.add(e); } endList(); beginList(); for(Integer tag : person.interests()) { list_.add(Integer.toString(tag)); } endList(); beginList(); int universityId = person.universityLocationId(); if ( universityId != -1){ if (person.classYear() != -1 ) { ArrayList<String> studyAtData = new ArrayList<String>(); studyAtData.add(Long.toString(Dictionaries.universities.getUniversityFromLocation(universityId))); studyAtData.add(Dictionaries.dates.formatYear(person.classYear())); list_.add(formatStringArray(studyAtData,",")); } } endList(); beginList(); for( Long companyId : person.companies().keySet()) { ArrayList<String> workAtData = new ArrayList<String>(); workAtData.add(Long.toString(companyId)); workAtData.add(Dictionaries.dates.formatYear(person.companies().get(companyId))); list_.add(formatStringArray(workAtData,",")); } endList(); endEvent(); } public void export(Person p, Knows k) throws IOException{ if( p.accountId() < k.to().accountId() ) { currentDependantDate_ = Math.max(p.creationDate(), k.to().creationDate()); beginEvent(k.creationDate(), UpdateEvent.UpdateEventType.ADD_FRIENDSHIP); data_.add(Long.toString(p.accountId())); data_.add(Long.toString(k.to().accountId())); data_.add(Long.toString(k.creationDate())); endEvent(); } } public void export(Post post) throws IOException { currentDependantDate_ = post.author().creationDate(); beginEvent(post.creationDate(), UpdateEvent.UpdateEventType.ADD_POST); String empty = ""; data_.add(Long.toString(post.messageId())); data_.add(empty); data_.add(Long.toString(post.creationDate())); data_.add(post.ipAddress().toString()); data_.add(Dictionaries.browsers.getName(post.browserId())); data_.add(Dictionaries.languages.getLanguageName(post.language())); data_.add(post.content()); data_.add(Long.toString(post.content().length())); data_.add(Long.toString(post.author().accountId())); data_.add(Long.toString(post.forumId())); data_.add(Long.toString(Dictionaries.ips.getLocation(post.ipAddress()))); beginList(); for( int tag : post.tags()) { list_.add(Integer.toString(tag)); } endList(); endEvent(); } public void export(Like like) throws IOException { currentDependantDate_ = like.userCreationDate; if( like.type == Like.LikeType.COMMENT) { beginEvent(like.date, UpdateEvent.UpdateEventType.ADD_LIKE_COMMENT); } else { beginEvent(like.date, UpdateEvent.UpdateEventType.ADD_LIKE_POST); } data_.add(Long.toString(like.user)); data_.add(Long.toString(like.messageId)); data_.add(Long.toString(like.date)); endEvent(); } public void export(Photo photo) throws IOException { currentDependantDate_ = photo.author().creationDate(); beginEvent(photo.creationDate(), UpdateEvent.UpdateEventType.ADD_POST); String empty = ""; data_.add(Long.toString(photo.messageId())); data_.add(photo.content()); data_.add(Long.toString(photo.creationDate())); data_.add(photo.ipAddress().toString()); data_.add(Dictionaries.browsers.getName(photo.browserId())); data_.add(empty); data_.add(empty); data_.add("0"); data_.add(Long.toString(photo.author().accountId())); data_.add(Long.toString(photo.forumId())); data_.add(Long.toString(Dictionaries.ips.getLocation(photo.ipAddress()))); beginList(); for( int tag : photo.tags()) { list_.add(Integer.toString(tag)); } endList(); endEvent(); } public void export(Comment comment) throws IOException { currentDependantDate_ = comment.author().creationDate(); beginEvent(comment.creationDate(), UpdateEvent.UpdateEventType.ADD_COMMENT); data_.add(Long.toString(comment.messageId())); data_.add(Long.toString(comment.creationDate())); data_.add(comment.ipAddress().toString()); data_.add(Dictionaries.browsers.getName(comment.browserId())); data_.add(comment.content()); data_.add(Integer.toString(comment.content().length())); data_.add(Long.toString(comment.author().accountId())); data_.add(Long.toString(Dictionaries.ips.getLocation(comment.ipAddress()))); if (comment.replyOf() == comment.postId()) { data_.add(Long.toString(comment.postId())); data_.add("-1"); } else { data_.add("-1"); data_.add(Long.toString(comment.replyOf())); } beginList(); for( int tag : comment.tags()) { list_.add(Integer.toString(tag)); } endList(); endEvent(); } public void export(Forum forum) throws IOException { currentDependantDate_ = forum.moderator().creationDate(); beginEvent(forum.creationDate(), UpdateEvent.UpdateEventType.ADD_FORUM); data_.add(Long.toString(forum.id())); data_.add(forum.title()); data_.add(Long.toString(forum.creationDate())); data_.add(Long.toString(forum.moderator().accountId())); beginList(); for( int tag : forum.tags()) { list_.add(Integer.toString(tag)); } endList(); endEvent(); } public void export(ForumMembership membership) throws IOException { currentDependantDate_ = membership.person().creationDate(); beginEvent(membership.creationDate(), UpdateEvent.UpdateEventType.ADD_FORUM_MEMBERSHIP); data_.add(Long.toString(membership.forumId())); data_.add(Long.toString(membership.person().accountId())); data_.add(Long.toString(membership.creationDate())); endEvent(); } }