/*
* Copyright (c) 2013 LDBC
* Linked Data Benchmark Council (http://ldbc.eu)
*
* This file is part of ldbc_socialnet_dbgen.
*
* ldbc_socialnet_dbgen is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* ldbc_socialnet_dbgen is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with ldbc_socialnet_dbgen. If not, see <http://www.gnu.org/licenses/>.
*
* Copyright (C) 2011 OpenLink Software <bdsmt@openlinksw.com>
* All Rights Reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; only Version 2 of the License dated
* June 1991.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
package ldbc.snb.datagen.dictionary;
import ldbc.snb.datagen.generator.DatagenParams;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Random;
import java.util.TreeMap;
import java.util.TreeSet;
public class TagMatrix {
private static final String SEPARATOR = " ";
private TreeMap<Integer, ArrayList<Integer>> relatedTags;
/**
* < @brief An array of related tags per tag.
*/
private TreeMap<Integer, ArrayList<Double>> cumulative;
private ArrayList<Integer> nonZeroTags;
/**
* < @brief The list of tags.
*/
public TagMatrix() {
cumulative = new TreeMap<Integer, ArrayList<Double>>();
relatedTags = new TreeMap<Integer, ArrayList<Integer>>();
nonZeroTags = new ArrayList<Integer>();
load(DatagenParams.tagMatrixFile);
}
/**
* @param tagMatrixFileName The tag matrix file name.
* @brief Loads the tag matrix from a file.
*/
private void load(String tagMatrixFileName) {
try {
BufferedReader dictionary = new BufferedReader(new InputStreamReader(getClass().getResourceAsStream(tagMatrixFileName), "UTF-8"));
String line;
while ((line = dictionary.readLine()) != null) {
String data[] = line.split(SEPARATOR);
int celebrityId = Integer.parseInt(data[0]);
int topicId = Integer.parseInt(data[1]);
double cumuluative = Double.parseDouble(data[2]);
ArrayList<Double> cum = cumulative.get(celebrityId);
if(cum == null ) cumulative.put(celebrityId,new ArrayList<Double>());
cumulative.get(celebrityId).add(cumuluative);
ArrayList<Integer> related = relatedTags.get(celebrityId);
if(related == null) relatedTags.put(celebrityId,new ArrayList<Integer>());
relatedTags.get(celebrityId).add(topicId);
}
for(Integer tag : relatedTags.keySet()) {
nonZeroTags.add(tag);
}
dictionary.close();
} catch (Exception e) {
e.printStackTrace();
}
}
/**
* @param randomTag The random tag number generator.
* @param tag The tag identifier.
* @return The related tag identifier.
* @brief Gets a random related tag.
*/
public Integer getRandomRelated(Random randomTag, int tag) {
int tagId = tag;
if(relatedTags.get(tagId) == null) {
tagId = nonZeroTags.get(randomTag.nextInt(nonZeroTags.size()));
}
return relatedTags.get(tagId).get(randomTag.nextInt(relatedTags.get(tagId).size()));
}
/**
* @param randomTopic The random number generator used to select aditional popular tags
* @param randomTag The random number generator used to select related tags.
* @param popularTagId The popular tag identifier.
* @param numTags The number of related tags to retrieve.
* @return The set of related tags.
* @brief Get a set of related tags.
*/
public TreeSet<Integer> getSetofTags(Random randomTopic, Random randomTag, int popularTagId, int numTags) {
TreeSet<Integer> resultTags = new TreeSet<Integer>();
resultTags.add(popularTagId);
while (resultTags.size() < numTags) {
int tagId;
tagId = popularTagId;
if(relatedTags.get(tagId) == null) {
tagId = nonZeroTags.get(randomTag.nextInt(nonZeroTags.size()));
}
// Doing binary search for finding the tag
double randomDis = randomTag.nextDouble();
int lowerBound = 0;
int upperBound = relatedTags.get(tagId).size();
int midPoint = (upperBound + lowerBound) / 2;
while (upperBound > (lowerBound + 1)) {
if (cumulative.get(tagId).get(midPoint) > randomDis) {
upperBound = midPoint;
} else {
lowerBound = midPoint;
}
midPoint = (upperBound + lowerBound) / 2;
}
resultTags.add(relatedTags.get(tagId).get(midPoint));
}
return resultTags;
}
}