/*
* Copyright (c) 2013 LDBC
* Linked Data Benchmark Council (http://ldbc.eu)
*
* This file is part of ldbc_socialnet_dbgen.
*
* ldbc_socialnet_dbgen is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* ldbc_socialnet_dbgen is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with ldbc_socialnet_dbgen. If not, see <http://www.gnu.org/licenses/>.
*
* Copyright (C) 2011 OpenLink Software <bdsmt@openlinksw.com>
* All Rights Reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; only Version 2 of the License dated
* June 1991.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
package ldbc.snb.datagen.dictionary;
import ldbc.snb.datagen.generator.DatagenParams;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Random;
import java.util.Set;
public class TagDictionary {
private static final String SEPARATOR = "\t";
private int numPopularTags;
/**
* < @brief The number of popular tags. *
*/
private double tagCountryCorrProb;
/**
* < @brief The probability to choose another country when asking for a tag.
*/
private ArrayList<ArrayList<Integer>> tagsByCountry;
/**
* < @brief The tags by country map.
*/
private ArrayList<ArrayList<Double>> tagCummulativeDist;
/**
* < @brief The tags by country cumulative distribution.
*/
private HashMap<Integer, String> tagClassName;
/**
* < @brief The tag class names.
*/
private HashMap<Integer, String> tagClassLabel;
/**
* < @brief The tag class labels.
*/
private HashMap<Integer, Integer> tagClassHierarchy;
/**
* < @brief The tag class hierarchy.
*/
private HashMap<Integer, Integer> tagTagClass;
/**
* < @brief The tag tag classes.
*/
private HashMap<Integer, String> tagNames;
/**
* < @brief the tag names.
*/
private HashMap<Integer, String> tagDescription; /**< @brief the tag descriptions.*/
/**
* @param numCountries The number of countries.
* @param tagCountryCorrProb The probability to choose a tag from another country.
* @brief Constructor
*/
public TagDictionary(int numCountries, double tagCountryCorrProb) {
this.tagCountryCorrProb = tagCountryCorrProb;
this.tagCummulativeDist = new ArrayList<ArrayList<Double>>(numCountries);
this.tagsByCountry = new ArrayList<ArrayList<Integer>>(numCountries);
this.tagNames = new HashMap<Integer, String>();
this.tagTagClass = new HashMap<Integer, Integer>();
this.tagDescription = new HashMap<Integer, String>();
this.tagClassName = new HashMap<Integer, String>();
this.tagClassLabel = new HashMap<Integer, String>();
this.tagClassHierarchy = new HashMap<Integer, Integer>();
for (int i = 0; i < numCountries; i++) {
tagCummulativeDist.add(new ArrayList<Double>());
tagsByCountry.add(new ArrayList<Integer>());
}
this.numPopularTags = 0;
load( DatagenParams.tagsFile,
DatagenParams.popularTagByCountryFile,
DatagenParams.tagClassFile,
DatagenParams.tagClassHierarchyFile);
}
/**
* @param id The tag identifier.
* @return The name of the tag.
* @brief Gets the name of a tag.
*/
public String getName(int id) {
return tagNames.get(id);
}
/**
* @param id The tag identifier.
* @return The tag's class identifier.
* @brief Gets the class of a tag.
*/
public Integer getTagClass(int id) {
return tagTagClass.get(id);
}
/**
* @param id The tag class identifier.
* @return The tag class's name.
* @brief Gets the name of a tag class.
*/
public String getClassName(int id) {
return tagClassName.get(id);
}
/**
* @param id The tag class identifier.
* @return The label of the tag class.
* @brief Gets the label of a tag class.
*/
public String getClassLabel(int id) {
return tagClassLabel.get(id);
}
/**
* @param id The id of the tag class.
* @return The parent tag class id.
* @brief Gets the tag class parent.
*/
public Integer getClassParent(int id) {
if (!tagClassHierarchy.containsKey(id)) {
return -1;
}
return tagClassHierarchy.get(id);
}
/**
* @param tagsFileName The tags file name.
* @param popularTagByCountryFileName The popular tags by country file name.
* @param tagClassFileName The tag classes file name.
* @param tagClassHierarchyFileName The tag hierarchy file name.
* @brief Loads the tag dictionary from files.
*/
private void load(String tagsFileName, String popularTagByCountryFileName, String tagClassFileName, String tagClassHierarchyFileName) {
try {
BufferedReader dictionary = new BufferedReader(new InputStreamReader(getClass().getResourceAsStream(tagClassFileName), "UTF-8"));
String line;
while ((line = dictionary.readLine()) != null) {
String data[] = line.split(SEPARATOR);
Integer classId = Integer.valueOf(data[0]);
tagClassName.put(classId, data[1]);
tagClassLabel.put(classId, data[2]);
}
dictionary.close();
dictionary = new BufferedReader(new InputStreamReader(getClass().getResourceAsStream(tagClassHierarchyFileName), "UTF-8"));
while ((line = dictionary.readLine()) != null) {
String infos[] = line.split(SEPARATOR);
Integer classId = Integer.valueOf(infos[0]);
Integer parentId = Integer.valueOf(infos[1]);
tagClassHierarchy.put(classId, parentId);
}
dictionary.close();
dictionary = new BufferedReader(new InputStreamReader(getClass().getResourceAsStream(tagsFileName), "UTF-8"));
while ((line = dictionary.readLine()) != null) {
String infos[] = line.split(SEPARATOR);
int tagId = Integer.valueOf(infos[0]);
Integer classId = Integer.valueOf(infos[1]);
tagTagClass.put(tagId, classId);
tagNames.put(tagId, infos[2]);
tagDescription.put(tagId, infos[3]);
}
dictionary.close();
dictionary = new BufferedReader(new InputStreamReader(getClass().getResourceAsStream(popularTagByCountryFileName), "UTF-8"));
while ((line = dictionary.readLine()) != null) {
String infos[] = line.split(" ");
int countryId = Integer.parseInt(infos[0]);
int tagId = Integer.parseInt(infos[1]);
double cummulative = Double.parseDouble(infos[2]);
tagCummulativeDist.get(countryId).add(cummulative);
tagsByCountry.get(countryId).add(tagId);
if (tagId + 1 > numPopularTags) {
numPopularTags = tagId + 1;
}
}
dictionary.close();
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* @param randomTagOtherCountry The random number generator for choosing another country.
* @param randomTagCountryProb The random number generator for choosing a country.
* @param countryId The country id.
* @return The random tag id.
* @brief Gets a random tag by country.
*/
public Integer getaTagByCountry(Random randomTagOtherCountry, Random randomTagCountryProb, int countryId) {
if (tagsByCountry.get(countryId).size() == 0 || randomTagOtherCountry.nextDouble() > tagCountryCorrProb) {
do {
countryId = randomTagOtherCountry.nextInt(tagsByCountry.size());
} while (tagsByCountry.get(countryId).size() == 0);
}
double randomDis = randomTagCountryProb.nextDouble();
int lowerBound = 0;
int upperBound = tagsByCountry.get(countryId).size();
int curIdx = (upperBound + lowerBound) / 2;
while (upperBound > (lowerBound + 1)) {
if (tagCummulativeDist.get(countryId).get(curIdx) > randomDis) {
upperBound = curIdx;
} else {
lowerBound = curIdx;
}
curIdx = (upperBound + lowerBound) / 2;
}
return tagsByCountry.get(countryId).get(curIdx);
}
/**
* @return The number of popular tags.
* @brief Gets the number of popular tags.
*/
public int getNumPopularTags() {
return numPopularTags;
}
/**
* @param random The random number generator.
* @param num The number of tags to retrieve.
* @return The array of random tags.
* @brief Gets a number of random tags.
*/
public Integer[] getRandomTags(Random random, int num) {
Integer[] result = new Integer[num];
for (int i = 0; i < num; ) {
int randomCountry = random.nextInt(tagsByCountry.size());
ArrayList<Integer> tags = tagsByCountry.get(randomCountry);
if (tags.size() > 0) {
result[i] = tags.get(random.nextInt(tags.size()));
++i;
}
}
return result;
}
/**
* @return The set of tag's names.
* @brief Gets all the tag names.
*/
public Set<Integer> getTags() {
return tagNames.keySet();
}
}