/*
* Copyright (c) 2013 LDBC
* Linked Data Benchmark Council (http://ldbc.eu)
*
* This file is part of ldbc_socialnet_dbgen.
*
* ldbc_socialnet_dbgen is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* ldbc_socialnet_dbgen is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with ldbc_socialnet_dbgen. If not, see <http://www.gnu.org/licenses/>.
*
* Copyright (C) 2011 OpenLink Software <bdsmt@openlinksw.com>
* All Rights Reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; only Version 2 of the License dated
* June 1991.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
package ldbc.snb.datagen.dictionary;
import ldbc.snb.datagen.generator.DatagenParams;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Random;
import java.util.TreeSet;
public class TagTextDictionary {
private static final String SEPARATOR = " ";
private TagDictionary tagDic;
/**
* < @brief The tag dictionary. *
*/
private HashMap<Integer, String> tagText;
/**
* < @brief The tag text. *
*/
private double reducedTextRatio;
StringBuilder returnString = null;
public TagTextDictionary(TagDictionary tagDic, double reducedTextRatio) {
this.tagText = new HashMap<Integer, String>();
this.tagDic = tagDic;
this.reducedTextRatio = reducedTextRatio;
this.returnString = new StringBuilder(1000);
load(DatagenParams.tagTextFile);
}
/**
* @param fileName The tag text dictionary file name.
* @brief Loads the dictionary.
*/
private void load(String fileName) {
try {
BufferedReader dictionary = new BufferedReader(new InputStreamReader(getClass().getResourceAsStream(fileName), "UTF-8"));
String line;
while ((line = dictionary.readLine()) != null) {
String[] data = line.split(SEPARATOR);
Integer id = Integer.valueOf(data[0]);
tagText.put(id, data[1]);
}
dictionary.close();
} catch (Exception e) {
e.printStackTrace();
}
}
/**
* @param id The tag identifier.
* @return The tag's text.
* @brief Gets the text associated with the tag.
*/
public String getTagText(int id) {
return tagText.get(id);
}
/**
* @param randomTextSize The random number generator to generate the text's size.
* @param randomReducedText The random number generator to generate a small text size.
* @param minSize The minimum size to generate.
* @param maxSize The maximum size to generate.
* @return
* @brief Gets a random tag text size.
*/
public int getRandomTextSize(Random randomTextSize, Random randomReducedText, int minSize, int maxSize) {
if (randomReducedText.nextDouble() > reducedTextRatio) {
return randomTextSize.nextInt(maxSize - minSize) + minSize;
}
return randomTextSize.nextInt((maxSize >> 1) - minSize) + minSize;
}
/**
* @param randomTextSize The random number generator to generate the size.
* @param minSize The minimun text size.
* @param maxSize The maximum text size.
* @return
* @brief Gets a random large text size.
*/
public int getRandomLargeTextSize(Random randomTextSize, int minSize, int maxSize) {
return randomTextSize.nextInt(maxSize - minSize) + minSize;
}
/**
*
* @param randomTextSize The random number generator to generate the amount of text devoted to each tag.
* @param tags The set of tags to generate the text from.
* @param textSize The final text size.
* @return The final text.
* @brief Generates a text given a set of tags.
*/
public String generateText(Random randomTextSize, TreeSet<Integer> tags, int textSize) {
returnString.setLength(0);
int textSizePerTag = (int) Math.ceil(textSize / (double) tags.size());
while (returnString.length() < textSize) {
Iterator<Integer> it = tags.iterator();
while (it.hasNext() && returnString.length() < textSize) {
Integer tag = it.next();
String content = getTagText(tag);
int thisTagTextSize = Math.min(textSizePerTag, textSize - returnString.length());
String tagName = tagDic.getName(tag).replace("_", " ");
tagName = tagName.replace("\"", "\\\"");
String prefix = "About " + tagName + ", ";
thisTagTextSize+=prefix.length();
if (thisTagTextSize >= content.length()) {
returnString.append(content);
} else {
int startingPos = randomTextSize.nextInt(content.length() - thisTagTextSize + prefix.length());
String finalString = content.substring(startingPos, startingPos + thisTagTextSize - prefix.length());
returnString.append(prefix);
returnString.append(finalString);
}
}
}
if (!(returnString.charAt(returnString.length()-1) == '.')) {
if (returnString.length() == 1) {
returnString.append(".");
} else {
returnString.append(".");
}
}
if (returnString.length() < textSize - 1) {
returnString.append(" ");
}
if (returnString.length() > textSize) {
returnString.delete(textSize-1,returnString.length());
returnString.trimToSize();
}
return returnString.toString().replace("|", " ");
}
}