/*
* Copyright 2013 SciFY NPO <info@scify.org>.
*
* This product is part of the NewSum Free Software.
* For more information about NewSum visit
*
* http://www.scify.gr/site/en/our-projects/completed-projects/newsum-menu-en
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* If this code or its output is used, extended, re-engineered, integrated,
* or embedded to any extent in another software or hardware, there MUST be
* an explicit attribution to this work in the resulting source code,
* the packaging (where such packaging exists), or user interface
* (where such an interface exists).
* The attribution must be of the form "Powered by NewSum, SciFY"
*/
package org.scify.NewSumServer.Server.Utils;
import gr.demokritos.iit.jinsect.structs.Pair;
import java.io.*;
import java.net.MalformedURLException;
import java.net.URL;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Collections;
import java.util.Comparator;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.ListIterator;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeSet;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.scify.NewSumServer.Server.Comms.Communicator;
import org.scify.NewSumServer.Server.Storage.IDataStorage;
import org.scify.NewSumServer.Server.Structures.Article;
import org.scify.NewSumServer.Server.Structures.Sentence;
import org.scify.NewSumServer.Server.Structures.Topic;
import static org.scify.NewSumServer.Server.Utils.Utilities.isAlphabetic;
import static org.scify.NewSumServer.Server.Utils.Utilities.isGreekLetter;
import static org.scify.NewSumServer.Server.Utils.Utilities.writeStringListToFile;
/**
* Contains Various utility methods
*
* @author George K. <gkiom@scify.org>
*/
public class Utilities {
public static final String sCatsDaysFile = Communicator.getSwitches().get("sCatsDaysFile");
private static final Logger LOGGER = Main.getLogger();
/**
* The Delimiter Used in the Sources File, in ./data/Sources/RSSSources.txt
*/
private static final String sDelimiter = "[*]{3}"; // regex pattern, for split
private static final String sDelimiterSimple = "***"; // simple format, 'contains'
private static List<Pair> lsArticlePairs = Collections.synchronizedList(new LinkedList());
/**
* Checks if a URL is Valid
*
* @param sURL A string containing the URL to check
* @return true if the URL is valid. False Otherwise
*/
public static boolean ValidURL(String sURL) {
try {
URL uTmp = new URL(sURL);
return true;
} catch (MalformedURLException me) {
LOGGER.log(Level.INFO, "Malformed URL ignored", me.getMessage());
return false;
}
}
/**
* Reads A Simple Text File.
*
* @param sPathToFile The Absolute Path to the File
* @return The File Contents
* @throws FileNotFoundException
* @throws IOException
*/
public static String readFromFile(String sPathToFile, String del) {
File fFile = new File(sPathToFile);
StringBuilder sb = new StringBuilder();
if (fFile.canRead()) {
FileInputStream fstream = null;
try {
fstream = new FileInputStream(fFile);
// Get the object of DataInputStream
DataInputStream in = new DataInputStream(fstream);
BufferedReader br = new BufferedReader(new InputStreamReader(in, Charset.forName("UTF-8")));
String sLine;
//Read File Line By Line
while ((sLine = br.readLine()) != null) {
sb.append(sLine).append(del);
}
//Close the input stream
in.close();
return sb.toString();
} catch (IOException ex) {
LOGGER.log(Level.SEVERE, ex.getMessage(), ex);
return null;
} finally {
try {
fstream.close();
} catch (IOException ex) {
LOGGER.log(Level.SEVERE, ex.getMessage(), ex);
return null;
}
}
} else {
LOGGER.log(Level.SEVERE, "Unable To Read From File {0}", fFile.getName());
return sb.toString(); //null
}
}
/**
* @param sPathToFile The absolute link to the file where the sources are
* saved
* @return The map containing the (RSSFeed, category) data
* @throws FileNotFoundException
* @throws IOException
*/
public static HashMap<String, String> getSourcesFromFile(String sPathToFile)
throws FileNotFoundException, IOException {
File fFile = new File(sPathToFile);
if (!fFile.exists()) {
throw new FileNotFoundException(fFile.getAbsolutePath() + " cannot be found.");
}
if (fFile.canRead()) {
FileInputStream fstream = new FileInputStream(fFile);
// Get the object of DataInputStream
DataInputStream in = new DataInputStream(fstream);
BufferedReader br = new BufferedReader(new InputStreamReader(in, Charset.forName("UTF-8")));
String sLine;
ArrayList<String> alCategories = new ArrayList<String>();
HashMap<String, String> hmSources = new HashMap<String, String>();
//if line does not start with 'http', it's a category
//else it's a link of the last category, containing the label
//separated by '***'
//if line starts with '?' is a comment
while ((sLine = br.readLine()) != null) {
// if not a commnet
if (!sLine.startsWith("?")) {
// if not a link
if (!sLine.startsWith("http")) {
// it's a category
// a category line, holds it's category name, and
// MAY hold a number after a separator (sDelimiter).
String tmpCat;
if (sLine.contains(sDelimiterSimple)) {
tmpCat = sLine.split(sDelimiter)[0];
// The number after the delimiter
// represents the number of days old news to fetch for that category.
// Write categories - days file
appendToFile(sCatsDaysFile, tmpCat + "=" + sLine.split(sDelimiter)[1]);
} else {
tmpCat = sLine;
}
if (!alCategories.contains(tmpCat)) {
alCategories.add(tmpCat);
}
} else {
// add links for that category
hmSources.put(sLine.split(sDelimiter)[0],
alCategories.get(alCategories.size() - 1));
}
}
}
in.close();
return hmSources;
} else {
LOGGER.log(Level.SEVERE, "Unable To Read From File {0}", fFile.getName());
return null;
}
}
public static HashMap<String, String> getLinkLabelsFromFile(String sPathToFile)
throws FileNotFoundException, IOException {
File fFile = new File(sPathToFile);
if (fFile.canRead()) {
FileInputStream fstream = new FileInputStream(fFile);
// Get the object of DataInputStream
DataInputStream in = new DataInputStream(fstream);
BufferedReader br = new BufferedReader(new InputStreamReader(in, Charset.forName("UTF-8")));
String sLine;
ArrayList<String> alCategories = new ArrayList<String>();
LinkedHashMap<String, String> hmLinkLabels = new LinkedHashMap<String, String>();
//if line does not start with 'http', it's a category
//else it's a link of the last category, containing the label
//separated by '***'
//Lines starting with '?'are comment lines
while ((sLine = br.readLine()) != null) {
if (!sLine.startsWith("?")) {
if (!sLine.startsWith("http")) {
String stmpCateg;
if (sLine.contains(sDelimiterSimple)) {
stmpCateg = sLine.split(sDelimiter)[0];
} else {
stmpCateg = sLine;
}
if (!alCategories.contains(stmpCateg)) {
alCategories.add(stmpCateg);
}
} else {
hmLinkLabels.put(sLine.split(sDelimiter)[0],
alCategories.get(alCategories.size() - 1) + "-" + sLine.split(sDelimiter)[1]);
}
}
}
in.close();
return hmLinkLabels;
} else {
LOGGER.log(Level.SEVERE, "Unable To Read From File {0}", fFile.getName());
return null;
}
}
/**
*
* @param sPathToFile the file containing the sources
* @return the mapping between the rssFeedLinks and their applied labels.
* @throws FileNotFoundException
* @throws IOException
*/
public static HashMap<String, String> getSourceLabelsFromFile(String sPathToFile)
throws FileNotFoundException, IOException {
File fFile = new File(sPathToFile);
if (fFile.canRead()) {
FileInputStream fstream = new FileInputStream(fFile);
// Get the object of DataInputStream
DataInputStream in = new DataInputStream(fstream);
BufferedReader br = new BufferedReader(new InputStreamReader(in, Charset.forName("UTF-8")));
String sLine;
LinkedHashMap<String, String> hmSourceLabels = new LinkedHashMap<String, String>();
while ((sLine = br.readLine()) != null) {
if (sLine.startsWith("http")) {
hmSourceLabels.put(sLine.split(sDelimiter)[0],
sLine.split(sDelimiter)[1]);
}
}
in.close();
return hmSourceLabels;
} else {
LOGGER.log(Level.SEVERE, "Unable To Read From File {0}", fFile.getName());
return null;
}
}
protected static String createSourceLabelsFromFile(String sPathToFile)
throws FileNotFoundException, IOException {
File fFile = new File(sPathToFile);
if (fFile.canRead()) {
FileInputStream fstream = new FileInputStream(fFile);
// Get the object of DataInputStream
DataInputStream in = new DataInputStream(fstream);
BufferedReader br = new BufferedReader(new InputStreamReader(in, Charset.forName("UTF-8")));
String sLine;
LinkedHashMap<String, String> hmSourceLabels = new LinkedHashMap<String, String>();
while ((sLine = br.readLine()) != null) {
if (!sLine.startsWith("?")) {
hmSourceLabels.put("\"" + sLine.split(sDelimiter)[0],
sLine.split(sDelimiter)[1] + "\"");
}
}
in.close();
return hmSourceLabels.toString();
} else {
LOGGER.log(Level.SEVERE, "Unable To Read From File {0}", fFile.getName());
return null;
}
}
/**
*
* @param <T> Map key
* @param <E> Map value
* @param map The map to filter
* @param value The value to filter by
* @return A set containing the keys of the map assigned to the specified
* value
*/
public static <T, E> Set<T> getKeysByValue(Map<T, E> map, E value) {
Set<T> keys = new HashSet<T>();
for (Entry<T, E> entry : map.entrySet()) {
if (value.equals(entry.getValue())) {
keys.add(entry.getKey());
}
}
return keys;
}
public static <K, Double extends Comparable<? super Double>> SortedSet<Map.Entry<K, Double>> entriesSortedByValues(Map<K, Double> map) {
LOGGER.log(Level.INFO, "Initial Map: {0}", map.size());
SortedSet<Map.Entry<K, Double>> sortedEntries = new TreeSet<Map.Entry<K, Double>>(
new Comparator<Map.Entry<K, Double>>() {
@Override
public int compare(Map.Entry<K, Double> e1, Map.Entry<K, Double> e2) {
if (e2.getValue().equals(e1.getValue())) {
return 1;
} else {
return e2.getValue().compareTo(e1.getValue());
}
}
});
sortedEntries.addAll(map.entrySet());
LOGGER.log(Level.INFO, "Sorted Map: {0}", sortedEntries.size());
return sortedEntries;
}
/**
*
* @param aStr The array of strings to be joined
* @param sSeparator The separator to be used to distinguish the strings
* @return A separator-delimited string containing all the elements of the
* Array
*/
public static String joinArrayToString(String[] aStr, String sSeparator) {
StringBuilder builder = new StringBuilder();
boolean firstOcc = true; //first occurence
for (String s : aStr) {
if (firstOcc) {
firstOcc = false;
} else {
builder.append(sSeparator);
}
builder.append(s);
}
return builder.toString();
}
public static String joinListToString(List<? extends Object> lsStr, String sSeparator) {
StringBuilder builder = new StringBuilder();
boolean firstOcc = true; //first occurence
if (lsStr.size() == 1 && lsStr.get(0).equals("")) {
return "";
}
for (int i = 0; i < lsStr.size(); i++) {
if (firstOcc) {
firstOcc = false;
} else {
builder.append(sSeparator);
}
builder.append(lsStr.get(i).toString());
}
return builder.toString();
}
public static String joinMapToString(Map<? extends Object, ? extends Object> map,
String sSeparator, String sMidSeparator) {
StringBuilder sb = new StringBuilder();
Iterator it = map.entrySet().iterator();
boolean First = true;
while (it.hasNext()) {
Map.Entry tmpEntry = (Map.Entry) it.next();
if (First) {
First = false;
} else {
sb.append(sSeparator);
}
sb.append(tmpEntry.getKey());
sb.append(sMidSeparator);
sb.append(tmpEntry.getValue());
}
return sb.toString();
}
public static void print(Object O) { //debug
System.out.println(O.toString());
}
/**
* Used only by dumpClusterer
*
* @param sCat the category of interest
* @param line the line to append to the file
*/
public static void writeClusterCheckFile(String sCat, String line) {
String sPathtoFile =
System.getProperty("user.dir")
+ System.getProperty("file.separator") + "data"
+ System.getProperty("file.separator") + sCat + "-ClusterCheck.csv";
File fFile = new File(sPathtoFile);
BufferedWriter bw = null;
try {
bw = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(fFile, true), Charset.forName("UTF-8")));
bw.write(line);
bw.newLine();
} catch (Exception e) {
LOGGER.log(Level.SEVERE, "Error: {0}", e.getMessage());
} finally {
if (bw != null) {
try {
bw.close();
} catch (IOException ex) {
LOGGER.log(Level.SEVERE, ex.getMessage(), ex);
}
}
}
}
/**
* Writes a list of strings to file, one line per entry. Deletes previous
* file
*
* @param lsToWrite the list of strings to store to file, line by line for
* each entry.
*/
public static void writeStringListToFile(List<String> lsToWrite) {
String sPathtoFile =
System.getProperty("user.dir")
+ System.getProperty("file.separator") + "data"
+ System.getProperty("file.separator") + "Tools"
+ System.getProperty("file.separator") + "PatternCheck.txt";
File fFile = new File(sPathtoFile);
BufferedWriter bw = null;
try {
bw = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(fFile), Charset.forName("UTF-8")));
for (String each : lsToWrite) {
bw.write(each);
bw.newLine();
}
} catch (Exception e) {
LOGGER.log(Level.SEVERE, "Error: {0}", e.getMessage());
} finally {
if (bw != null) {
try {
bw.close();
} catch (IOException ex) {
LOGGER.log(Level.SEVERE, ex.getMessage(), ex);
}
}
}
}
/**
* used for training the classifier
*
* @param sToWrite a single line for the train set of the classifier
*/
public static void appendToFile(String sToWrite) {
String sPathtoFile =
System.getProperty("user.dir")
+ System.getProperty("file.separator") + "data"
+ System.getProperty("file.separator") + "MachineLearningData"
+ System.getProperty("file.separator") + "Classification_Results.txt";
File fFile = new File(sPathtoFile);
BufferedWriter bw = null;
try {
bw = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(fFile, true), Charset.forName("UTF-8")));
bw.write(sToWrite);
bw.newLine();
} catch (Exception e) {
LOGGER.log(Level.SEVERE, "Error: {0}", e.getMessage());
} finally {
if (bw != null) {
try {
bw.close();
} catch (IOException ex) {
LOGGER.log(Level.SEVERE, ex.getMessage(), ex);
}
}
}
}
/**
* Appends a single line to a specified text file
*
* @param sPathToFile the full path to the file
* @param sToWrite the line to write to the file
*/
public static void appendToFile(String sPathToFile, String sToWrite) {
File fFile = new File(sPathToFile);
if (!fFile.exists()) {
try {
new File(sPathToFile).createNewFile();
} catch (Exception ex) {
LOGGER.log(Level.WARNING, ex.getMessage());
}
}
BufferedWriter bw = null;
try {
bw = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(fFile, true), Charset.forName("UTF-8")));
bw.write(sToWrite);
bw.newLine();
} catch (Exception e) {
LOGGER.log(Level.SEVERE, "Error: {0}", e.getMessage());
} finally {
if (bw != null) {
try {
bw.close();
} catch (IOException ex) {
LOGGER.log(Level.SEVERE, ex.getMessage(), ex);
}
}
}
}
/**
* Reads a specified file containing the (category - Days to keep) map and
* returns it
*
* @param sPathToFile the path where the file is located
* @return the mapping between the category and it's max days to keep
* articles
* @throws FileNotFoundException
* @throws IOException
*/
public static HashMap<String, Integer> readDaysPerCategoryFile(String sPathToFile)
throws FileNotFoundException, IOException {
String sDel = "=";
File fFile = new File(sPathToFile);
if (!fFile.exists()) {
throw new FileNotFoundException(fFile.getAbsolutePath() + " cannot be found.");
}
if (fFile.canRead()) {
FileInputStream fstream = new FileInputStream(fFile);
// Get the object of DataInputStream
DataInputStream in = new DataInputStream(fstream);
BufferedReader br = new BufferedReader(new InputStreamReader(in));
String sLine;
HashMap<String, Integer> hmDaysPerCateg = new HashMap<String, Integer>();
while ((sLine = br.readLine()) != null) {
if (sLine.contains(sDel)) {
hmDaysPerCateg.put(sLine.split(sDel)[0],
Integer.valueOf(sLine.split(sDel)[1]));
}
}
in.close();
return hmDaysPerCateg;
} else {
LOGGER.log(Level.SEVERE, "Unable To Read From File {0}", fFile.getName());
return null;
}
}
public static String MakeTmpHumanLine(String sSep,
double ValueSimilarity, double ContainmentSimilarity,
double SizeSimilarity, double NVS, String sMatches) {
String sTmpLine;
sTmpLine = Double.toString(ValueSimilarity)
+ sSep + Double.toString(ContainmentSimilarity)
+ sSep + Double.toString(SizeSimilarity)
+ sSep + Double.toString(NVS)
+ sSep + sMatches;
return sTmpLine;
}
/**
* Adds an object to the list, only if it is not already contained in the
* list, avoiding duplicates
*
* @param <T> a Type that extends Object
* @param lsArt the list to add to
* @param toAdd the object to add to the list
*/
public static <T extends Object> void addItemToList(List<T> lsArt, T toAdd) {
if (lsArt.isEmpty()) {
lsArt.add(toAdd);
} else {
if (!lsArt.contains(toAdd)) {
lsArt.add(toAdd);
}
}
}
public static int countDiffArticles(String[] Summary) {
int counter = 1;
if (Summary.length <= 2) {
return counter;
} else {
String InitialSource = Summary[1].split(Sentence.getSentenceSeparator())[1];
for (int i = 2; i < Summary.length; i++) {
String[] tmps = Summary[i].split(Sentence.getSentenceSeparator());
if (!InitialSource.contains(tmps[1])) {
counter++;
}
InitialSource += tmps[1];
}
return counter;
}
}
public static void checkForPossibleSpam(List<Article> lsArticleList) {
List lsSame = new LinkedList();
for (int i = 0; i < lsArticleList.size() - 1; i++) {
Article aFirst = lsArticleList.get(i); // first feed
for (int j = i + 1; j < lsArticleList.size(); j++) {
Article aSecond = lsArticleList.get(j); // second feed
if (aFirst.getFeed().equals(aSecond.getFeed())) {
String t1 = aFirst.getText();
List<String> at1 = splitNoEmpty(t1, "[;,.]");
String t2 = aSecond.getText();
List<String> at2 = splitNoEmpty(t2, "[;,.]");
if (at1.size() > 1 && at2.size() > 1) {
for (String each : at1) {
each = each.trim();
for (String each2 : at2) {
each2 = each2.trim();
if (each.equalsIgnoreCase(each2)) {
lsSame.add(0, each);
}
}
}
}
}
}
}
if (lsSame.size() > 0) {
ArrayList<String> lsRes = sortByOccurencies(lsSame);
if (lsRes != null) {
if (!lsRes.isEmpty()) {
LOGGER.log(Level.WARNING, "Found possible SPAM sentences, check 'Tools' folder");
System.out.println(lsRes.toString());
writeStringListToFile(lsRes);
}
} else {
LOGGER.info("No SPAM occurencies");
}
} else {
LOGGER.info("No SPAM occurencies");
}
}
private static ArrayList<String> sortByOccurencies(List<String> lsSame) {
HashMap<String, Integer> hsOccs = new HashMap<String, Integer>();
for (String each : lsSame) {
if (!hsOccs.containsKey(each)) {
hsOccs.put(each, 1);
} else {
hsOccs.put(each, hsOccs.get(each) + 1);
}
}
Iterator it = hsOccs.entrySet().iterator();
while (it.hasNext()) {
Map.Entry mp = (Map.Entry) it.next();
Integer val = (Integer) mp.getValue();
if (val < 4) {
it.remove();
}
}
if (hsOccs.isEmpty()) {
return null;
}
ArrayList<Map.Entry<String, Integer>> lsRes = new ArrayList<Map.Entry<String, Integer>>(hsOccs.entrySet());
Collections.sort(lsRes, new Comparator<Map.Entry<String, Integer>>() {
@Override
public int compare(Map.Entry<String, Integer> o1, Map.Entry<String, Integer> o2) {
return o2.getValue().compareTo(o1.getValue());
}
});
ArrayList<String> lsRet = new ArrayList<String>();
for (Map.Entry mp : lsRes) {
lsRet.add((String) mp.getKey() + "="
+ (String) Integer.toString((Integer) mp.getValue()));
}
return lsRet;
}
private static ArrayList<String> splitNoEmpty(String sStr, String regex) {
String[] aIn = sStr.split(regex);
ArrayList<String> lRes = new ArrayList<String>();
if (aIn.length == 0) {
lRes.add("");
return lRes;
}
for (int i = 0; i < aIn.length; i++) {
if (!aIn[i].isEmpty() && !aIn[i].matches("\\s+") && aIn[i].length() > 1) {
lRes.add((String) aIn[i]);
}
}
return lRes;
}
/**
* traverses the list of articles and searches for the fewest in a category.
* E.g. if a list named myList contains 20 articles from "science" category
* and 10 articles from "Europe" category, getLeastOccurencies(myList) will
* return 10
*
* @param lsList the list of articles
* @return the number of occurencies in the list with the fewest articles in
* a category
*/
public static Integer getLeastOccurencies(List<Article> lsList) {
int max = 10000;
int j = 1;
String tmp1 = lsList.get(0).getCategory();
for (Article each : lsList) {
if (each.getToWrap()) {
String tmpCat = each.getCategory();
if (tmpCat.equals(tmp1)) {
j++;
} else {
tmp1 = tmpCat;
if (j < max) {
max = j;
}
j = 1;
}
// System.out.println(tmpCat +" ::: " + j);
}
}
return max;
}
/**
* True if character is a Greek letter.
*
* @param c Character to check for being a Greek letter.
* @return true if character is a Greek letter.
*/
public static boolean isGreekLetter(char c) {
return (((c >= 0x0370) && (c < 0x0400)) || ((c >= 0x1f00) && (c < 0x2000)));
}
/**
* True if any characters in a string are Greek letters.
*
* @param s String to check for Greek letters.
* @return true if any characters are Greek letters.
*/
public static boolean hasGreekLetters(String s) {
boolean result = false;
String ts = s.trim();
for (int i = 0; i < ts.length(); i++) {
char ch = ts.charAt(i);
if (isGreekLetter(ch)) {
result = true;
break;
}
}
return result;
}
/**
* Imitates the Java7 isAlphabetic function
*
* @param c The character to test.
* @return True if the character is any letter number.
*/
public static boolean isAlphabetic(Character c) {
switch (Character.getType(c)) {
case Character.UPPERCASE_LETTER:
case Character.LOWERCASE_LETTER:
case Character.TITLECASE_LETTER:
case Character.MODIFIER_LETTER:
case Character.OTHER_LETTER:
case Character.LETTER_NUMBER:
return true;
default:
return false;
}
}
public static boolean isGreekWord(String s) {
s = s.trim();
for (Character a : s.toCharArray()) {
if (!Character.isWhitespace(a) && isAlphabetic(a)) {
if (!isGreekLetter(a)) {
return false;
}
}
}
return true;
}
/**
* Converts date to Calendar format
*
* @param date The date in Date format
* @return A calendar instance of the specified date
*/
public static Calendar convertDateToCalendar(Date date) {
Calendar cal = Calendar.getInstance();
cal.setTime(date);
return cal;
}
public static List<String> getListOfStrings(List<Article> lsArts) {
ArrayList<String> lsRes = new ArrayList<String>();
for (Article each : lsArts) {
lsRes.add(each.getText());
}
return lsRes;
}
public static HashMap<String, Topic> getTopicsMap(IDataStorage ids) {
try {
HashMap<String, Topic> hsM = ids.readClusteredTopics();
System.out.println(hsM.toString());
return hsM;
} catch (Exception ex) {
System.err.println(ex);
return null;
}
}
public static void writeTopicsToFile(HashMap<String, Topic> hsTopics, String sFolderName)
throws IOException {
String sTopicPath = System.getProperty("user.dir") + System.getProperty("file.separator")
+ "data" + System.getProperty("file.separator") + sFolderName + System.getProperty("file.separator");
File f = new File(sTopicPath);
if (!f.exists()) {
System.err.println("FILE " + sTopicPath + " DOES NOT EXIST");
if (!f.mkdirs()) {
System.err.println("FILE " + sTopicPath + " Could not be created");
}
}
if (f.isDirectory()) {
f.setWritable(true);
for (File k : f.listFiles()) {
k.delete();
}
}
Iterator It = hsTopics.entrySet().iterator();
while (It.hasNext()) {
Map.Entry Pair = (Map.Entry) It.next();
String tmpID = (String) Pair.getKey();
Topic tmpTopic = (Topic) Pair.getValue();
String sFullFileName =
sTopicPath + tmpID + ".txt";
File fFile = new File(sFullFileName);
fFile.createNewFile();
BufferedWriter bw = new BufferedWriter(new FileWriter(fFile, false));
bw.write("ClusterID: " + tmpID);
bw.newLine();
bw.write("Title: " + hsTopics.get(tmpID).getTitle());
bw.write(("\n========================================\n"));
StringBuilder sb = new StringBuilder();
ListIterator<Article> li = hsTopics.get(tmpID).listIterator();
while (li.hasNext()) {
Article sCur = li.next();
sb.append(sCur.getTitle()).append(": ").append(sCur.getText()).append("---").append(sCur.getDatetoString()).append("\n");
}
bw.write(sb.toString());
bw.close();
}
}
public static int getSourcesNum(String sTitle, String sRegex) {
Matcher m = Pattern.compile(sRegex).matcher(sTitle);
if (m.find()) {
return Integer.valueOf(m.group(1));
}
return 0;
}
/**
*
* @param early First {@link Topic} object
* @param late Second {@link Topic} object
* @return The difference in days between the two {@link Topic} objects
*/
public static int getDiffInDays(Topic early, Topic late) {
// Compare using formatted date
return early.getSortableDate().compareTo(late.getSortableDate());
}
public static void printStringMap(Map<String, String> hsMap, String sDel) {
int i=1;
for (Map.Entry<String, String> entry : hsMap.entrySet()) {
String sKey = entry.getKey();
String sValue = entry.getValue();
System.out.println(i+": " + sKey + "=" + sValue);
i++;
}
}
public static void main(String[] args) {
///////////////////CHECK SEARCH/////////////////////////////////////
// String sBaseDir = Main.sBaseDir;
// System.out.println("Enter Search String\n");
// Scanner imp = new Scanner(System.in);
// String term = imp.next();
// IDataStorage ids = new InsectFileIO(sBaseDir);
// ArticleClusterer ac = new ArticleClusterer(
// (ArrayList<Article>) ids.loadObject("AllArticles", "feeds"), ids, Main.sArticlePath);
// Locale loc = Main.sPathToSources.endsWith("GR.txt") ? new Locale("el")
// : new Locale("en");
// Indexer ind = new Indexer(Main.sArticlePath, Main.sindexPath, loc);
// INSECTDB idb = new INSECTFileDBWithDir("", Main.sSummaryPath);
// Summariser sum = new Summariser(new HashSet<Topic>(
// ac.getArticlesPerCluster().values()), idb);
// Communicator cm = new Communicator(ids, ac, sum, ind);
// String sTop = cm.getTopicsByKeyword(ind, term, "All");
// System.out.println(sTop);
/////////////////CHECK SEARCH END///////////////////////////////////
}
// public class debugLogger {
//
// public void log(String sMessage, String sPathToFile) {
// PrintWriter out = null;
// try {
// out = new PrintWriter(new FileWriter(sPathToFile), true);
// out.write(sMessage);
// out.close();
// } catch (IOException ex) {
// LOGGER.log(Level.SEVERE, null, ex);
// } finally {
// out.close();
// }
// }
// }
}