/* * Copyright 2013 SciFY NPO <info@scify.org>. * * This product is part of the NewSum Free Software. * For more information about NewSum visit * * http://www.scify.gr/site/en/our-projects/completed-projects/newsum-menu-en * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * If this code or its output is used, extended, re-engineered, integrated, * or embedded to any extent in another software or hardware, there MUST be * an explicit attribution to this work in the resulting source code, * the packaging (where such packaging exists), or user interface * (where such an interface exists). * The attribution must be of the form "Powered by NewSum, SciFY" */ package org.scify.NewSumServer.Server.Comms; import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.DataInputStream; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileWriter; import java.io.IOException; import java.io.InputStreamReader; import java.net.MalformedURLException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.LinkedList; import java.util.List; import java.util.ListIterator; import java.util.Locale; import java.util.Map; import java.util.Set; import java.util.logging.Level; import java.util.logging.Logger; import static org.scify.NewSumServer.Server.Comms.Communicator.LOGGER; import static org.scify.NewSumServer.Server.Comms.Communicator.readConfigFile; import org.scify.NewSumServer.Server.Searching.Indexer; import org.scify.NewSumServer.Server.Storage.IDataStorage; import org.scify.NewSumServer.Server.Structures.Article; import org.scify.NewSumServer.Server.Structures.Sentence; import org.scify.NewSumServer.Server.Structures.Topic; import org.scify.NewSumServer.Server.Summarisation.ArticleClusterer; import org.scify.NewSumServer.Server.Summarisation.RedundancyRemover; import org.scify.NewSumServer.Server.Summarisation.Summariser; import org.scify.NewSumServer.Server.Utils.Main; import org.scify.NewSumServer.Server.Utils.Utilities; import static org.scify.NewSumServer.Server.Utils.Utilities.getDiffInDays; /** * The Class that contains all the methods required for interaction with * the Client. * @author George K. <gkiom@scify.org> */ public class Communicator { /** * The Logger class, inherited from main */ protected static Logger LOGGER = (Main.getLogger() != null) ? Main.getLogger() : Logger.getAnonymousLogger(); private static File Config = new File("./data/BaseDir/ServerConfig.txt"); private static HashMap Switches = readConfigFile(); private static String sSourcesPath = (String) Switches.get("PathToSources"); /** * The Data Storage Interface */ protected IDataStorage ids; /** * The Indexer Used */ protected Indexer ind; /** * The Article Clusterer */ protected ArticleClusterer ac; /** * The Summariser */ protected Summariser sum; /** * Max Sentences per summary fetched */ public static int iOutputSize = Main.iOutputSize; public static String NO_IDS_FOUND = sSourcesPath.endsWith("EN.txt") ? "No relevant Topic IDs found for the specified User Sources" : "Δε βρέθηκαν ID για τις συγκεκριμένες πηγές"; public static String NO_TOPICS_FOUND = sSourcesPath.endsWith("EN.txt") ? "No relevant Topic Titles found for the specified User Sources" : "Δε βρέθηκαν άρθρα για τις συγκεκριμένες πηγές που δώσατε"; public static String NO_IDS_ONSEARCH_FOUND = sSourcesPath.endsWith("EN.txt") ? "No relevant Topic IDs found for the specified Keyword" : "Δε βρέθηκαν IDs για τις συκεκριμένες πηγές"; public static String NO_TOPICS_ONSEARCH_FOUND = sSourcesPath.endsWith("EN.txt") ? "No relevant Topic Titles found for the specified Keyword" : "Δε βρέθηκαν άρθρα για τη συγκεκριμένη αναζήτηση"; private static final String FIRST_LEVEL_SEPARATOR = ";,;"; private static final String SECOND_LEVEL_SEPARATOR = Sentence.getSentenceSeparator(); private static final String THIRD_LEVEL_SEPARATOR = "=;="; /** * Reads the configuration file saved by the NewSum server */ public static void initStaticVariables() { LOGGER.info("initStaticVariables"); Config = new File("./data/BaseDir/ServerConfig.txt"); Switches = readConfigFile(); sSourcesPath = (String) Switches.get("PathToSources"); iOutputSize = Main.iOutputSize; //Max Sentences per summary LOGGER = (Main.getLogger() != null) ? Main.getLogger() : Logger.getAnonymousLogger(); // TODO: Use RESOURCES of some kind. Do NOT use hardcoded messages. NO_IDS_FOUND = sSourcesPath.endsWith("EN.txt") ? "No relevant Topic IDs found for the specified User Sources" : "Δε βρέθηκαν ID για τις συγκεκριμένες πηγές"; NO_TOPICS_FOUND = sSourcesPath.endsWith("EN.txt") ? "No relevant Topic Titles found for the specified User Sources" : "Δε βρέθηκαν άρθρα για τις συγκεκριμένες πηγές που δώσατε"; NO_IDS_ONSEARCH_FOUND = sSourcesPath.endsWith("EN.txt") ? "No relevant Topic IDs found for the specified Keyword" : "Δε βρέθηκαν IDs για τις συκεκριμένες πηγές"; NO_TOPICS_ONSEARCH_FOUND = sSourcesPath.endsWith("EN.txt") ? "No relevant Topic Titles found for the specified Keyword" : "Δε βρέθηκαν άρθρα για τη συγκεκριμένη αναζήτηση"; LOGGER.info("initStaticVariables done"); } private ArrayList<String> lsSortedIDs=null; /** * Main constructor * @param ids The data storage module * @param ac The Article Clusterer * @param sum The Summariser * @param ind The indexer */ public Communicator(IDataStorage ids, ArticleClusterer ac, Summariser sum, Indexer ind) { this.ids = ids; this.ac = ac; this.sum = sum; this.ind = ind; } /** * * @return The First Level Separator */ public String getFirstLevelSeparator() { return FIRST_LEVEL_SEPARATOR; } /** * * @return The Second level Separator, used in the getSummary and * getLinkLabels methods */ public String getSecondLevelSeparator() { return SECOND_LEVEL_SEPARATOR; } /** * The Third level separator is used only in the first element of the getSummary * returned String, which is for the (SourceLink=;=Label) data. * @return the link separator */ public String getThirdLevelSeparator() { return THIRD_LEVEL_SEPARATOR; } /** * * @param sCategory The category of interest * @return The Sources in the specified category, * or null if error occurs. The Sources are URL patterns */ public String getCategorySources(String sCategory) { try{ if (this.ids.objectExists(sCategory, "Links")) { HashSet<String> cSources = (HashSet<String>) this.ids.getLinksByCategory(sCategory); StringBuilder sStr = new StringBuilder(); boolean firstOcc = true; for (String s : cSources) { if (firstOcc) { firstOcc = false; } else { sStr.append(getFirstLevelSeparator()); } sStr.append(s); } return sStr.toString(); } else { LOGGER.log(Level.SEVERE, "Error: file containing Sources for {0} not found", sCategory); } } catch (Exception ex) { LOGGER.log(Level.SEVERE, ex.getMessage()); } return null; } /** * Using the User's Sources, returns the Categories that these Sources * belong to * @param sUserSources A separator-delimited String containing * the User URL paths. if null or "All", all Sources are accepted * @return The Categories for the specified user, or null on Error. */ public String getCategories(String sUserSources) { try { //for every item in the array, if item is in the generic //sources map, add it to the categories set if (sUserSources == null || "All".equals(sUserSources) || areAllSources(sUserSources)) { //Accept all sources and return categories from saved file Collection<String> genericCategs = this.ids.readGenericCategories(); //TODO IGNORE UNCLASSIFIED CATEGORY String[] aCats = genericCategs.toArray(new String[0]); return Utilities.joinArrayToString(aCats, getFirstLevelSeparator()); } else { //filter using User Sources HashSet<String> hsCats = new HashSet<String>(); //the categories to send HashMap<String, String> hsSources = this.ids.readSources("generic"); // transform the string to an array, using the separator delimiter String[] aUserSources = sUserSources.trim().split(getFirstLevelSeparator()); for (String each: aUserSources) { if (hsSources.containsKey(each)) { hsCats.add(hsSources.get(each)); } } String[] aCats = hsCats.toArray(new String[0]);//convert to array //transform to string and return return Utilities.joinArrayToString(aCats, getFirstLevelSeparator()); } } catch (Exception ex) { LOGGER.log(Level.SEVERE, "Could not load categories: " + ex.getMessage(), ex.getMessage()); return null; } } /** * @param sUserSources A separator-delimited String containing the user * URL paths. if null or "All", all sources are considered valid and * all Topic IDs are returned for the specified category * @param sCategory The category the client is interested in * @return The topic IDs contained in this category */ public String getTopicIDs(String sUserSources, String sCategory) { //load the Clustered Topics Map HashMap<String, Topic> ClusteredTopics = this.ac.getArticlesPerCluster(); ArrayList<String> TopicIDs = new ArrayList<String>(); //The Topic IDs to return if (ClusteredTopics == null) { LOGGER.log(Level.WARNING, "Could Not get Articles Per Cluster"); TopicIDs.add(NO_IDS_FOUND); return Utilities.joinListToString(TopicIDs, getFirstLevelSeparator()); } //Accept all user sources if (sUserSources == null || "All".equals(sUserSources) || areAllSources(sUserSources)) { Iterator it = ClusteredTopics.entrySet().iterator(); while (it.hasNext()) { Map.Entry t = (Map.Entry) it.next(); String sID = (String) t.getKey(); Topic tp = (Topic) t.getValue(); //filter by category if (tp.getCategory().equals(sCategory)) { //every Topic in each Topic ID is in the same category TopicIDs.add(sID); } } } else { // filter according to user sources for (String each : ClusteredTopics.keySet()) { Topic To = ClusteredTopics.get(each); if (To.getCategory().equals(sCategory)) { Iterator l = To.iterator(); while (l.hasNext()) { Article ls = (Article) l.next(); if (sUserSources.contains(ls.getFeed())) { if (!TopicIDs.contains(each)) { TopicIDs.add(each); } } } } } } //If no Topic IDs Found for the specified User Sources, return smth logical if (TopicIDs.isEmpty()) { LOGGER.log(Level.INFO, "No Topic IDs found for the specified User Sources"); TopicIDs.add(NO_IDS_FOUND); // return Utilities.joinListToString(TopicIDs, getFirstLevelSeparator()); } final HashMap hsIDsTitles = getTopicsMap(); //the (ID, title) map //sort the IDs according to their titles Collections.sort(TopicIDs, new Comparator<String>() { @Override public int compare(String o1, String o2) { String tmpTit1 = (String) hsIDsTitles.get(o1); String tmpTit2 = (String) hsIDsTitles.get(o2); String sRegex = ".*\\((\\d+)\\)$"; int i1 = Utilities.getSourcesNum(tmpTit1, sRegex); int i2 = Utilities.getSourcesNum(tmpTit2, sRegex); return i2-i1; } }); lsSortedIDs = TopicIDs; return Utilities.joinListToString(TopicIDs, getFirstLevelSeparator()); } /** * @param sUserSources A separator-delimited String containing the user * URL paths. If null or "All", all sources are considered valid and * all Topic Titles are returned for the specified category * @param sCategory The category of interest * @return The Topic Titles contained in this category, or null on error */ public String getTopicTitles(String sUserSources, String sCategory) { HashMap IDTopicsMap = getTopicsMap(); //The (ID, TopicTitles) Map //load the Clustered Topics Map HashMap<String, Topic> ClusteredTopics = this.ac.getArticlesPerCluster(); //Populate the Topic Titles List according to the ID Map ArrayList<String> TopicTitles = new ArrayList<String>(); if (lsSortedIDs == null) { getTopicIDs(sUserSources, sCategory); } //If no Topic IDs Found for the specified User Sources, return smth logical if (lsSortedIDs.isEmpty() || lsSortedIDs.get(0).equals(NO_IDS_FOUND)) { LOGGER.log(Level.INFO, "No Topic Titles found for the specified User Sources"); TopicTitles.add(NO_TOPICS_FOUND); LOGGER.info(TopicTitles.toString()); } else { for (int i=0; i<lsSortedIDs.size(); i++) { // get the title for this ID and add it to the list TopicTitles.add(i, (String) lsSortedIDs.get(i) + getSecondLevelSeparator() + IDTopicsMap.get(lsSortedIDs.get(i)) + getSecondLevelSeparator() + ClusteredTopics.get(lsSortedIDs.get(i)).getDate().getTimeInMillis()); // Also add date for each topic title } } return Utilities.joinListToString(TopicTitles, getFirstLevelSeparator()); } /** * @param sUserSources A separator-delimited String containing the user * URL paths. If "All", all sources are considered valid and * all Topics are returned for the specified category * @param sCategory The category of interest * @return A string containing the (ID-Title-Date) info for each * Topic contained in this category, or null on error. * Uses {@link #getFirstLevelSeparator()} for splitting Topics, * and {@link #getSecondLevelSeparator()}for splitting data for each Topic * @since 1.0 */ public String getTopics(String sUserSources, String sCategory) { //load the Clustered Topics Map HashMap<String, Topic> ClusteredTopics = this.ac.getArticlesPerCluster(); //The Topics to return ArrayList<Topic> tTopics = new ArrayList<Topic>(); if (ClusteredTopics == null) { LOGGER.log(Level.WARNING, "Could Not Load Articles Per Cluster"); return ""; } //Accept all user sources if (sUserSources == null || "All".equals(sUserSources) || areAllSources(sUserSources)) { Iterator it = ClusteredTopics.entrySet().iterator(); while (it.hasNext()) { Map.Entry t = (Map.Entry) it.next(); String sID = (String) t.getKey(); Topic tp = (Topic) t.getValue(); //filter by category if (tp.getCategory().equals(sCategory)) { tTopics.add(tp); } } } else { // filter according to user sources // Create a Temporary ID Set and fill it // with the appropriate data. HashSet<String> hsIDs = new HashSet<String>(); for (Map.Entry each : ClusteredTopics.entrySet()) { String sID = (String) each.getKey(); Topic tmpTopic = (Topic) each.getValue(); // if we reach the specified category if (tmpTopic.getCategory().equals(sCategory)) { // keep only Topics from accepted User Sources Iterator l = tmpTopic.iterator(); while (l.hasNext()) { Article ls = (Article) l.next(); if (sUserSources.contains(ls.getFeed())) { // if even only one source from this topic // interests the user, we keep it. // the final filtering goes on in getSummary hsIDs.add(sID); } } } } // if user Sources if (!hsIDs.isEmpty()) { // fill the Topics with Appropriate info Iterator<String> iit = hsIDs.iterator(); while (iit.hasNext()) { String tmpID = iit.next(); // add to the topics List, according to Topic ID tTopics.add(ClusteredTopics.get(tmpID)); } } } //If no Topic IDs Found for the specified User Sources, return smth logical if (tTopics.isEmpty()) { LOGGER.log(Level.INFO, "No Topic IDs found for the specified User Sources"); // tTopics.add(NO_IDS_FOUND); return ""; } // sort Topics According to Date and Article Count Collections.sort(tTopics, new Comparator<Topic>() { @Override public int compare(Topic o1, Topic o2) { // get the date difference in days int iDiff = Utilities.getDiffInDays(o2, o1); // First compare their dates if (iDiff != 0) { return iDiff; } // get the articles number difference int iSourceDiff = o2.getArticlesCount() - o1.getArticlesCount(); // else compare their Source Count if (iSourceDiff != 0) { return iSourceDiff; } // else Sort Alphabetically return o1.getTitle().compareTo(o2.getTitle()); } }); //debugging // for (Topic each : tTopics) { // works well // System.out.println("Date: " + each.getSortableDate() + " : " + each.getTitle()); // } return getTopicsAsString(tTopics); } /** * * @param sTopicID The topic ID of interest * @param sUserSources A separator-delimited list of acceptable sources. * if null or "All", the User Accepts all known sources. * The sources are in fact URL patterns. * @param iMaxSnippets The maximum number of sentences for the summary * @return The summary for that topic, using only sentences from acceptable sources * @deprecated */ protected String getSummary(String sTopicID, String sUserSources, int iMaxSnippets) { // return the Summary of the given topic ID String sRes = ""; // Returns the Summary of the given topic ID, according to user sources // Get the topic from the topic ID Topic tp = null; try { tp = this.ac.getArticlesPerCluster().get(sTopicID); //get The topic } catch (NullPointerException ex) { //if sTopicID not contained in map LOGGER.log(Level.SEVERE, ex.getMessage()); return ""; } catch (Exception ex) { LOGGER.log(Level.SEVERE, ex.getMessage()); return ""; } LinkedList<Sentence> lsSen = //Get the Summary (LinkedList<Sentence>) this.sum.getSummary(tp); if (lsSen == null || lsSen.isEmpty()) { LOGGER.log(Level.WARNING, "No summary for Topic {0}", sTopicID); // lsSen.add(new Sentence("No Summary Found", " "," ")); return ""; } else { //ommit small sentences for (Iterator<Sentence> ite = lsSen.iterator(); ite.hasNext();) { Sentence ob = ite.next(); if (ob.getSnippet().split("[;,. ]").length < 5) { ite.remove(); } } } //keep only iOutputSize Sentences in the List while (lsSen.size() > iMaxSnippets) { lsSen.removeLast(); } // the List containing all the article snippets of interest LinkedList<Sentence> lsSummary = new LinkedList<Sentence>(); if (sUserSources == null || "All".equals(sUserSources) || areAllSources(sUserSources)) { //Accept all user sources //Add all source links at start of sRes String sRes = getSummarySources(lsSen); // Remove redundant sentences and apply result sRes += Utilities.joinListToString( new RedundancyRemover().removeRedundantSentences(lsSen), getFirstLevelSeparator()); } else { //filter Summary using User Sources for (Sentence each: lsSen) { if (sUserSources.contains(each.getFeed())) { if (!lsSummary.contains(each)) { lsSummary.add(each); } } } if (!lsSummary.isEmpty()) { //Keep only iMaxSnippets Sentences from the List while (lsSummary.size() > iMaxSnippets) { lsSummary.removeLast(); } //Add all source links at start of sRes String sRes = getSummarySources(lsSummary); // Remove redundant sentences and apply result sRes += Utilities.joinListToString( new RedundancyRemover().removeRedundantSentences(lsSummary), getFirstLevelSeparator()); } else { LOGGER.log(Level.INFO, "No Summary from these user sources in the specified topic"); // lsSummary.add(new Sentence("No Summary Found", " "," ")); return ""; } } //Get the Feed link of each sentence and //Append Source Label For this feed to the sentence String[] aRes = sRes.split(getFirstLevelSeparator()); String toRes = aRes[0] + getFirstLevelSeparator(); //keep 1st element (all sources - labels) for (int i=1; i<aRes.length; i++) { String[] sStr = aRes[i].split(getSecondLevelSeparator()); String sLabel = appendSourceLabel(sStr[2]); // get the label for this feed toRes += aRes[i] + getSecondLevelSeparator() + sLabel + getFirstLevelSeparator(); // append to string } // System.err.println(toRes); // debug return toRes; } /** * * @param sTopicID The topic ID of interest * @param sUserSources A separator-delimited list of acceptable sources. * if "All", the User Accepts all known sources. * The sources are in fact URL patterns. * @return The summary for that topic, using only sentences from acceptable sources */ public String getSummary(String sTopicID, String sUserSources) { String sRes = ""; // Returns the Summary of the given topic ID, according to user sources // Get the topic from the topic ID Topic tp = null; try { tp = this.ac.getArticlesPerCluster().get(sTopicID); //get The topic } catch (NullPointerException ex) { //if sTopicID not contained in map, then news have been updated LOGGER.log(Level.WARNING, ex.getMessage()); return ""; } catch (Exception ex) { // TODO This ERROR should be logged elsewhere? LOGGER.log(Level.SEVERE, ex.getMessage()); return ""; } LinkedList<Sentence> lsSen = //Get the Summary (LinkedList<Sentence>) this.sum.getSummary(tp); if (lsSen == null || lsSen.isEmpty()) { LOGGER.log(Level.WARNING, "No summary for Topic {0}", sTopicID); // lsSen.add(new Sentence("No Summary Found", " "," ")); return ""; } else { for (Iterator<Sentence> ite = lsSen.iterator(); ite.hasNext();) { Sentence ob = ite.next(); // Ommit small sentences if (ob.getSnippet().split("[;,. ]").length < 5) { ite.remove(); } } } // keep only iOutputSize Sentences in the List while (lsSen.size() > iOutputSize) { lsSen.removeLast(); } // the List containing all the article snippets of interest LinkedList<Sentence> lsSummary = new LinkedList<Sentence>(); // Accept all user sources if (sUserSources == null || "All".equals(sUserSources) || areAllSources(sUserSources)) { // Add all source links at start of sRes String, // independently of what will get removed from filtering sRes = getSummarySources(lsSen); // Filter scrap sentences, remove redundant sentences and apply result sRes += Utilities.joinListToString( new RedundancyRemover().removeRedundantSentences(filterScrapSentences(lsSen)), getFirstLevelSeparator()); } else { // if user has limited Sources preference //filter Summary using User Sources for (Sentence each: lsSen) { if (sUserSources.contains(each.getFeed())) { if (!lsSummary.contains(each)) { lsSummary.add(each); } } } if (!lsSummary.isEmpty()) { //Add all source links at start of sRes String sRes = getSummarySources(lsSummary); // Filter scrap sentences, // remove redundant sentences and apply result sRes += Utilities.joinListToString( new RedundancyRemover().removeRedundantSentences(filterScrapSentences(lsSummary)), getFirstLevelSeparator()); } else { // User unlucky, all sources he kept had scrap content // TODO Possibly alter result in order to Inform USER!? LOGGER.log(Level.INFO, "No Summary from these user sources in the specified topic"); // lsSummary.add(new Sentence("No Summary Found", " "," ")); return ""; } } //Get the Feed link of each sentence and //Append Source Label For this feed to the sentence String[] aRes = sRes.split(getFirstLevelSeparator()); String toRes = aRes[0] + getFirstLevelSeparator(); //keep 1st element (all sources - labels) for (int i=1; i<aRes.length; i++) { String[] sStr = aRes[i].split(getSecondLevelSeparator()); String sLabel = appendSourceLabel(sStr[2]); // get the label for this feed toRes += aRes[i] + getSecondLevelSeparator() + sLabel + getFirstLevelSeparator(); // append to string } return toRes; } /** * Searches the Articles folder with the specified keyword * @param ind The indexer that is used for indexing * @param sKeyword The search query that the user enters. * @param sUserSources The separator-delimited String containing the acceptable * User Sources. if null or "All", All URL sources are considered acceptable * @return A separator-delimited String containing a List of the Topic IDs * in relation to the search term, in descending order, * or null if no result is found * @deprecated */ public String getTopicIDsByKeyword(Indexer ind, String sKeyword, String sUserSources) { String sRes = null; try {//Call the Clusterer to get the topic IDs for the specified keyword ArrayList<String> topicIDsByKeyword=null; if (sSourcesPath.endsWith("GR.txt")) { topicIDsByKeyword = this.ac.getTopicIDsByKeyword(ind, sKeyword.trim(), sUserSources, 8, new Locale("el")); } else if (sSourcesPath.endsWith("EN.txt")) { topicIDsByKeyword = this.ac.getTopicIDsByKeyword(ind, sKeyword.trim(), sUserSources, 8, new Locale("en")); } ArrayList<String> NullTopicIDs = new ArrayList<String>(); //handle no output if (topicIDsByKeyword == null) { //if nothing found NullTopicIDs.add(NO_IDS_ONSEARCH_FOUND); sRes = NullTopicIDs.get(0); // No relevant topic found // sRes = ""; // No relevant topic found } else { //pack to String and return it sRes = Utilities.joinListToString(topicIDsByKeyword, getFirstLevelSeparator()); } } catch (FileNotFoundException ex) { LOGGER.log(Level.SEVERE, ex.getMessage(), ex); List nulls = new ArrayList<String>(); nulls.add("Server Error"); //trying not to mess too much with the Client sRes = Utilities.joinListToString(nulls, getFirstLevelSeparator()); } catch (IOException ex) { LOGGER.log(Level.SEVERE, ex.getMessage(), ex); List nulls = new ArrayList<String>(); nulls.add("Server Error"); sRes = Utilities.joinListToString(nulls, getFirstLevelSeparator()); } return sRes; } /** * Searches the Articles folder with the specified keyword and returns the * articles containing the keyword sorted by the number of occurrences. * @param ind The indexer that is used for indexing * @param sKeyword The search query that the user enters. * @param sUserSources The separator-delimited String containing the acceptable * User Sources. if "All", All URL sources are considered acceptable * @return A {@link #getFirstLevelSeparator() } delimited String containing * the Topics in relation to the search term, in descending order, * or null if no result is found. Each Topic is splitted by * {@link #getSecondLevelSeparator() } to it's ID - Title - Date values. * @since 1.0 */ public String getTopicsByKeyword(Indexer ind, String sKeyword, String sUserSources) { String sRes = null; try {//Call the Clusterer to get the topic IDs for the specified keyword ArrayList<String> topicIDsByKeyword=null; if (sSourcesPath.endsWith("GR.txt")) { topicIDsByKeyword = this.ac.getTopicIDsByKeyword(ind, sKeyword.trim(), sUserSources, 8, new Locale("el")); } else if (sSourcesPath.endsWith("EN.txt")) { topicIDsByKeyword = this.ac.getTopicIDsByKeyword(ind, sKeyword.trim(), sUserSources, 8, new Locale("en")); } ArrayList<String> NullTopicIDs = new ArrayList<String>(); //handle no output if (topicIDsByKeyword == null) { //if nothing found NullTopicIDs.add(NO_IDS_ONSEARCH_FOUND); sRes = NullTopicIDs.get(0); // No relevant topic found // sRes = ""; // No relevant topic found } else { // create Topic for each TopicID, pack it and return it // load the Clustered Topics Map HashMap<String, Topic> ClusteredTopics = this.ac.getArticlesPerCluster(); // Init the Topics to return ArrayList<Topic> tTopics = new ArrayList<Topic>(); // for each ID, add the Topic to the List for (String eachID : topicIDsByKeyword) { if (ClusteredTopics.containsKey(eachID)) { tTopics.add(ClusteredTopics.get(eachID)); } } // debug for (Topic eT : tTopics) { System.out.println(eT.getID() + " " + eT.getTitle() + " " + eT.getSortableDate()); } // Convert to string sRes = getTopicsAsString(tTopics); } } catch (FileNotFoundException ex) { LOGGER.log(Level.SEVERE, ex.getMessage(), ex); List nulls = new ArrayList<String>(); nulls.add("Server Error"); //trying not to mess too much with the Client sRes = Utilities.joinListToString(nulls, getFirstLevelSeparator()); } catch (IOException ex) { LOGGER.log(Level.SEVERE, ex.getMessage(), ex); List nulls = new ArrayList<String>(); nulls.add("Server Error"); sRes = Utilities.joinListToString(nulls, getFirstLevelSeparator()); } return sRes; } /** * * @param sTopicID The UUID of interest * @return A Separator delimited String composed from the List of Clustered Topics * that correspond to the specified UUID * @deprecated */ public String getTopicTitlesByID(String sTopicID) { List<Article> lsArticles= this.ac.getArticlesPerCluster().get(sTopicID); List<String> lsTopicTitles = new ArrayList<String>(); for (Article each : lsArticles) { lsTopicTitles.add(each.getTitle()); } return Utilities.joinListToString(lsTopicTitles, getFirstLevelSeparator()); } /** * * @param sTopicID The Cluster ID of interest * @return The Topic Title for the Specified Cluster, with the date * in milliseconds appended after {@link #getSecondLevelSeparator()} */ public String getTopicTitleByID(String sTopicID) { HashMap ClArts = this.ac.getArticlesPerCluster(); if (ClArts == null) { return NO_TOPICS_FOUND; } if (ClArts.containsKey(sTopicID)) { Topic to = (Topic) ClArts.get(sTopicID); String STitleAndDate = to.getTitle() + getSecondLevelSeparator() + to.getDate().getTimeInMillis(); return STitleAndDate; } else { return NO_TOPICS_FOUND; } } /** * * @param sTopicIDs The Separator-delimited String containing all the UUIDs * @return A separator-delimited String containing the Topic for each UUID * @deprecated */ public String getTopicTitlesByIDs(String sTopicIDs) { String sRes; StringBuilder sb = new StringBuilder(); if (sTopicIDs == null || sTopicIDs.equals(NO_IDS_ONSEARCH_FOUND) || sTopicIDs.equals("")) { LOGGER.log(Level.WARNING, "No Topic IDs Passed"); sRes = ""; } else { String[] IDs = sTopicIDs.split(getFirstLevelSeparator()); String tmpTitleAndDate=null; boolean First = true; for (int i = 0; i<IDs.length; i++) { tmpTitleAndDate = getTopicTitleByID(IDs[i]); if (First) { First = false; } else { sb.append(getFirstLevelSeparator()); } sb.append(tmpTitleAndDate); } sRes = sb.toString(); } return sRes; } /** * * @return A Separator delimited String containing all the links * and their associated labels */ public String getLinkLabelsFromFile() { HashMap<String, String> hsLabels = (HashMap<String, String>) this.ids.loadObject("LinkLabels", "generic"); // construct a Sentence-Separator - Separator delimited String (as // in the getSummary() method and return it if (hsLabels != null) { return Utilities.joinMapToString(hsLabels, getFirstLevelSeparator(), getSecondLevelSeparator()); } else { LOGGER.log(Level.SEVERE, "Unable to load Link Labels. Returning Null"); return null; } } /** * * @param sTopicIDs the topic IDs * @return a map containing (ID, topic title) for the specified IDs */ public HashMap<String, String> getTopicsMap(String sTopicIDs) { HashMap<String, String> Titles = new HashMap<String, String>(); String[] IDs = sTopicIDs.split(getFirstLevelSeparator()); for (int i=0; i<IDs.length; i++) { Topic TmpTopic = this.ac.getArticlesPerCluster().get(IDs[i]); Titles.put(IDs[i], TmpTopic.getTitle()); } return Titles; } /** * * @return A map containing (clusterID, TopicTitle) data */ protected HashMap<String, String> getTopicsMap() { HashMap<String, Topic> hsTopics = this.ac.getArticlesPerCluster(); HashMap<String, String> hsTitles = new HashMap<String, String>(); for (Map.Entry each : hsTopics.entrySet()) { Topic top = (Topic) each.getValue(); hsTitles.put((String) each.getKey(), top.getTitle()); } return hsTitles; } /** * * @param alTopics the topics to process * @return the (ID - title - date In milliseconds) data for each topic, separated * by {@link #getFirstLevelSeparator()} for each topic, and by * {@link #getSecondLevelSeparator() } for each value in a topic. * @since 1.0 */ private String getTopicsAsString(ArrayList<Topic> alTopics) { StringBuilder sb = new StringBuilder(); boolean firstOcc = true; for (Topic each : alTopics) { if (firstOcc) { firstOcc = false; } else { sb.append(getFirstLevelSeparator()); } // append ID, Title, Date separated sb.append(each.getID()).append(getSecondLevelSeparator()); sb.append(each.getTitle()).append(getSecondLevelSeparator()); sb.append(each.getDate().getTimeInMillis()); } return sb.toString(); } /** * Just for debugging matters. * @param sUserSources the user sources * @param sCategory the category of interest * @deprecated (from v1.0+) */ public void checkTopicTitles(String sUserSources, String sCategory) { String sIDs = getTopicIDs(sUserSources, sCategory); String[] aIDs = sIDs.split(getFirstLevelSeparator()); String sTopicTitles = getTopicTitles(sUserSources, sCategory); String[] aTitles = sTopicTitles.split(getFirstLevelSeparator()); if (sTopicTitles.equals(NO_TOPICS_FOUND) || sIDs.equals(NO_IDS_FOUND)) { LOGGER.log(Level.INFO, "Please Alter your Sources Preferences"); return; } if (aIDs.length != aTitles.length) { System.err.println("LENGTH DIFF"); System.err.println(aIDs.length + " : " + aTitles.length); } HashMap<String, String> titlesPerCluster = getTopicsMap(); for (int i=0; i<aIDs.length; i++) { String sID = aIDs[i]; String sTit = aTitles[i]; if (!titlesPerCluster.containsKey(sID)) { System.err.println(sID + " not Contained in Full Map"); } else { if (!titlesPerCluster.get(sID).equals(sTit)) { LOGGER.log(Level.SEVERE, "MISMATCH"); System.out.println("Index: " + i); System.out.println("ID in IDArray: " + sID); System.out.println("Title in TitleArray: " + sTit); System.out.println("ID in Map: " + sID); System.out.println("Title in Map: " + titlesPerCluster.get(sID)); } else { continue; } } } } /** * * @return The Map containing the needed static variables for the communicator * to perform */ public static HashMap<String, String> readConfigFile() { HashMap switches = new HashMap<String, String>(); LOGGER.log(Level.INFO, "Looking for settings file: {0}", Config.getAbsolutePath()); if (Config.canRead()) { FileInputStream fstream = null; try { fstream = new FileInputStream(Config); DataInputStream in = new DataInputStream(fstream); BufferedReader br = new BufferedReader(new InputStreamReader(in)); String sLine; while ((sLine = br.readLine()) != null) { switches.put(sLine.split("=")[0].trim(), sLine.split("=")[1].trim()); } in.close(); LOGGER.info("Settings loaded."); return switches; } catch (IOException ex) { LOGGER.log(Level.SEVERE, ex.getMessage(), ex); return null; } finally { try { fstream.close(); } catch (IOException ex) { LOGGER.log(Level.SEVERE, ex.getMessage(), ex); } } } else { LOGGER.log(Level.SEVERE, "Error: Cannot read from file: {0}", Config.toString()); return null; } } /** * Reads the switches configuration file and returns the map containing them * @return The map containing the path Switches */ public static HashMap<String, String> getSwitches() { return (Switches != null) ? Switches : readConfigFile(); } /** * * @param sSourceFeed the source feed of the article * @return the Label of the Source URL link */ private String appendSourceLabel(String sSourceFeed) { HashMap<String, String> hsLabels = (HashMap<String, String>) this.ids.loadObject("SourceLabels", "generic"); Iterator it = hsLabels.entrySet().iterator(); while (it.hasNext()) { Map.Entry mp = (Map.Entry) it.next(); String sFeed = (String) mp.getKey(); if (sFeed.equals(sSourceFeed)) { return (String) mp.getValue(); } } try { return new java.net.URL(sSourceFeed).getHost(); } catch (MalformedURLException ex) { LOGGER.log(Level.INFO, "Could not get Host for {0}", sSourceFeed); LOGGER.log(Level.INFO, ex.getMessage()); return "Source"; } } /** * Gets all different source URL's and relevant labels for the summary * specified * @return A link-separated string containing all the URL's and labels for * the specified summary */ private String getSummarySources(LinkedList<Sentence> lsSummary) { String sRes = ""; boolean firstOcc = true; for (Sentence eachSen : lsSummary) { if (!sRes.contains(eachSen.getLinkToSource())) { if (firstOcc) { firstOcc = false; } else { sRes += getSecondLevelSeparator(); } sRes += eachSen.getLinkToSource(); sRes += getThirdLevelSeparator(); sRes += appendSourceLabel(eachSen.getFeed()); } } sRes += getFirstLevelSeparator(); return sRes; } /** * * @param sUserSources the user sources preferences * @return true if userSources are equal to all sources, therefore * user has not removed any sources. */ private boolean areAllSources(String sUserSources) { HashMap<String, String> hsAll = (HashMap<String, String>) this.ids.loadObject("Links", "generic"); Set<String> hsUserSources = hsAll.keySet(); String[] saUserSources = sUserSources.split(getFirstLevelSeparator()); if (saUserSources.length == hsUserSources.size()) { return true; } return false; } private LinkedList<Sentence> filterScrapSentences(LinkedList<Sentence> lsSen) { for (Iterator<Sentence> iSi = lsSen.iterator(); iSi.hasNext();) { Sentence ob = iSi.next(); // remove unmeaningfull sentences that have [...] at their ending // e.g. "He stated that he does not [...]" if (ob.getSnippet().matches(".*\\[\\.{3}\\]\\Z") // or "He stated that he does not..." (... ... ... ...) || ob.getSnippet().matches(".*\\.{3}\\s*(\\s*\\.{2,})*\\s*\\Z")) { if (lsSen.size() > 1) { iSi.remove(); } } } return lsSen; } }