package edu.uncc.cs.watsonsim.search;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import edu.uncc.cs.watsonsim.Environment;
import edu.uncc.cs.watsonsim.Log;
import edu.uncc.cs.watsonsim.Passage;
import edu.uncc.cs.watsonsim.Score;
import edu.uncc.cs.watsonsim.scorers.Merge;
public class Anagrams extends Searcher {
private final Map<String, List<String>> mp = new HashMap<>();
private Log log;
public Anagrams(Environment env) {
super(env);
log = env.log.kid(getClass());
try
{
for (String line : Files.readAllLines(Paths.get("data", "words"))) {
// condition of different anagram questions:
// usually anagram questions are coming for word coming after :
// regular expression for searching if a : is coming in the question
char[] charArray = line.toLowerCase().toCharArray();
Arrays.sort(charArray);
String source = String.valueOf(charArray);
List<String> targets = mp.get(source);
if (targets == null) {
targets = new ArrayList<>();
mp.put(source, targets);
}
targets.add(line);
}
}
catch(IOException e)
{
e.printStackTrace();
}
Score.register("IS_ONLY_ANAGRAM", 0.0, Merge.Min);
}
public static void main(String args[]) throws IOException {
Anagrams ta = new Anagrams(new Environment());
System.out.println("Enter the Jeopardy Anagram Question:");
BufferedReader br2 = new BufferedReader(
new InputStreamReader(System.in));
String question = br2.readLine();
ta.query(question);
}
public static List<String> search_key(String keys,Map<String, List<String>> mp)
{
char[] charArray = keys.toLowerCase().toCharArray();
Arrays.sort(charArray);
// String searchKey = String.valueOf(charArray);
List<String> entries = mp.get(String.valueOf(charArray));
if (entries == null)
{
entries = new ArrayList<>();
}
entries.remove(keys);
return entries;
}
@Override
public List<Passage> query(String query) {
// Some anagrams come in a very clear syntax:
// either in quotes, or after a colon. Find them.
Matcher matcher = Pattern.compile("\"([A-z ]+)\"|: ([A-z ]+)")
.matcher(query);
List<String> entries = new ArrayList<>();
if (matcher.find() && matcher.group(1) != null) {
// Good news. We found a quoted string to generate anagrams from.
entries.addAll(search_key(matcher.group(1), mp));
if (!entries.isEmpty()) {
log.info("Found " + entries.size()
+ " quoted anagrams");
}
} else {
// Bad news. We have to guess all the words.
String[] words = query.split(" ");
if (words.length <= 2) {
// When there are so few words, the whole question is likely
// an anagram. For example, "Nuke Air" -> "Ukariane"
entries.addAll(search_key(query.replace(" ", ""), mp));
} else {
// Otherwise, consider each word separately.
for (String word : words) {
entries.addAll(search_key(word, mp));
}
}
}
entries.removeAll(Arrays.asList("Si","shit","Ni"));
List<Passage> results = new ArrayList<>();
for (String text : entries) {
results.add(new edu.uncc.cs.watsonsim.Passage("lucene", // Engine
text, // Title
text, // Text
"anagram:" + text).score("IS_ONLY_ANAGRAM", 1.0));
}
return results;
}
}