package org.adsabs.solr.analysis;
import static org.junit.Assert.*;
import java.io.*;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import org.junit.Test;
public class TestProcessCuratedAuthorSynonyms {
@Test
public void testParseGroups() {
String inputString = "Stern, Carolyn\nGrant, Carolyn\n\nMiller, Bill\nMiller, William\n";
InputStream is = new ByteArrayInputStream( inputString.getBytes( Charset.forName("UTF-8")));
BufferedReader inputReader = new BufferedReader(new InputStreamReader(is));
ArrayList<List<String>> groups = ProcessCuratedAuthorSynonyms.parseGroups(inputReader);
ArrayList<List<String>> expected = new ArrayList<List<String>>() {{
add(new ArrayList<String>() {{
add("Stern, Carolyn");
add("Grant, Carolyn");
}});
add(new ArrayList<String>() {{
add("Miller, Bill");
add("Miller, William");
}});
}};
assertEquals(groups, expected);
}
@Test
public void testTransformGroup1() {
List<String> group = new ArrayList<String>() {{
add("STERN, CAROLYN");
add("GRANT, CAROLYN");
}};
HashMap<String,HashSet<String>> transformed = ProcessCuratedAuthorSynonyms.transformGroup(group);
HashMap<String,HashSet<String>> expected = new HashMap<String,HashSet<String>>() {{
put("STERN, CAROLYN", new HashSet<String>() {{
addAll(Arrays.asList(new String[]{"GRANT, C", "GRANT, CAROLYN\\b.*", "GRANT,", "GRANT, CAROLYN"}));
}});
put("GRANT, CAROLYN", new HashSet<String>() {{
addAll(Arrays.asList(new String[]{"STERN, C", "STERN, CAROLYN", "STERN, CAROLYN\\b.*", "STERN,"}));
}});
}};
assertEquals(expected, transformed);
}
@Test
public void testTransformGroup2() {
List<String> group = new ArrayList<String>() {{
add("MÜLLER, WILLIAM");
add("MÜLLER, BILL");
}};
HashMap<String,HashSet<String>> transformed = ProcessCuratedAuthorSynonyms.transformGroup(group);
HashMap<String,HashSet<String>> expected = new HashMap<String,HashSet<String>>() {{
put("MUELLER, BILL", new HashSet<String>() {{
addAll(Arrays.asList(new String[]{"MÜLLER,", "MUELLER, WILLIAM\\b.*", "MULLER, BILL", "MUELLER, WILLIAM", "MULLER, B", "MULLER, WILLIAM\\b.*", "MÜLLER, WILLIAM\\b.*", "MÜLLER, W", "MULLER, BILL\\b.*", "MUELLER, W", "MULLER, WILLIAM", "MUELLER,", "MÜLLER, BILL\\b.*", "MÜLLER, BILL", "MULLER,", "MULLER, W", "MÜLLER, B", "MÜLLER, WILLIAM"}));
}});
put("MULLER, BILL", new HashSet<String>() {{
addAll(Arrays.asList(new String[]{"MUELLER, BILL", "MÜLLER,", "MUELLER, WILLIAM\\b.*", "MUELLER, B", "MÜLLER, W", "MUELLER, WILLIAM", "MÜLLER, WILLIAM", "MULLER, WILLIAM\\b.*", "MÜLLER, WILLIAM\\b.*", "MUELLER, W", "MULLER, WILLIAM", "MUELLER,", "MÜLLER, BILL\\b.*", "MÜLLER, BILL", "MULLER,", "MULLER, W", "MÜLLER, B", "MUELLER, BILL\\b.*"}));
}});
put("MUELLER, WILLIAM", new HashSet<String>() {{
addAll(Arrays.asList(new String[]{"MUELLER, BILL", "MÜLLER,", "MUELLER, B", "MULLER, BILL", "MULLER, B", "MULLER, WILLIAM\\b.*", "MÜLLER, WILLIAM\\b.*", "MÜLLER, W", "MULLER, BILL\\b.*", "MÜLLER, WILLIAM", "MULLER, WILLIAM", "MUELLER,", "MÜLLER, BILL\\b.*", "MÜLLER, BILL", "MULLER,", "MULLER, W", "MÜLLER, B", "MUELLER, BILL\\b.*"}));
}});
put("MULLER, WILLIAM", new HashSet<String>() {{
addAll(Arrays.asList(new String[]{"MUELLER, BILL", "MÜLLER,", "MUELLER, B", "MULLER, BILL", "MUELLER, WILLIAM", "MULLER, B", "MUELLER, WILLIAM\\b.*", "MÜLLER, WILLIAM\\b.*", "MÜLLER, W", "MULLER, BILL\\b.*", "MUELLER, W", "MÜLLER, WILLIAM", "MUELLER,", "MÜLLER, BILL\\b.*", "MÜLLER, BILL", "MULLER,", "MÜLLER, B", "MUELLER, BILL\\b.*"}));
}});
put("MÜLLER, BILL", new HashSet<String>() {{
addAll(Arrays.asList(new String[]{"MUELLER, BILL", "MÜLLER,", "MUELLER, WILLIAM\\b.*", "MUELLER, B", "MULLER, BILL", "MUELLER, WILLIAM", "MÜLLER, WILLIAM", "MULLER, B", "MULLER, WILLIAM\\b.*", "MÜLLER, WILLIAM\\b.*", "MÜLLER, W", "MULLER, BILL\\b.*", "MUELLER, W", "MULLER, WILLIAM", "MUELLER,", "MULLER,", "MULLER, W", "MUELLER, BILL\\b.*"}));
}});
put("MÜLLER, WILLIAM", new HashSet<String>() {{
addAll(Arrays.asList(new String[]{"MUELLER, BILL", "MÜLLER,", "MUELLER, WILLIAM\\b.*", "MUELLER, B", "MULLER, BILL", "MUELLER, WILLIAM", "MULLER, B", "MULLER, WILLIAM\\b.*", "MULLER, BILL\\b.*", "MUELLER, W", "MULLER, WILLIAM", "MUELLER,", "MÜLLER, BILL\\b.*", "MÜLLER, BILL", "MULLER,", "MULLER, W", "MÜLLER, B", "MUELLER, BILL\\b.*"}));
}});
}};
assertEquals(expected, transformed);
}
}