package org.apache.solr.handler.batch;
import java.io.File;
import org.adsabs.solr.AdsConfig.F;
import org.apache.solr.request.SolrQueryRequest;
public class TestBatchProviderDumpAuthorNames extends BatchProviderTest {
public void test() throws Exception {
// index some data
assertU(adoc(F.ID, "1", F.BIBCODE, "xxxxxxxxxxxxx", F.AUTHOR, "Adamčuk,"));
assertU(adoc(F.ID, "2", F.BIBCODE, "xxxxxxxxxxxxx", F.AUTHOR, "Adamčuk, M."));
assertU(adoc(F.ID, "3", F.BIBCODE, "xxxxxxxxxxxxx", F.AUTHOR, "Adamčuk, Marel"));
assertU(adoc(F.ID, "4", F.BIBCODE, "xxxxxxxxxxxxx", F.AUTHOR, "Adamčuk, Molja"));
assertU(adoc(F.ID, "5", F.BIBCODE, "xxxxxxxxxxxxx", F.AUTHOR, "Adamčuk, Molja Karel"));
assertU(commit());
assertU(adoc(F.ID, "7", F.BIBCODE, "xxxxxxxxxxxxx", F.AUTHOR, "Adamčuk, Molja K"));
assertU(adoc(F.ID, "8", F.BIBCODE, "xxxxxxxxxxxxx", F.AUTHOR, "Adamčuk, M K"));
assertU(adoc(F.ID, "9", F.BIBCODE, "xxxxxxxxxxxxx", F.AUTHOR, "Adamčuk, Karel Molja"));
assertU(adoc(F.ID, "10", F.BIBCODE, "xxxxxxxxxxxxx", F.AUTHOR, "Adamčuk, Karel M"));
assertU(adoc(F.ID, "11", F.BIBCODE, "xxxxxxxxxxxxx", F.AUTHOR, "Adamčuk, K Molja"));
assertU(adoc(F.ID, "12", F.BIBCODE, "xxxxxxxxxxx12", F.AUTHOR, "ǎguşan, Adrian, , Dr"));
assertU(adoc(F.ID, "13", F.BIBCODE, "xxxxxxxxxxx13", F.AUTHOR, "")); // no author
assertU(adoc(F.ID, "14", F.BIBCODE, "xxxxxxxxxxx14", F.AUTHOR, "á"));
assertU(adoc(F.ID, "15", F.BIBCODE, "xxxxxxxxxxx15", F.AUTHOR, "sárname \\, name ,,,,")); // try to confuse it
assertU(adoc(F.ID, "16", F.BIBCODE, "xxxxxxxxxxxxx", F.AUTHOR, "Adamčuk"));
assertU(adoc(F.ID, "17", F.BIBCODE, "xxxxxxxxxxxxx", F.AUTHOR, "Adamčuk "));
assertU(commit());
BatchHandlerRequestQueue queue = new BatchHandlerRequestQueue();
String tmpDir = new File("./temp").getAbsolutePath();
BatchProviderI provider = new BatchProviderDumpAuthorNames();
SolrQueryRequest req = req("jobid", "00000", "#workdir", tmpDir, "sourceField", "author", "analyzerField", "author_collector");
provider.run(req, queue);
req.close();
checkFile(tmpDir + "/00000",
new String[]{
"Adamcuk, Molja Karel=>Adamčuk, Molja Karel",
"Adamcuk, Molja K=>Adamčuk, Molja K",
//"Adamcuk, M Karel=>Adamčuk, M Karel",
"Adamcuk, M K=>Adamčuk, M K",
"Adamcuk, M=>Adamčuk, M",
"Adamchuk, Molja Karel=>Adamčuk, Molja Karel",
"Adamchuk, Molja K=>Adamčuk, Molja K",
//"Adamczuk, M Karel=>Adamčuk, M Karel",
"Adamchuk, M K=>Adamčuk, M K",
"Adamchuk, M=>Adamčuk, M",
"Adamcuk, Karel Molja=>Adamčuk, Karel Molja",
"Adamcuk, Karel M=>Adamčuk, Karel M",
"!ahguşan, Adrian, , Dr=>ǎguşan, Adrian, , Dr",
"!agusan, Adrian, ,=>ǎguşan, Adrian, ,",
"!ahguşan, A ,=>ǎguşan, A ,",
"!agusan, A , D=>ǎguşan, A , D",
"agusan, Adrian Dr=>ǎguşan, Adrian Dr",
"agusan, A=>ǎguşan, A",
"ahguşan, Adrian D=>ǎguşan, Adrian D",
"ahguşan, Adrian Dr=>ǎguşan, Adrian Dr",
"ahguşan, A D=>ǎguşan, A D",
"!a=>á,"
}
);
}
public static String[] formatSynonyms(String[] strings) {
String[] newLines = new String[strings.length];
int nl = 0;
for (String line : strings) {
StringBuilder out = new StringBuilder();
String[] kv = line.split("=>");
for (int i=0;i<kv.length;i++) {
if (i>0) out.append("=>");
String[] names = kv[i].split(";");
for (int j=0;j<names.length;j++) {
if (j>0) out.append(",");
out.append(names[j].trim().replace(" ", "\\ ").replace(",", "\\,"));
}
}
newLines[nl++] = out.toString();
}
return newLines;
}
}