package org.gbif.nub.lookup.fuzzy;
import org.gbif.api.model.checklistbank.NameUsageMatch;
import org.gbif.api.service.checklistbank.NameParser;
import org.gbif.api.util.VocabularyUtils;
import org.gbif.api.vocabulary.Rank;
import org.gbif.api.vocabulary.TaxonomicStatus;
import org.gbif.nameparser.GBIFNameParser;
import org.gbif.utils.file.csv.CSVReader;
import org.gbif.utils.file.csv.CSVReaderFactory;
import java.io.IOException;
import java.io.InputStream;
import java.util.List;
import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
import com.google.common.io.Resources;
import org.junit.BeforeClass;
import org.junit.Test;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
public class NubIndexTest {
private static NubIndex index;
@BeforeClass
public static void buildMatcher() throws IOException {
HigherTaxaComparator syn = new HigherTaxaComparator();
syn.loadClasspathDicts("dicts");
index = NubIndex.newMemoryIndex(readTestNames());
}
public static List<NameUsageMatch> readTestNames() throws IOException {
List<NameUsageMatch> usages = Lists.newArrayList();
NameParser parser = new GBIFNameParser();
try(InputStream testFile = Resources.getResource("testNames.txt").openStream()) {
CSVReader reader = CSVReaderFactory.build(testFile, "UTF8", "\t", null, 0);
for (String[] row : reader) {
NameUsageMatch n = new NameUsageMatch();
n.setUsageKey(Integer.valueOf(row[0]));
n.setScientificName(row[1]);
n.setCanonicalName(parser.parseToCanonical(n.getScientificName(), null));
n.setFamily(row[2]);
n.setOrder(row[3]);
n.setClazz(row[4]);
n.setPhylum(row[5]);
n.setKingdom(row[6]);
boolean isSynonym = Boolean.parseBoolean(row[7]);
n.setStatus(isSynonym ? TaxonomicStatus.SYNONYM : TaxonomicStatus.ACCEPTED);
n.setRank(VocabularyUtils.lookupEnum(row[8], Rank.class));
usages.add(n);
}
Preconditions.checkArgument(usages.size() == 10, "Wrong number of test names");
}
return usages;
}
@Test
public void testMatchByName() throws Exception {
final Integer abiesAlbaKey = 7;
NameUsageMatch m = index.matchByUsageId(abiesAlbaKey);
assertEquals(abiesAlbaKey, m.getUsageKey());
assertEquals("Abies alba Mill.", m.getScientificName());
assertEquals(Rank.SPECIES, m.getRank());
assertFalse(m.isSynonym());
m = index.matchByName("Abies alba", true, 2).get(0);
assertEquals(abiesAlbaKey, m.getUsageKey());
m = index.matchByName("abies alba", true, 2).get(0);
assertEquals(abiesAlbaKey, m.getUsageKey());
m = index.matchByName("Abbies alba", true, 2).get(0);
assertEquals(abiesAlbaKey, m.getUsageKey());
m = index.matchByName("abyes alba", true, 2).get(0);
assertEquals(abiesAlbaKey, m.getUsageKey());
m = index.matchByName(" apies alba", true, 2).get(0);
assertEquals(abiesAlbaKey, m.getUsageKey());
// sciname soundalike filter enables this
m = index.matchByName("Abies alllbbbbaaa", true, 2).get(0);
assertEquals(abiesAlbaKey, m.getUsageKey());
m = index.matchByName("Aebies allba", true, 2).get(0);
assertEquals(abiesAlbaKey, m.getUsageKey());
// fuzzy searches use a minPrefix=1
assertTrue(index.matchByName("Obies alba", true, 2).isEmpty());
assertTrue(index.matchByName("Abies elba", false, 2).isEmpty());
// synonym matching
m = index.matchByName("Picea abies", false, 2).get(0);
assertTrue(m.isSynonym());
}
}