package org.gbif.nub.lookup; import org.gbif.api.model.checklistbank.NameUsageMatch; import org.gbif.api.service.checklistbank.NameParser; import org.gbif.api.service.checklistbank.NameUsageMatchingService; import org.gbif.nameparser.GBIFNameParser; import org.gbif.nub.lookup.fuzzy.HigherTaxaComparator; import org.gbif.nub.lookup.fuzzy.NubIndex; import org.gbif.nub.lookup.fuzzy.NubMatchingServiceImpl; import org.gbif.nub.lookup.straight.IdLookup; import org.gbif.nub.lookup.straight.IdLookupImpl; import org.gbif.nub.lookup.straight.LookupUsage; import org.gbif.nub.lookup.straight.LookupUsageMatch; import org.gbif.utils.file.InputStreamUtils; import java.io.IOException; import java.io.InputStream; import java.util.List; import java.util.Map; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.inject.PrivateModule; import com.google.inject.Provides; import com.google.inject.Singleton; import org.codehaus.jackson.map.DeserializationConfig; import org.codehaus.jackson.map.ObjectMapper; import org.junit.Assert; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * Guice module setting up all dependencies to expose the NubMatching service. */ public class NubMatchingTestModule extends PrivateModule { private static final Logger LOG = LoggerFactory.getLogger(NubMatchingTestModule.class); @Override protected void configure() { bind(NameUsageMatchingService.class).to(NubMatchingServiceImpl.class).asEagerSingleton(); expose(NameUsageMatchingService.class); } @Provides @Singleton public static NubIndex provideIndex() throws IOException { return NubIndex.newMemoryIndex(loadIndexJson()); } @Provides @Singleton public static IdLookup provideLookup() throws IOException { return IdLookupImpl.temp().load(loadLookupJson()); } @Provides @Singleton public static HigherTaxaComparator provideSynonyms() throws IOException { LOG.info("Loading synonym dictionaries from classpath ..."); HigherTaxaComparator syn = new HigherTaxaComparator(); syn.loadClasspathDicts("dicts"); return syn; } @Provides @Singleton public NameParser provideParser() { NameParser parser = new GBIFNameParser(); return parser; } /** * Load all nubXX.json files from the index resources into a distinct list of NameUsage instances. * The individual nubXX.json files are regular results of a NameUsageMatch and can be added to the folder * to be picked up here. */ private static List<NameUsageMatch> loadIndexJson() { Map<Integer, NameUsageMatch> usages = Maps.newHashMap(); InputStreamUtils isu = new InputStreamUtils(); ObjectMapper mapper = new ObjectMapper(); mapper.disable(DeserializationConfig.Feature.FAIL_ON_UNKNOWN_PROPERTIES); int id = 0; while (id < 275) { String file = "index/nub"+id+".json"; InputStream json = isu.classpathStream(file); if (json != null) { try { int before = usages.size(); NameUsageMatch m = mapper.readValue(json, NameUsageMatch.class); for (NameUsageMatch u : extractUsages(m)) { if (u != null) { usages.put(u.getUsageKey(), u); } } System.out.println("Loaded " + (usages.size() - before) + " new usage(s) from " + file); } catch (IOException e) { Assert.fail("Failed to read " + file + ": " + e.getMessage()); } } id++; } return Lists.newArrayList(usages.values()); } /** * Load all uXX.json files from the lookup resources into a distinct list of LookupUsage instances. * The individual uXX.json files are regular results of a LookupUsageMatch and can be added to the folder * to be picked up here. */ private static List<LookupUsage> loadLookupJson() { Map<Integer, LookupUsage> usages = Maps.newHashMap(); InputStreamUtils isu = new InputStreamUtils(); ObjectMapper mapper = new ObjectMapper(); mapper.disable(DeserializationConfig.Feature.FAIL_ON_UNKNOWN_PROPERTIES); int id = 1; while (id < 100) { String file = String.format("lookup/u%03d.json", id); InputStream json = isu.classpathStream(file); if (json != null) { try { int before = usages.size(); LookupUsageMatch m = mapper.readValue(json, LookupUsageMatch.class); for (LookupUsage u : extractUsages(m)) { if (u != null) { usages.put(u.getKey(), u); } } System.out.println("Loaded " + (usages.size() - before) + " new usage(s) from " + file); } catch (IOException e) { Assert.fail("Failed to read " + file + ": " + e.getMessage()); } } id++; } return Lists.newArrayList(usages.values()); } private static List<NameUsageMatch> extractUsages(NameUsageMatch m) { List<NameUsageMatch> usages = Lists.newArrayList(); usages.add(m); if (m.getAlternatives() != null) { usages.addAll(m.getAlternatives()); } return usages; } private static List<LookupUsage> extractUsages(LookupUsageMatch m) { List<LookupUsage> usages = Lists.newArrayList(); usages.add(m.getMatch()); if (m.getAlternatives() != null) { usages.addAll(m.getAlternatives()); } return usages; } }