package eu.dnetlib.iis.wf.affmatching.match; import static eu.dnetlib.iis.wf.affmatching.match.voter.AffOrgMatchVotersFactory.createNameCountryStrictMatchVoter; import static eu.dnetlib.iis.wf.affmatching.match.voter.AffOrgMatchVotersFactory.createNameStrictCountryLooseMatchVoter; import static eu.dnetlib.iis.wf.affmatching.match.voter.AffOrgMatchVotersFactory.createSectionedNameLevenshteinCountryLooseMatchVoter; import static eu.dnetlib.iis.wf.affmatching.match.voter.AffOrgMatchVotersFactory.createSectionedNameStrictCountryLooseMatchVoter; import java.util.List; import java.util.function.Function; import com.google.common.collect.ImmutableList; import eu.dnetlib.iis.wf.affmatching.bucket.AffOrgHashBucketJoiner; import eu.dnetlib.iis.wf.affmatching.bucket.AffiliationOrgNameBucketHasher; import eu.dnetlib.iis.wf.affmatching.bucket.MainSectionBucketHasher; import eu.dnetlib.iis.wf.affmatching.bucket.MainSectionBucketHasher.FallbackSectionPickStrategy; import eu.dnetlib.iis.wf.affmatching.bucket.OrganizationNameBucketHasher; import eu.dnetlib.iis.wf.affmatching.match.voter.AffOrgMatchVoter; import eu.dnetlib.iis.wf.affmatching.match.voter.GetOrgAlternativeNamesFunction; import eu.dnetlib.iis.wf.affmatching.match.voter.GetOrgNameFunction; import eu.dnetlib.iis.wf.affmatching.match.voter.GetOrgShortNameFunction; import eu.dnetlib.iis.wf.affmatching.model.AffMatchAffiliation; import eu.dnetlib.iis.wf.affmatching.model.AffMatchOrganization; /** * A factory of {@link AffOrgMatcher}s that join organizations and affiliations into buckets based on hashes produced from * the main sections of organization names. * * @author Ɓukasz Dumiszewski */ public final class MainSectionHashBucketMatcherFactory { // ---------------------- CONSTRUCTORS ---------------------- private MainSectionHashBucketMatcherFactory() {} //------------------------ LOGIC -------------------------- /** * Returns {@link AffOrgMatcher} that uses hashing of affiliations and organizations to create buckets.<br/> * Hashes are computed based on main section of {@link AffMatchAffiliation#getOrganizationName()} * and {@link AffMatchOrganization#getName()}. * * @see MainSectionBucketHasher#hash(String) */ public static AffOrgMatcher createNameMainSectionHashBucketMatcher() { // joiner AffOrgHashBucketJoiner mainSectionHashBucketJoiner = createMainSectionAffOrgHashJoiner(new GetOrgNameFunction()); // computer AffOrgMatchComputer mainSectionHashMatchComputer = new AffOrgMatchComputer(); mainSectionHashMatchComputer.setAffOrgMatchVoters(createNameMainSectionHashBucketMatcherVoters()); // matcher return new AffOrgMatcher(mainSectionHashBucketJoiner, mainSectionHashMatchComputer); } /** * Creates {@link AffOrgMatchVoter}s for {@link #createNameMainSectionHashBucketMatcher()} */ public static ImmutableList<AffOrgMatchVoter> createNameMainSectionHashBucketMatcherVoters() { return ImmutableList.of( createNameCountryStrictMatchVoter(0.981f, new GetOrgNameFunction()), createNameStrictCountryLooseMatchVoter(0.966f, new GetOrgNameFunction()), createSectionedNameStrictCountryLooseMatchVoter(0.988f, new GetOrgNameFunction()), createSectionedNameLevenshteinCountryLooseMatchVoter(0.983f, new GetOrgNameFunction()), createSectionedNameStrictCountryLooseMatchVoter(0.937f, new GetOrgShortNameFunction()) ); } /** * Returns {@link AffOrgMatcher} that uses hashing of affiliations and organizations to create buckets.<br/> * Hashes are computed based on the main section {@link AffMatchAffiliation#getAlternativeNames()} * and {@link AffMatchOrganization#getName()}. * * @see MainSectionBucketHasher#hash(String) */ public static AffOrgMatcher createAlternativeNameMainSectionHashBucketMatcher() { // joiner AffOrgHashBucketJoiner mainSectionHashBucketJoiner = createMainSectionAffOrgHashJoiner(new GetOrgAlternativeNamesFunction()); // computer AffOrgMatchComputer mainSectionHashMatchComputer = new AffOrgMatchComputer(); mainSectionHashMatchComputer.setAffOrgMatchVoters(createAlternativeNameMainSectionHashBucketMatcherVoters()); // matcher return new AffOrgMatcher(mainSectionHashBucketJoiner, mainSectionHashMatchComputer); } /** * Creates {@link AffOrgMatchVoter}s for {@link #createAlternativeNameMainSectionHashBucketMatcher()} */ public static ImmutableList<AffOrgMatchVoter> createAlternativeNameMainSectionHashBucketMatcherVoters() { return ImmutableList.of( createNameCountryStrictMatchVoter(1f, new GetOrgAlternativeNamesFunction()), createNameStrictCountryLooseMatchVoter(1f, new GetOrgAlternativeNamesFunction()), createSectionedNameStrictCountryLooseMatchVoter(1f, new GetOrgAlternativeNamesFunction()), createSectionedNameLevenshteinCountryLooseMatchVoter(1f, new GetOrgAlternativeNamesFunction()) ); } /** * Returns {@link AffOrgMatcher} that uses hashing of affiliations and organizations to create buckets.<br/> * Hashes are computed based on main section of {@link AffMatchAffiliation#getOrganizationName()} * and {@link AffMatchOrganization#getShortName()}. * * @see MainSectionBucketHasher#hash(String) */ public static AffOrgMatcher createShortNameMainSectionHashBucketMatcher() { // joiner AffOrgHashBucketJoiner mainSectionHashBucketJoiner = createMainSectionAffOrgHashJoiner(new GetOrgShortNameFunction()); // computer AffOrgMatchComputer mainSectionHashMatchComputer = new AffOrgMatchComputer(); mainSectionHashMatchComputer.setAffOrgMatchVoters(createShortNameMainSectionHashBucketMatcherVoters()); // matcher return new AffOrgMatcher(mainSectionHashBucketJoiner, mainSectionHashMatchComputer); } /** * Creates {@link AffOrgMatchVoter}s for {@link #createShortNameMainSectionHashBucketMatcher()} */ public static ImmutableList<AffOrgMatchVoter> createShortNameMainSectionHashBucketMatcherVoters() { return ImmutableList.of( createNameCountryStrictMatchVoter(1f, new GetOrgShortNameFunction()), createNameStrictCountryLooseMatchVoter(0.962f, new GetOrgShortNameFunction()) ); } //------------------------ PRIVATE -------------------------- private static AffOrgHashBucketJoiner createMainSectionAffOrgHashJoiner(Function<AffMatchOrganization, List<String>> getOrgNamesFunction) { // affiliation hasher AffiliationOrgNameBucketHasher mainSectionAffBucketHasher = new AffiliationOrgNameBucketHasher(); MainSectionBucketHasher mainSectionStringAffBucketHasher = new MainSectionBucketHasher(); mainSectionStringAffBucketHasher.setFallbackSectionPickStrategy(FallbackSectionPickStrategy.LAST_SECTION); mainSectionAffBucketHasher.setStringHasher(mainSectionStringAffBucketHasher); // organization hasher OrganizationNameBucketHasher mainSectionOrgBucketHasher = new OrganizationNameBucketHasher(); mainSectionOrgBucketHasher.setGetOrgNamesFunction(getOrgNamesFunction); MainSectionBucketHasher mainSectionStringOrgBucketHasher = new MainSectionBucketHasher(); mainSectionStringOrgBucketHasher.setFallbackSectionPickStrategy(FallbackSectionPickStrategy.FIRST_SECTION); mainSectionOrgBucketHasher.setStringHasher(mainSectionStringOrgBucketHasher); // joiner AffOrgHashBucketJoiner mainSectionHashBucketJoiner = new AffOrgHashBucketJoiner(); mainSectionHashBucketJoiner.setAffiliationBucketHasher(mainSectionAffBucketHasher); mainSectionHashBucketJoiner.setOrganizationBucketHasher(mainSectionOrgBucketHasher); return mainSectionHashBucketJoiner; } }