package eu.dnetlib.iis.wf.affmatching.bucket; import java.util.List; import eu.dnetlib.iis.wf.affmatching.orgsection.OrganizationSection; import eu.dnetlib.iis.wf.affmatching.orgsection.OrganizationSectionsSplitter; import eu.dnetlib.iis.wf.affmatching.orgsection.OrganizationSection.OrgSectionType; /** * An implementation of {@link BucketHasher} that hashes organization name string. * Hash is generated from 'main' (most significant) section of organization name. * * @author madryk */ public class MainSectionBucketHasher implements BucketHasher<String> { private static final long serialVersionUID = 1L; private OrganizationSectionsSplitter sectionsSplitter = new OrganizationSectionsSplitter(); private OrganizationSectionHasher sectionHasher = new OrganizationSectionHasher(); private OrgSectionType mainSectionType = OrgSectionType.UNIVERSITY; private FallbackSectionPickStrategy fallbackSectionPickStrategy = FallbackSectionPickStrategy.FIRST_SECTION; public enum FallbackSectionPickStrategy { FIRST_SECTION, LAST_SECTION } //------------------------ LOGIC -------------------------- /** * Returns a hash of the passed organization name string.<br/> * Hash is generated from main (most significant) section of organization name * using {@link OrganizationSectionHasher}.<br/> * <br/> * Section is considered to be most significant when it is the first section * with type {@link #setMainSectionType(OrgSectionType)} * ({@link OrgSectionType#UNIVERSITY} by default).<br/> * <br/> * If no section meets this criteria then a fallback section will be taken. * Fallback section is taken based on {@link FallbackSectionPickStrategy}. * It is the first section if fallback strategy is {@link FallbackSectionPickStrategy#FIRST_SECTION} * or the last section if fallback strategy is {@link FallbackSectionPickStrategy#LAST_SECTION}. * First section is the default behavior. */ public String hash(String organizationName) { List<OrganizationSection> sections = sectionsSplitter.splitToSectionsDetailed(organizationName); if (sections.isEmpty()) { return ""; } OrganizationSection mainSection = pickMainSection(sections); return sectionHasher.hash((mainSection != null) ? mainSection : pickFallbackSection(sections)); } //------------------------ PRIVATE -------------------------- private OrganizationSection pickMainSection(List<OrganizationSection> sections) { return sections.stream() .filter(x -> x.getType() == mainSectionType) .findFirst().orElse(null); } private OrganizationSection pickFallbackSection(List<OrganizationSection> sections) { switch (fallbackSectionPickStrategy) { case FIRST_SECTION: return sections.get(0); case LAST_SECTION: return sections.get(sections.size() - 1); default: throw new UnsupportedOperationException(fallbackSectionPickStrategy.name() + " is unsupported FallbackSectionPickStrategy"); } } //------------------------ SETTERS -------------------------- public void setSectionsSplitter(OrganizationSectionsSplitter sectionsSplitter) { this.sectionsSplitter = sectionsSplitter; } public void setSectionHasher(OrganizationSectionHasher sectionHasher) { this.sectionHasher = sectionHasher; } public void setMainSectionType(OrgSectionType mainSectionType) { this.mainSectionType = mainSectionType; } public void setFallbackSectionPickStrategy(FallbackSectionPickStrategy fallbackSectionPickStrategy) { this.fallbackSectionPickStrategy = fallbackSectionPickStrategy; } }