/** * This file is part of General Entity Annotator Benchmark. * * General Entity Annotator Benchmark is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * General Entity Annotator Benchmark is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with General Entity Annotator Benchmark. If not, see <http://www.gnu.org/licenses/>. */ package org.aksw.gerbil.semantic.sameas.impl.wiki; import java.util.HashSet; import java.util.Set; import org.aksw.gerbil.semantic.sameas.SingleUriSameAsRetriever; import org.aksw.gerbil.semantic.sameas.impl.DomainBasedSameAsRetrieverManager; import org.aksw.gerbil.semantic.sameas.impl.SimpleDomainExtractor; public class WikiDbPediaBridgingSameAsRetriever implements SingleUriSameAsRetriever { private static final String URI_PROTOCOL_PART = "http://"; private static final String BG_DBPEDIA_DOMAIN = "bg.dbpedia.org"; private static final String CA_DBPEDIA_DOMAIN = "ca.dbpedia.org"; private static final String CS_DBPEDIA_DOMAIN = "cs.dbpedia.org"; private static final String DE_DBPEDIA_DOMAIN = "de.dbpedia.org"; private static final String EN_DBPEDIA_DOMAIN = "dbpedia.org"; private static final String ES_DBPEDIA_DOMAIN = "es.dbpedia.org"; private static final String EU_DBPEDIA_DOMAIN = "eu.dbpedia.org"; private static final String FR_DBPEDIA_DOMAIN = "fr.dbpedia.org"; private static final String HU_DBPEDIA_DOMAIN = "hu.dbpedia.org"; private static final String ID_DBPEDIA_DOMAIN = "id.dbpedia.org"; private static final String IT_DBPEDIA_DOMAIN = "it.dbpedia.org"; private static final String JA_DBPEDIA_DOMAIN = "ja.dbpedia.org"; private static final String KO_DBPEDIA_DOMAIN = "ko.dbpedia.org"; private static final String NL_DBPEDIA_DOMAIN = "nl.dbpedia.org"; private static final String PL_DBPEDIA_DOMAIN = "pl.dbpedia.org"; private static final String PT_DBPEDIA_DOMAIN = "pt.dbpedia.org"; private static final String RU_DBPEDIA_DOMAIN = "ru.dbpedia.org"; private static final String TR_DBPEDIA_DOMAIN = "tr.dbpedia.org"; private static final String DBPEDIA_PATH = "/resource/"; private static final String BG_WIKIPEDIA_DOMAIN = "bg.wikipedia.org"; private static final String CA_WIKIPEDIA_DOMAIN = "ca.wikipedia.org"; private static final String CS_WIKIPEDIA_DOMAIN = "cs.wikipedia.org"; private static final String DE_WIKIPEDIA_DOMAIN = "de.wikipedia.org"; private static final String EN_WIKIPEDIA_DOMAIN = "en.wikipedia.org"; private static final String ES_WIKIPEDIA_DOMAIN = "es.wikipedia.org"; private static final String EU_WIKIPEDIA_DOMAIN = "eu.wikipedia.org"; private static final String FR_WIKIPEDIA_DOMAIN = "fr.wikipedia.org"; private static final String HU_WIKIPEDIA_DOMAIN = "hu.wikipedia.org"; private static final String ID_WIKIPEDIA_DOMAIN = "id.wikipedia.org"; private static final String IT_WIKIPEDIA_DOMAIN = "it.wikipedia.org"; private static final String JA_WIKIPEDIA_DOMAIN = "ja.wikipedia.org"; private static final String KO_WIKIPEDIA_DOMAIN = "ko.wikipedia.org"; private static final String NL_WIKIPEDIA_DOMAIN = "nl.wikipedia.org"; private static final String PL_WIKIPEDIA_DOMAIN = "pl.wikipedia.org"; private static final String PT_WIKIPEDIA_DOMAIN = "pt.wikipedia.org"; private static final String RU_WIKIPEDIA_DOMAIN = "ru.wikipedia.org"; private static final String TR_WIKIPEDIA_DOMAIN = "tr.wikipedia.org"; private static final String WIKIPDIA_PATH = "/wiki/"; @Override public Set<String> retrieveSameURIs(String uri) { return retrieveSameURIs(SimpleDomainExtractor.extractDomain(uri), uri); } @Override public Set<String> retrieveSameURIs(String domain, String uri) { if ((domain == null) || (uri == null)) { return null; } switch (domain) { /* * DBpedia domains */ case BG_DBPEDIA_DOMAIN: return replaceDomain(BG_DBPEDIA_DOMAIN, BG_WIKIPEDIA_DOMAIN, DBPEDIA_PATH, WIKIPDIA_PATH, uri); case CA_DBPEDIA_DOMAIN: return replaceDomain(CA_DBPEDIA_DOMAIN, CA_WIKIPEDIA_DOMAIN, DBPEDIA_PATH, WIKIPDIA_PATH, uri); case CS_DBPEDIA_DOMAIN: return replaceDomain(CS_DBPEDIA_DOMAIN, CS_WIKIPEDIA_DOMAIN, DBPEDIA_PATH, WIKIPDIA_PATH, uri); case DE_DBPEDIA_DOMAIN: return replaceDomain(DE_DBPEDIA_DOMAIN, DE_WIKIPEDIA_DOMAIN, DBPEDIA_PATH, WIKIPDIA_PATH, uri); case EN_DBPEDIA_DOMAIN: return replaceDomain(EN_DBPEDIA_DOMAIN, EN_WIKIPEDIA_DOMAIN, DBPEDIA_PATH, WIKIPDIA_PATH, uri); case ES_DBPEDIA_DOMAIN: return replaceDomain(ES_DBPEDIA_DOMAIN, ES_WIKIPEDIA_DOMAIN, DBPEDIA_PATH, WIKIPDIA_PATH, uri); case EU_DBPEDIA_DOMAIN: return replaceDomain(EU_DBPEDIA_DOMAIN, EU_WIKIPEDIA_DOMAIN, DBPEDIA_PATH, WIKIPDIA_PATH, uri); case FR_DBPEDIA_DOMAIN: return replaceDomain(FR_DBPEDIA_DOMAIN, FR_WIKIPEDIA_DOMAIN, DBPEDIA_PATH, WIKIPDIA_PATH, uri); case HU_DBPEDIA_DOMAIN: return replaceDomain(HU_DBPEDIA_DOMAIN, HU_WIKIPEDIA_DOMAIN, DBPEDIA_PATH, WIKIPDIA_PATH, uri); case ID_DBPEDIA_DOMAIN: return replaceDomain(ID_DBPEDIA_DOMAIN, ID_WIKIPEDIA_DOMAIN, DBPEDIA_PATH, WIKIPDIA_PATH, uri); case IT_DBPEDIA_DOMAIN: return replaceDomain(IT_DBPEDIA_DOMAIN, IT_WIKIPEDIA_DOMAIN, DBPEDIA_PATH, WIKIPDIA_PATH, uri); case JA_DBPEDIA_DOMAIN: return replaceDomain(JA_DBPEDIA_DOMAIN, JA_WIKIPEDIA_DOMAIN, DBPEDIA_PATH, WIKIPDIA_PATH, uri); case KO_DBPEDIA_DOMAIN: return replaceDomain(KO_DBPEDIA_DOMAIN, KO_WIKIPEDIA_DOMAIN, DBPEDIA_PATH, WIKIPDIA_PATH, uri); case NL_DBPEDIA_DOMAIN: return replaceDomain(NL_DBPEDIA_DOMAIN, NL_WIKIPEDIA_DOMAIN, DBPEDIA_PATH, WIKIPDIA_PATH, uri); case PL_DBPEDIA_DOMAIN: return replaceDomain(PL_DBPEDIA_DOMAIN, PL_WIKIPEDIA_DOMAIN, DBPEDIA_PATH, WIKIPDIA_PATH, uri); case PT_DBPEDIA_DOMAIN: return replaceDomain(PT_DBPEDIA_DOMAIN, PT_WIKIPEDIA_DOMAIN, DBPEDIA_PATH, WIKIPDIA_PATH, uri); case RU_DBPEDIA_DOMAIN: return replaceDomain(RU_DBPEDIA_DOMAIN, RU_WIKIPEDIA_DOMAIN, DBPEDIA_PATH, WIKIPDIA_PATH, uri); case TR_DBPEDIA_DOMAIN: return replaceDomain(TR_DBPEDIA_DOMAIN, TR_WIKIPEDIA_DOMAIN, DBPEDIA_PATH, WIKIPDIA_PATH, uri); /* * Wikipedia domains */ case BG_WIKIPEDIA_DOMAIN: return replaceDomain(BG_WIKIPEDIA_DOMAIN, BG_DBPEDIA_DOMAIN, WIKIPDIA_PATH, DBPEDIA_PATH, uri); case CA_WIKIPEDIA_DOMAIN: return replaceDomain(CA_WIKIPEDIA_DOMAIN, CA_DBPEDIA_DOMAIN, WIKIPDIA_PATH, DBPEDIA_PATH, uri); case CS_WIKIPEDIA_DOMAIN: return replaceDomain(CS_WIKIPEDIA_DOMAIN, CS_DBPEDIA_DOMAIN, WIKIPDIA_PATH, DBPEDIA_PATH, uri); case DE_WIKIPEDIA_DOMAIN: return replaceDomain(DE_WIKIPEDIA_DOMAIN, DE_DBPEDIA_DOMAIN, WIKIPDIA_PATH, DBPEDIA_PATH, uri); case EN_WIKIPEDIA_DOMAIN: return replaceDomain(EN_WIKIPEDIA_DOMAIN, EN_DBPEDIA_DOMAIN, WIKIPDIA_PATH, DBPEDIA_PATH, uri); case ES_WIKIPEDIA_DOMAIN: return replaceDomain(ES_WIKIPEDIA_DOMAIN, ES_DBPEDIA_DOMAIN, WIKIPDIA_PATH, DBPEDIA_PATH, uri); case EU_WIKIPEDIA_DOMAIN: return replaceDomain(EU_WIKIPEDIA_DOMAIN, EU_DBPEDIA_DOMAIN, WIKIPDIA_PATH, DBPEDIA_PATH, uri); case FR_WIKIPEDIA_DOMAIN: return replaceDomain(FR_WIKIPEDIA_DOMAIN, FR_DBPEDIA_DOMAIN, WIKIPDIA_PATH, DBPEDIA_PATH, uri); case HU_WIKIPEDIA_DOMAIN: return replaceDomain(HU_WIKIPEDIA_DOMAIN, HU_DBPEDIA_DOMAIN, WIKIPDIA_PATH, DBPEDIA_PATH, uri); case ID_WIKIPEDIA_DOMAIN: return replaceDomain(ID_WIKIPEDIA_DOMAIN, ID_DBPEDIA_DOMAIN, WIKIPDIA_PATH, DBPEDIA_PATH, uri); case IT_WIKIPEDIA_DOMAIN: return replaceDomain(IT_WIKIPEDIA_DOMAIN, IT_DBPEDIA_DOMAIN, WIKIPDIA_PATH, DBPEDIA_PATH, uri); case JA_WIKIPEDIA_DOMAIN: return replaceDomain(JA_WIKIPEDIA_DOMAIN, JA_DBPEDIA_DOMAIN, WIKIPDIA_PATH, DBPEDIA_PATH, uri); case KO_WIKIPEDIA_DOMAIN: return replaceDomain(KO_WIKIPEDIA_DOMAIN, KO_DBPEDIA_DOMAIN, WIKIPDIA_PATH, DBPEDIA_PATH, uri); case NL_WIKIPEDIA_DOMAIN: return replaceDomain(NL_WIKIPEDIA_DOMAIN, NL_DBPEDIA_DOMAIN, WIKIPDIA_PATH, DBPEDIA_PATH, uri); case PL_WIKIPEDIA_DOMAIN: return replaceDomain(PL_WIKIPEDIA_DOMAIN, PL_DBPEDIA_DOMAIN, WIKIPDIA_PATH, DBPEDIA_PATH, uri); case PT_WIKIPEDIA_DOMAIN: return replaceDomain(PT_WIKIPEDIA_DOMAIN, PT_DBPEDIA_DOMAIN, WIKIPDIA_PATH, DBPEDIA_PATH, uri); case RU_WIKIPEDIA_DOMAIN: return replaceDomain(RU_WIKIPEDIA_DOMAIN, RU_DBPEDIA_DOMAIN, WIKIPDIA_PATH, DBPEDIA_PATH, uri); case TR_WIKIPEDIA_DOMAIN: return replaceDomain(TR_WIKIPEDIA_DOMAIN, TR_DBPEDIA_DOMAIN, WIKIPDIA_PATH, DBPEDIA_PATH, uri); default: { return null; } } } private Set<String> replaceDomain(String oldDomain, String newDomain, String oldPath, String newPath, String uri) { int pos = uri.indexOf(oldDomain); if (pos < 0) { return null; } pos += oldDomain.length(); // check that the expected path is there if (!uri.substring(pos).startsWith(oldPath)) { return null; } pos += oldPath.length(); StringBuilder builder = new StringBuilder(); builder.append(URI_PROTOCOL_PART); builder.append(newDomain); builder.append(newPath); builder.append(uri.substring(pos)); Set<String> result = new HashSet<String>(); result.add(builder.toString()); return result; } public void addToManager(DomainBasedSameAsRetrieverManager manager) { manager.addDomainSpecificRetriever(BG_DBPEDIA_DOMAIN, this); manager.addDomainSpecificRetriever(CA_DBPEDIA_DOMAIN, this); manager.addDomainSpecificRetriever(CS_DBPEDIA_DOMAIN, this); manager.addDomainSpecificRetriever(DE_DBPEDIA_DOMAIN, this); manager.addDomainSpecificRetriever(EN_DBPEDIA_DOMAIN, this); manager.addDomainSpecificRetriever(ES_DBPEDIA_DOMAIN, this); manager.addDomainSpecificRetriever(EU_DBPEDIA_DOMAIN, this); manager.addDomainSpecificRetriever(FR_DBPEDIA_DOMAIN, this); manager.addDomainSpecificRetriever(HU_DBPEDIA_DOMAIN, this); manager.addDomainSpecificRetriever(ID_DBPEDIA_DOMAIN, this); manager.addDomainSpecificRetriever(IT_DBPEDIA_DOMAIN, this); manager.addDomainSpecificRetriever(JA_DBPEDIA_DOMAIN, this); manager.addDomainSpecificRetriever(KO_DBPEDIA_DOMAIN, this); manager.addDomainSpecificRetriever(NL_DBPEDIA_DOMAIN, this); manager.addDomainSpecificRetriever(PL_DBPEDIA_DOMAIN, this); manager.addDomainSpecificRetriever(PT_DBPEDIA_DOMAIN, this); manager.addDomainSpecificRetriever(RU_DBPEDIA_DOMAIN, this); manager.addDomainSpecificRetriever(TR_DBPEDIA_DOMAIN, this); manager.addDomainSpecificRetriever(BG_WIKIPEDIA_DOMAIN, this); manager.addDomainSpecificRetriever(CA_WIKIPEDIA_DOMAIN, this); manager.addDomainSpecificRetriever(CS_WIKIPEDIA_DOMAIN, this); manager.addDomainSpecificRetriever(DE_WIKIPEDIA_DOMAIN, this); manager.addDomainSpecificRetriever(EN_WIKIPEDIA_DOMAIN, this); manager.addDomainSpecificRetriever(ES_WIKIPEDIA_DOMAIN, this); manager.addDomainSpecificRetriever(EU_WIKIPEDIA_DOMAIN, this); manager.addDomainSpecificRetriever(FR_WIKIPEDIA_DOMAIN, this); manager.addDomainSpecificRetriever(HU_WIKIPEDIA_DOMAIN, this); manager.addDomainSpecificRetriever(ID_WIKIPEDIA_DOMAIN, this); manager.addDomainSpecificRetriever(IT_WIKIPEDIA_DOMAIN, this); manager.addDomainSpecificRetriever(JA_WIKIPEDIA_DOMAIN, this); manager.addDomainSpecificRetriever(KO_WIKIPEDIA_DOMAIN, this); manager.addDomainSpecificRetriever(NL_WIKIPEDIA_DOMAIN, this); manager.addDomainSpecificRetriever(PL_WIKIPEDIA_DOMAIN, this); manager.addDomainSpecificRetriever(PT_WIKIPEDIA_DOMAIN, this); manager.addDomainSpecificRetriever(RU_WIKIPEDIA_DOMAIN, this); manager.addDomainSpecificRetriever(TR_WIKIPEDIA_DOMAIN, this); } }