/**
* This file is part of General Entity Annotator Benchmark.
*
* General Entity Annotator Benchmark is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* General Entity Annotator Benchmark is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with General Entity Annotator Benchmark. If not, see <http://www.gnu.org/licenses/>.
*/
package org.aksw.gerbil.utils;
import java.util.HashSet;
import java.util.Set;
public class WikipediaHelper {
private static final String WIKIPEPIA_URI_PROTOCOL_PART = "http://";
private static final String WIKIPEPIA_URI_WIKI_PART = "/wiki/";
/**
* Extracts the Wikipedia article title from a given Wikpedia URI. Note that
* this method expects that the title is preceded by
* {@link #WIKIPEPIA_URI_WIKI_PART} = {@value #WIKIPEPIA_URI_WIKI_PART}.
* Note that if there is a query or an ancher inside the URI they won't be
* removed from the title.
*
* @param uri
* the Wikipedia URI
* @return the Wikipedia article title or null if it couldn't be found.
*/
public static String getWikipediaTitle(String uri) {
if (uri == null) {
return null;
}
int startPos = uri.indexOf(WIKIPEPIA_URI_WIKI_PART);
if (startPos < 0) {
return null;
} else {
startPos += WIKIPEPIA_URI_WIKI_PART.length();
return uri.substring(startPos).replace('_', ' ');
}
}
/**
* Generates the URI for a Wikipedia entity with the given domain and the
* given title.
*
* @param domain
* the domain of the Wikipedia
* @param title
* the title of the Wikipedia article
* @return the URI of the Wikipedia entity or null if one of the two
* parameters is null.
*/
public static String getWikipediaUri(String domain, String title) {
if ((domain == null) || (title == null)) {
return null;
}
StringBuilder uriBuilder = new StringBuilder();
uriBuilder.append(WIKIPEPIA_URI_PROTOCOL_PART);
uriBuilder.append(domain);
uriBuilder.append(WIKIPEPIA_URI_WIKI_PART);
uriBuilder.append(title.replace(' ', '_'));
return uriBuilder.toString();
}
/**
* Transforms the given Wikipedia title into a DBpedia URI. Not that this
* method always translates to the English DBpedia.
*
* @param title
* @return a DBpedia URI or null if the input title is empty or null
*/
public static String getDBPediaUri(String title) {
if ((title != null) && (!title.isEmpty())) {
return "http://dbpedia.org/resource/" + title.replace(' ', '_');
} else {
return null;
}
}
/**
* Transforms the given Wikipedia title into a DBpedia URI.
*
* @param title
* @return a Set containing a DBpedia URI or null if the input title is
* empty or null
*/
public static Set<String> generateUriSet(String title) {
Set<String> uris = new HashSet<String>();
String uri = getDBPediaUri(title);
if (uri != null) {
uris.add(uri);
}
return uris;
}
}