/** * This file is part of General Entity Annotator Benchmark. * * General Entity Annotator Benchmark is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * General Entity Annotator Benchmark is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with General Entity Annotator Benchmark. If not, see <http://www.gnu.org/licenses/>. */ package org.aksw.gerbil.semantic.sameas.impl; /** * Very simple approach to find the domain inside a given URI. * * @author Michael Röder (roeder@informatik.uni-leipzig.de) * */ public class SimpleDomainExtractor { private static final String DOMAIN_PREFIX = "://"; private static final int DOMAIN_PREFIX_LENGTH = DOMAIN_PREFIX.length(); public static String extractDomain(String uri) { if (uri == null) { return null; } // get the start position of the domain int startPos = uri.indexOf(DOMAIN_PREFIX); if (startPos < 0) { startPos = 0; } else { startPos += DOMAIN_PREFIX_LENGTH; } // find the end position of the String char chars[] = uri.toCharArray(); for (int i = startPos; i < chars.length; ++i) { switch (chars[i]) { // if this is a character that is not part of the domain, anymore case '/': case ':': { return uri.substring(startPos, i); } default: { // nothing to do } } } // we couldn't find the end, but maybe we have found a start return uri.substring(startPos); } }