/** * This file is part of General Entity Annotator Benchmark. * * General Entity Annotator Benchmark is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * General Entity Annotator Benchmark is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with General Entity Annotator Benchmark. If not, see <http://www.gnu.org/licenses/>. */ package org.aksw.gerbil.dataset.check.impl; import java.util.ArrayList; import java.util.Collection; import java.util.List; import java.util.Set; import org.aksw.gerbil.dataset.check.EntityChecker; import org.aksw.gerbil.dataset.check.EntityCheckerManager; import org.aksw.gerbil.datatypes.marking.MeaningsContainingMarking; import org.aksw.gerbil.transfer.nif.Marking; import org.aksw.gerbil.transfer.nif.Meaning; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.carrotsearch.hppc.ObjectObjectOpenHashMap; /** * <p> * Standard implementation of the {@link EntityCheckerManager} interface. * Internally it uses a cache for storing the results of the * {@link EntityChecker}. * </p> * TODO The current implementation is not thread safe if * {@link #registerEntityChecker(String, EntityChecker)} is called while another * thread already is inside the {@link #checkMeanings(Collection)} method. * * @author Michael Röder (roeder@informatik.uni-leipzig.de) * */ public class EntityCheckerManagerImpl implements EntityCheckerManager { private static final Logger LOGGER = LoggerFactory.getLogger(EntityCheckerManagerImpl.class); private static final String SYNTHETIC_URI_NAME_SPACE = "http://aksw.org/unknown_entity/"; private ObjectObjectOpenHashMap<String, EntityChecker> registeredCheckers = new ObjectObjectOpenHashMap<String, EntityChecker>(); @Override public void registerEntityChecker(String namespace, EntityChecker checker) { registeredCheckers.put(namespace, checker); } @Override public void checkMarkings(Collection<? extends Marking> markings) { for (Marking marking : markings) { if (marking instanceof Meaning) { checkMeaning((Meaning) marking); } else if (marking instanceof MeaningsContainingMarking) { checkMeanings(((MeaningsContainingMarking) marking).getMeanings()); } } } @Override public void checkMeanings(Collection<? extends Meaning> meanings) { for (Meaning meaning : meanings) { checkMeaning(meaning); } } public void checkMeaning(Meaning meaning) { Set<String> uris = meaning.getUris(); List<String> wrongUris = null; List<String> newUris = null; for (String uri : uris) { // If the URI does not exist if ((uri != null) && (!checkUri(uri))) { if (wrongUris == null) { wrongUris = new ArrayList<String>(3); newUris = new ArrayList<String>(3); } wrongUris.add(uri); newUris.add(generateNewUri(uri)); } } if (wrongUris != null) { LOGGER.info("Couldn't find an entity with the URIs={}.", wrongUris); uris.removeAll(wrongUris); uris.addAll(newUris); } } protected String generateNewUri(String uri) { StringBuilder newUri = new StringBuilder(); newUri.append(SYNTHETIC_URI_NAME_SPACE); char chars[] = uri.toCharArray(); for (int i = 0; i < chars.length; ++i) { switch (chars[i]) { case '.': // falls through case ':': case '/': { newUri.append('_'); break; } default: { newUri.append(chars[i]); } } } return newUri.toString(); } public boolean checkUri(String uri) { String namespace; int matchingId = -1; for (int i = 0; (i < registeredCheckers.allocated.length) && (matchingId < 0); ++i) { if (registeredCheckers.allocated[i]) { namespace = (String) ((Object[]) registeredCheckers.keys)[i]; if (uri.startsWith(namespace)) { matchingId = i; } } } // If there is a checker available for this URI if (matchingId >= 0) { EntityChecker checker = (EntityChecker) ((Object[]) registeredCheckers.values)[matchingId]; // Return whether this URI does exist return checker.entityExists(uri); } else { return true; } } }