/* * Licensed under the Apache License, Version 2.0 (the "License"); * * You may not use this file except in compliance with the License. * * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * * See the License for the specific language governing permissions and * limitations under the License. * * Contributions from 2013-2017 where performed either by US government * employees, or under US Veterans Health Administration contracts. * * US Veterans Health Administration contributions by government employees * are work of the U.S. Government and are not subject to copyright * protection in the United States. Portions contributed by government * employees are USGovWork (17USC ยง105). Not subject to copyright. * * Contribution by contractors to the US Veterans Health Administration * during this period are contractually contributed under the * Apache License, Version 2.0. * * See: https://www.usa.gov/government-works * * Contributions prior to 2013: * * Copyright (C) International Health Terminology Standards Development Organisation. * Licensed under the Apache License, Version 2.0. * */ package sh.isaac.provider.identifier; //~--- JDK imports ------------------------------------------------------------ import java.io.File; import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; import java.util.Arrays; import java.util.Collection; import java.util.HashSet; import java.util.LinkedHashMap; import java.util.List; import java.util.Optional; import java.util.UUID; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; import java.util.stream.IntStream; import javax.annotation.PostConstruct; import javax.annotation.PreDestroy; //~--- non-JDK imports -------------------------------------------------------- import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.glassfish.hk2.runlevel.RunLevel; import org.jvnet.hk2.annotations.Service; import sh.isaac.api.ConfigurationService; import sh.isaac.api.Get; import sh.isaac.api.IdentifiedObjectService; import sh.isaac.api.IdentifierService; import sh.isaac.api.LookupService; import sh.isaac.api.SystemStatusService; import sh.isaac.api.chronicle.LatestVersion; import sh.isaac.api.chronicle.ObjectChronology; import sh.isaac.api.chronicle.ObjectChronologyType; import sh.isaac.api.collections.ConceptSequenceSet; import sh.isaac.api.collections.LruCache; import sh.isaac.api.collections.NidSet; import sh.isaac.api.collections.SememeSequenceSet; import sh.isaac.api.collections.UuidIntMapMap; import sh.isaac.api.component.concept.ConceptSpecification; import sh.isaac.api.component.sememe.SememeSnapshotService; import sh.isaac.api.component.sememe.version.StringSememe; import sh.isaac.api.coordinate.StampCoordinate; import sh.isaac.api.identity.StampedVersion; //~--- classes ---------------------------------------------------------------- /** * The Class IdentifierProvider. * * @author kec */ @Service @RunLevel(value = 0) public class IdentifierProvider implements IdentifierService, IdentifiedObjectService { /** The Constant LOG. */ private static final Logger LOG = LogManager.getLogger(); /** * For debugging... */ private static HashSet<UUID> watchSet = new HashSet<>(); // { // watchSet.add(UUID.fromString("0418a591-f75b-39ad-be2c-3ab849326da9")); // watchSet.add(UUID.fromString("4459d8cf-5a6f-3952-9458-6d64324b27b7")); /** The thread local cache. */ // } private static ThreadLocal<LinkedHashMap<UUID, Integer>> THREAD_LOCAL_CACHE = new ThreadLocal() { @Override protected LruCache<UUID, Integer> initialValue() { return new LruCache<>(50); } }; //~--- fields -------------------------------------------------------------- /** The load required. */ private final AtomicBoolean loadRequired = new AtomicBoolean(); /** The database validity. */ private DatabaseValidity databaseValidity = DatabaseValidity.NOT_SET; /** The folder path. */ private final Path folderPath; /** The uuid int map map. */ private final UuidIntMapMap uuidIntMapMap; /** The concept sequence map. */ private final SequenceMap conceptSequenceMap; /** The sememe sequence map. */ private final SequenceMap sememeSequenceMap; //~--- constructors -------------------------------------------------------- /** * Instantiates a new identifier provider. * * @throws IOException Signals that an I/O exception has occurred. */ private IdentifierProvider() throws IOException { // for HK2 LOG.info("IdentifierProvider constructed"); this.folderPath = LookupService.getService(ConfigurationService.class) .getChronicleFolderPath() .resolve("identifier-provider"); if (!Files.exists(this.folderPath)) { this.databaseValidity = DatabaseValidity.MISSING_DIRECTORY; } this.loadRequired.set(!Files.exists(this.folderPath)); Files.createDirectories(this.folderPath); this.uuidIntMapMap = UuidIntMapMap.create(new File(this.folderPath.toAbsolutePath().toFile(), "uuid-nid-map")); this.conceptSequenceMap = new SequenceMap(450000); this.sememeSequenceMap = new SequenceMap(3000000); } //~--- methods ------------------------------------------------------------- /** * Adds the uuid for nid. * * @param uuid the uuid * @param nid the nid */ @Override public void addUuidForNid(UUID uuid, int nid) { this.uuidIntMapMap.put(uuid, nid); } /** * Clear database validity value. */ @Override public void clearDatabaseValidityValue() { // Reset to enforce analysis this.databaseValidity = DatabaseValidity.NOT_SET; } /** * A method to remove refs to sememe or concept sequences that never had data stored. * This should not be necessary in normal operation. This supports patterns where objects are * being deserialized from an ibdf file (causing refs to be stored here) but then not loaded into the DB. */ @Override public void clearUnusedIds() { final AtomicInteger cleaned = new AtomicInteger(); this.conceptSequenceMap.getSequenceStream().parallel().forEach((conceptSequence) -> { if (!Get.conceptService() .hasConcept(conceptSequence)) { final int nid = this.conceptSequenceMap.getNid(conceptSequence) .getAsInt(); this.conceptSequenceMap.removeNid(nid); cleaned.incrementAndGet(); } }); LOG.info("Removed " + cleaned.get() + " unused concept references"); cleaned.set(0); this.sememeSequenceMap.getSequenceStream().parallel().forEach((sememeSequence) -> { if (!Get.sememeService() .hasSememe(sememeSequence)) { final int nid = this.sememeSequenceMap.getNid(sememeSequence) .getAsInt(); this.sememeSequenceMap.removeNid(nid); cleaned.incrementAndGet(); } }); LOG.info("Removed " + cleaned.get() + " unused sememe references"); // We could also clear refs from the uuid map here... but that would take longer / // provide minimal gain } /** * Reset. */ protected static void reset() { THREAD_LOCAL_CACHE = new ThreadLocal() { @Override protected LruCache<UUID, Integer> initialValue() { return new LruCache<>(50); } }; } /** * Start me. */ @PostConstruct private void startMe() { try { LOG.info("Starting IdentifierProvider post-construct - reading from " + this.folderPath); if (!this.loadRequired.get()) { final String conceptSequenceMapBaseName = "concept-sequence.map"; LOG.info("Loading {} from dir {}.", conceptSequenceMapBaseName, this.folderPath.toAbsolutePath() .normalize() .toString()); this.conceptSequenceMap.read(new File(this.folderPath.toFile(), conceptSequenceMapBaseName)); final String sememeSequenceMapBaseName = "sememe-sequence.map"; LOG.info("Loading {} from dir {}.", sememeSequenceMapBaseName, this.folderPath.toAbsolutePath() .normalize() .toString()); this.sememeSequenceMap.read(new File(this.folderPath.toFile(), sememeSequenceMapBaseName)); // uuid-nid-map can do dynamic load, no need to read all at the beginning. // LOG.info("Loading uuid-nid-map."); // uuidIntMapMap.read(); if (isPopulated()) { this.databaseValidity = DatabaseValidity.POPULATED_DIRECTORY; } } } catch (final Exception e) { LookupService.getService(SystemStatusService.class) .notifyServiceConfigurationFailure("Identifier Provider", e); throw new RuntimeException(e); } } /** * Stop me. */ @PreDestroy private void stopMe() { try { this.uuidIntMapMap.setShutdown(true); LOG.info("conceptSequence: {}", this.conceptSequenceMap.getNextSequence()); LOG.info("writing concept-sequence.map."); this.conceptSequenceMap.write(new File(this.folderPath.toFile(), "concept-sequence.map")); LOG.info("writing sememe-sequence.map."); this.sememeSequenceMap.write(new File(this.folderPath.toFile(), "sememe-sequence.map")); LOG.info("writing uuid-nid-map."); this.uuidIntMapMap.write(); } catch (final IOException e) { throw new RuntimeException(e); } } //~--- get methods --------------------------------------------------------- /** * Gets the chronology type for nid. * * @param nid the nid * @return the chronology type for nid */ @Override public ObjectChronologyType getChronologyTypeForNid(int nid) { if (this.sememeSequenceMap.containsNid(nid)) { return ObjectChronologyType.SEMEME; } if (this.conceptSequenceMap.containsNid(nid)) { return ObjectChronologyType.CONCEPT; } return ObjectChronologyType.UNKNOWN_NID; } /** * Gets the concept identifier for authority. * * @param conceptId the concept id * @param identifierAuthorityUuid the identifier authority uuid * @param stampCoordinate the stamp coordinate * @return the concept identifier for authority */ @Override public Optional<LatestVersion<String>> getConceptIdentifierForAuthority(int conceptId, UUID identifierAuthorityUuid, StampCoordinate stampCoordinate) { conceptId = getConceptNid(conceptId); return getIdentifierForAuthority(conceptId, identifierAuthorityUuid, stampCoordinate); } /** * Gets the concept nid. * * @param conceptSequence the concept sequence * @return the concept nid */ @Override public int getConceptNid(int conceptSequence) { if (conceptSequence < 0) { return conceptSequence; } final int conceptNid = this.conceptSequenceMap.getNidFast(conceptSequence); if ((conceptSequence != 0) && (conceptNid == 0)) { LOG.warn("retrieved nid=" + conceptNid + " for sequence=" + conceptSequence); } return conceptNid; } /** * Gets the concept nids for concept sequences. * * @param conceptSequences the concept sequences * @return the concept nids for concept sequences */ @Override public IntStream getConceptNidsForConceptSequences(IntStream conceptSequences) { return conceptSequences.map((sequence) -> { return getConceptNid(sequence); }); } /** * Gets the concept sequence. * * @param nid the nid * @return the concept sequence */ @Override public int getConceptSequence(int nid) { if (nid >= 0) { return nid; } return this.conceptSequenceMap.addNidIfMissing(nid); } /** * Gets the concept sequence for proxy. * * @param conceptProxy the concept proxy * @return the concept sequence for proxy */ @Override public int getConceptSequenceForProxy(ConceptSpecification conceptProxy) { return getConceptSequence(getNidForProxy(conceptProxy)); } /** * Gets the concept sequence for uuids. * * @param uuids the uuids * @return the concept sequence for uuids */ @Override public int getConceptSequenceForUuids(Collection<UUID> uuids) { return getConceptSequenceForUuids(uuids.toArray(new UUID[uuids.size()])); } /** * Gets the concept sequence for uuids. * * @param uuids the uuids * @return the concept sequence for uuids */ @Override public int getConceptSequenceForUuids(UUID... uuids) { return getConceptSequence(getNidForUuids(uuids)); } /** * Gets the concept sequence stream. * * @return the concept sequence stream */ @Override public IntStream getConceptSequenceStream() { return this.conceptSequenceMap.getSequenceStream(); } /** * Gets the concept sequences for concept nids. * * @param conceptNidArray the concept nid array * @return the concept sequences for concept nids */ @Override public ConceptSequenceSet getConceptSequencesForConceptNids(int[] conceptNidArray) { final ConceptSequenceSet sequences = new ConceptSequenceSet(); IntStream.of(conceptNidArray) .forEach((nid) -> sequences.add(this.conceptSequenceMap.getSequenceFast(nid))); return sequences; } /** * Gets the concept sequences for concept nids. * * @param conceptNidSet the concept nid set * @return the concept sequences for concept nids */ @Override public ConceptSequenceSet getConceptSequencesForConceptNids(NidSet conceptNidSet) { final ConceptSequenceSet sequences = new ConceptSequenceSet(); conceptNidSet.stream() .forEach((nid) -> sequences.add(this.conceptSequenceMap.getSequenceFast(nid))); return sequences; } /** * Gets the database folder. * * @return the database folder */ @Override public Path getDatabaseFolder() { return this.folderPath; } /** * Gets the database validity status. * * @return the database validity status */ @Override public DatabaseValidity getDatabaseValidityStatus() { return this.databaseValidity; } /** * Gets the identified object chronology. * * @param nid the nid * @return the identified object chronology */ @Override public Optional<? extends ObjectChronology<? extends StampedVersion>> getIdentifiedObjectChronology(int nid) { switch (getChronologyTypeForNid(nid)) { case CONCEPT: return Get.conceptService() .getOptionalConcept(nid); case SEMEME: return Get.sememeService() .getOptionalSememe(nid); case UNKNOWN_NID: return Optional.empty(); } throw new UnsupportedOperationException("Unknown chronology type: " + getChronologyTypeForNid(nid)); } /** * Gets the identifier for authority. * * @param nid the nid * @param identifierAuthorityUuid the identifier authority uuid * @param stampCoordinate the stamp coordinate * @return the identifier for authority */ @Override public Optional<LatestVersion<String>> getIdentifierForAuthority(int nid, UUID identifierAuthorityUuid, StampCoordinate stampCoordinate) { if (nid >= 0) { throw new IllegalStateException("Not a nid: " + nid); } final int authoritySequence = getConceptSequenceForUuids(identifierAuthorityUuid); final SememeSnapshotService<StringSememe> snapshot = Get.sememeService() .getSnapshot(StringSememe.class, stampCoordinate); return snapshot.getLatestSememeVersionsForComponentFromAssemblage(nid, authoritySequence) .findAny() .map((LatestVersion<StringSememe> latestSememe) -> { final LatestVersion<String> latestString = new LatestVersion<>(latestSememe.value().getString()); if (latestSememe.contradictions() .isPresent()) { for (final StringSememe version: latestSememe.contradictions() .get()) { latestString.addLatest(version.getString()); } } return latestString; }); } /** * Gets the max nid. * * @return the max nid */ @Override public int getMaxNid() { return UuidIntMapMap.getNextNidProvider() .get(); } /** * Gets the nid for proxy. * * @param conceptProxy the concept proxy * @return the nid for proxy */ @Override public int getNidForProxy(ConceptSpecification conceptProxy) { return getNidForUuids(conceptProxy.getUuids()); } /** * Gets the nid for uuids. * * @param uuids the uuids * @return the nid for uuids */ @Override public int getNidForUuids(Collection<UUID> uuids) { return getNidForUuids(uuids.toArray(new UUID[uuids.size()])); } /** * Gets the nid for uuids. * * @param uuids the uuids * @return the nid for uuids */ @Override public int getNidForUuids(UUID... uuids) { final LinkedHashMap<UUID, Integer> cacheMap = THREAD_LOCAL_CACHE.get(); final Integer cacheNid = cacheMap.get(uuids[0]); if (cacheNid != null) { return cacheNid; } for (final UUID uuid: uuids) { // if (watchSet.contains(uuid)) { // System.out.println("Found watch: " + Arrays.asList(uuids)); // watchSet.remove(uuid); // } final int nid = this.uuidIntMapMap.get(uuid); if (nid != Integer.MAX_VALUE) { cacheMap.put(uuids[0], nid); return nid; } } final int nid = this.uuidIntMapMap.getWithGeneration(uuids[0]); cacheMap.put(uuids[0], nid); for (int i = 1; i < uuids.length; i++) { this.uuidIntMapMap.put(uuids[i], nid); } return nid; } /** * Gets the parallel concept sequence stream. * * @return the parallel concept sequence stream */ @Override public IntStream getParallelConceptSequenceStream() { return this.conceptSequenceMap.getSequenceStream() .parallel(); } /** * Gets the parallel sememe sequence stream. * * @return the parallel sememe sequence stream */ @Override public IntStream getParallelSememeSequenceStream() { return this.sememeSequenceMap.getSequenceStream() .parallel(); } /** * Checks if populated. * * @return true, if populated */ /* * Investigate if "uuid-nid-map" directory is populated with at least one *.map file. */ private boolean isPopulated() { final File segmentDirectory = new File(this.folderPath.toAbsolutePath().toFile(), "uuid-nid-map"); final int numberOfSegmentFiles = segmentDirectory.list((segmentDirectory1, name) -> (name.endsWith("map"))).length; return numberOfSegmentFiles > 0; } /** * Gets the sememe nid. * * @param sememeId the sememe id * @return the sememe nid */ @Override public int getSememeNid(int sememeId) { if (sememeId < 0) { return sememeId; } return this.sememeSequenceMap.getNidFast(sememeId); } /** * Gets the sememe nids for sememe sequences. * * @param sememSequences the semem sequences * @return the sememe nids for sememe sequences */ @Override public IntStream getSememeNidsForSememeSequences(IntStream sememSequences) { return sememSequences.map((sequence) -> { return getSememeNid(sequence); }); } /** * Gets the sememe sequence. * * @param sememeId the sememe id * @return the sememe sequence */ @Override public int getSememeSequence(int sememeId) { if (sememeId >= 0) { return sememeId; } return this.sememeSequenceMap.addNidIfMissing(sememeId); } /** * Gets the sememe sequence for uuids. * * @param uuids the uuids * @return the sememe sequence for uuids */ @Override public int getSememeSequenceForUuids(Collection<UUID> uuids) { return getSememeSequence(getNidForUuids(uuids)); } /** * Gets the sememe sequence for uuids. * * @param uuids the uuids * @return the sememe sequence for uuids */ @Override public int getSememeSequenceForUuids(UUID... uuids) { return getSememeSequence(getNidForUuids(uuids)); } /** * Gets the sememe sequence stream. * * @return the sememe sequence stream */ @Override public IntStream getSememeSequenceStream() { return this.sememeSequenceMap.getSequenceStream(); } /** * Gets the sememe sequences for sememe nids. * * @param sememeNidArray the sememe nid array * @return the sememe sequences for sememe nids */ @Override public SememeSequenceSet getSememeSequencesForSememeNids(int[] sememeNidArray) { final SememeSequenceSet sequences = new SememeSequenceSet(); IntStream.of(sememeNidArray) .forEach((nid) -> sequences.add(this.sememeSequenceMap.getSequenceFast(nid))); return sequences; } /** * Checks for uuid. * * @param uuids the uuids * @return true, if successful */ @Override public boolean hasUuid(Collection<UUID> uuids) { if (uuids == null) { throw new IllegalArgumentException("A UUID must be specified."); } final LinkedHashMap<UUID, Integer> cacheMap = THREAD_LOCAL_CACHE.get(); // Check the cache to (hopefully) avoid a potential disk read final boolean cacheHit = uuids.stream() .anyMatch((uuid) -> (cacheMap.get(uuid) != null)); if (cacheHit) { return true; } return uuids.stream() .anyMatch((uuid) -> (this.uuidIntMapMap.containsKey(uuid))); } /** * Checks for uuid. * * @param uuids the uuids * @return true, if successful */ @Override public boolean hasUuid(UUID... uuids) { if (uuids == null) { throw new IllegalArgumentException("A UUID must be specified."); } return Arrays.stream(uuids) .anyMatch((uuid) -> (this.uuidIntMapMap.containsKey(uuid))); } /** * Gets the uuid primordial for nid. * * @param nid the nid * @return the uuid primordial for nid */ @Override public Optional<UUID> getUuidPrimordialForNid(int nid) { if (nid > 0) { throw new RuntimeException("Sequence passed to a function that expects a nid!"); } // If we have a cache in uuidIntMapMap, read from there, it is faster. // If we don't have a cache, then uuidIntMapMap will be extremely slow, so try this first. if (!this.uuidIntMapMap.cacheContainsNid(nid)) { final Optional<? extends ObjectChronology<? extends StampedVersion>> optionalObj = Get.identifiedObjectService() .getIdentifiedObjectChronology(nid); if (optionalObj.isPresent()) { return Optional.of(optionalObj.get() .getPrimordialUuid()); } } final UUID[] uuids = this.uuidIntMapMap.getKeysForValue(nid); // In the use case of directly writing files (converting terminology) this is a normal occurrence LOG.debug("[1] No object for nid: " + nid + " Found uuids: " + Arrays.asList(uuids)); if (uuids.length > 0) { return Optional.of(uuids[0]); } return Optional.empty(); } /** * Gets the uuid primordial from concept id. * * @param conceptId the concept id * @return the uuid primordial from concept id */ @Override public Optional<UUID> getUuidPrimordialFromConceptId(int conceptId) { return getUuidPrimordialForNid(getConceptNid(conceptId)); } /** * Gets the uuid primordial from sememe id. * * @param sememeId the sememe id * @return the uuid primordial from sememe id */ @Override public Optional<UUID> getUuidPrimordialFromSememeId(int sememeId) { return getUuidPrimordialForNid(getSememeNid(sememeId)); } /** * Gets the uuids for nid. * * @param nid the nid * @return A list of uuids corresponding with a nid. */ @Override public List<UUID> getUuidsForNid(int nid) { if (nid > 0) { throw new RuntimeException("Method expected nid!"); } final Optional<? extends ObjectChronology<? extends StampedVersion>> optionalObj = Get.identifiedObjectService() .getIdentifiedObjectChronology( nid); if (optionalObj.isPresent()) { return optionalObj.get() .getUuidList(); } final UUID[] uuids = this.uuidIntMapMap.getKeysForValue(nid); LOG.warn("[3] No object for nid: " + nid + " Found uuids: " + Arrays.asList(uuids)); return Arrays.asList(uuids); } }