/* * Licensed under the Apache License, Version 2.0 (the "License"); * * You may not use this file except in compliance with the License. * * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * * See the License for the specific language governing permissions and * limitations under the License. * * Contributions from 2013-2017 where performed either by US government * employees, or under US Veterans Health Administration contracts. * * US Veterans Health Administration contributions by government employees * are work of the U.S. Government and are not subject to copyright * protection in the United States. Portions contributed by government * employees are USGovWork (17USC ยง105). Not subject to copyright. * * Contribution by contractors to the US Veterans Health Administration * during this period are contractually contributed under the * Apache License, Version 2.0. * * See: https://www.usa.gov/government-works * * Contributions prior to 2013: * * Copyright (C) International Health Terminology Standards Development Organisation. * Licensed under the Apache License, Version 2.0. * */ package sh.isaac.mojo; //~--- JDK imports ------------------------------------------------------------ import java.io.ByteArrayOutputStream; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import java.util.UUID; import java.util.concurrent.BlockingQueue; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; //~--- non-JDK imports -------------------------------------------------------- import org.apache.maven.plugin.AbstractMojo; import org.apache.maven.plugin.MojoExecutionException; import org.apache.maven.plugins.annotations.LifecyclePhase; import org.apache.maven.plugins.annotations.Mojo; import org.apache.maven.plugins.annotations.Parameter; import com.cedarsoftware.util.io.JsonWriter; import sh.isaac.api.ConceptProxy; import sh.isaac.api.DataTarget; import sh.isaac.api.Get; import sh.isaac.api.State; import sh.isaac.api.bootstrap.TermAux; import sh.isaac.api.chronicle.ObjectChronology; import sh.isaac.api.chronicle.ObjectChronologyType; import sh.isaac.api.collections.SememeSequenceSet; import sh.isaac.api.component.concept.ConceptChronology; import sh.isaac.api.component.sememe.SememeChronology; import sh.isaac.api.component.sememe.SememeType; import sh.isaac.api.component.sememe.version.LogicGraphSememe; import sh.isaac.api.component.sememe.version.MutableLogicGraphSememe; import sh.isaac.api.externalizable.BinaryDataReaderQueueService; import sh.isaac.api.externalizable.OchreExternalizable; import sh.isaac.api.externalizable.StampAlias; import sh.isaac.api.externalizable.StampComment; import sh.isaac.api.identity.StampedVersion; import sh.isaac.api.logic.IsomorphicResults; import sh.isaac.api.logic.LogicalExpression; //~--- classes ---------------------------------------------------------------- /* * Copyright 2001-2005 The Apache Software Foundation. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /** * Goal which loads a database from eConcept files. */ @Mojo( name = "load-termstore", defaultPhase = LifecyclePhase.PROCESS_RESOURCES ) public class LoadTermstore extends AbstractMojo { /** The active only. */ @Parameter(required = false) private boolean activeOnly = false; /** The sememe types to skip. */ private final HashSet<SememeType> sememeTypesToSkip = new HashSet<>(); /** The skipped items. */ private final HashSet<Integer> skippedItems = new HashSet<>(); /** The skipped any. */ private boolean skippedAny = false; /** * The preferred mechanism for specifying ibdf files - provide a folder that contains IBDF files, all found IBDF files in this * folder will be processed. */ @Parameter(required = false) private File ibdfFileFolder; /** * The optional (old) way to specify ibdf files - requires each file to be listed one by one. */ @Parameter(required = false) private File[] ibdfFiles; /** The item failure. */ private int conceptCount, sememeCount, stampAliasCount, stampCommentCount, itemCount, itemFailure; //~--- methods ------------------------------------------------------------- /** * Execute. * * @throws MojoExecutionException the mojo execution exception */ @SuppressWarnings({ "rawtypes", "unchecked" }) @Override public void execute() throws MojoExecutionException { Get.configurationService() .setDBBuildMode(); final int statedSequence = Get.identifierService() .getConceptSequenceForUuids( TermAux.EL_PLUS_PLUS_STATED_ASSEMBLAGE.getPrimordialUuid()); final long loadTime = System.currentTimeMillis(); // Load IsaacMetadataAuxiliary first, otherwise, we have issues.... final AtomicBoolean hasMetadata = new AtomicBoolean(false); Set<File> mergedFiles; try { mergedFiles = new HashSet<>(); if (this.ibdfFiles != null) { for (final File f: this.ibdfFiles) { mergedFiles.add(f.getCanonicalFile()); } } if (this.ibdfFileFolder != null) { if (!this.ibdfFileFolder.isDirectory()) { throw new MojoExecutionException("If ibdfFileFolder is provided, it must point to a folder"); } for (final File f: this.ibdfFileFolder.listFiles()) { if (!f.isFile()) { getLog().info("The file " + f.getAbsolutePath() + " is not a file - ignoring."); } else if (!f.getName() .toLowerCase() .endsWith(".ibdf")) { getLog().info("The file " + f.getAbsolutePath() + " does not match the expected type of ibdf - ignoring."); } else { mergedFiles.add(f); } } } } catch (final IOException e1) { throw new MojoExecutionException("Problem reading ibdf files", e1); } final File[] temp = mergedFiles.toArray(new File[mergedFiles.size()]); Arrays.sort(temp, (o1, o2) -> { if (o1.getName() .equals("IsaacMetadataAuxiliary.ibdf")) { hasMetadata.set(true); return -1; } else if (o2.getName() .equals("IsaacMetadataAuxiliary.ibdf")) { hasMetadata.set(true); return 1; } else { return ((o1.length() - o2.length()) > 0 ? 1 : ((o1.length() - o2.length()) < 0 ? -1 : 0)); } }); if (!hasMetadata.get()) { getLog().warn("No Metadata IBDF file found! This probably isn't good...."); } if (temp.length == 0) { throw new MojoExecutionException("Failed to find any ibdf files to load"); } getLog().info("Identified " + temp.length + " ibdf files"); final Set<Integer> deferredActionNids = new HashSet<>(); try { for (final File f: temp) { getLog().info("Loading termstore from " + f.getCanonicalPath() + (this.activeOnly ? " active items only" : "")); final BinaryDataReaderQueueService reader = Get.binaryDataQueueReader(f.toPath()); final BlockingQueue<OchreExternalizable> queue = reader.getQueue(); while (!queue.isEmpty() ||!reader.isFinished()) { final OchreExternalizable object = queue.poll(500, TimeUnit.MILLISECONDS); if (object != null) { this.itemCount++; try { if (null != object.getOchreObjectType()) { switch (object.getOchreObjectType()) { case CONCEPT: if (!this.activeOnly || isActive((ObjectChronology) object)) { Get.conceptService() .writeConcept(((ConceptChronology) object)); this.conceptCount++; } else { this.skippedItems.add(((ObjectChronology) object).getNid()); } break; case SEMEME: SememeChronology sc = (SememeChronology) object; if (sc.getAssemblageSequence() == statedSequence) { final SememeSequenceSet sequences = Get.sememeService() .getSememeSequencesForComponentFromAssemblage( sc.getReferencedComponentNid(), statedSequence); if (!sequences.isEmpty()) { final List<LogicalExpression> listToMerge = new ArrayList<>(); listToMerge.add(getLatestLogicalExpression(sc)); getLog().info("\nDuplicate: " + sc); sequences.stream() .forEach( (sememeSequence) -> listToMerge.add( getLatestLogicalExpression(Get.sememeService() .getSememe(sememeSequence)))); getLog().info("Duplicates: " + listToMerge); if (listToMerge.size() > 2) { throw new UnsupportedOperationException("Can't merge list of size: " + listToMerge.size() + "\n" + listToMerge); } final IsomorphicResults isomorphicResults = listToMerge.get(0) .findIsomorphisms( listToMerge.get(1)); getLog().info("Isomorphic results: " + isomorphicResults); final SememeChronology existingChronology = Get.sememeService() .getSememe(sequences.findFirst() .getAsInt()); final ConceptProxy moduleProxy = new ConceptProxy("SOLOR overlay module", "9ecc154c-e490-5cf8-805d-d2865d62aef3"); final ConceptProxy pathProxy = new ConceptProxy("development path", "1f200ca6-960e-11e5-8994-feff819cdc9f"); final ConceptProxy userProxy = new ConceptProxy("user", "f7495b58-6630-3499-a44e-2052b5fcf06c"); final int stampSequence = Get.stampService() .getStampSequence(State.ACTIVE, loadTime, userProxy.getConceptSequence(), moduleProxy.getConceptSequence(), pathProxy.getConceptSequence()); final MutableLogicGraphSememe newVersion = (MutableLogicGraphSememe) existingChronology.createMutableVersion( MutableLogicGraphSememe.class, stampSequence); newVersion.setGraphData(isomorphicResults.getMergedExpression() .getData(DataTarget.INTERNAL)); // TODO mess - this isn't merging properly - how should we merge!? // for (UUID uuid : sc.getUuidList()) // { // Get.identifierService().addUuidForNid(uuid, newVersion.getNid()); // } sc = existingChronology; } } if (!this.sememeTypesToSkip.contains(sc.getSememeType()) && (!this.activeOnly || (isActive(sc) &&!this.skippedItems.contains(sc.getReferencedComponentNid())))) { Get.sememeService() .writeSememe(sc); if (sc.getSememeType() == SememeType.LOGIC_GRAPH) { deferredActionNids.add(sc.getNid()); } this.sememeCount++; } else { this.skippedItems.add(sc.getNid()); } break; case STAMP_ALIAS: Get.commitService() .addAlias(((StampAlias) object).getStampSequence(), ((StampAlias) object).getStampAlias(), null); this.stampAliasCount++; break; case STAMP_COMMENT: Get.commitService() .setComment(((StampComment) object).getStampSequence(), ((StampComment) object).getComment()); this.stampCommentCount++; break; default: throw new UnsupportedOperationException("Unknown ochre object type: " + object); } } } catch (final Exception e) { this.itemFailure++; getLog().error("Failure at " + this.conceptCount + " concepts, " + this.sememeCount + " sememes, " + this.stampAliasCount + " stampAlias, " + this.stampCommentCount + " stampComments", e); final Map<String, Object> args = new HashMap<>(); args.put(JsonWriter.PRETTY_PRINT, true); final ByteArrayOutputStream baos = new ByteArrayOutputStream(); final JsonWriter json = new JsonWriter(baos, args); UUID primordial = null; if (object instanceof ObjectChronology) { primordial = ((ObjectChronology) object).getPrimordialUuid(); } json.write(object); getLog().error("Failed on " + ((primordial == null) ? ": " : "object with primoridial UUID " + primordial.toString() + ": ") + baos.toString()); json.close(); } if (this.itemCount % 50000 == 0) { getLog().info("Read " + this.itemCount + " entries, " + "Loaded " + this.conceptCount + " concepts, " + this.sememeCount + " sememes, " + this.stampAliasCount + " stampAlias, " + this.stampCommentCount + " stampComment"); } } } ; if (this.skippedItems.size() > 0) { this.skippedAny = true; } getLog().info("Loaded " + this.conceptCount + " concepts, " + this.sememeCount + " sememes, " + this.stampAliasCount + " stampAlias, " + this.stampCommentCount + " stampComment" + ((this.skippedItems.size() > 0) ? ", skipped for inactive " + this.skippedItems.size() : "") + ((this.itemFailure > 0) ? " Failures " + this.itemFailure : "") + " from file " + f.getName()); this.conceptCount = 0; this.sememeCount = 0; this.stampAliasCount = 0; this.stampCommentCount = 0; this.skippedItems.clear(); } getLog().info("Completing processing on " + deferredActionNids.size() + " defered items"); for (final int nid: deferredActionNids) { if (ObjectChronologyType.SEMEME.equals(Get.identifierService() .getChronologyTypeForNid(nid))) { final SememeChronology sc = Get.sememeService() .getSememe(nid); if (sc.getSememeType() == SememeType.LOGIC_GRAPH) { Get.taxonomyService() .updateTaxonomy(sc); } else { throw new UnsupportedOperationException("Unexpected nid in deferred set: " + nid); } } else { throw new UnsupportedOperationException("Unexpected nid in deferred set: " + nid); } } if (this.skippedAny) { // Loading with activeOnly set to true causes a number of gaps in the concept / sememe providers Get.identifierService() .clearUnusedIds(); } } catch (final Exception ex) { getLog().info("Loaded " + this.conceptCount + " concepts, " + this.sememeCount + " sememes, " + this.stampAliasCount + " stampAlias, " + this.stampCommentCount + " stampComments" + ((this.skippedItems.size() > 0) ? ", skipped for inactive " + this.skippedItems.size() : "")); throw new MojoExecutionException(ex.getLocalizedMessage(), ex); } } /** * Sets the ibdf files. * * @param files the new ibdf files */ public void setibdfFiles(File[] files) { this.ibdfFiles = files; } /** * Skip sememe types. * * @param types the types */ public void skipSememeTypes(Collection<SememeType> types) { this.sememeTypesToSkip.addAll(types); } //~--- get methods --------------------------------------------------------- /** * Checks if active. * * @param object the object * @return true, if active */ private boolean isActive(ObjectChronology<?> object) { if (object.getVersionList() .size() != 1) { throw new RuntimeException("Didn't expect version list of size " + object.getVersionList()); } else { return ((StampedVersion) object.getVersionList() .get(0)).getState() == State.ACTIVE; } } //~--- set methods --------------------------------------------------------- /** * Sets the active only. * * @param activeOnly the new active only */ public void setActiveOnly(boolean activeOnly) { this.activeOnly = activeOnly; } //~--- get methods --------------------------------------------------------- /** * Gets the latest logical expression. * * @param sc the sc * @return the latest logical expression */ private static LogicalExpression getLatestLogicalExpression(SememeChronology sc) { final SememeChronology<? extends LogicGraphSememe> lgsc = sc; LogicGraphSememe latestVersion = null; for (final LogicGraphSememe version: lgsc.getVersionList()) { if (latestVersion == null) { latestVersion = version; } else if (latestVersion.getTime() < version.getTime()) { latestVersion = version; } } return (latestVersion != null) ? latestVersion.getLogicalExpression() : null; } }