/*
* Licensed under the Apache License, Version 2.0 (the "License");
*
* You may not use this file except in compliance with the License.
*
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*
* See the License for the specific language governing permissions and
* limitations under the License.
*
* Contributions from 2013-2017 where performed either by US government
* employees, or under US Veterans Health Administration contracts.
*
* US Veterans Health Administration contributions by government employees
* are work of the U.S. Government and are not subject to copyright
* protection in the United States. Portions contributed by government
* employees are USGovWork (17USC ยง105). Not subject to copyright.
*
* Contribution by contractors to the US Veterans Health Administration
* during this period are contractually contributed under the
* Apache License, Version 2.0.
*
* See: https://www.usa.gov/government-works
*
* Contributions prior to 2013:
*
* Copyright (C) International Health Terminology Standards Development Organisation.
* Licensed under the Apache License, Version 2.0.
*
*/
package sh.isaac.convert.rf2.mojo;
//~--- JDK imports ------------------------------------------------------------
import java.io.File;
import java.io.FileInputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Optional;
import java.util.TreeMap;
import java.util.UUID;
//~--- non-JDK imports --------------------------------------------------------
import org.apache.maven.plugin.MojoExecutionException;
import org.apache.maven.plugins.annotations.LifecyclePhase;
import org.apache.maven.plugins.annotations.Mojo;
import org.apache.maven.plugins.annotations.Parameter;
import sh.isaac.MetaData;
import sh.isaac.api.Get;
import sh.isaac.api.LanguageCode;
import sh.isaac.api.MavenConceptProxy;
import sh.isaac.api.State;
import sh.isaac.api.bootstrap.TermAux;
import sh.isaac.api.component.concept.ConceptChronology;
import sh.isaac.api.component.concept.ConceptSpecification;
import sh.isaac.api.component.concept.ConceptVersion;
import sh.isaac.api.component.sememe.SememeChronology;
import sh.isaac.api.component.sememe.version.DescriptionSememe;
import sh.isaac.api.component.sememe.version.DynamicSememe;
import sh.isaac.api.logic.LogicalExpression;
import sh.isaac.api.logic.LogicalExpressionBuilder;
import sh.isaac.api.logic.assertions.Assertion;
import sh.isaac.api.util.UuidT3Generator;
import sh.isaac.converters.sharedUtils.ComponentReference;
import sh.isaac.converters.sharedUtils.ConsoleUtil;
import sh.isaac.converters.sharedUtils.ConverterBaseMojo;
import sh.isaac.converters.sharedUtils.IBDFCreationUtility;
import sh.isaac.converters.sharedUtils.IBDFCreationUtility.DescriptionType;
import sh.isaac.converters.sharedUtils.sql.ColumnDefinition;
import sh.isaac.converters.sharedUtils.sql.DataType;
import sh.isaac.converters.sharedUtils.sql.H2DatabaseHandle;
import sh.isaac.converters.sharedUtils.sql.TableDefinition;
import sh.isaac.converters.sharedUtils.stats.ConverterUUID;
import sh.isaac.utility.Frills;
import sh.isaac.utility.LanguageMap;
import static sh.isaac.api.logic.LogicalExpressionBuilder.And;
import static sh.isaac.api.logic.LogicalExpressionBuilder.ConceptAssertion;
import static sh.isaac.api.logic.LogicalExpressionBuilder.NecessarySet;
import static sh.isaac.api.logic.LogicalExpressionBuilder.SomeRole;
import static sh.isaac.api.logic.LogicalExpressionBuilder.SufficientSet;
//~--- classes ----------------------------------------------------------------
/**
* Loader code to convert RF2 format files into the ISAAC format.
*/
@Mojo(
name = "convert-RF2-to-ibdf",
defaultPhase = LifecyclePhase.PROCESS_SOURCES
)
public class RF2Mojo
extends ConverterBaseMojo {
/** The date format parser. */
protected static final SimpleDateFormat DATE_PARSER = new SimpleDateFormat("yyyyMMdd");
//~--- fields --------------------------------------------------------------
/** The output json flag. */
private final boolean outputJson = false; // Set to true to produce a json dump file
/** The descriptions. */
private final ArrayList<String> DESCRIPTIONS = new ArrayList<>();
/** The languages. */
private final ArrayList<String> LANGUAGES = new ArrayList<>();
/** The tables. */
private final HashMap<String, TableDefinition> tables_ = new HashMap<>();
/**
* The concept definition status cache. The map is from a concept UUID to a treemap that has the time and the status value.
* TODO the time is not sufficient to define time points. You need time, path, and module. Could shrink the size of this if necessary by using con sequence ids....
*/
private final HashMap<UUID, TreeMap<Long, UUID>> conceptDefinitionStatusCache = new HashMap<>();
/** The concepts with no stated relationships. This cache is to work around a data problem where stated rels are missing from SCT. */
private final HashSet<UUID> consWithNoStatedRel = new HashSet<>();
/** The never role group set. */
private final HashSet<UUID> neverRoleGroupSet = new HashSet<>();
/** The input type. */
private InputType inputType = null;
/** Default value from SNOMED_CT_CORE_MODULE. */
@Parameter(required = false)
private ConceptSpecification moduleUUID = MetaData.SNOMED_CT_CORE_MODULES;
/** The db. */
private H2DatabaseHandle db;
/** The name of content released and the version of the release. */
@Parameter(required = true)
String contentNameVersion;
/** The release date string. */
@Parameter(required = true)
String rf2ReleaseDate;
/** The stated relationship. */
private String CONCEPT, RELATIONSHIP, STATED_RELATIONSHIP;
@Parameter(
required = false,
defaultValue = "${project.build.directory}/generated-resources/ibdf"
)
private File ibdfFolder;
//~--- initializers --------------------------------------------------------
{
this.neverRoleGroupSet.add(TermAux.PART_OF.getPrimordialUuid());
this.neverRoleGroupSet.add(TermAux.LATERALITY.getPrimordialUuid());
this.neverRoleGroupSet.add(TermAux.HAS_ACTIVE_INGREDIENT.getPrimordialUuid());
this.neverRoleGroupSet.add(TermAux.HAS_DOSE_FORM.getPrimordialUuid());
}
//~--- methods -------------------------------------------------------------
/**
* Execute.
*
* @throws MojoExecutionException the mojo execution exception
*/
@Override
public void execute()
throws MojoExecutionException {
try {
super.execute();
this.inputType = InputType.get(this.converterOutputArtifactClassifier);
final long defaultTime = RF2Mojo.DATE_PARSER.parse(this.rf2ReleaseDate)
.getTime();
super.importUtil = new IBDFCreationUtility(Optional.empty(),
Optional.of(this.moduleUUID),
this.outputDirectory,
this.converterOutputArtifactId,
this.converterOutputArtifactVersion,
this.converterOutputArtifactClassifier,
this.outputJson,
defaultTime);
loadDatabase(this.inputFileLocation);
final ComponentReference rf2Metadata =
ComponentReference.fromConcept(super.importUtil.createConcept("RF2 Metadata " + this.contentNameVersion,
true));
super.importUtil.addParent(rf2Metadata, MetaData.SOLOR_CONTENT_METADATA.getPrimordialUuid());
super.importUtil.loadTerminologyMetadataAttributes(rf2Metadata,
this.converterSourceArtifactVersion,
Optional.ofNullable(this.rf2ReleaseDate),
this.converterOutputArtifactVersion,
Optional.of(this.converterOutputArtifactClassifier),
this.converterVersion);
// process content
transformConcepts();
transformDescriptions();
// stated first, so we can know what doesn't get stated graphs
transformRelationships(true);
ConsoleUtil.println("Noticed " + this.consWithNoStatedRel.size() + " concepts with no stated relationships");
transformRelationships(false);
// can clear this cache now
ConsoleUtil.println("After copying inferred rels, still " + this.consWithNoStatedRel.size() +
" concepts with no stated relationships");
this.consWithNoStatedRel.clear();
ConsoleUtil.println("Dumping UUID Debug File");
ConverterUUID.dump(this.outputDirectory, this.converterOutputArtifactClassifier + "-RF2UUID");
ConsoleUtil.println("Load Statistics");
for (final String s: super.importUtil.getLoadStats()
.getSummary()) {
ConsoleUtil.println(s);
}
// shutdown
super.importUtil.shutdown();
this.db.shutdown();
ConsoleUtil.println("Finished converting " + this.contentNameVersion + "-" +
this.converterOutputArtifactClassifier);
ConsoleUtil.writeOutputToFile(new File(this.outputDirectory,
this.converterOutputArtifactClassifier + "-ConsoleOutput.txt").toPath());
} catch (final Exception e) {
throw new MojoExecutionException("Failure during conversion", e);
}
}
/**
* The main method.
*
* @param args the arguments
* @throws MojoExecutionException the mojo execution exception
*/
public static void main(String[] args)
throws MojoExecutionException {
final RF2Mojo mojo = new RF2Mojo();
mojo.outputDirectory = new File("../rf2-ibdf/sct/target");
mojo.inputFileLocation = new File("../rf2-ibdf/sct/target/generated-resources/src/");
mojo.converterVersion = "foo";
mojo.converterOutputArtifactVersion = "bar";
mojo.converterOutputArtifactClassifier = "Full";
mojo.converterSourceArtifactVersion = "bar";
mojo.execute();
}
/**
* Creates the table definition.
*
* @param tableName the table name
* @param header the header
* @param sampleDataRow the sample data row
* @return the table definition
*/
private TableDefinition createTableDefinition(String tableName, String[] header, String[] sampleDataRow) {
final TableDefinition td = new TableDefinition(tableName);
for (int i = 0; i < header.length; i++) {
DataType dataType;
if (header[i].equals("id") || header[i].endsWith("Id")) {
// See if this looks like a UUID or a long
try {
Long.parseLong(sampleDataRow[i]);
dataType = new DataType(DataType.SUPPORTED_DATA_TYPE.LONG, null, false);
} catch (NumberFormatException
| NullPointerException e) // Might be a null pointer if there is no data, just treat it as a string (doesn't matter)
{
// UUID
dataType = new DataType(DataType.SUPPORTED_DATA_TYPE.STRING, 36, false);
}
} else if (header[i].equals("active")) {
dataType = new DataType(DataType.SUPPORTED_DATA_TYPE.BOOLEAN, null, false);
} else if (header[i].equals("effectiveTime") ||
header[i].equals("sourceEffectiveTime") ||
header[i].equals("targetEffectiveTime")) {
dataType = new DataType(DataType.SUPPORTED_DATA_TYPE.STRING, 8, false);
} else {
dataType = new DataType(DataType.SUPPORTED_DATA_TYPE.STRING, null, true);
ConsoleUtil.println("Treating " + header[i] + " as a string");
}
final ColumnDefinition cd = new ColumnDefinition(header[i], dataType);
td.addColumn(cd);
}
return td;
}
/**
* Load database.
*
* @param contentDirectory the zip file
* @throws Exception the exception
*/
private void loadDatabase(File contentDirectory)
throws Exception {
final long time = System.currentTimeMillis();
this.db = new H2DatabaseHandle();
final boolean createdNew = this.db.createOrOpenDatabase(new File(this.outputDirectory,
this.contentNameVersion + "-" +
this.converterOutputArtifactClassifier));
int tableCount = 0;
final Iterable<Path> pathIterator = Files.walk(contentDirectory.toPath())
.filter(p -> p.toString().endsWith(".txt") &&
p.toString().toUpperCase().contains(
this.inputType.name()))::iterator;
for (final Path path: pathIterator) {
// One of the data files we want to load
ConsoleUtil.println("Loading " + path);
String tableName = path.getFileName()
.toString();
tableName = tableName.substring(0, tableName.length() - 4); // remove ".txt"
tableName = tableName.replaceAll("-", "_"); // hyphens cause sql issues
if (tableName.toLowerCase()
.startsWith("sct2_concept_")) {
this.CONCEPT = tableName;
} else if (tableName.toLowerCase().startsWith("sct2_description_") ||
tableName.toLowerCase().startsWith("sct2_textdefinition_")) {
this.DESCRIPTIONS.add(tableName);
} else if (tableName.toLowerCase().startsWith("der2_crefset_") &&
tableName.toLowerCase().contains("language")) {
this.LANGUAGES.add(tableName);
} else if (tableName.toLowerCase()
.startsWith("sct2_identifier_")) {}
else if (tableName.toLowerCase()
.startsWith("sct2_relationship_")) {
this.RELATIONSHIP = tableName;
} else if (tableName.toLowerCase()
.startsWith("sct2_statedrelationship_")) {
this.STATED_RELATIONSHIP = tableName;
}
try (RF2FileReader fileReader = new RF2FileReader(new FileInputStream(path.toFile()))) {
final TableDefinition td = createTableDefinition(tableName,
fileReader.getHeader(),
fileReader.peekNextRow());
this.tables_.put(tableName, td);
if (!createdNew) {
// Only need to process this far to read the metadata about the DB
continue;
}
this.db.createTable(td);
tableCount++;
final int rowCount = this.db.loadDataIntoTable(td, fileReader);
// don't bother indexing small tables
if (rowCount > 10000) {
final HashSet<String> colsToIndex = new HashSet<>();
colsToIndex.add("conceptId");
colsToIndex.add("referencedComponentId");
colsToIndex.add("sourceId");
for (final String s: fileReader.getHeader()) {
if (colsToIndex.contains(s)) {
try (Statement statement = this.db.getConnection().createStatement()) {
ConsoleUtil.println("Indexing " + tableName + " on " + s);
if (s.equals("referencedComponentId")) {
statement.execute("CREATE INDEX " + tableName + "_" + s + "_index ON " + tableName + " (" +
s + ", refsetId)");
} else {
if (td.getColDataType("id") != null) {
statement.execute("CREATE INDEX " + tableName + "_" + s + "_index ON " + tableName +
" (" + s + ", id)");
} else {
statement.execute("CREATE INDEX " + tableName + "_" + s + "_index ON " + tableName +
" (" + s + ")");
}
}
}
}
}
}
}
}
ConsoleUtil.println("Processing DB loaded " + tableCount + " tables in " +
((System.currentTimeMillis() - time) / 1000) + " seconds");
if (tableCount == 0) {
throw new RuntimeException("Failed to find tables in directory: " + contentDirectory.getAbsolutePath());
}
}
/**
* Transform concepts.
*
* @throws SQLException the SQL exception
* @throws ParseException the convert exception
*/
private void transformConcepts()
throws SQLException, ParseException {
ConsoleUtil.println("Converting concepts");
final TableDefinition td = this.tables_.get(this.CONCEPT);
int conCount = 0;
final PreparedStatement ps = this.db.getConnection()
.prepareStatement("Select * from " + this.CONCEPT + " order by id");
UUID lastId = null;
final ResultSet rs = ps.executeQuery();
while (rs.next()) {
conCount++;
Long sctID = null;
UUID id, moduleId, definitionStatusId;
if (td.getColDataType("ID")
.isLong()) {
sctID = rs.getLong("ID");
id = UuidT3Generator.fromSNOMED(sctID);
} else {
id = UUID.fromString(rs.getString("ID"));
}
this.consWithNoStatedRel.add(id);
final long time = DATE_PARSER.parse(rs.getString("EFFECTIVETIME"))
.getTime();
final boolean active = rs.getBoolean("ACTIVE");
moduleId = (td.getColDataType("MODULEID")
.isLong() ? UuidT3Generator.fromSNOMED(rs.getLong("MODULEID"))
: UUID.fromString(rs.getString("MODULEID")));
definitionStatusId = (td.getColDataType("DEFINITIONSTATUSID")
.isLong() ? UuidT3Generator.fromSNOMED(rs.getLong("DEFINITIONSTATUSID"))
: UUID.fromString(rs.getString("DEFINITIONSTATUSID")));
TreeMap<Long, UUID> conDefStatus = this.conceptDefinitionStatusCache.get(id);
if (conDefStatus == null) {
conDefStatus = new TreeMap<>();
this.conceptDefinitionStatusCache.put(id, conDefStatus);
}
final UUID oldValue = conDefStatus.put(time, definitionStatusId);
if ((oldValue != null) &&!oldValue.equals(definitionStatusId)) {
throw new RuntimeException("Unexpected - multiple definition status values at the same time: " + sctID +
" " + id + " " + definitionStatusId);
}
final ConceptChronology<? extends ConceptVersion<?>> con = super.importUtil.createConcept(id,
time,
active ? State.ACTIVE
: State.INACTIVE,
moduleId);
if ((sctID != null) &&!id.equals(lastId)) {
lastId = id;
super.importUtil.addStaticStringAnnotation(ComponentReference.fromConcept(con),
sctID + "",
MetaData.SCTID.getPrimordialUuid(),
State.ACTIVE);
}
if (conCount % 1000 == 0) {
ConsoleUtil.showProgress();
}
if (conCount % 75000 == 0) {
ConsoleUtil.println("Processed " + conCount + " concepts...");
}
}
ConsoleUtil.println("Converted " + conCount + " concepts");
}
/**
* Transform descriptions.
*
* @throws SQLException the SQL exception
* @throws ParseException the convert exception
* @throws MojoExecutionException the mojo execution exception
*/
private void transformDescriptions()
throws SQLException, ParseException, MojoExecutionException {
ConsoleUtil.println("Converting descriptions");
for (final String DESCRIPTION: this.DESCRIPTIONS) {
final TableDefinition descriptionTable = this.tables_.get(DESCRIPTION);
final String lang = DESCRIPTION.split("_")[3];
String LANGUAGE = null;
for (final String s: this.LANGUAGES) {
if (s.split("_")[3]
.equals(lang)) {
LANGUAGE = s;
break;
}
}
if (LANGUAGE == null) {
throw new MojoExecutionException("Failed to find the language table for the language: " + lang);
}
final TableDefinition acceptabilityTable = this.tables_.get(LANGUAGE);
ConsoleUtil.println("Processing " + descriptionTable.getTableName() + ", " +
acceptabilityTable.getTableName());
int descCount = 0;
int accCount = 0;
int noAcceptabilityCount = 0;
final PreparedStatement ps = this.db.getConnection()
.prepareStatement("Select * from " + DESCRIPTION +
" order by conceptId, id");
final PreparedStatement ps2 = this.db.getConnection()
.prepareStatement("Select * from " + LANGUAGE +
" where referencedComponentId = ? ");
UUID lastId = null;
final ResultSet descRS = ps.executeQuery();
while (descRS.next()) {
descCount++;
Long sctID = null;
UUID id;
if (descriptionTable.getColDataType("ID")
.isLong()) {
sctID = descRS.getLong("ID");
id = UuidT3Generator.fromSNOMED(sctID);
} else {
id = UUID.fromString(descRS.getString("ID"));
}
final long time = DATE_PARSER.parse(descRS.getString("EFFECTIVETIME"))
.getTime();
final boolean active = descRS.getBoolean("ACTIVE");
final UUID moduleId = (descriptionTable.getColDataType("MODULEID")
.isLong() ? UuidT3Generator.fromSNOMED(descRS.getLong("MODULEID"))
: UUID.fromString(descRS.getString("MODULEID")));
final UUID conceptId = (descriptionTable.getColDataType("CONCEPTID")
.isLong() ? UuidT3Generator.fromSNOMED(descRS.getLong("CONCEPTID"))
: UUID.fromString(descRS.getString("CONCEPTID")));
final String languageCode = descRS.getString("LANGUAGECODE");
final UUID typeId = (descriptionTable.getColDataType("TYPEID")
.isLong() ? UuidT3Generator.fromSNOMED(descRS.getLong("TYPEID"))
: UUID.fromString(descRS.getString("TYPEID")));
final String term = descRS.getString("TERM");
final UUID caseSigId = (descriptionTable.getColDataType("CASESIGNIFICANCEID")
.isLong() ? UuidT3Generator.fromSNOMED(
descRS.getLong("CASESIGNIFICANCEID"))
: UUID.fromString(descRS.getString("CASESIGNIFICANCEID")));
final SememeChronology<DescriptionSememe<?>> desc =
super.importUtil.addDescription(ComponentReference.fromConcept(conceptId),
id,
term,
DescriptionType.convert(typeId),
null,
null,
caseSigId,
LanguageMap.getConceptForLanguageCode(
LanguageCode.getLangCode(languageCode))
.getPrimordialUuid(),
moduleId,
null,
active ? State.ACTIVE
: State.INACTIVE,
time);
// add SCTID if this is the first sighting
if ((sctID != null) &&!id.equals(lastId)) {
lastId = id;
super.importUtil.addStaticStringAnnotation(ComponentReference.fromChronology(desc),
sctID + "",
MetaData.SCTID.getPrimordialUuid(),
State.ACTIVE);
}
ps2.clearParameters();
if (acceptabilityTable.getColDataType("referencedComponentId")
.isLong()) {
if (sctID == null) {
throw new RuntimeException("type mismatch!");
}
ps2.setLong(1, sctID);
} else {
ps2.setString(1, id.toString());
}
final ResultSet langRS = ps2.executeQuery();
boolean foundAcceptability = false;
while (langRS.next()) {
accCount++;
foundAcceptability = true;
final UUID acceptID = UUID.fromString(langRS.getString("id"));
final long acceptTime = DATE_PARSER.parse(langRS.getString("EFFECTIVETIME"))
.getTime();
final boolean acceptActive = langRS.getBoolean("ACTIVE");
final UUID acceptModuleId = (acceptabilityTable.getColDataType("MODULEID")
.isLong() ? UuidT3Generator.fromSNOMED(
langRS.getLong("MODULEID"))
: UUID.fromString(langRS.getString("MODULEID")));
final UUID refsetId = (acceptabilityTable.getColDataType("refsetID")
.isLong() ? UuidT3Generator.fromSNOMED(
langRS.getLong("refsetID"))
: UUID.fromString(langRS.getString("refsetID")));
final UUID acceptabilityId = (acceptabilityTable.getColDataType("acceptabilityId")
.isLong() ? UuidT3Generator.fromSNOMED(
langRS.getLong("acceptabilityId"))
: UUID.fromString(langRS.getString("acceptabilityId")));
boolean preferred;
if (MetaData.ACCEPTABLE.getPrimordialUuid()
.equals(acceptabilityId)) {
preferred = false;
} else if (MetaData.PREFERRED.getPrimordialUuid()
.equals(acceptabilityId)) {
preferred = true;
} else {
throw new RuntimeException("Unexpected acceptibility: " + acceptabilityId);
}
super.importUtil.addDescriptionAcceptibility(ComponentReference.fromChronology(desc),
acceptID,
refsetId,
preferred,
acceptActive ? State.ACTIVE
: State.INACTIVE,
acceptTime,
acceptModuleId);
}
if (!foundAcceptability) {
noAcceptabilityCount++;
// Might be better as "fine" or debug logging logging...
// ConsoleUtil.printErrorln("No acceptibility found for: " + id + " " + sctID);
}
if (descCount % 1000 == 0) {
ConsoleUtil.showProgress();
}
if (descCount % 75000 == 0) {
ConsoleUtil.println("Processed " + descCount + " descriptions with " + accCount + " acceptabilities...");
}
}
ConsoleUtil.println("Converted " + descCount + " descriptions. Descriptions with no acceptability: " +
noAcceptabilityCount);
}
}
/**
* Transform relationships.
*
* @param stated the stated
* @throws SQLException the SQL exception
*/
private void transformRelationships(boolean stated)
throws SQLException {
// TODO can ConsoleUtil use standard logging API instead?
ConsoleUtil.println("Converting " + (stated ? "stated"
: "inferred") + " relationships into graphs");
final String table = (stated ? this.STATED_RELATIONSHIP
: this.RELATIONSHIP);
final TableDefinition td = this.tables_.get(table);
int graphCount = 0;
final Iterator<ArrayList<RelBatch>> rels = getRelationships(table, td);
UUID lastId = null;
while (rels.hasNext()) {
// each Rel here will be for the same sourceId.
graphCount++;
final ArrayList<RelBatch> conRels = rels.next();
long newestRelTime = 0;
final LogicalExpressionBuilder leb = Get.logicalExpressionBuilderService()
.getLogicalExpressionBuilder();
final ArrayList<Assertion> assertions = new ArrayList<>();
final HashMap<String, ArrayList<Assertion>> groupedAssertions = new HashMap<>();
// Each member of a RelBatch contains the same relBatch ID (so different versions of the same relBatch)
for (final RelBatch rb: conRels) {
if (!rb.isActiveNow()) {
// TODO handle historical relationships
} else {
// TODO handle history - only loading latest for now.
final Rel r = rb.getRels()
.last();
if ((stated && r.characteristicTypeId.equals(MetaData.INFERRED.getPrimordialUuid())) ||
(!stated && r.characteristicTypeId.equals(MetaData.STATED.getPrimordialUuid()))) {
throw new RuntimeException("Unexpected - table type and characteristic type do not match!");
}
if (r.characteristicTypeId.equals(MetaData.INFERRED.getPrimordialUuid()) ||
r.characteristicTypeId.equals(MetaData.STATED.getPrimordialUuid())) {
if (r.effectiveTime > newestRelTime) {
newestRelTime = r.effectiveTime;
}
if (r.relGroup.trim()
.equals("0")) {
// Don't just check primordial, IS_A has multiple UUIDs
if (Arrays.stream(MetaData.IS_A.getUuids())
.anyMatch(uuid -> uuid.equals(r.typeId))) {
assertions.add(ConceptAssertion(Get.identifierService()
.getConceptSequenceForUuids(r.destinationId),
leb));
} else {
// TODO [graph] ask Keith about the never group stuff.
// TODO [graph] handle modifier?
// TODO [graph] handle sctid
// TODO [graph] handle id
// TODO [graph] maintain actual group numbers?
if (this.neverRoleGroupSet.contains(r.typeId)) {
assertions.add(SomeRole(Get.identifierService()
.getConceptSequenceForUuids(r.typeId),
ConceptAssertion(Get.identifierService()
.getConceptSequenceForUuids(r.destinationId),
leb)));
} else {
assertions.add(SomeRole(MetaData.ROLE_GROUP.getConceptSequence(),
And(SomeRole(Get.identifierService()
.getConceptSequenceForUuids(r.typeId),
ConceptAssertion(Get.identifierService()
.getConceptSequenceForUuids(r.destinationId),
leb)))));
}
}
} else {
ArrayList<Assertion> groupAssertions = groupedAssertions.get(r.relGroup.trim());
if (groupAssertions == null) {
groupAssertions = new ArrayList<>();
groupedAssertions.put(r.relGroup.trim(), groupAssertions);
}
groupAssertions.add(SomeRole(Get.identifierService()
.getConceptSequenceForUuids(r.typeId),
ConceptAssertion(Get.identifierService()
.getConceptSequenceForUuids(r.destinationId),
leb)));
}
} else {
// kick it over into an association bucket
// TODO should I toss these when processing inferred?
final SememeChronology<DynamicSememe<?>> assn =
super.importUtil.addAssociation(ComponentReference.fromConcept(r.sourceId),
r.id,
r.destinationId,
r.typeId,
r.isActive ? State.ACTIVE
: State.INACTIVE,
r.effectiveTime,
r.moduleId);
// TODO put on modifier, group
if ((r.sctID != null) &&!r.id.equals(lastId)) {
super.importUtil.addStaticStringAnnotation(ComponentReference.fromChronology(assn,
() -> "Association"),
r.sctID + "",
MetaData.SCTID.getPrimordialUuid(),
State.ACTIVE);
}
}
lastId = r.id;
}
}
// handle relationship groups
for (final ArrayList<Assertion> groupAssertions: groupedAssertions.values()) {
assertions.add(SomeRole(MetaData.ROLE_GROUP.getConceptSequence(),
And(groupAssertions.toArray(new Assertion[groupAssertions.size()]))));
}
if (assertions.size() > 0) {
Boolean defined = null;
/**
* conDefStatus is a sorted set of time, definition status uuid. The datastructure is limited in that it is possible
* for a component to have one status in a combination of branch and module, and to have another status for
* a different combination of branch and module... Sorting just by time may cause alternative status values
* at the same time to be overwritten. TODO modify data structures to handle status which may differ by
* branch and module.
*/
final TreeMap<Long, UUID> conDefStatus = this.conceptDefinitionStatusCache.get(conRels.get(0)
.getSourceId());
if (conDefStatus == null) {
// Try Frills - in the case of US Extension, we should have SCT loaded, pull from that.
// Can definition status vary between stated and inferred? Just read stated for now.
final Optional<Boolean> sctDefined =
Frills.isConceptFullyDefined(ComponentReference.fromConcept(conRels.get(0)
.getSourceId())
.getNid(),
true);
if (sctDefined.isPresent()) {
defined = sctDefined.get();
} else {
final RelBatch relBatch = conRels.get(0);
final Rel rel = relBatch.getRels()
.first();
final StringBuilder builder = new StringBuilder();
builder.append(Get.conceptDescriptionText(Get.identifierService()
.getConceptSequenceForUuids(rel.sourceId)));
builder.append("|");
builder.append(Get.conceptDescriptionText(Get.identifierService()
.getConceptSequenceForUuids(rel.typeId)));
builder.append("|");
builder.append(Get.conceptDescriptionText(Get.identifierService()
.getConceptSequenceForUuids(rel.destinationId)));
ConsoleUtil.printErrorln("No definition status found for: " + conRels.get(0) + "\n" +
builder.toString());
}
} else {
if (conDefStatus.lastEntry()
.getValue()
.equals(TermAux.SUFFICIENT_CONCEPT_DEFINITION.getPrimordialUuid())) {
defined = true;
} else if (conDefStatus.lastEntry()
.getValue()
.equals(
TermAux.NECESSARY_BUT_NOT_SUFFICIENT_CONCEPT_DEFINITION.getPrimordialUuid())) {
defined = false;
} else {
throw new RuntimeException("Unexpected concept definition status: " + conDefStatus.lastEntry());
}
}
if (defined != null) {
if (defined) {
SufficientSet(And(assertions.toArray(new Assertion[assertions.size()])));
} else {
NecessarySet(And(assertions.toArray(new Assertion[assertions.size()])));
}
final LogicalExpression le = leb.build();
if (le.isMeaningful()) {
if (newestRelTime == 0) {
throw new RuntimeException("Time sort failure!");
}
// TODO [graph] what if the modules are different across the graph rels?
super.importUtil.addRelationshipGraph(ComponentReference.fromConcept(conRels.get(0)
.getSourceId()),
le,
stated,
newestRelTime,
conRels.get(0)
.getRels()
.first().moduleId);
if (!stated && this.consWithNoStatedRel.contains(conRels.get(0).getSourceId())) {
// substitute inferred expression, as early SNOMED stated expressions where lost.
super.importUtil.addRelationshipGraph(ComponentReference.fromConcept(conRels.get(0)
.getSourceId()),
le,
true,
newestRelTime,
conRels.get(0)
.getRels()
.first().moduleId);
}
this.consWithNoStatedRel.remove(conRels.get(0)
.getSourceId());
} else {
ConsoleUtil.printErrorln("expression not meaningful?");
}
}
} else {
// TODO [graph] ask Keith about these cases where no associations get generated.
}
if (graphCount % 1000 == 0) {
ConsoleUtil.showProgress();
}
if (graphCount % 75000 == 0) {
ConsoleUtil.println("Processed " + graphCount + " relationship graphs...");
}
}
ConsoleUtil.println("Created " + graphCount + " graphs");
}
//~--- set methods ---------------------------------------------------------
/**
* Set default value from SNOMED_CT_CORE_MODULE.
*
* @param conceptProxy the new default value from SNOMED_CT_CORE_MODULE
*/
public void setModuleUUID(MavenConceptProxy conceptProxy) {
this.moduleUUID = conceptProxy;
}
//~--- get methods ---------------------------------------------------------
@Override
protected ConverterUUID.NAMESPACE getNamespace() {
return ConverterUUID.NAMESPACE.SNOMED;
}
/**
* This will return batches of relationships, each item the iterator returns will be all of the relationships
* for a particular source concepts, while each RelBatch within the list will be all versions of a particular relationship.
*
* @param table the table
* @param td the td
* @return the relationships
* @throws SQLException the SQL exception
*/
private Iterator<ArrayList<RelBatch>> getRelationships(String table, TableDefinition td)
throws SQLException {
final PreparedStatement ps = this.db.getConnection()
.prepareStatement("Select * from " + table + " order by sourceid, id");
final ResultSet rs = ps.executeQuery();
final Iterator<ArrayList<RelBatch>> iter = new Iterator<ArrayList<RelBatch>>() {
RelBatch relBatchWorking = null;
ArrayList<RelBatch> conceptRelsWorking = new ArrayList<>();
ArrayList<RelBatch> conceptRelsNextReady = null;
@Override
public boolean hasNext() {
if (this.conceptRelsNextReady == null) {
read();
}
if (this.conceptRelsNextReady == null) {
try {
rs.close();
} catch (final SQLException e) {
// noop
}
}
return this.conceptRelsNextReady != null;
}
@Override
public ArrayList<RelBatch> next() {
if (this.conceptRelsNextReady == null) {
read();
}
final ArrayList<RelBatch> temp = this.conceptRelsNextReady;
this.conceptRelsNextReady = null;
return temp;
}
private void read() {
try {
while ((this.conceptRelsNextReady == null) && rs.next()) {
final Rel r = new Rel(rs, td);
if (this.relBatchWorking == null) {
this.relBatchWorking = new RelBatch(r);
} else if (this.relBatchWorking.getBatchId()
.equals(r.id)) {
this.relBatchWorking.addRel(r);
} else // different batchId than previous - need a new RelBatch. Move last relBatch into conceptRels.
{
if ((this.conceptRelsWorking.size() > 0) &&
!this.conceptRelsWorking.get(0).getSourceId().equals(this.relBatchWorking.getSourceId())) {
this.conceptRelsNextReady = this.conceptRelsWorking;
this.conceptRelsWorking = new ArrayList<>();
}
this.conceptRelsWorking.add(this.relBatchWorking);
// Put this relBatch into a new batch.
this.relBatchWorking = new RelBatch(r);
}
}
} catch (SQLException | ParseException e) {
throw new RuntimeException(e);
}
if (this.conceptRelsNextReady != null) {
return;
}
if ((this.conceptRelsWorking.size() > 0) &&
!this.conceptRelsWorking.get(0).getSourceId().equals(this.relBatchWorking.getSourceId())) {
this.conceptRelsNextReady = this.conceptRelsWorking;
this.conceptRelsWorking = new ArrayList<>();
return;
}
// If we get here, the only thing left is the last relBatch.
if (this.relBatchWorking != null) {
this.conceptRelsWorking.add(this.relBatchWorking);
this.conceptRelsNextReady = this.conceptRelsWorking;
this.relBatchWorking = null;
this.conceptRelsWorking = new ArrayList<>();
}
}
};
return iter;
}
}