/** * Copyright Notice * * This is a work of the U.S. Government and is not subject to copyright * protection in the United States. Foreign copyrights may apply. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package sh.isaac.convert.loinc.techPreview; import java.io.BufferedWriter; import java.io.File; import java.io.FileWriter; import java.io.IOException; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Arrays; import java.util.Date; import java.util.Enumeration; import java.util.HashMap; import java.util.Optional; import java.util.UUID; import java.util.zip.ZipEntry; import java.util.zip.ZipFile; import org.antlr.v4.runtime.tree.ParseTree; import org.apache.commons.lang3.StringUtils; import org.apache.maven.plugin.MojoExecutionException; import org.apache.maven.plugins.annotations.LifecyclePhase; import org.apache.maven.plugins.annotations.Mojo; import org.apache.maven.plugins.annotations.Parameter; import se.liu.imt.mi.snomedct.expression.tools.ExpressionSyntaxError; import se.liu.imt.mi.snomedct.expression.tools.SNOMEDCTParserUtil; import sh.isaac.MetaData; import sh.isaac.api.Get; import sh.isaac.api.State; import sh.isaac.api.logic.LogicalExpression; import sh.isaac.api.logic.LogicalExpressionBuilder; import sh.isaac.convert.loinc.LOINCReader; import sh.isaac.convert.loinc.LoincCsvFileReader; import sh.isaac.convert.loinc.techPreview.propertyTypes.PT_Annotations; import sh.isaac.convert.loinc.techPreview.propertyTypes.PT_Descriptions; import sh.isaac.convert.loinc.techPreview.propertyTypes.PT_Refsets; import sh.isaac.converters.sharedUtils.ComponentReference; import sh.isaac.converters.sharedUtils.ConsoleUtil; import sh.isaac.converters.sharedUtils.ConverterBaseMojo; import sh.isaac.converters.sharedUtils.IBDFCreationUtility; import sh.isaac.converters.sharedUtils.propertyTypes.PropertyType; import sh.isaac.converters.sharedUtils.propertyTypes.ValuePropertyPair; import sh.isaac.converters.sharedUtils.stats.ConverterUUID; import sh.isaac.provider.logic.ISAACVisitor; /** * * Loader code to convert Loinc into the ISAAC datastore. * * Paths are typically controlled by maven, however, the main() method has paths configured so that they match what * maven does for test purposes. */ @Mojo( name = "convert-loinc-tech-preview-to-ibdf", defaultPhase = LifecyclePhase.PROCESS_SOURCES ) public class LoincTPLoaderMojo extends ConverterBaseMojo { /** * The Constant NECESSARY_SCTID. */ private static final String NECESSARY_SCTID = "900000000000074008"; /** * The Constant SUFFICIENT_SCTID. */ private static final String SUFFICIENT_SCTID = "900000000000073002"; /** * we also read a native loinc input file - get that version too. */ @Parameter( required = true, defaultValue = "${loinc-src-data.version}" ) protected String converterSourceLoincArtifactVersion; /** * Builds the UUID. * * @param uniqueIdentifier the unique identifier * @return the uuid */ private UUID buildUUID(String uniqueIdentifier) { return ConverterUUID.createNamespaceUUIDFromString(uniqueIdentifier, true); } /** * Execute. * * @throws MojoExecutionException the mojo execution exception */ @SuppressWarnings("resource") @Override public void execute() throws MojoExecutionException { ConsoleUtil.println("LOINC Tech Preview Processing Begins " + new Date().toString()); super.execute(); ConsoleUtil.println("Processing LOINC"); LOINCReader loincData = null; File tpZipFile = null; int expLineNumber = 1; BufferedWriter loincExpressionDebug = null; try { if (!this.inputFileLocation.isDirectory()) { throw new MojoExecutionException( "LoincDataFiles must point to a directory containing the required loinc data files"); } for (final File f : this.inputFileLocation.listFiles()) { if (f.getName().toLowerCase().equals("loinc.csv")) { loincData = new LoincCsvFileReader(f, false); } if (f.isFile() && f.getName().toLowerCase().endsWith(".zip")) { if (f.getName().toLowerCase().contains("technologypreview")) { if (tpZipFile != null) { throw new RuntimeException("Found multiple zip files in " + this.inputFileLocation.getAbsolutePath()); } tpZipFile = f; } else { final ZipFile zf = new ZipFile(f); final Enumeration<? extends ZipEntry> zipEntries = zf.entries(); while (zipEntries.hasMoreElements()) { final ZipEntry ze = zipEntries.nextElement(); // see {@link SupportedConverterTypes} if (f.getName().toLowerCase().contains("text")) { if (ze.getName().toLowerCase().endsWith("loinc.csv")) { ConsoleUtil.println("Using the data file " + f.getAbsolutePath() + " - " + ze.getName()); loincData = new LoincCsvFileReader(zf.getInputStream(ze)); ((LoincCsvFileReader) loincData).readReleaseNotes(f.getParentFile(), true); } } } } } } if (loincData == null) { throw new MojoExecutionException("Could not find the loinc data file in " + this.inputFileLocation.getAbsolutePath()); } if (tpZipFile == null) { throw new RuntimeException("Couldn't find the tech preview zip file in " + this.inputFileLocation.getAbsolutePath()); } loincExpressionDebug = new BufferedWriter(new FileWriter(new File(this.outputDirectory, "ExpressionDebug.log"))); final SimpleDateFormat dateReader = new SimpleDateFormat("MMMMMMMMMMMMM yyyy");//Parse things like "June 2014" final Date releaseDate = dateReader.parse(loincData.getReleaseDate()); this.importUtil = new IBDFCreationUtility(Optional.empty(), Optional.of(MetaData.LOINC_SOLOR_MODULES), this.outputDirectory, this.converterOutputArtifactId, this.converterOutputArtifactClassifier, this.converterOutputArtifactVersion, false, releaseDate.getTime()); ConsoleUtil.println("Loading Metadata"); // Set up a meta-data root concept final ComponentReference metadata = ComponentReference.fromConcept( this.importUtil.createConcept( "LOINC Tech Preview Metadata" + IBDFCreationUtility.METADATA_SEMANTIC_TAG, true, MetaData.SOLOR_CONTENT_METADATA.getPrimordialUuid())); this.importUtil.loadTerminologyMetadataAttributes(metadata, this.converterSourceArtifactVersion, Optional.of(loincData.getReleaseDate()), this.converterOutputArtifactVersion, Optional.ofNullable( this.converterOutputArtifactClassifier), this.converterVersion); this.importUtil.addStaticStringAnnotation(metadata, this.converterSourceLoincArtifactVersion, MetaData.SOURCE_ARTIFACT_VERSION.getPrimordialUuid(), State.ACTIVE); final PT_Refsets refsets = new PT_Refsets(); final PT_Annotations annotations = new PT_Annotations(new ArrayList<>()); final PT_Descriptions descTypes = new PT_Descriptions(); this.importUtil.loadMetaDataItems(Arrays.asList((new PropertyType[] { refsets, annotations, descTypes })), metadata.getPrimordialUuid()); // TODO do I need any other attrs right now? ConsoleUtil.println("Reading data file into memory."); int conCounter = 0; final HashMap<String, String[]> loincNumToData = new HashMap<>(); String[] line = loincData.readLine(); while (line != null) { if (line.length > 0) { loincNumToData.put(line[loincData.getFieldMap().get("LOINC_NUM")], line); } line = loincData.readLine(); if (loincNumToData.size() % 1000 == 0) { ConsoleUtil.showProgress(); } if (loincNumToData.size() % 10000 == 0) { ConsoleUtil.println("Read " + loincNumToData.size() + " lines"); } } loincData.close(); ConsoleUtil.println("Read " + loincNumToData.size() + " data lines from file"); /* * Columns in this data file are: * id - A UUID for this row * effectiveTime * active - 1 for active * moduleId * refsetId * referencedComponentId * mapTarget - LOINC_NUM * Expression - the goods * definitionStatusId * correlationId * contentOriginId */ loincExpressionDebug.write("line number,expression id,converted expression\n"); ConsoleUtil.println("Processing Expressions / Creating Concepts"); final LoincExpressionReader ler = new LoincExpressionReader(tpZipFile); String[] expressionLine = ler.readLine(); while (expressionLine != null) { try { if (expressionLine.length > 0) { final String[] loincConceptData = loincNumToData.get(expressionLine[ler.getPositionForColumn("mapTarget")]); if (loincConceptData == null) { ConsoleUtil.printErrorln("Skipping line " + expLineNumber + " because I can't find loincNum " + expressionLine[ler.getPositionForColumn("mapTarget")]); } final boolean active = expressionLine[ler.getPositionForColumn("active")].equals("1"); if (!active) { ConsoleUtil.printErrorln("Skipping line " + expLineNumber + " because it is inactive"); } if (active && (loincConceptData != null)) { ParseTree parseTree; final String definitionSctid = expressionLine[ler.getPositionForColumn("definitionStatusId")]; switch (definitionSctid) { case SUFFICIENT_SCTID : parseTree = SNOMEDCTParserUtil.parseExpression( expressionLine[ler.getPositionForColumn("Expression")]); break; case NECESSARY_SCTID : // See <<< black magic from http://ihtsdo.org/fileadmin/user_upload/doc/download/doc_CompositionalGrammarSpecificationAndGuide_Current-en-US_INT_20150708.pdf?ok parseTree = SNOMEDCTParserUtil.parseExpression( "<<< " + expressionLine[ler.getPositionForColumn("Expression")]); break; default : throw new RuntimeException("Unexpected definition status: " + definitionSctid + " on line " + expLineNumber); } final LogicalExpressionBuilder defBuilder = Get.logicalExpressionBuilderService() .getLogicalExpressionBuilder(); final ISAACVisitor visitor = new ISAACVisitor(defBuilder); visitor.visit(parseTree); final LogicalExpression expression = defBuilder.build(); final UUID expressionId = UUID.fromString(expressionLine[ler.getPositionForColumn("id")]); loincExpressionDebug.write(expLineNumber + "," + expressionId + "," + expression.toString() + "\n"); // Build up a concept with the attributes we want, and the expression from the tech preview final String loincNum = loincConceptData[loincData.getPositionForColumn("LOINC_NUM")]; final ComponentReference concept = ComponentReference.fromConcept(this.importUtil.createConcept(buildUUID(loincNum))); conCounter++; this.importUtil.addRelationshipGraph(concept, expressionId, expression, true, null, null); this.importUtil.addRefsetMembership(concept, PT_Refsets.Refsets.ALL.getProperty().getUUID(), State.ACTIVE, null); // add descriptions final ArrayList<ValuePropertyPair> descriptions = new ArrayList<>(); for (final String property : descTypes.getPropertyNames()) { final String data = loincConceptData[loincData.getPositionForColumn(property)]; if (!StringUtils.isBlank(data)) { descriptions.add(new ValuePropertyPair(data, descTypes.getProperty(property))); } } this.importUtil.addDescriptions(concept, descriptions); // add attributes for (final String property : annotations.getPropertyNames()) { final String data = loincConceptData[loincData.getPositionForColumn(property)]; if (!StringUtils.isBlank(data)) { this.importUtil.addStringAnnotation(concept, data, annotations.getProperty(property).getUUID(), State.ACTIVE); } } } } } catch (final IOException | RuntimeException | ExpressionSyntaxError e) { getLog().error("Failed with expression line number at " + expLineNumber + " " + e + " skipping line"); } expressionLine = ler.readLine(); expLineNumber++; } loincExpressionDebug.close(); ConsoleUtil.println("Created " + conCounter + " concepts total"); ConsoleUtil.println("Data Load Summary:"); for (final String s : this.importUtil.getLoadStats().getSummary()) { ConsoleUtil.println(" " + s); } ConsoleUtil.println("Finished"); } catch (final Exception ex) { throw new MojoExecutionException("Failed with expression line number at " + expLineNumber, ex); } finally { try { if (this.importUtil != null) { this.importUtil.shutdown(); } if (loincData != null) { loincData.close(); } if (loincExpressionDebug != null) { loincExpressionDebug.close(); } } catch (final IOException e) { throw new RuntimeException("Failure", e); } } } @Override protected ConverterUUID.NAMESPACE getNamespace() { return ConverterUUID.NAMESPACE.LOINC; } }