/**
* Copyright 2014 IHTSDO
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.ihtsdo.otf.snomed.loader;
import static org.ihtsdo.otf.snomed.loader.RF2ImportHelper.descMap;
import static org.ihtsdo.otf.snomed.loader.RF2ImportHelper.vMap;
import static org.ihtsdo.otf.snomed.loader.RF2ImportHelper.getVertex;
import static org.ihtsdo.otf.snomed.loader.RF2ImportHelper.processCaseSinificance;
import static org.ihtsdo.otf.snomed.loader.RF2ImportHelper.processModule;
import static org.ihtsdo.otf.snomed.loader.RF2ImportHelper.processType;
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.HashMap;
import java.util.zip.GZIPInputStream;
import org.apache.commons.lang3.StringUtils;
import org.ihtsdo.otf.snomed.domain.DescriptionType;
import org.ihtsdo.otf.snomed.domain.Properties;
import org.ihtsdo.otf.snomed.domain.Relationship;
import org.ihtsdo.otf.snomed.domain.Types;
import org.joda.time.DateTime;
import org.joda.time.format.DateTimeFormat;
import org.joda.time.format.DateTimeFormatter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.thinkaurelius.titan.core.TitanException;
import com.thinkaurelius.titan.core.TitanGraph;
import com.tinkerpop.blueprints.Direction;
import com.tinkerpop.blueprints.Edge;
import com.tinkerpop.blueprints.Vertex;
/**
*
*/
public class Rf2SnapshotAuditor {
private static final Logger LOGGER = LoggerFactory.getLogger(Rf2SnapshotAuditor.class);
private static final String SNAPSHOT_USER = "system";
private TitanGraph g;
private int bufferSize = 1000;
private String subType;
boolean isReload = false;
/**
* @param subType the subType to set
*/
public void setSubType(String subType) {
this.subType = subType;
}
/**all files together
*
*/
public Rf2SnapshotAuditor(TitanGraph g) {
// TODO Auto-generated constructor stub
LOGGER.info("Initializing graph {}" , g);
if (g == null) {
throw new IllegalArgumentException("Graph instance is required for snapshot loading. Can not continue");
}
this.g = g;
}
public void audit(String file) {
LOGGER.debug("Starting to audit file {}", file);
long start = System.currentTimeMillis();
long totalRow = 0;
//need to change implementation from super csv to java io as RF2 description has buggy quotes
BufferedReader reader = null;
try {
if (StringUtils.isBlank(file)) {
throw new IllegalArgumentException("Please check file supplied.");
} else if(file.endsWith(".gz")) {
reader = new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(file)), "utf-8"));
} else {
reader = new BufferedReader(new FileReader(file));
}
LOGGER.debug("Starting to load file {}", file);
String line;
beginTx();
int row = -1;
DateTimeFormatter fmt = DateTimeFormat.forPattern("yyyyMMdd");
while( (line = reader.readLine()) != null ) {
row++;
LOGGER.debug("Processing rowNo={} ", row);
if (StringUtils.isEmpty(line)) {
LOGGER.debug("rowNo={} , {} is empty skipping", row, line);
continue;
}
String[] columns = StringUtils.splitByWholeSeparator(line, "\t");
if (columns != null && columns.length == 9 && row != 0) {
if (isReload) {
Vertex v = getVertex(g, columns[0]);
if (v != null) {
LOGGER.debug("Not Processing line={}, rowNo={} as record already loaded", line, row);
continue;
}
}
LOGGER.debug("Processing rowNo={} , {}", row, line);
Rf2Description desc = new Rf2Description();
//id effectiveTime active moduleId conceptId languageCode typeId term caseSignificanceId
desc.setId(columns[0]);
desc.setEffectiveTime(fmt.parseDateTime(columns[1]));
desc.setActive(columns[2]);
desc.setModuleId(columns[3]);
desc.setConceptId(columns[4]);
desc.setLanguageCode(columns[5]);
desc.setTypeId(columns[6]);
desc.setTerm(columns[7]);
desc.setCaseSignificanceId(columns[8]);
if (!isConceptTitleExist(desc)) {
auditDescription(desc);
} else if (!StringUtils.isEmpty(subType)
&& subType.equalsIgnoreCase(descMap.get(desc.getTypeId()).toString())
&& !isSubTypeRelationExist(desc)) {
LOGGER.debug("Processing row {} of subType {}", row, subType);
processDescription(desc);
} else {
LOGGER.debug("Not processing row {} of description id {}", row, desc.getId());
}
} else {
LOGGER.debug("rowNo={}, {} does not have required columns, skipping", row, line);
continue;
}
commit(row);
}
LOGGER.info("Commiting remaining data");
g.commit();//remaining operation commit
totalRow = row;
} catch (IOException e) {
g.rollback();
// TODO Auto-generated catch block
e.printStackTrace();
throw new RuntimeException("Can not process file", e);
} catch (Exception e) {
LOGGER.error("Transaction rolledback");
g.rollback();
e.printStackTrace();
throw new RuntimeException(e);
}
finally {
if( reader != null ) {
try {
LOGGER.info("Closing IO resources");
reader.close();
} catch (IOException e) {
LOGGER.error("Error closing bean reader");
e.printStackTrace();
}
}
}
LOGGER.info("Total row {} processed in {} minute ", totalRow, ((System.currentTimeMillis() - start)/60000));
}
private void commit(int rowNumber) {
if (rowNumber % bufferSize == 0) {
LOGGER.info("Committing running transaction");
try {
g.commit();
beginTx();
LOGGER.info("Total concept processed {}", rowNumber);
} catch (TitanException e) {
LOGGER.error("Error commiting transaction, retrying {}, {}", rowNumber, e);
e.printStackTrace();
try {
g.commit();//retry
beginTx();
LOGGER.info("Total concept processed {}", rowNumber);
} catch (TitanException e2) {
e2.printStackTrace();
LOGGER.error("Error commiting transaction during retry {}, {}", e, rowNumber);
throw e2;
}
}
}
}
private void beginTx() {
LOGGER.info("Starting a new transaction");
try {
Thread.sleep(2000);
LOGGER.info("Sleep done");
} catch (InterruptedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
g.buildTransaction().enableBatchLoading();
g.newTransaction();
vMap = new HashMap<String, Vertex>();//refresh map as vertex are stale after commit
}
private void processDescription(Rf2Description desc) {
long start = System.currentTimeMillis();
LOGGER.debug("Processing description {}", desc.getId());
Vertex vD = g.addVertexWithLabel(Types.description.toString());
vD.setProperty(Properties.sctid.toString(), desc.getId());
vD.setProperty(Properties.effectiveTime.toString(), desc.getEffectiveTime().getMillis());
vD.setProperty(Properties.status.toString(), desc.getActive());
vD.setProperty(Properties.created.toString(), new DateTime().getMillis());
vD.setProperty(Properties.createdBy.toString(), SNAPSHOT_USER);
vD.setProperty(Properties.languageCode.toString(), desc.getLanguageCode());
vD.setProperty(Properties.title.toString(), desc.getTerm());
//add module
Vertex vM = processModule(g, desc.getModuleId());
vD.addEdge(Relationship.hasModule.toString(), vM);
//case significance
Vertex vCs = processCaseSinificance(g, desc.getCaseSignificanceId());
vD.addEdge(Relationship.hasCaseSignificance.toString(), vCs);
//type
Vertex vT = processType(g, desc.getTypeId());
vD.addEdge(Relationship.hasType.toString(), vT);
//concept
Vertex vC = getVertex(g, desc.getConceptId());
LOGGER.trace("Concept vertex {}", vC);
if (vC != null) {
if (DescriptionType.fsn.equals(descMap.get(desc.getTypeId()))) {
LOGGER.debug("Adding FSN as concept title {}", desc.getTerm());
vC.setProperty(Properties.title.toString(), desc.getTerm());
}
String name = descMap.get(desc.getTypeId()).toString();
LOGGER.trace("Concept vertex {}, typeId {} and resulted edge label name {}", vC, desc.getTypeId(), name);
Edge e = vC.addEdge(name, vD);
e.setProperty(Properties.title.toString(), desc.getTerm());
} else {
LOGGER.error("Could not find concept for id {}", desc.getConceptId());
}
LOGGER.trace("processDescription total time {} sec ", (System.currentTimeMillis() - start)/1000);
}
private void auditDescription(Rf2Description desc) {
long start = System.currentTimeMillis();
LOGGER.debug("auditDescription description {}", desc.getId());
Vertex existingVD = getVertex(g, desc.getId());
if (existingVD == null) {
LOGGER.debug("description {} does not exist adding", desc.getId());
//add this description
processDescription(desc);
} else {
LOGGER.debug("description {} exist auditing further", desc.getId());
boolean hasModule = existingVD.getEdges(Direction.OUT, Relationship.hasModule.toString()).iterator().hasNext();
boolean hasCaseSignificance = existingVD.getEdges(Direction.OUT, Relationship.hasCaseSignificance.toString()).iterator().hasNext();
boolean hasType = existingVD.getEdges(Direction.OUT, Relationship.hasType.toString()).iterator().hasNext();
boolean fsnOrSynnonym = existingVD.getEdges(Direction.IN, descMap.get(desc.getTypeId()).toString()).iterator().hasNext();
if (!(hasModule && hasCaseSignificance && hasType && fsnOrSynnonym)) {
LOGGER.debug("Description {} does not have full data. "
+ "Reprocessing", desc.getId());
//remove this description from db and add again
g.removeVertex(existingVD);
//add again
processDescription(desc);
} else {
LOGGER.debug("auditDescription description {} has required data. "
+ "Auditing concept for title ", desc.getId());
//verify if concept vertex has title
Vertex vC = getVertex(g, desc.getConceptId());
if (vC != null) {
String title = vC.getProperty(Properties.title.toString());
if (StringUtils.isBlank(title) && DescriptionType.fsn.equals(descMap.get(desc.getTypeId()))) {
LOGGER.debug("concept {} title does not exist adding it ", desc.getConceptId());
vC.addEdge(descMap.get(desc.getTypeId()).toString(), existingVD);
vC.setProperty(Properties.title.toString(), desc.getTerm());
} else {
LOGGER.debug("concept {} title exist or it is a {} ", desc.getConceptId(), descMap.get(desc.getTypeId()).toString());
}
}
}
}
LOGGER.trace("auditDescription total time {} sec ", (System.currentTimeMillis() - start)/1000);
}
private boolean isConceptTitleExist(Rf2Description desc) {
LOGGER.trace("isConceptTitleExist {}", desc.getConceptId());
boolean isExist = false;
long start = System.currentTimeMillis();
if (descMap.get(desc.getTypeId()).equals(DescriptionType.fsn)) {
//verify if concept vertex has title
Vertex vC = getVertex(g, desc.getConceptId());
if (vC != null) {
String title = vC.getProperty(Properties.title.toString());
if (StringUtils.isBlank(title)) {
isExist = false;
} else {
LOGGER.debug("Concept {} title {} exist", desc.getConceptId(), title);
isExist = true;
}
LOGGER.debug("Title for concept {} exist? - {}", desc.getConceptId(), isExist);
}
} else {
isExist = true;
LOGGER.debug("Not a fsn skip hence retrun {} ", isExist);
}
LOGGER.trace("isConceptTitleExist total time {} sec ", (System.currentTimeMillis() - start)/1000);
return isExist;
}
private boolean isSubTypeRelationExist(Rf2Description desc) {
boolean isExist = false;
if (!descMap.get(desc.getTypeId()).equals(DescriptionType.fsn)) {
//verify if concept vertex has title
Vertex vC = getVertex(g, desc.getConceptId());
if (vC != null) {
Iterable<Edge> es = vC.getEdges(Direction.OUT, descMap.get(desc.getTypeId()).toString());
for (Edge e : es) {
String title = e.getProperty(Properties.title.toString());
if(!StringUtils.isBlank(title) && title.equalsIgnoreCase(desc.getTerm())) {
isExist = true;
break;
}
}
}
} else {
isExist = true;
LOGGER.debug("is a fsn skip, return {} ", isExist);//fsn is being processed as part of concept title. So skip
}
LOGGER.debug("description id {} exist ? = {} ", desc.getId(), isExist);
return isExist;
}
/**
* @param bufferSize the bufferSize to set
*/
public void setBufferSize(int bufferSize) {
this.bufferSize = bufferSize;
}
/**
* @param isReload the isReload to set
*/
public void setReload(boolean isReload) {
this.isReload = isReload;
}
}