/******************************************************************************* * Copyright 2012 University of Southern California * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * This code was developed by the Information Integration Group as part * of the Karma project at the Information Sciences Institute of the * University of Southern California. For more information, publications, * and related projects, please see: http://www.isi.edu/integration ******************************************************************************/ package edu.isi.karma.controller.command.alignment; import java.util.ArrayList; import java.util.List; import org.jgrapht.graph.DirectedWeightedMultigraph; import org.json.JSONArray; import org.json.JSONException; import org.json.JSONObject; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import edu.isi.karma.controller.command.Command; import edu.isi.karma.controller.command.CommandException; import edu.isi.karma.controller.update.ErrorUpdate; import edu.isi.karma.controller.update.SVGAlignmentUpdate_ForceKarmaLayout; import edu.isi.karma.controller.update.SemanticTypesUpdate; import edu.isi.karma.controller.update.UpdateContainer; import edu.isi.karma.modeling.alignment.Alignment; import edu.isi.karma.modeling.alignment.AlignmentManager; import edu.isi.karma.modeling.ontology.OntologyManager; import edu.isi.karma.modeling.semantictypes.CRFColumnModel; import edu.isi.karma.modeling.semantictypes.SemanticTypeTrainingThread; import edu.isi.karma.modeling.semantictypes.crfmodelhandler.CRFModelHandler; import edu.isi.karma.rep.HNode; import edu.isi.karma.rep.Worksheet; import edu.isi.karma.rep.alignment.ColumnNode; import edu.isi.karma.rep.alignment.Label; import edu.isi.karma.rep.alignment.Link; import edu.isi.karma.rep.alignment.LinkKeyInfo; import edu.isi.karma.rep.alignment.Node; import edu.isi.karma.rep.alignment.SemanticType; import edu.isi.karma.rep.alignment.SemanticType.ClientJsonKeys; import edu.isi.karma.rep.alignment.SynonymSemanticTypes; import edu.isi.karma.view.VWorkspace; public class SetSemanticTypeCommand extends Command { private final String hNodeId; private final String vWorksheetId; private final boolean trainAndShowUpdates; private final String rdfLiteralType; private CRFColumnModel oldColumnModel; private SynonymSemanticTypes oldSynonymTypes; private JSONArray typesArr; private SynonymSemanticTypes newSynonymTypes; private final boolean isPartOfKey; private Alignment oldAlignment; private DirectedWeightedMultigraph<Node, Link> oldGraph; // private Link newLink; private SemanticType oldType; private SemanticType newType; private final Logger logger = LoggerFactory.getLogger(this.getClass().getSimpleName()); protected SetSemanticTypeCommand(String id, String vWorksheetId, String hNodeId, boolean isPartOfKey, JSONArray typesArr, boolean trainAndShowUpdates, String rdfLiteralType) { super(id); this.hNodeId = hNodeId; this.vWorksheetId = vWorksheetId; this.isPartOfKey = isPartOfKey; this.trainAndShowUpdates = trainAndShowUpdates; this.typesArr = typesArr; this.rdfLiteralType = rdfLiteralType; addTag(CommandTag.Modeling); } @Override public String getCommandName() { return this.getClass().getSimpleName(); } @Override public String getTitle() { return "Set Semantic Type"; } @Override public String getDescription() { return ""; } @Override public CommandType getCommandType() { return CommandType.undoable; } @SuppressWarnings("unchecked") @Override public UpdateContainer doIt(VWorkspace vWorkspace) throws CommandException { /*** Get the Alignment for this worksheet ***/ Worksheet worksheet = vWorkspace.getViewFactory().getVWorksheet(vWorksheetId).getWorksheet(); OntologyManager ontMgr = vWorkspace.getWorkspace().getOntologyManager(); String alignmentId = AlignmentManager.Instance().constructAlignmentId(vWorkspace.getWorkspace().getId(), vWorksheetId); Alignment alignment = AlignmentManager.Instance().getAlignment(alignmentId); if (alignment == null) { alignment = new Alignment(ontMgr); AlignmentManager.Instance().addAlignmentToMap(alignmentId, alignment); } // Save the original alignment for undo oldAlignment = alignment.getAlignmentClone(); oldGraph = (DirectedWeightedMultigraph<Node, Link>)alignment.getGraph().clone(); /*** Add the appropriate nodes and links in alignment graph ***/ List<SemanticType> typesList = new ArrayList<SemanticType>(); for (int i = 0; i < typesArr.length(); i++) { try { JSONObject type = typesArr.getJSONObject(i); String domainValue = type.getString(ClientJsonKeys.Domain.name()); String fullTypeValue = type.getString(ClientJsonKeys.FullType.name()); // System.out.println("FULL TYPE:" + type.getString(ClientJsonKeys.FullType.name())); // System.out.println("Domain: " + type.getString(ClientJsonKeys.Domain.name())); // Look if the domain value exists. If it exists, then it is a domain of a data property. If not // then the value in FullType has the the value which indicates if a new class instance is needed // or an existing class instance should be used (this is the case when just the class is chosen as a sem type). // Label domainName = null; Node newDomainNode = null; boolean domainNodeAlreadyExistsInGraph = false; boolean domainValueExists = false; if (!domainValue.equals("")) { domainValueExists = true; // Check if the new domain is an existing instance newDomainNode = alignment.getNodeById(domainValue); if (newDomainNode != null) domainNodeAlreadyExistsInGraph = true; } if (type.getBoolean(ClientJsonKeys.isPrimary.name())) { // Check if a semantic type already exists for the column ColumnNode existingColumnNode = alignment.getColumnNodeByHNodeId(hNodeId); boolean columnNodeAlreadyExisted = false; Link oldIncomingLinkToColumnNode = null; Node oldDomainNode = null; if (existingColumnNode != null) { columnNodeAlreadyExisted = true; oldIncomingLinkToColumnNode = alignment.getCurrentIncomingLinksToNode(existingColumnNode.getId()).iterator().next(); oldDomainNode = oldIncomingLinkToColumnNode.getSource(); if (!rdfLiteralType.equals(existingColumnNode.getRdfLiteralType())) existingColumnNode.setRdfLiteralType(rdfLiteralType); } // Add a class link if the domain is null if (!domainValueExists) { Node classNode = alignment.getNodeById(fullTypeValue); LinkKeyInfo keyInfo = isPartOfKey ? LinkKeyInfo.PartOfKey : LinkKeyInfo.None; if (columnNodeAlreadyExisted && classNode != null) { alignment.removeLink(oldIncomingLinkToColumnNode.getId()); alignment.addClassInstanceLink(classNode, existingColumnNode, keyInfo); alignment.removeNode(oldDomainNode.getId()); // newLink = clsLink; } else { ColumnNode newColumnNode = getColumnNode(alignment, vWorkspace.getRepFactory().getHNode(hNodeId)); if (classNode == null) { Label classLabel = ontMgr.getUriLabel(fullTypeValue); if (classLabel == null) { logger.error("URI/ID does not exist in the ontology or model: " + fullTypeValue); continue; } classNode = alignment.addInternalNode(classLabel); } alignment.addClassInstanceLink(classNode, newColumnNode, keyInfo); // newLink = clsLink; } // Update the alignment alignment.align(); // Create the semantic type object newType = new SemanticType(hNodeId, classNode.getLabel(), null, SemanticType.Origin.User, 1.0,isPartOfKey); } // Add a property link if both type (property) and domain (class) is present else { Label propertyLabel = ontMgr.getUriLabel(fullTypeValue); if (propertyLabel == null) { logger.error("URI/ID does not exist in the ontology or model: " + fullTypeValue); continue; } // When only the link changes between the class node and the internal node (domain) if (columnNodeAlreadyExisted && domainNodeAlreadyExistsInGraph && (oldDomainNode == newDomainNode)) { alignment.removeLink(oldIncomingLinkToColumnNode.getId()); alignment.addDataPropertyLink(newDomainNode, existingColumnNode, propertyLabel, isPartOfKey); // newLink = propLink; } // When there was an existing semantic type and the new domain is a new node in the graph and columnNode already existed else if (columnNodeAlreadyExisted && !domainNodeAlreadyExistsInGraph) { alignment.removeLink(oldIncomingLinkToColumnNode.getId()); alignment.removeNode(oldDomainNode.getId()); Label domainLabel = ontMgr.getUriLabel(domainValue); newDomainNode = alignment.addInternalNode(domainLabel); alignment.addDataPropertyLink(newDomainNode, existingColumnNode, propertyLabel, isPartOfKey); // newLink = propLink; } // When the new domain node already existed in the graph else if (columnNodeAlreadyExisted && domainNodeAlreadyExistsInGraph) { alignment.removeLink(oldIncomingLinkToColumnNode.getId()); alignment.addDataPropertyLink(newDomainNode, existingColumnNode, propertyLabel, isPartOfKey); alignment.removeNode(oldDomainNode.getId()); } // For all other cases where the columnNode did not exist yet else { if (!domainNodeAlreadyExistsInGraph) { // System.out.println("Domain Value: " + domainValue); Label domainLabel = ontMgr.getUriLabel(domainValue); // System.out.println("Domain Label" + domainLabel); // Check for the case when domainLabel is null and // the node id has a integer in the end if (domainLabel == null) { int len = domainValue.length(); if ((len > 1) && Character.isDigit(domainValue.charAt(len-1))) { String newDomainValue = domainValue.substring(0, len-1); domainLabel = ontMgr.getUriLabel(newDomainValue); } // If still node is not found if (domainLabel == null) { logger.error("No graph node found for the node: " + domainValue); return new UpdateContainer(new ErrorUpdate("" + "Error occured while setting semantic type!")); } } newDomainNode = alignment.addInternalNode(domainLabel); } ColumnNode newColumnNode = getColumnNode(alignment, vWorkspace.getRepFactory().getHNode(hNodeId)); alignment.addDataPropertyLink(newDomainNode, newColumnNode, propertyLabel, isPartOfKey); // newLink = propLink; } // Update the alignment alignment.align(); // Create the semantic type object newType = new SemanticType(hNodeId, propertyLabel, newDomainNode.getLabel(), SemanticType.Origin.User, 1.0,isPartOfKey); } } else { // Synonym semantic type Label propertyLabel = ontMgr.getUriLabel(fullTypeValue); if (propertyLabel == null) { logger.error("URI/ID does not exist in the ontology or model: " + fullTypeValue); continue; } newDomainNode = alignment.getNodeById(domainValue); Label domainLabel = null; if (newDomainNode == null) { domainLabel = ontMgr.getUriLabel(domainValue); if (domainLabel == null) { logger.error("URI/ID does not exist in the ontology or model: " + domainValue); continue; } } else { domainLabel = newDomainNode.getLabel(); } SemanticType synType = new SemanticType(hNodeId, propertyLabel, domainLabel, SemanticType.Origin.User, 1.0,isPartOfKey); typesList.add(synType); } } catch (JSONException e) { logger.error("JSON Exception occured", e); } } UpdateContainer c = new UpdateContainer(); CRFModelHandler crfModelHandler = vWorkspace.getWorkspace().getCrfModelHandler(); // Save the old SemanticType object and CRF Model for undo oldType = worksheet.getSemanticTypes().getSemanticTypeForHNodeId(hNodeId); oldColumnModel = worksheet.getCrfModel().getModelByHNodeId(hNodeId); oldSynonymTypes = worksheet.getSemanticTypes().getSynonymTypesForHNodeId(hNodeId); if (newType != null) { // Update the SemanticTypes data structure for the worksheet worksheet.getSemanticTypes().addType(newType); // Update the synonym semanticTypes newSynonymTypes = new SynonymSemanticTypes(typesList); worksheet.getSemanticTypes().addSynonymTypesForHNodeId(newType.getHNodeId(), newSynonymTypes); } // Identify the outliers if the semantic type exists in the crfmodel // List<String> existingLabels = new ArrayList<String>(); // crfModelHandler.getLabels(existingLabels); // if (existingLabels.contains(newType.getCrfModelLabelString())) { // identifyOutliers(worksheet, vWorkspace, crfModelHandler, newType); // c.add(new TagsUpdate()); // } if(trainAndShowUpdates) { c.add(new SemanticTypesUpdate(worksheet, vWorksheetId, alignment)); try { // Add the visualization update c.add(new SVGAlignmentUpdate_ForceKarmaLayout(vWorkspace.getViewFactory().getVWorksheet(vWorksheetId), alignment)); } catch (Exception e) { logger.error("Error occured while setting the semantic type!", e); return new UpdateContainer(new ErrorUpdate( "Error occured while setting the semantic type!")); } // Train the semantic type in a separate thread Thread t = new Thread(new SemanticTypeTrainingThread(crfModelHandler, worksheet, newType)); t.start(); return c; } // Just do the alignment, no training and update JSON required. // AlignToOntology align = new AlignToOntology(worksheet, vWorkspace, vWorksheetId); // align.align(false); return c; } // private void identifyOutliers(Worksheet worksheet, // VWorkspace vWorkspace, CRFModelHandler crfModelHandler, SemanticType type) { // Tag outlierTag = vWorkspace.getWorkspace().getTagsContainer().getTag(TagName.Outlier); // Map<ColumnFeature, Collection<String>> features = new HashMap<ColumnFeature, Collection<String>>(); // // // Get the HNodePath // List<HNodePath> allPaths = worksheet.getHeaders().getAllPaths(); // for (HNodePath currentPath:allPaths) { // if (currentPath.getLeaf().getId().equals(hNodeId)) { //// List<String> columnNamesList = new ArrayList<String>(); //// columnNamesList.add(currentPath.getLeaf().getColumnName()); //// features.put(ColumnFeature.ColumnHeaderName, columnNamesList); // String typeString = newType.isClass() ? newType.getType().getUri() : newType.getDomain().getUri() + "|" + newType.getType().getUri(); // SemanticTypeUtil.identifyOutliers(worksheet, typeString, currentPath, outlierTag, features, crfModelHandler); // break; // } // } // // } private ColumnNode getColumnNode(Alignment alignment, HNode hNode) { String columnName = hNode.getColumnName(); ColumnNode columnNode = alignment.getColumnNodeByHNodeId(hNodeId); if (columnNode == null) { columnNode = alignment.addColumnNode(hNodeId, columnName, rdfLiteralType); } else { // Remove old column node if it exists alignment.removeNode(columnNode.getId()); columnNode = alignment.addColumnNode(hNodeId, columnName, rdfLiteralType); } return columnNode; } @Override public UpdateContainer undoIt(VWorkspace vWorkspace) { UpdateContainer c = new UpdateContainer(); Worksheet worksheet = vWorkspace.getViewFactory().getVWorksheet(vWorksheetId).getWorksheet(); if (oldType == null) { worksheet.getSemanticTypes().unassignColumnSemanticType(newType.getHNodeId()); } else { worksheet.getSemanticTypes().addType(oldType); worksheet.getSemanticTypes().addSynonymTypesForHNodeId(newType.getHNodeId(), oldSynonymTypes); } worksheet.getCrfModel().addColumnModel(newType.getHNodeId(), oldColumnModel); // Replace the current alignment with the old alignment String alignmentId = AlignmentManager.Instance().constructAlignmentId(vWorkspace.getWorkspace().getId(), vWorksheetId); AlignmentManager.Instance().addAlignmentToMap(alignmentId, oldAlignment); oldAlignment.setGraph(oldGraph); // System.out.println("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%"); // GraphUtil.printGraph(oldAlignment.getGraph()); // GraphUtil.printGraph(oldAlignment.getSteinerTree()); // Get the alignment update if any try { c.add(new SemanticTypesUpdate(worksheet, vWorksheetId, oldAlignment)); c.add(new SVGAlignmentUpdate_ForceKarmaLayout(vWorkspace.getViewFactory().getVWorksheet(vWorksheetId), oldAlignment)); } catch (Exception e) { logger.error("Error occured while unsetting the semantic type!", e); return new UpdateContainer(new ErrorUpdate( "Error occured while unsetting the semantic type!")); } return c; } }