package com.cyc.tool.kbtaxonomy.builder; /* * #%L * KBTaxonomyGeneral * %% * Copyright (C) 2015 Cycorp, Inc * %% * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * #L% */ ; import com.cyc.library.json.JSONBuilder; import com.google.gson.Gson; import com.google.gson.GsonBuilder; import com.google.gson.JsonElement; import com.google.gson.JsonParser; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import java.util.zip.GZIPInputStream; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.semanticweb.owlapi.model.OWLOntologyCreationException; /** * An implementation of Taxonomy that defines a taxonomy from JSON input. * */ public class TaxonomyFromJson implements Taxonomy { final static Set<String> badLabels = new HashSet<>(); static int line = 0; final Gson gson = new GsonBuilder().setPrettyPrinting().create(); Set<KBConcept> rootConcepts; /** * TaxonomyFromJson constructor. * */ public TaxonomyFromJson() { this(0); } /** * TaxonomyFromJson constructor * * @param limitNAnswers */ public TaxonomyFromJson(int limitNAnswers) { setRootConcepts(); } /** * Create a taxonomy from a JSON file * * @param JSONFileName a plain JSON file or zipped with gz or gzip extension * @throws Exception */ public TaxonomyFromJson(String JSONFileName) throws Exception { String content = null; File JSONFile = new File(JSONFileName); InputStream stream = getClass().getClassLoader().getResourceAsStream(JSONFileName); if (stream != null) { // Got the resource content = IOUtils.toString(stream); } else { //It isn't available as a resource - look for normal file if (!JSONFile.exists()) { throw new FileNotFoundException(JSONFileName + " does not exist"); } else { System.out.println("The Stream for " + JSONFileName + " is " + stream); if (JSONFileName.endsWith(".gz") || JSONFileName.endsWith(".gzip")) { try (InputStream gzipped = new GZIPInputStream(new FileInputStream(JSONFile))) { content = IOUtils.toString(gzipped); } catch (IOException e) { throw new RuntimeException("Failed to read gzipped file as stream " + JSONFileName); } } } } if (content == null) { throw new RuntimeException("Could not get JSON in " + JSONFileName + " as resource or file"); } rootConcepts = new HashSet<>(); List roots = new Gson().fromJson(content, List.class); assert roots.size() == 1 : "There should only be one root node in a loaded taxonomy; this one has " + roots.size(); for (Object root : roots) { addDependent(root, 0); } System.out.println("LOADED"); } private static String quote(String s) { return "\"" + s + "\""; } static String printList(List<Object> parents) { String res = ""; for (Object o : parents) { res += o.toString().substring(0, 100) + ", "; } return res; } @Override public Set<KBConcept> getConcepts() { return KBConcept.getAllConcepts(); } /** * @return the gson */ public Gson getGson() { return gson; } @Override public int getLinkCount() { return TaxonomicLink.getAllLinks().size(); } @Override public int getNodeCount() { return getConcepts().size(); } /** * @return the rootConcepts */ public Set<KBConcept> getRootConcepts() { return rootConcepts; } /** * @param rootConcepts the rootConcepts to set */ public void setRootConcepts(Set<KBConcept> rootConcepts) { this.rootConcepts = rootConcepts; } /** * * @return D3JSON output */ public String toD3JSON() { List<String> rootJasons = new ArrayList<>(); for (KBConcept c : getRootConcepts()) { System.out.println("Making D3 Graph JSON for " + c); rootJasons.add(c.toD3JSON()); } return flattenJSON(rootJasons, IsArray.NOTARRAY); } /** * * @return Gephi output */ public String toGephi() { StringBuilder sb = new StringBuilder(); for (KBConcept con : getRootConcepts()) { sb.append(toGephi(con, 0)); } return sb.toString(); } /** * * @param concept * @param depth * @return Gephi output */ public String toGephi(KBConcept concept, int depth) { assert depth < 100 : "to Gephi reached depth limit 100"; //Terminate on leaf nodes... they don't make edges if (concept.getChildSpecLinks().isEmpty()) { return ""; } StringBuilder sb = new StringBuilder(concept.getConceptCycL()); for (KBLink link : concept.getChildSpecLinks()) { sb.append(",").append(quote(link.getFrom().getName())); //Double weight for Genls vs Isa if (link instanceof GeneralisationLink) { sb.append(",").append(quote(link.getFrom().getName())); } //triple weight for owl:sameAs vs Isa if (link instanceof EquivalenceLink) { sb.append(",").append(quote(link.getFrom().getName())); sb.append(",").append(quote(link.getFrom().getName())); } } sb.append("\n"); for (KBLink link : concept.getChildSpecLinks()) { sb.append(toGephi(link.getFrom(), depth + 1)); } return sb.toString(); } /** * * @return JSON output */ public String toJSON() { // return gson.toJson(this); List<String> rootJasons = new ArrayList<>(); for (KBConcept c : getRootConcepts()) { System.out.println("Making JSON for " + c); rootJasons.add(c.toJSON()); } return flattenJSON(rootJasons, IsArray.ARRAY); } private void addLink(EdgeType ltype, KBConcept fromSpecific, KBConcept toGeneral) { if (ltype.getJsonName().equals("specializations")) { GeneralisationLink.create(fromSpecific, toGeneral); } else { NonTaxonomicLink.create(fromSpecific, toGeneral, ltype); } } private String flattenJSON(List<String> jsonRoots, IsArray at) { System.out.println("Trying to flatton " + jsonRoots.size() + " roots " + at); String flatJson = at == IsArray.ARRAY ? JSONBuilder.array(jsonRoots) : jsonRoots.get(0); try { JsonElement je = new JsonParser().parse(flatJson); String formattedJson = getGson().toJson(je); return formattedJson; } catch (com.google.gson.JsonSyntaxException e) { System.out.println(e + " in " + flatJson); } return flatJson; } private void setRootConcepts() { setRootConcepts(TaxonomicLink.getRoots()); } final KBConcept addDependent(Object jsonObject, int depth) throws OWLOntologyCreationException { System.out.println("Current call: " + jsonObject.toString().substring(0, 100)); assert depth < 100 : "100 is too deep for a taxonomy"; String cycConceptTerm; String conceptUri; String openCycConstant; KBConcept thisConcept = null; assert (jsonObject instanceof Map) : "Got non map jsonObject " + jsonObject; Map<String, Object> jmap = (Map<String, Object>) jsonObject; String conceptType = (String) jmap.get("type"); String printSeq = "[PS:" + jmap.get("printSequence").toString() + "]"; String depthS = printSeq + ": [N" + getNodeCount() + "L" + getLinkCount() + ":" + depth + "]"; assert (jmap.containsKey("kbTaxonomyCycConceptTerm")) : "Something is broken. got concept with no cycTerm"; cycConceptTerm = jmap.get("kbTaxonomyCycConceptTerm").toString(); try { conceptUri = jmap.get("hlid").toString(); openCycConstant = jmap.get("openCycConstant").toString(); } catch (NullPointerException e) { conceptUri = ""; openCycConstant = ""; } if (conceptType.equals("nonCycTeamConcept")) { assert (depth != 0) : " Seems wrong for a top level concept to be a NonCyc one " + cycConceptTerm; //It's a detectible "NonCyc concept" --- to do: add icon assert (jmap.containsKey("nonCycConceptName")) : "Found NonCyc Concept with no name " + cycConceptTerm; String nonCycName = jmap.get("nonCycConceptName").toString(); assert (jmap.containsKey("nonCycTeamConceptID")) : "Found NonCyc Concept with no numeric ID " + cycConceptTerm; // int nonCycID = (int) Math.round((Double) jmap.get("nonCycTeamConceptID")); List<Double> nonCycIDdoubles = (List<Double>) jmap.get("nonCycTeamConceptID"); List<Integer> nonCycID = new ArrayList<>(); nonCycIDdoubles.forEach(d -> { nonCycID.add(d.intValue()); }); try { List<String> nonCycWNIDs = (List<String>) jmap.get("NonCycConceptWNID"); // String nonCycWNIDs = jmap.get("NonCycConceptWNID").toString().replaceAll("\\[", "").replaceAll("\\]", ""); thisConcept = NonCycConcept.create(cycConceptTerm, nonCycID, nonCycName, conceptUri, nonCycWNIDs); } catch (Exception e) { System.out.println("No wnid info for conept: " + nonCycName + "/" + nonCycID); } assert (!jmap.containsKey("specializations")) : "NonCyc Concepts shouldn't have specialisations"; return thisConcept; } if (conceptType.equals("kbTaxonomyConcept")) { //It's a "taxonmy concept" --- so create an OpenCycConceptNode for it if (!jmap.containsKey("nlLabels")) { System.out.println("WARN: Cyc term: [" + cycConceptTerm + "] HAS NO LABEL"); Set<String> fakeLabel = new HashSet<>(); fakeLabel.add("[[[" + cycConceptTerm + "]]]"); // System.out.println((++line) + "CRE_CY" + cycConceptTerm + " NO NL LABEL USING "+fakeLabel); thisConcept = OpenCycConcept.create(cycConceptTerm, fakeLabel, conceptUri); } else { assert (jmap.containsKey("nlLabels")) : "Found Cyc Concept with no NL Labels " + cycConceptTerm; List labels = (List) jmap.get("nlLabels"); List<String> lstrings = new ArrayList<>(); for (Object l : labels) { lstrings.add(l.toString()); } // System.out.println((++line) + "CRE_CY" + cycConceptTerm + "," + lstrings); thisConcept = OpenCycConcept.create(cycConceptTerm, new HashSet<>(lstrings), conceptUri); } if (depth == 0) { getRootConcepts().add(thisConcept); } Set<String> possibleEdgeTypes = EdgeType.getPossibleEdgeTypes(); for (String possibleEdgeType : possibleEdgeTypes) { if (jmap.containsKey(possibleEdgeType)) { List specs = (List) jmap.get(possibleEdgeType); for (Object spec : specs) { KBConcept child = addDependent(spec, depth + 1); if (null != child) { EdgeType theEdgeType = new EdgeType(possibleEdgeType); addLink(theEdgeType, child, thisConcept); } } } } return thisConcept; } assert false : "should always be a AU or Cyc node " + conceptType; return null; } int getRootCount() { return getRootConcepts().size(); } String getRootNames() { return StringUtils.join(getRootConcepts(), ";\n"); } private enum IsArray { ARRAY, NOTARRAY } enum NodeType { TAXONOMY, CONCEPT, POORLYCONNECTED } }