/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package arq; import static org.apache.jena.atlas.logging.LogCtl.setCmdLogging; import java.net.URL; import java.util.ArrayList; import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import java.util.TreeSet; import java.io.FileInputStream; import org.apache.jena.rdf.model.*; import org.apache.jena.sparql.util.Closure; /** * A program which read two RDF models and provides a basic triple level diff * * <p> * This program will read two RDF models, in a variety of languages, and compare * them providing a basic triple level diff output. Since blank nodes are a * complicating factor diffs for blank node containing portions of the graph are * reported in terms of sub-graphs rather than individual triples. * </p> * <p> * Input can be read either from a URL or from a file. The program writes its * results to the standard output stream and sets its exit code to 0 if the * models are equal, to 1 if they are not and to -1 if it encounters an error. * </p> * * <p> * </p> * * <pre> * java jena.rdfdiff model1 model2 lang1? lang2? base1? base2? * </pre> */ public class rdfdiff extends java.lang.Object { static { setCmdLogging(); } /** * @param args * the command line arguments */ public static void main(String... args) { if (args.length < 2 || args.length > 6) { usage(); System.exit(-1); } String in1 = args[0]; String in2 = args[1]; String lang1 = "RDF/XML"; if (args.length >= 3) { lang1 = args[2]; } String lang2 = "N-TRIPLE"; if (args.length >= 4) { lang2 = args[3]; } String base1 = null; if (args.length >= 5) { base1 = args[4]; } String base2 = base1; if (args.length >= 6) { base2 = args[5]; } System.out.println(in1 + " " + in2 + " " + lang1 + " " + lang2 + " " + base1 + " " + base2); try { Model m1 = ModelFactory.createDefaultModel(); Model m2 = ModelFactory.createDefaultModel(); read(m1, in1, lang1, base1); read(m2, in2, lang2, base2); if (m1.isIsomorphicWith(m2)) { System.out.println("models are equal"); System.out.println(); System.exit(0); } else { System.out.println("models are unequal"); System.out.println(); if (m1.size() != m2.size()) { System.out.println(String.format("< %,d triples", m1.size())); System.out.println(String.format("> %,d triples", m2.size())); } // Calculate differences Map<AnonId, Model> m1SubGraphs = new HashMap<>(); StmtIterator iter = m1.listStatements(); while (iter.hasNext()) { Statement stmt = iter.next(); if (stmt.asTriple().isConcrete()) { if (!m2.contains(stmt)) { System.out.print("< "); System.out.println(stmt.toString()); } } else { // Handle blank nodes via sub-graphs addToSubGraph(stmt, m1SubGraphs); } } Map<AnonId, Model> m2SubGraphs = new HashMap<>(); iter = m2.listStatements(); while (iter.hasNext()) { Statement stmt = iter.next(); if (stmt.asTriple().isConcrete()) { if (!m1.contains(stmt)) { System.out.print("> "); System.out.println(stmt.toString()); } } else { // Handle blank nodes via sub-graphs addToSubGraph(stmt, m2SubGraphs); } } // Compute sub-graph differences // Reduce to sets Set<Model> m1SubGraphSet = new TreeSet<>(new ModelReferenceComparator()); m1SubGraphSet.addAll(m1SubGraphs.values()); Set<Model> m2SubGraphSet = new TreeSet<>(new ModelReferenceComparator()); m2SubGraphSet.addAll(m2SubGraphs.values()); if (m1SubGraphSet.size() != m2SubGraphSet.size()) { System.out.println("< " + m1SubGraphs.size() + " sub-graphs"); System.out.println("> " + m2SubGraphs.size() + " sub-graphs"); } if (m1SubGraphSet.size() > 0) { diffSubGraphs(m1SubGraphSet, m2SubGraphSet, "< "); } if (m2SubGraphSet.size() > 0) { diffSubGraphs(m2SubGraphSet, m1SubGraphSet, "> "); } System.exit(1); } } catch (Exception e) { System.err.println("Unhandled exception:"); System.err.println(" " + e.toString()); System.exit(-1); } } private static void diffSubGraphs(Set<Model> m1SubGraphSet, Set<Model> m2SubGraphSet, String prefix) { for (Model subGraph : m1SubGraphSet) { // Find candidate matches List<Model> candidates = new ArrayList<>(); for (Model subGraphCandidate : m2SubGraphSet) { if (subGraph.size() == subGraphCandidate.size()) { candidates.add(subGraph); } } if (candidates.size() == 0) { // No match printNonMatchingSubGraph(prefix, subGraph); } else if (candidates.size() == 1) { // Precisely 1 candidate if (!subGraph.isIsomorphicWith(candidates.get(0))) { printNonMatchingSubGraph(prefix, subGraph); } else { m2SubGraphSet.remove(candidates.get(0)); } } else { // Multiple candidates boolean matched = false; for (Model subGraphCandidate : candidates) { if (subGraph.isIsomorphicWith(subGraphCandidate)) { // Found a match matched = true; m2SubGraphSet.remove(subGraphCandidate); break; } } if (!matched) { // Didn't find a match printNonMatchingSubGraph(prefix, subGraph); } } } } private static void printNonMatchingSubGraph(String prefix, Model subGraph) { StmtIterator sIter = subGraph.listStatements(); while (sIter.hasNext()) { System.out.print(prefix); System.out.println(sIter.next().toString()); } } private static void addToSubGraph(Statement stmt, Map<AnonId, Model> subGraphs) { Set<AnonId> ids = new HashSet<>(); addToIdList(stmt, ids); // Here we take a copy of the IDs Model subGraph = null; for (AnonId id : ids) { if (!subGraphs.containsKey(id)) { subGraph = Closure.closure(stmt); subGraph.add(stmt); break; } } // May already have built the sub-graph that includes this statement if (subGraph == null) return; // Find any further IDs that occur in the sub-graph StmtIterator sIter = subGraph.listStatements(); while (sIter.hasNext()) { addToIdList(sIter.next(), ids); } // Associate the sub-graph with all mentioned blank node IDs for (AnonId id : ids) { if (subGraphs.containsKey(id)) throw new IllegalStateException(String.format("ID %s occurs in multiple sub-graphs", id)); subGraphs.put(id, subGraph); } } private static void addToIdList(Statement stmt, Set<AnonId> ids) { if (stmt.getSubject().isAnon()) { ids.add(stmt.getSubject().getId()); } if (stmt.getObject().isAnon()) { ids.add(stmt.getObject().asResource().getId()); } } protected static void usage() { System.err.println("usage:"); System.err.println(" java jena.rdfdiff source1 source2 [lang1 [lang2 [base1 [base2]]]]"); System.err.println(); System.err.println(" source1 and source2 can be URL's or filenames"); System.err.println(" lang1 and lang2 can take values:"); System.err.println(" RDF/XML"); System.err.println(" N-TRIPLE"); System.err.println(" N3"); System.err.println(" lang1 defaults to RDF/XML, lang2 to N-TRIPLE"); System.err.println(" base1 and base2 are URIs"); System.err.println(" base1 defaults to null"); System.err.println(" base2 defaults to base1"); System.err.println(" If no base URIs are specified Jena determines the base URI based on the input source"); System.err.println(); } protected static void read(Model model, String in, String lang, String base) throws java.io.FileNotFoundException { try { URL url = new URL(in); model.read(in, base, lang); } catch (java.net.MalformedURLException e) { model.read(new FileInputStream(in), base, lang); } } private static class ModelReferenceComparator implements Comparator<Model> { @Override public int compare(Model o1, Model o2) { if (o1 == o2) return 0; int h1 = System.identityHashCode(o1); int h2 = System.identityHashCode(o2); if (h1 == h2) return 0; return h1 < h2 ? -1 : 1; } } }