rdfdiff.java example

Explorer
jena-master
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package arq;

import static org.apache.jena.atlas.logging.LogCtl.setCmdLogging;

import java.net.URL;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
import java.io.FileInputStream;

import org.apache.jena.rdf.model.*;
import org.apache.jena.sparql.util.Closure;

/**
 * A program which read two RDF models and provides a basic triple level diff
 *
 * <p>
 * This program will read two RDF models, in a variety of languages, and compare
 * them providing a basic triple level diff output. Since blank nodes are a
 * complicating factor diffs for blank node containing portions of the graph are
 * reported in terms of sub-graphs rather than individual triples.
 * </p>
 * <p>
 * Input can be read either from a URL or from a file. The program writes its
 * results to the standard output stream and sets its exit code to 0 if the
 * models are equal, to 1 if they are not and to -1 if it encounters an error.
 * </p>
 *
 * <p>
 * </p>
 *
 * <pre>
 * java jena.rdfdiff model1 model2 lang1? lang2? base1? base2? 
 * </pre>
 */
public class rdfdiff extends java.lang.Object {

    static {
        setCmdLogging();
    }

    /**
     * @param args
     *            the command line arguments
     */
    public static void main(String... args) {
        if (args.length < 2 || args.length > 6) {
            usage();
            System.exit(-1);
        }

        String in1 = args[0];
        String in2 = args[1];
        String lang1 = "RDF/XML";
        if (args.length >= 3) {
            lang1 = args[2];
        }
        String lang2 = "N-TRIPLE";
        if (args.length >= 4) {
            lang2 = args[3];
        }
        String base1 = null;
        if (args.length >= 5) {
            base1 = args[4];
        }
        String base2 = base1;
        if (args.length >= 6) {
            base2 = args[5];
        }

        System.out.println(in1 + " " + in2 + " " + lang1 + " " + lang2 + " " + base1 + " " + base2);
        try {
            Model m1 = ModelFactory.createDefaultModel();
            Model m2 = ModelFactory.createDefaultModel();

            read(m1, in1, lang1, base1);
            read(m2, in2, lang2, base2);

            if (m1.isIsomorphicWith(m2)) {
                System.out.println("models are equal");
                System.out.println();
                System.exit(0);
            } else {
                System.out.println("models are unequal");
                System.out.println();

                if (m1.size() != m2.size()) {
                    System.out.println(String.format("< %,d triples", m1.size()));
                    System.out.println(String.format("> %,d triples", m2.size()));
                }

                // Calculate differences
                Map<AnonId, Model> m1SubGraphs = new HashMap<>();
                StmtIterator iter = m1.listStatements();
                while (iter.hasNext()) {
                    Statement stmt = iter.next();
                    if (stmt.asTriple().isConcrete()) {
                        if (!m2.contains(stmt)) {
                            System.out.print("< ");
                            System.out.println(stmt.toString());
                        }
                    } else {
                        // Handle blank nodes via sub-graphs
                        addToSubGraph(stmt, m1SubGraphs);
                    }
                }

                Map<AnonId, Model> m2SubGraphs = new HashMap<>();
                iter = m2.listStatements();
                while (iter.hasNext()) {
                    Statement stmt = iter.next();
                    if (stmt.asTriple().isConcrete()) {
                        if (!m1.contains(stmt)) {
                            System.out.print("> ");
                            System.out.println(stmt.toString());
                        }
                    } else {
                        // Handle blank nodes via sub-graphs
                        addToSubGraph(stmt, m2SubGraphs);
                    }
                }

                // Compute sub-graph differences

                // Reduce to sets
                Set<Model> m1SubGraphSet = new TreeSet<>(new ModelReferenceComparator());
                m1SubGraphSet.addAll(m1SubGraphs.values());
                Set<Model> m2SubGraphSet = new TreeSet<>(new ModelReferenceComparator());
                m2SubGraphSet.addAll(m2SubGraphs.values());

                if (m1SubGraphSet.size() != m2SubGraphSet.size()) {
                    System.out.println("< " + m1SubGraphs.size() + " sub-graphs");
                    System.out.println("> " + m2SubGraphs.size() + " sub-graphs");
                }
                if (m1SubGraphSet.size() > 0) {
                    diffSubGraphs(m1SubGraphSet, m2SubGraphSet, "< ");
                }
                if (m2SubGraphSet.size() > 0) {
                    diffSubGraphs(m2SubGraphSet, m1SubGraphSet, "> ");
                }

                System.exit(1);
            }
        } catch (Exception e) {
            System.err.println("Unhandled exception:");
            System.err.println("    " + e.toString());
            System.exit(-1);
        }
    }

    private static void diffSubGraphs(Set<Model> m1SubGraphSet, Set<Model> m2SubGraphSet, String prefix) {
        for (Model subGraph : m1SubGraphSet) {
            // Find candidate matches
            List<Model> candidates = new ArrayList<>();
            for (Model subGraphCandidate : m2SubGraphSet) {
                if (subGraph.size() == subGraphCandidate.size()) {
                    candidates.add(subGraph);
                }
            }

            if (candidates.size() == 0) {
                // No match
                printNonMatchingSubGraph(prefix, subGraph);
            } else if (candidates.size() == 1) {
                // Precisely 1 candidate
                if (!subGraph.isIsomorphicWith(candidates.get(0))) {
                    printNonMatchingSubGraph(prefix, subGraph);
                } else {
                    m2SubGraphSet.remove(candidates.get(0));
                }
            } else {
                // Multiple candidates
                boolean matched = false;
                for (Model subGraphCandidate : candidates) {
                    if (subGraph.isIsomorphicWith(subGraphCandidate)) {
                        // Found a match
                        matched = true;
                        m2SubGraphSet.remove(subGraphCandidate);
                        break;
                    }
                }

                if (!matched) {
                    // Didn't find a match
                    printNonMatchingSubGraph(prefix, subGraph);
                }
            }
        }
    }

    private static void printNonMatchingSubGraph(String prefix, Model subGraph) {
        StmtIterator sIter = subGraph.listStatements();
        while (sIter.hasNext()) {
            System.out.print(prefix);
            System.out.println(sIter.next().toString());
        }
    }

    private static void addToSubGraph(Statement stmt, Map<AnonId, Model> subGraphs) {
        Set<AnonId> ids = new HashSet<>();

        addToIdList(stmt, ids);

        // Here we take a copy of the IDs
        Model subGraph = null;
        for (AnonId id : ids) {
            if (!subGraphs.containsKey(id)) {
                subGraph = Closure.closure(stmt);
                subGraph.add(stmt);
                break;
            }
        }

        // May already have built the sub-graph that includes this statement
        if (subGraph == null)
            return;

        // Find any further IDs that occur in the sub-graph
        StmtIterator sIter = subGraph.listStatements();
        while (sIter.hasNext()) {
            addToIdList(sIter.next(), ids);
        }

        // Associate the sub-graph with all mentioned blank node IDs
        for (AnonId id : ids) {
            if (subGraphs.containsKey(id))
                throw new IllegalStateException(String.format("ID %s occurs in multiple sub-graphs", id));
            subGraphs.put(id, subGraph);
        }
    }

    private static void addToIdList(Statement stmt, Set<AnonId> ids) {
        if (stmt.getSubject().isAnon()) {
            ids.add(stmt.getSubject().getId());
        }
        if (stmt.getObject().isAnon()) {
            ids.add(stmt.getObject().asResource().getId());
        }
    }

    protected static void usage() {
        System.err.println("usage:");
        System.err.println("    java jena.rdfdiff source1 source2 [lang1 [lang2 [base1 [base2]]]]");
        System.err.println();
        System.err.println("    source1 and source2 can be URL's or filenames");
        System.err.println("    lang1 and lang2 can take values:");
        System.err.println("      RDF/XML");
        System.err.println("      N-TRIPLE");
        System.err.println("      N3");
        System.err.println("    lang1 defaults to RDF/XML, lang2 to N-TRIPLE");
        System.err.println("    base1 and base2 are URIs");
        System.err.println("    base1 defaults to null");
        System.err.println("    base2 defaults to base1");
        System.err.println("    If no base URIs are specified Jena determines the base URI based on the input source");

        System.err.println();
    }

    protected static void read(Model model, String in, String lang, String base) throws java.io.FileNotFoundException {
        try {
            URL url = new URL(in);
            model.read(in, base, lang);
        } catch (java.net.MalformedURLException e) {
            model.read(new FileInputStream(in), base, lang);
        }
    }

    private static class ModelReferenceComparator implements Comparator<Model> {

        @Override
        public int compare(Model o1, Model o2) {
            if (o1 == o2)
                return 0;
            int h1 = System.identityHashCode(o1);
            int h2 = System.identityHashCode(o2);

            if (h1 == h2)
                return 0;
            return h1 < h2 ? -1 : 1;
        }

    }
}