/* * Copyright (c) 2009, MediaEvent Services GmbH & Co. KG * http://mediaeventservices.com * * This file is part of Marbles. * * Marbles is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Marbles is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Marbles. If not, see <http://www.gnu.org/licenses/>. * */ package de.fuberlin.wiwiss.marbles; import java.io.OutputStream; import java.io.Writer; import java.util.HashMap; import org.openrdf.model.Graph; import org.openrdf.model.Resource; import org.openrdf.model.Statement; import org.openrdf.model.URI; import org.openrdf.model.Value; import org.openrdf.model.impl.GraphImpl; import org.openrdf.model.vocabulary.OWL; import org.openrdf.repository.RepositoryConnection; import org.openrdf.rio.RDFHandlerException; import org.openrdf.rio.rdfxml.RDFXMLWriter; /** * An {@link RDFXMLWriter} that skips duplicate statements. * <code>RDFXMLWriterUnique</code> checks for exactly duplicated statements. * Furthermore, it accepts only statements for one resource of an <code>owl:sameAs</code> * alias group, thereby assuming that all predicates, including <code>owl:sameAs</code>, * are mirrored among all group members (this is the expected behavior of SameAsInferencer). * * Note: The filtering algorithm requires <code>owl:sameAs</code> statements to be passed before any filtering can take place. * * @author Christian Becker */ public class RDFXMLWriterUnique extends RDFXMLWriter { Graph statementsWritten; HashMap<Value, Boolean> acceptedResources; RepositoryConnection conn; /** * Creates a new <code>RDFXMLWriterUnique</code> that will write to the supplied <code>OutputStream</code>. * * @param out The OutputStream to write the RDF/XML document to. */ public RDFXMLWriterUnique(OutputStream out) { super(out); statementsWritten = new GraphImpl(); acceptedResources = new HashMap<Value, Boolean>(); } /** * Creates a new RDFXMLWriterUnique that will write to the supplied Writer. * * @param writer The Writer to write the RDF/XML document to. */ public RDFXMLWriterUnique(Writer writer) { super(writer); statementsWritten = new GraphImpl(); acceptedResources = new HashMap<Value, Boolean>(); } @Override public void handleStatement(Statement st) throws RDFHandlerException { /* Check for duplicate statement - Sesame RDBMS provides these despite usage of the <code>DISTINCT</code> keyword */ if (statementsWritten.contains(st)) return; Boolean isAccepted = acceptedResources.get(st.getSubject()); if (isAccepted != null && !isAccepted) return; /* Handle <code>owl:sameAs</code> statement - accepting this resource, but not its alias */ if (st.getPredicate().equals(OWL.SAMEAS)) { acceptedResources.put(st.getSubject(), true); acceptedResources.put(st.getObject(), false); } statementsWritten.add(st); super.handleStatement(st); } }