/******************************************************************************* * Copyright (c) 2008 Cambridge Semantics Incorporated. * All rights reserved. This program and the accompanying materials * are made available under the terms of the Eclipse Public License v1.0 * which accompanies this distribution, and is available at * http://www.eclipse.org/legal/epl-v10.html * * Contributors: * Cambridge Semantics Incorporated *******************************************************************************/ package org.openanzo.client.cli; import java.io.File; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStreamWriter; import java.io.UnsupportedEncodingException; import java.io.Writer; import java.util.Collection; import java.util.HashSet; import java.util.List; import java.util.Set; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.Option; import org.apache.commons.cli.Options; import org.openanzo.client.AnzoClient; import org.openanzo.client.ClientGraph; import org.openanzo.exceptions.AnzoException; import org.openanzo.exceptions.AnzoRuntimeException; import org.openanzo.ontologies.openanzo.AnzoFactory; import org.openanzo.ontologies.openanzo.NamedGraph; import org.openanzo.rdf.Constants; import org.openanzo.rdf.IAnzoGraph; import org.openanzo.rdf.RDFFormat; import org.openanzo.rdf.Resource; import org.openanzo.rdf.Statement; import org.openanzo.rdf.URI; import org.openanzo.rdf.utils.Pair; import org.openanzo.rdf.utils.UriGenerator; import org.openanzo.rdf.vocabulary.RDF; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * Gets graphs from the anzo repository and prints them in serialized rdf formats. * * @author Joe Betz <jpbetz@cambridgesemantics.com> * */ class GetCommand extends RdfIOCommand { private static final Logger log = LoggerFactory.getLogger(GetCommand.class); private static final Option REVISION = new Option("r", "revision", true, "Gets a specific revision of the graph. Returns an error if the graph is non-revisioned."); private static final Option OUTPUT_FORMAT = new Option("o", "output-format", true, "Overide the default RDF format associated with the RDF output(s)"); private static final Option METADATA = new Option("m", "metadata", false, "include metadata graph (TRIX and TRIG only)"); private static final Option OUTPUT_FILE = new Option("f", "output-file", true, "write the get results to a file"); private static final Option EXPAND_DATASET = new Option("e", "expand-dataset", false, "Expand the given dataset according to the SPARQL dataset definition."); private static final Option EXPORT_DATASET = new Option("E", "export-dataset", false, "Export the given dataset for backup or sharing."); private static final int DEFAULT_BATCH_SIZE = 1000; static { REVISION.setArgName("int"); OUTPUT_FORMAT.setArgName("format"); OUTPUT_FILE.setArgName("file"); } public String getName() { return "get"; } public Options getOptions() { Options options = new Options(); options.addOption(REVISION); options.addOption(OUTPUT_FORMAT); options.addOption(METADATA); options.addOption(OUTPUT_FILE); options.addOption(EXPAND_DATASET); options.addOption(EXPORT_DATASET); return options; } public int invoke(CommandLine cl, CommandContext context, AnzoClient client) throws AnzoException { log.debug("LOG: INVOKING GET COMMAND!"); if (cl.getArgs().length < 1) { throw new InvalidArgumentException("Argument required"); } boolean expandDataset = isFlagSet(cl, EXPAND_DATASET); boolean exportDataset = isFlagSet(cl, EXPORT_DATASET); RDFFormat outputFormatOverride = getFormatOption(cl, OUTPUT_FORMAT); Pair<File, RDFFormat> output = getFileOption(cl, OUTPUT_FILE, outputFormatOverride, false); Long revision = null; if (cl.hasOption(REVISION.getOpt())) { String revisionStr = cl.getOptionValue(REVISION.getOpt()); try { revision = Long.valueOf(revisionStr); } catch (NumberFormatException e) { throw new InvalidArgumentException("Illegal revision option value: " + revisionStr + ". Revision must be a number."); } if (revision < 0) { throw new InvalidArgumentException("Illegal revision option value: " + revisionStr + ". Revision must be non-negative."); } } Writer out = null; RDFFormat outputFormat = null; if (output == null) { try { out = new OutputStreamWriter(System.out, Constants.byteEncoding); } catch (UnsupportedEncodingException uee) { throw new RuntimeException("This exception should never occur since UTF-8 is always supported"); } if (outputFormatOverride == null) { outputFormat = RDFFormat.forFileName("." + CommandLineInterface.DEFAULT_RDF_FORMAT); } else { outputFormat = outputFormatOverride; } } else { try { out = new OutputStreamWriter(new FileOutputStream(output.first), Constants.byteEncoding); } catch (FileNotFoundException e) { throw new InvalidArgumentException("Output file not found:" + output.first.getAbsolutePath()); } catch (IOException e) { throw new InvalidArgumentException("Error writing to output file:" + output.first.getAbsolutePath() + " [" + e.getMessage() + "]"); } outputFormat = output.second; } boolean getMetadata = false; if (isFlagSet(cl, METADATA)) { if (outputFormatOverride != null && !outputFormatOverride.supportsNamedGraphs()) { throw new CommandException("Metadata may only be retrieved for formats supporting named graphs: TRIX, TRIG"); } getMetadata = true; } String[] args = cl.getArgs(); List<URI> uris = getURIArguments(args, 0, args.length, context); if (uris.size() > 1 && outputFormatOverride != null && !outputFormatOverride.supportsNamedGraphs()) { throw new CommandException("multiple named graph inputs not supported for format: " + outputFormatOverride.name()); } if (expandDataset || exportDataset) { if (uris.size() != 1) { throw new CommandException("Must provide exactly one named graph URI when " + EXPAND_DATASET.getLongOpt() + " option is set."); } if (getMetadata && expandDataset) { throw new CommandException(METADATA.getLongOpt() + " option and " + EXPAND_DATASET.getLongOpt() + " option may not both be set."); } if (expandDataset && exportDataset) { throw new CommandException(EXPAND_DATASET.getLongOpt() + " option and " + EXPORT_DATASET.getLongOpt() + " option may not both be set"); } return getDataset(context, client, out, outputFormat, uris.get(0), exportDataset, getMetadata); } else { return get(context, client, getMetadata, out, outputFormat, uris, revision); } } private int get(CommandContext context, AnzoClient client, boolean getMetadata, Writer out, RDFFormat outputFormat, List<URI> uris, Long revision) throws AnzoException { int result = 1; boolean owns = false; try { owns = !client.isConnected(); if (owns) { client.connect(); printOnConnectionSuccess(context); } HashSet<Statement> store = new HashSet<Statement>(); boolean foundAll = true; for (URI uri : uris) { IAnzoGraph graph = null; if (revision != null) { graph = client.getNamedGraphRevision(uri, revision); // catch this? ExceptionConstants.DATASOURCE.NAMEDGRAPH.NON_REVISIONED_GRAPH } else { graph = client.getCurrentNamedGraphRevision(uri); } if (graph != null) { store.addAll(graph.getStatements()); if (getMetadata) { store.addAll(graph.getMetadataGraph().getStatements()); } } else { context.writeError("Graph does not exist or hidden due to access control: " + uri); foundAll = false; } } context.outputRdf(store, outputFormat, out); if (foundAll) { result = 0; } } finally { try { if (owns) client.close(); } catch (AnzoRuntimeException e) { log.error("Error closing connection", e); } } return result; } private int getDataset(CommandContext context, AnzoClient client, Writer out, RDFFormat outputFormat, URI namedGraph, boolean export, boolean getMetadata) throws AnzoException { int result = 1; try { client.connect(); ClientGraph graph = client.getServerGraph(namedGraph); Set<Statement> results = new HashSet<Statement>(); Collection<Statement> find = graph.find(null, RDF.TYPE, org.openanzo.ontologies.openanzo.Dataset.TYPE); if (find.size() != 1) { throw new CommandException("Requested graph is not a valid Dataset. Datasets must contain exactly one item of type " + org.openanzo.ontologies.openanzo.Dataset.TYPE); } Resource subject = find.iterator().next().getSubject(); if (export) { results.addAll(graph.getStatements()); if (getMetadata) results.addAll(getEditableMetadataStatements(graph.getMetadataGraph().getStatements())); } org.openanzo.ontologies.openanzo.Dataset ds = AnzoFactory.getDataset(subject, graph); Set<NamedGraph> ngs = new HashSet<NamedGraph>(); ngs.addAll(ds.getNamedGraph()); ngs.addAll(ds.getDefaultNamedGraph()); processGraphs(ngs, results, client, getMetadata, context, out, outputFormat); Set<NamedGraph> dgs = new HashSet<NamedGraph>(); dgs.addAll(ds.getDefaultGraph()); if (export) processGraphs(dgs, results, client, getMetadata, context, out, outputFormat); else { dgs.addAll(ds.getDefaultNamedGraph()); // only need to reprocess the default namedGraphs if the expand option is on URI[] namedGraphUris = new URI[DEFAULT_BATCH_SIZE]; int count = 0; for (NamedGraph dg : dgs) { namedGraphUris[count++] = getUri(dg); if (count >= DEFAULT_BATCH_SIZE) { for (Statement stmt : client.serverFind(null, null, null, namedGraphUris)) { results.add(new Statement(stmt.getSubject(), stmt.getPredicate(), stmt.getObject())); } context.outputRdf(results, outputFormat, out); results.clear(); count = 0; } } if (count > 0) { // process the remaining dgs.size() % DEFAULT_BATCH_SIZE graphs namedGraphUris = resizeURIArray(namedGraphUris, count); for (Statement stmt : client.serverFind(null, null, null, namedGraphUris)) { results.add(new Statement(stmt.getSubject(), stmt.getPredicate(), stmt.getObject())); } context.outputRdf(results, outputFormat, out); } } result = 0; } finally { try { client.close(); } catch (AnzoRuntimeException e) { log.error("Error closing connection", e); } } return result; } private void processGraphs(Set<NamedGraph> ngs, Set<Statement> results, AnzoClient client, boolean getMetadata, CommandContext context, Writer out, RDFFormat outputFormat) throws AnzoException { URI[] namedGraphUris = new URI[DEFAULT_BATCH_SIZE]; int count = 0; for (NamedGraph ng : ngs) { namedGraphUris[count++] = getUri(ng); if (count >= DEFAULT_BATCH_SIZE) { writeGraphStatements(namedGraphUris, results, client, getMetadata, context, out, outputFormat); count = 0; } } if (count > 0) { namedGraphUris = resizeURIArray(namedGraphUris, count); // adds the graph statements for the remaining ngs.size() % DEFAULT_BATCH_SIZE graphs writeGraphStatements(namedGraphUris, results, client, getMetadata, context, out, outputFormat); } } private void writeGraphStatements(URI[] namedGraphUris, Set<Statement> results, AnzoClient client, boolean getMetadata, CommandContext context, Writer out, RDFFormat outputFormat) throws AnzoException { results.addAll(client.serverFind(null, null, null, namedGraphUris)); if (getMetadata) { for (int i = 0; i < namedGraphUris.length; i++) { namedGraphUris[i] = UriGenerator.generateMetadataGraphUri(namedGraphUris[i]); } results.addAll(getEditableMetadataStatements(client.serverFind(null, null, null, namedGraphUris))); } context.outputRdf(results, outputFormat, out); results.clear(); } private Collection<Statement> getEditableMetadataStatements(Collection<Statement> metaStmts) { Collection<Statement> editableStmts = new HashSet<Statement>(); for (Statement metaStmt : metaStmts) { if (!RdfUploadCommand.hasReservedPredicate(metaStmt)) editableStmts.add(metaStmt); } return editableStmts; } private URI getUri(NamedGraph ng) throws AnzoException { Resource res = ng.resource(); if (!(res instanceof URI)) { throw new CommandException("Requested graph is not a valid Dataset. Referenced named graph is not a URI " + res); } return (URI) res; } private URI[] resizeURIArray(URI[] oldArray, int newSize) { URI[] newArray = new URI[newSize]; for (int i = 0; i < newSize; i++) { newArray[i] = oldArray[i]; } return newArray; } public void printHelp(IConsole consoleWriter) { String header = "Retrieves named graphs from the server."; String syntax = "anzo get [options] [NAMED-GRAPH-URI...]"; String footer = "RDF format options are: " + CommandLineInterface.getRDFFormatOptionsString(); Options options = getOptions(); CommandLineInterface.appendGlobalOptions(options); consoleWriter.printHelp( syntax, header, options, footer); } }