/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.jena.hadoop.rdf.io;
import java.io.IOException;
/**
* RDF IO related constants
*
*
*
*/
public class RdfIOConstants {
/**
* Private constructor prevents instantiation
*/
private RdfIOConstants() {
}
/**
* Configuration key used to set whether bad tuples are ignored. This is the
* default behaviour, when explicitly set to {@code false} bad tuples will
* result in {@link IOException} being thrown by the relevant record
* readers.
*/
public static final String INPUT_IGNORE_BAD_TUPLES = "rdf.io.input.ignore-bad-tuples";
/**
* Configuration key used to set the batch size used for RDF output formats
* that take a batched writing approach. Default value is given by the
* constant {@link #DEFAULT_OUTPUT_BATCH_SIZE}.
*/
public static final String OUTPUT_BATCH_SIZE = "rdf.io.output.batch-size";
/**
* Default batch size for batched output formats
*/
public static final long DEFAULT_OUTPUT_BATCH_SIZE = 10000;
/**
* Configuration key used to control behaviour with regards to how blank
* nodes are handled.
* <p>
* The default behaviour is that blank nodes are file scoped which is what
* the RDF specifications require.
* </p>
* <p>
* However in the case of a multi-stage pipeline this behaviour can cause
* blank nodes to diverge over several jobs and introduce spurious blank
* nodes over time. This is described in <a
* href="https://issues.apache.org/jira/browse/JENA-820">JENA-820</a> and
* enabling this flag for jobs in your pipeline allow you to work around
* this problem.
* </p>
* <h3>Warning</h3> You should only enable this flag for jobs that take in
* RDF output originating from previous jobs since our normal blank node
* allocation policy ensures that blank nodes will be file scoped and unique
* over all files (barring unfortunate hasing collisions). If you enable
* this for jobs that take in RDF originating from other sources you may
* incorrectly conflate blank nodes that are supposed to distinct and
* separate nodes.
*/
public static final String GLOBAL_BNODE_IDENTITY = "rdf.io.input.bnodes.global-identity";
}