LangNTuple.java example

Explorer
jena-master
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.jena.riot.lang;

import static org.apache.jena.riot.tokens.TokenType.STRING2 ;

import java.util.Iterator ;

import org.apache.jena.graph.Node ;
import org.apache.jena.riot.system.ParserProfile ;
import org.apache.jena.riot.system.StreamRDF ;
import org.apache.jena.riot.tokens.Token ;
import org.apache.jena.riot.tokens.TokenType ;
import org.apache.jena.riot.tokens.Tokenizer ;
import org.slf4j.Logger ;
import org.slf4j.LoggerFactory ;

/** N-Quads, N-triples parser framework, with both push and pull interfaces.
 * 
 * <ul>
 * <li>The {@link #parse} method processes the whole stream of tokens, 
 *   sending each to a {@link org.apache.jena.atlas.lib.Sink} object.</li>
 * <li>The <tt>Iterator<X></tt> interface yields triples one-by-one.</li>
 *  </ul> 
 * 
 * Normally, bad terms causes the parser to stop (i.e. treat them as errors).
 * In addition, the NTuples subsystem allows triples/quads with "bad" terms
 * to be skipped.
 * 
 * Checking can be switched off completely. If the data is known to be correct,
 * no checking can be a large performance gain. <i>Caveat emptor</i>.
 */

public abstract class LangNTuple<X> extends LangBase implements Iterator<X>
{
    private static Logger log = LoggerFactory.getLogger(LangNTuple.class) ;
    
    protected boolean skipOnBadTerm = false ;
    
    protected LangNTuple(Tokenizer tokens,
                         ParserProfile profile,
                         StreamRDF dest)
    { 
        super(tokens, profile, dest) ;
    }

    // Assumes no syntax errors.
    @Override
    public final boolean hasNext()
    {
        return super.moreTokens() ;
    }
    
    @Override
    public final X next()
    {
        return parseOne() ;
    }
    
    @Override
    public final void remove()
    { throw new UnsupportedOperationException(); }

    /** Parse one tuple - return object to be sent to the sink or null for none */ 
    protected abstract X parseOne() ;
    
    /** Note a tuple not being output */
    protected void skipOne(X object, String printForm, long line, long col)
    {
        profile.getHandler().warning("Skip: "+printForm, line, col) ;
    }

    protected abstract Node tokenAsNode(Token token) ;

    protected final void checkIRIOrBNode(Token token)
    {
        if ( token.hasType(TokenType.IRI) ) return ;
        if ( token.hasType(TokenType.BNODE) ) return ; 
        exception(token, "Expected BNode or IRI: Got: %s", token) ;
    }

    protected final void checkIRI(Token token)
    {
        if ( token.hasType(TokenType.IRI) ) return ;
        exception(token, "Expected IRI: Got: %s", token) ;
    }

    protected final void checkRDFTerm(Token token)
    {
        switch(token.getType())
        {
            case IRI:
            case BNODE:
            case STRING2:
                return ;
            case LITERAL_DT:
                if ( profile.isStrictMode() && ! token.getSubToken1().hasType(STRING2) )
                    exception(token, "Illegal single quoted string: %s", token) ;
                return ;
            case LITERAL_LANG:
                if ( profile.isStrictMode() && ! token.getSubToken1().hasType(STRING2) )
                    exception(token, "Illegal single quoted string: %s", token) ;
                return ;
            case STRING1:
                if ( profile.isStrictMode() )
                    exception(token, "Illegal single quoted string: %s", token) ;
                break ;
            default:
                exception(token, "Illegal object: %s", token) ;
        }
    }

    /** SkipOnBadTerm - do not output tuples with bad RDF terms */ 
    public boolean  getSkipOnBadTerm()                      { return skipOnBadTerm ; }
    /** SkipOnBadTerm - do not output tuples with bad RDF terms */ 
    public void     setSkipOnBadTerm(boolean skipOnBadTerm) { this.skipOnBadTerm = skipOnBadTerm ; }
}