TupleSet.java example

Explorer
jena-master
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.jena.n3 ;

import java.io.* ;
import java.util.* ;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

class TupleSet implements Iterator<List<TupleItem>>
{
    BufferedReader in ;
    public String line = null ;
    public int lineNumber = 0 ;

    static final char COMMENTCHAR = '#' ;
    List<TupleItem> current = null ;
    boolean finished = false ;

    protected static Logger logger = LoggerFactory.getLogger( TupleSet.class );
    
    /** Creates new TupleSet */
    public TupleSet(Reader r)
    {
        if ( ! ( r instanceof BufferedReader ) )
            in = new BufferedReader(r) ;
        else
            in = (BufferedReader)r;
    }

    @Override
    public boolean hasNext()
    {
        if ( finished ) return false ;

        if ( current == null )
            current = tuple() ;
        return current != null ;
    }

    @Override
    public List<TupleItem> next()
    {
        if ( hasNext() )
        {
            List<TupleItem> x = current ;
            current = null ;
            return x ;
        }
        else
            return null ;
    }


    @Override
    public void remove()
    {
        throw new java.lang.UnsupportedOperationException("TupleSet.remove") ;
    }

    private List<TupleItem> tuple()
    {

        try {
            lineNumber ++ ;
            line = in.readLine() ;
        } catch (IOException e) {}

        if ( line == null )
        {
            finished = true ;
            return null ;
        }

        //System.out.println("Line: "+line) ;
        List<TupleItem> tuple = new ArrayList<>() ;
        int i = 0 ;
        int j = 0 ;
        boolean errorFound = false ;

     tupleLoop:
        for (;;)
        {
            // Move to beginning of next item.
            i = skipwhitespace(line, j) ;

            if ( i < 0 )
                break ;

            int iStart = -2 ;       // Points to the beginning of the item as found
            int jStart = -2 ;       // Points to the item without quotes
            int iFinish = -2 ;      // Points after the end of the item as found
            int jFinish = -2 ;      // Points after the end of the item without quotes
            int dtStart = -2 ;      // Points to start of datatype (after < quote)
            int dtFinish = -2 ;     // Points to end of datatype
            int type = TupleItem.UNKNOWN;

            switch (line.charAt(i))
            {
                case COMMENTCHAR:
                    break tupleLoop ;
                case '<':
                    type = TupleItem.URI ;
                    iStart = i ;
                    jStart = i+1 ;
                    int newPosn = parseURI(i, line) ;
                    if (newPosn < 0)
                    {
                        errorFound = true;
                        break tupleLoop;
                    }
                    j = newPosn ;
                    
                    iFinish = j+1 ;
                    jFinish = j ;
                    break ;
                case '"':
                    type = TupleItem.STRING ;
                    iStart = i ;
                    jStart = i+1 ;
                    boolean inEscape = false ;
                    for ( j = i+1 ; j < line.length() ; j++ )
                    {
                        char ch = line.charAt(j) ;
                        if ( inEscape )
                        {
                        	// ToDo: escape
                            inEscape = false ;
                            continue ;
                        }
                        // Not an escape
                        if ( ch == '"'  )
                            break ;

                        if ( ch == '\\' )
                            inEscape = true ;
                        if ( ch == '\n' || ch == '\r' )
                        {
                            errorFound = true ;
                            break tupleLoop;
                            
                        }
                    }
                    
                    // Malformed
                    if ( j == line.length() )
                    {
                        errorFound = true ;
                        break tupleLoop;
                    }
                    
                    iFinish = j+1 ;
                    jFinish = j ;
                    // RDF literals may be followed by their type.
                         
                    if ( j < line.length()-3 
                         && line.charAt(j+1) == '^'
                         && line.charAt(j+2) == '^'
                         && line.charAt(j+3) == '<' )
                    {
                        dtFinish = parseURI(j+3, line) ;
                        dtStart = j+4 ;
                        if (dtFinish < 0)
                        {
                            errorFound = true;
                            break tupleLoop;
                        }
                        j = dtFinish+1 ;
                        //String dt = line.substring(dtStart, dtFinish) ;
                        //System.out.println("I see a datatype:"+dt) ;
                    }
                    
                    break ;
                case '_':
                    type = TupleItem.ANON ;
                    iStart = i ;
                    for ( j = i+1 ; j < line.length() ; j++ )
                    {
                        char ch = line.charAt(j) ;
                        if ( ch == ' '  || ch == '\t' || ch == '.' )
                            break ;
                        if ( ! Character.isLetterOrDigit(ch) && ! (ch == '_') && ! (ch == ':') )
                        {
                            errorFound = true ;
                            break tupleLoop ;
                        }
                    }
                    iFinish = j ;
                    jStart = iStart ;
                    jFinish = iFinish ;
                    break ;
                case '.':
                case '\n':
                case '\r':
                    return tuple ;
                default:
                    type = TupleItem.UNQUOTED ;
                    iStart = i ;
                    jStart = i ;
                    for ( j = i+1 ; j < line.length() ; j++ )
                    {
                        char ch = line.charAt(j) ;
                        if ( ch == ' '  || ch == '\t' || ch == '.' )
                            break ;

                        //if ( ! Character.isLetterOrDigit(line.charAt(i)) )
                        //{
                        //    errorFound = true ;
                        //    break tupleLoop;
                        //}
                    }
                    // Malformed
                    if ( j == line.length()+1 )
                    {
                        errorFound = true ;
                        break tupleLoop;
                    }
                    iFinish = j ;
                    jFinish = j ;
                    break ;
            }
            String item = line.substring(jStart, jFinish) ;
            String literal = line.substring(iStart, iFinish) ;
            String dt = null ;
            if ( dtStart > 0 )
                dt = line.substring(dtStart, dtFinish) ;
            
            tuple.add(new TupleItem(item, literal, type, dt)) ;
            j++ ;
            // End of item.
        }
        //End of this line.
        if ( errorFound )
        {
            logger.error( "Error in TupleSet.tuple: " + line );
            
            String s = "" ;
            int k = 0 ;
            for ( ; k < i ; k++ ) s = s+" " ;
            s = s+"^" ;
            for ( ; k < j-1 ; k++ ) s=s+" " ;
            s = s+"^" ;
            logger.error( s ) ;
            return null ;
        }

        if ( tuple.size() == 0 )
        {
            // Nothing found : loop by tail recursion
            return tuple() ;
        }
        return tuple ;
    }

    private int skipwhitespace(String s, int i)
    {
        for ( ; i < s.length() ; i++ )
        {
            char ch = s.charAt(i) ;
            // Horizonal whitespace
            if ( ch != ' ' && ch != '\t' )
                return i ;
        }
        return -1 ;
    }

    private int parseURI(int i, String line)
    {
        int j;
        for (j = i + 1; j < line.length(); j++)
        {
            char ch = line.charAt(j);
            if (ch == '>')
                break;
            if (ch == '\n' || ch == '\r')
                return -1;
        }
        // Malformed
        if (j == line.length())
            return -2;
        return j ;
    }
}