/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.jena.n3 ;
import java.io.* ;
import java.util.* ;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
class TupleSet implements Iterator<List<TupleItem>>
{
BufferedReader in ;
public String line = null ;
public int lineNumber = 0 ;
static final char COMMENTCHAR = '#' ;
List<TupleItem> current = null ;
boolean finished = false ;
protected static Logger logger = LoggerFactory.getLogger( TupleSet.class );
/** Creates new TupleSet */
public TupleSet(Reader r)
{
if ( ! ( r instanceof BufferedReader ) )
in = new BufferedReader(r) ;
else
in = (BufferedReader)r;
}
@Override
public boolean hasNext()
{
if ( finished ) return false ;
if ( current == null )
current = tuple() ;
return current != null ;
}
@Override
public List<TupleItem> next()
{
if ( hasNext() )
{
List<TupleItem> x = current ;
current = null ;
return x ;
}
else
return null ;
}
@Override
public void remove()
{
throw new java.lang.UnsupportedOperationException("TupleSet.remove") ;
}
private List<TupleItem> tuple()
{
try {
lineNumber ++ ;
line = in.readLine() ;
} catch (IOException e) {}
if ( line == null )
{
finished = true ;
return null ;
}
//System.out.println("Line: "+line) ;
List<TupleItem> tuple = new ArrayList<>() ;
int i = 0 ;
int j = 0 ;
boolean errorFound = false ;
tupleLoop:
for (;;)
{
// Move to beginning of next item.
i = skipwhitespace(line, j) ;
if ( i < 0 )
break ;
int iStart = -2 ; // Points to the beginning of the item as found
int jStart = -2 ; // Points to the item without quotes
int iFinish = -2 ; // Points after the end of the item as found
int jFinish = -2 ; // Points after the end of the item without quotes
int dtStart = -2 ; // Points to start of datatype (after < quote)
int dtFinish = -2 ; // Points to end of datatype
int type = TupleItem.UNKNOWN;
switch (line.charAt(i))
{
case COMMENTCHAR:
break tupleLoop ;
case '<':
type = TupleItem.URI ;
iStart = i ;
jStart = i+1 ;
int newPosn = parseURI(i, line) ;
if (newPosn < 0)
{
errorFound = true;
break tupleLoop;
}
j = newPosn ;
iFinish = j+1 ;
jFinish = j ;
break ;
case '"':
type = TupleItem.STRING ;
iStart = i ;
jStart = i+1 ;
boolean inEscape = false ;
for ( j = i+1 ; j < line.length() ; j++ )
{
char ch = line.charAt(j) ;
if ( inEscape )
{
// ToDo: escape
inEscape = false ;
continue ;
}
// Not an escape
if ( ch == '"' )
break ;
if ( ch == '\\' )
inEscape = true ;
if ( ch == '\n' || ch == '\r' )
{
errorFound = true ;
break tupleLoop;
}
}
// Malformed
if ( j == line.length() )
{
errorFound = true ;
break tupleLoop;
}
iFinish = j+1 ;
jFinish = j ;
// RDF literals may be followed by their type.
if ( j < line.length()-3
&& line.charAt(j+1) == '^'
&& line.charAt(j+2) == '^'
&& line.charAt(j+3) == '<' )
{
dtFinish = parseURI(j+3, line) ;
dtStart = j+4 ;
if (dtFinish < 0)
{
errorFound = true;
break tupleLoop;
}
j = dtFinish+1 ;
//String dt = line.substring(dtStart, dtFinish) ;
//System.out.println("I see a datatype:"+dt) ;
}
break ;
case '_':
type = TupleItem.ANON ;
iStart = i ;
for ( j = i+1 ; j < line.length() ; j++ )
{
char ch = line.charAt(j) ;
if ( ch == ' ' || ch == '\t' || ch == '.' )
break ;
if ( ! Character.isLetterOrDigit(ch) && ! (ch == '_') && ! (ch == ':') )
{
errorFound = true ;
break tupleLoop ;
}
}
iFinish = j ;
jStart = iStart ;
jFinish = iFinish ;
break ;
case '.':
case '\n':
case '\r':
return tuple ;
default:
type = TupleItem.UNQUOTED ;
iStart = i ;
jStart = i ;
for ( j = i+1 ; j < line.length() ; j++ )
{
char ch = line.charAt(j) ;
if ( ch == ' ' || ch == '\t' || ch == '.' )
break ;
//if ( ! Character.isLetterOrDigit(line.charAt(i)) )
//{
// errorFound = true ;
// break tupleLoop;
//}
}
// Malformed
if ( j == line.length()+1 )
{
errorFound = true ;
break tupleLoop;
}
iFinish = j ;
jFinish = j ;
break ;
}
String item = line.substring(jStart, jFinish) ;
String literal = line.substring(iStart, iFinish) ;
String dt = null ;
if ( dtStart > 0 )
dt = line.substring(dtStart, dtFinish) ;
tuple.add(new TupleItem(item, literal, type, dt)) ;
j++ ;
// End of item.
}
//End of this line.
if ( errorFound )
{
logger.error( "Error in TupleSet.tuple: " + line );
String s = "" ;
int k = 0 ;
for ( ; k < i ; k++ ) s = s+" " ;
s = s+"^" ;
for ( ; k < j-1 ; k++ ) s=s+" " ;
s = s+"^" ;
logger.error( s ) ;
return null ;
}
if ( tuple.size() == 0 )
{
// Nothing found : loop by tail recursion
return tuple() ;
}
return tuple ;
}
private int skipwhitespace(String s, int i)
{
for ( ; i < s.length() ; i++ )
{
char ch = s.charAt(i) ;
// Horizonal whitespace
if ( ch != ' ' && ch != '\t' )
return i ;
}
return -1 ;
}
private int parseURI(int i, String line)
{
int j;
for (j = i + 1; j < line.length(); j++)
{
char ch = line.charAt(j);
if (ch == '>')
break;
if (ch == '\n' || ch == '\r')
return -1;
}
// Malformed
if (j == line.length())
return -2;
return j ;
}
}