RdfTurtleReader.java example

Explorer

restlet-framework-java-master
- build
  - tools
    - checkstyle
      - contrib
        JavadocCheckDefault.java
        bcel
        src
        checkstyle
        com
        puppycrawl
        tools
        checkstyle
        bcel
        AbstractCheckVisitor.java
        ClassFileSetCheck.java
        EmptyClassFileVisitor.java
        EmptyDeepVisitor.java
        EmptyGenericVisitor.java
        IDeepVisitor.java
        IObjectSetVisitor.java
        JavaClassWalker.java
        ReferenceVisitor.java
        VisitorSet.java
        checks
        AbstractReferenceCheck.java
        HiddenInheritedFieldCheck.java
        HiddenStaticMethodCheck.java
        UnreadFieldCheck.java
        UnreadVariableCheck.java
        UnusedMethodCheck.java
        classfile
        FieldDefinition.java
        FieldOrMethodDefinition.java
        JavaClassDefinition.java
        MethodDefinition.java
        ReferenceDAO.java
        Utils.java
        generic
        FieldOrMethodReference.java
        FieldReference.java
        GETFIELDReference.java
        GETSTATICReference.java
        InvokeReference.java
        PUTFIELDReference.java
        PUTSTATICReference.java
        Utils.java
        testinputs
        com
        puppycrawl
        tools
        checkstyle
        bcel
        checks
        InheritLibrary.java
        SubClass.java
        SuperClass.java
        tests
        com
        puppycrawl
        tools
        checkstyle
        bcel
        BcelCheckTestCase.java
        checks
        HiddenInheritedFieldTest.java
        HiddenStaticMethodTest.java
        examples
        XInclude
        NamespacesSAXParserFactoryImpl.java
        checks
        com
        mycompany
        checks
        LimitImplementationFiles.java
        MethodLimitCheck.java
        puppycrawl
        tools
        checkstyle
        checks
        xpath
        Attribute.java
        AttributeAxisIterator.java
        DocumentNavigator.java
        NodeIterator.java
        XPathCheck.java
        filters
        com
        mycompany
        filters
        FilesFilter.java
        listeners
        com
        mycompany
        listeners
        CommonsLoggingListener.java
        MailLogger.java
        VerboseListener.java
        usage
        src
        checkstyle
        com
        puppycrawl
        tools
        checkstyle
        checks
        usage
        AbstractUsageCheck.java
        OneMethodPrivateFieldCheck.java
        UnusedLocalVariableCheck.java
        UnusedParameterCheck.java
        UnusedPrivateFieldCheck.java
        UnusedPrivateMethodCheck.java
        transmogrify
        ASTManager.java
        ASTUtil.java
        AnonymousInnerClass.java
        ArrayDef.java
        ArrayLengthMember.java
        BaseScope.java
        BlockDef.java
        ClassDef.java
        ClassImportException.java
        ClassManager.java
        DefaultConstructor.java
        DefaultScope.java
        Definition.java
        DefinitionTraverser.java
        DotIterator.java
        ExternalClass.java
        ExternalConstructor.java
        ExternalDefinition.java
        ExternalMethod.java
        ExternalPackage.java
        ExternalSignature.java
        ExternalVariable.java
        IClass.java
        IDefinition.java
        IMethod.java
        IPackage.java
        ISignature.java
        IVariable.java
        InterfaceConstructor.java
        LabelDef.java
        LiteralResolver.java
        MethodDef.java
        MethodSignature.java
        MethodSpecificityComparator.java
        NullClass.java
        Occurrence.java
        PackageDef.java
        PrimitiveClasses.java
        QueryEngine.java
        Reference.java
        ReferenceCounter.java
        ReferenceThreshold.java
        ReferenceTool.java
        Resolver.java
        Scope.java
        ScopeIndex.java
        Span.java
        SymTabAST.java
        SymTabASTFactory.java
        SymTabASTIterator.java
        SymbolTable.java
        SymbolTableException.java
        TableMaker.java
        Typed.java
        UnknownClass.java
        VariableDef.java
        testinputs
        com
        puppycrawl
        tools
        checkstyle
        usage
        InputEmptyFile.java
        InputInnerUsedMethod.java
        InputOneMethodPrivateField.java
        InputUnusedField.java
        InputUnusedLocal.java
        InputUnusedMethod.java
        InputUnusedParameter.java
        tests
        com
        puppycrawl
        tools
        checkstyle
        checks
        usage
        AllTests.java
        OneMethodPrivateFieldCheckTest.java
        UnusedLocalVariableCheckTest.java
        UnusedParameterCheckTest.java
        UnusedPrivateFieldCheckTest.java
        UnusedPrivateMethodCheckTest.java
- modules

/**
 * Copyright 2005-2014 Restlet
 * 
 * The contents of this file are subject to the terms of one of the following
 * open source licenses: Apache 2.0 or or EPL 1.0 (the "Licenses"). You can
 * select the license that you prefer but you may not use this file except in
 * compliance with one of these Licenses.
 * 
 * You can obtain a copy of the Apache 2.0 license at
 * http://www.opensource.org/licenses/apache-2.0
 * 
 * You can obtain a copy of the EPL 1.0 license at
 * http://www.opensource.org/licenses/eclipse-1.0
 * 
 * See the Licenses for the specific language governing permissions and
 * limitations under the Licenses.
 * 
 * Alternatively, you can obtain a royalty free commercial license with less
 * limitations, transferable or non-transferable, directly at
 * http://restlet.com/products/restlet-framework
 * 
 * Restlet is a registered trademark of Restlet S.A.S.
 */

package org.restlet.ext.rdf.internal.turtle;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

import org.restlet.data.Reference;
import org.restlet.ext.rdf.Graph;
import org.restlet.ext.rdf.GraphHandler;
import org.restlet.ext.rdf.Literal;
import org.restlet.ext.rdf.internal.RdfConstants;
import org.restlet.ext.rdf.internal.ntriples.RdfNTriplesReader;
import org.restlet.representation.Representation;

/**
 * Handler of RDF content according to the RDF Turtle notation.
 * 
 * @author Thierry Boileau
 */
public class RdfTurtleReader extends RdfNTriplesReader {

    /** Increment used to identify inner blank nodes. */
    private int blankNodeId = 0;

    /** The current context object. */
    private Context context;

    /**
     * Constructor.
     * 
     * @param rdfRepresentation
     *            The representation to read.
     * @param graphHandler
     *            The graph handler invoked during the parsing.
     * @throws IOException
     */
    public RdfTurtleReader(Representation rdfN3Representation,
            GraphHandler graphHandler) throws IOException {
        super(rdfN3Representation, graphHandler);
        this.context = new Context();
        context.getKeywords().addAll(
                Arrays.asList("a", "is", "of", "this", "has"));
    }

    /**
     * Loops over the given list of lexical units and generates the adequat
     * calls to link* methods.
     * 
     * @see GraphHandler#link(Graph, Reference, Reference)
     * @see GraphHandler#link(Reference, Reference, Literal)
     * @see GraphHandler#link(Reference, Reference, Reference)
     * @param lexicalUnits
     *            The list of lexical units used to generate the links.
     */
    protected void generateLinks(List<LexicalUnit> lexicalUnits) {
        Object currentSubject = null;
        Reference currentPredicate = null;
        Object currentObject = null;
        int nbTokens = 0;
        boolean swapSubjectObject = false;

        for (int i = 0; i < lexicalUnits.size(); i++) {
            LexicalUnit lexicalUnit = lexicalUnits.get(i);

            nbTokens++;
            switch (nbTokens) {
            case 1:
                if (",".equals(lexicalUnit.getValue())) {
                    nbTokens++;
                } else if (!";".equals(lexicalUnit.getValue())) {
                    currentSubject = lexicalUnit.resolve();
                }
                break;
            case 2:
                if ("is".equalsIgnoreCase(lexicalUnit.getValue())) {
                    nbTokens--;
                    swapSubjectObject = true;
                } else if ("has".equalsIgnoreCase(lexicalUnit.getValue())) {
                    nbTokens--;
                } else if ("a".equalsIgnoreCase(lexicalUnit.getValue())) {
                    currentPredicate = RdfConstants.PREDICATE_TYPE;
                } else if ("!".equalsIgnoreCase(lexicalUnit.getValue())) {
                    currentObject = new BlankNodeToken(newBlankNodeId())
                            .resolve();
                    currentPredicate = getPredicate(lexicalUnits.get(++i));
                    this.link(currentSubject, currentPredicate, currentObject);
                    currentSubject = currentObject;
                    nbTokens = 1;
                } else if ("^".equalsIgnoreCase(lexicalUnit.getValue())) {
                    currentObject = currentSubject;
                    currentPredicate = getPredicate(lexicalUnits.get(++i));
                    currentSubject = new BlankNodeToken(newBlankNodeId())
                            .resolve();
                    this.link(currentSubject, currentPredicate, currentObject);
                    nbTokens = 1;
                } else {
                    currentPredicate = getPredicate(lexicalUnit);
                }
                break;
            case 3:
                if ("of".equalsIgnoreCase(lexicalUnit.getValue())) {
                    nbTokens--;
                } else {
                    if (swapSubjectObject) {
                        this.link(lexicalUnit.resolve(), currentPredicate,
                                currentSubject);
                    } else {
                        currentObject = lexicalUnit.resolve();
                        this.link(currentSubject, currentPredicate,
                                currentObject);
                    }
                    nbTokens = 0;
                    swapSubjectObject = false;
                }
                break;
            default:
                break;
            }
        }
    }

    /**
     * Returns the current context.
     * 
     * @return The current context.
     */
    protected Context getContext() {
        return context;
    }

    /**
     * Returns the given lexical unit as a predicate.
     * 
     * @param lexicalUnit
     *            The lexical unit to get as a predicate.
     * @return A RDF URI reference of the predicate.
     */
    private Reference getPredicate(LexicalUnit lexicalUnit) {
        Reference result = null;
        Object p = lexicalUnit.resolve();
        if (p instanceof Reference) {
            result = (Reference) p;
        } else if (p instanceof String) {
            result = new Reference((String) p);
        }

        return result;
    }

    /**
     * Returns true if the given character is a delimiter.
     * 
     * @param c
     *            The given character to check.
     * @return true if the given character is a delimiter.
     */
    @Override
    protected boolean isDelimiter(int c) {
        return isWhiteSpace(c) || c == '^' || c == '!' || c == '=' || c == '<'
                || c == '"' || c == '[' || c == ']' || c == '(' || c == ')'
                || c == '.' || c == ';' || c == ',' || c == '@';
    }

    /**
     * Callback method used when a link is parsed or written.
     * 
     * @param source
     *            The source or subject of the link.
     * @param typeRef
     *            The type reference of the link.
     * @param target
     *            The target or object of the link.
     */
    protected void link(Object source, Reference typeRef, Object target) {
        if (source instanceof Reference) {
            if (target instanceof Reference) {
                getGraphHandler().link((Reference) source, typeRef,
                        (Reference) target);
            } else if (target instanceof Literal) {
                getGraphHandler().link((Reference) source, typeRef,
                        (Literal) target);
            } else {
                org.restlet.Context
                        .getCurrentLogger()
                        .warning(
                                "The RDF Turtle document contains an object which is neither a Reference nor a literal: "
                                        + target);
                org.restlet.Context.getCurrentLogger().warning(
                        getParsingMessage());
            }
        } else if (source instanceof Graph) {
            if (target instanceof Reference) {
                getGraphHandler().link((Graph) source, typeRef,
                        (Reference) target);
            } else if (target instanceof Literal) {
                getGraphHandler().link((Graph) source, typeRef,
                        (Literal) target);
            } else {
                org.restlet.Context
                        .getCurrentLogger()
                        .warning(
                                "The RDF Turtle document contains an object which is neither a Reference nor a literal: "
                                        + target);
                org.restlet.Context.getCurrentLogger().warning(
                        getParsingMessage());
            }
        }
    }

    /**
     * Returns the identifier of a new blank node.
     * 
     * @return The identifier of a new blank node.
     */
    protected String newBlankNodeId() {
        return "#_bn" + blankNodeId++;
    }

    /**
     * Parses the current representation.
     * 
     * @throws IOException
     */
    @Override
    public void parse() throws IOException {
        // Init the reading.
        step();
        do {
            consumeWhiteSpaces();
            switch (getChar()) {
            case '@':
                parseDirective(this.context);
                break;
            case '#':
                parseComment();
                break;
            case '.':
                step();
                break;
            default:
                parseStatement(this.context);
                break;
            }
        } while (!isEndOfFile(getChar()));
    }

    /**
     * Parse the given blank node.
     * 
     * @param blankNode
     *            The blank node to parse.
     * @throws IOException
     */
    protected void parseBlankNode(BlankNodeToken blankNode) throws IOException {
        step();
        do {
            consumeWhiteSpaces();
            switch (getChar()) {
            case '(':
                blankNode.getLexicalUnits().add(
                        new ListToken(this, this.context));
                break;
            case '<':
                stepBack();
                blankNode.getLexicalUnits().add(
                        new UriToken(this, this.context));
                break;
            case '_':
                blankNode.getLexicalUnits().add(
                        new BlankNodeToken(this.parseToken()));
                break;
            case '"':
                blankNode.getLexicalUnits().add(
                        new StringToken(this, this.context));
                break;
            case '[':
                blankNode.getLexicalUnits().add(
                        new BlankNodeToken(this, this.context));
                break;
            case ']':
                break;
            case '.':
                step();
                break;
            default:
                if (!isEndOfFile(getChar())) {
                    blankNode.getLexicalUnits().add(
                            new Token(this, this.context));
                }

                break;
            }
        } while (!isEndOfFile(getChar()) && getChar() != ']');
        if (getChar() == ']') {
            // Set the cursor at the right of the list token.
            step();
        }
    }

    /**
     * Parse the current directive and update the context according to the kind
     * of directive ("base", "prefix", etc).
     * 
     * @param context
     *            The context to update.
     * @throws IOException
     */
    protected void parseDirective(Context context) throws IOException {
        // Remove the leading '@' character.
        step();
        discard();
        String currentKeyword = parseToken();
        if ("base".equalsIgnoreCase(currentKeyword)) {
            consumeWhiteSpaces();
            String base = parseUri();
            Reference ref = new Reference(base);
            if (ref.isRelative()) {
                context.getBase().addSegment(base);
            } else {
                context.setBase(ref);
            }
            consumeStatement();
        } else if ("prefix".equalsIgnoreCase(currentKeyword)) {
            consumeWhiteSpaces();
            String prefix = parseToken();
            consumeWhiteSpaces();
            String uri = parseUri();
            context.getPrefixes().put(prefix, uri);
            consumeStatement();
        } else if ("keywords".equalsIgnoreCase(currentKeyword)) {
            consumeWhiteSpaces();
            int c;
            do {
                c = step();
            } while (!isEndOfFile(c) && c != '.');
            String strKeywords = getCurrentToken();
            String[] keywords = strKeywords.split(",");
            context.getKeywords().clear();
            for (String keyword : keywords) {
                context.getKeywords().add(keyword.trim());
            }
            consumeStatement();
        } else {
            org.restlet.Context.getCurrentLogger().warning(
                    "@" + currentKeyword + " directive is not supported.");
            consumeStatement();
        }
    }

    /**
     * Parse the given list token.
     * 
     * @param listToken
     *            The list token to parse.
     * @throws IOException
     */
    protected void parseList(ListToken listToken) throws IOException {
        step();
        do {
            consumeWhiteSpaces();
            switch (getChar()) {
            case '(':
                listToken.getLexicalUnits().add(
                        new ListToken(this, this.context));
                break;
            case '<':
                stepBack();
                listToken.getLexicalUnits().add(
                        new UriToken(this, this.context));
                break;
            case '_':
                listToken.getLexicalUnits().add(
                        new BlankNodeToken(parseToken()));
                break;
            case '"':
                listToken.getLexicalUnits().add(
                        new StringToken(this, this.context));
                break;
            case '[':
                listToken.getLexicalUnits().add(
                        new BlankNodeToken(this, this.context));
                break;
            case ')':
                break;
            default:
                if (!isEndOfFile(getChar())) {
                    listToken.getLexicalUnits().add(
                            new Token(this, this.context));
                }
                break;
            }
        } while (!isEndOfFile(getChar()) && getChar() != ')');
        if (getChar() == ')') {
            // Set the cursor at the right of the list token.
            step();
        }
    }

    /**
     * Reads the current statement until its end, and parses it.
     * 
     * @param context
     *            The current context.
     * @throws IOException
     */
    protected void parseStatement(Context context) throws IOException {
        List<LexicalUnit> lexicalUnits = new ArrayList<LexicalUnit>();
        do {
            consumeWhiteSpaces();
            switch (getChar()) {
            case '(':
                lexicalUnits.add(new ListToken(this, context));
                break;
            case '<':
                stepBack();
                lexicalUnits.add(new UriToken(this, context));
                break;
            case '_':
                lexicalUnits.add(new BlankNodeToken(parseToken()));
                break;
            case '"':
                lexicalUnits.add(new StringToken(this, context));
                break;
            case '[':
                lexicalUnits.add(new BlankNodeToken(this, context));
                break;
            case '!':
                lexicalUnits.add(new Token("!"));
                step();
                discard();
                break;
            case '^':
                lexicalUnits.add(new Token("^"));
                step();
                discard();
                break;
            case '@':
                // Remove the leading '@' character.
                step();
                discard();
                lexicalUnits.add(new Token(this, context));
                discard();
                break;
            case ';':
                step();
                discard();
                lexicalUnits.add(new Token(";"));
                break;
            case ',':
                step();
                discard();
                lexicalUnits.add(new Token(","));
                break;
            case '#':
                parseComment();
                break;
            case '.':
                break;
            default:
                if (!isEndOfFile(getChar())) {
                    lexicalUnits.add(new Token(this, context));
                }
                break;
            }
        } while (!isEndOfFile(getChar()) && getChar() != '.');

        // Generate the links
        generateLinks(lexicalUnits);
    }

    /**
     * Parse the given String token.
     * 
     * @param stringToken
     *            The String token to parse.
     * @throws IOException
     */
    protected void parseString(StringToken stringToken) throws IOException {
        // Answer the question : is it multi lines or not?
        // That is to say, is it delimited by 3 quotes or not?
        int c1 = step();
        int c2 = step();

        if ((c1 == c2) && (c1 == '"')) {
            stringToken.setMultiLines(true);
            step();
            discard();
            int[] tab = new int[3];
            int cpt = 0; // Number of consecutives '"' characters.
            int c = getChar();
            while (!isEndOfFile(c)) {
                if (c == '"') {
                    tab[++cpt - 1] = c;
                } else {
                    cpt = 0;
                }
                if (cpt == 3) {
                    // End of the string reached.
                    stepBack(2);
                    stringToken.setValue(getCurrentToken());
                    step(3);
                    discard();
                    break;
                }
                c = step();
            }
        } else {
            stringToken.setMultiLines(false);
            stepBack(1);
            discard();
            int c = getChar();
            while (!isEndOfFile(c) && (c != '"')) {
                c = step();
            }
            stringToken.setValue(getCurrentToken());
            step();
            discard();
        }

        // Parse the type and language of literals
        int c = getChar();
        if (c == '@') {
            stringToken.setLanguage(parseToken());
        } else if (c == '^') {
            c = step();
            if (c == '^') {
                stringToken.setType(parseToken());
            } else {
                stepBack();
            }
        }

    }

    /**
     * Parses the given token.
     * 
     * @param token
     *            The token to parse.
     * @throws IOException
     */
    protected void parseToken(Token token) throws IOException {
        int c;
        do {
            c = step();
        } while (!isEndOfFile(c) && !isDelimiter(c));
        token.setValue(getCurrentToken());

    }

    /**
     * Parses the given URI token.
     * 
     * @param token
     *            The URI token to parse.
     * @throws IOException
     */
    protected void parseUri(UriToken uriToken) throws IOException {
        uriToken.setValue(parseUri());
    }
}