XMLReader.java example

Explorer

lemma-master
- core
  - src
    - main
      - java
        org
        fourthline
        lemma
        Constants.java
        anchor
        AnchorAddress.java
        CitationAnchor.java
        Scheme.java
        pipeline
        Context.java
        Pipeline.java
        javadoc
        XHTMLTemplateJavadocPipeline.java
        processor
        AbstractJavadocProcessor.java
        AbstractProcessor.java
        Processor.java
        ProcessorOptions.java
        xhtml
        JavadocCitationProcessor.java
        TocProcessor.java
        XRefProcessor.java
        reader
        AbstractReader.java
        Reader.java
        content
        LineRange.java
        filter
        CleanupFilter.java
        ContentFilter.java
        FragmentFilter.java
        handler
        ContentFileHandler.java
        printer
        CalloutContentPrinter.java
        ContentPrinter.java
        JavaContentPrinter.java
        PlainContentPrinter.java
        XMLContentPrinter.java
        javacode
        JavacodeRawReader.java
        JavacodeReader.java
        LineRangeParser.java
        javadoc
        AbstractJavadocReader.java
        JavadocReader.java
        text
        PlaintextReader.java
        xml
        XMLReader.java
    - test
      - java
        example
        advanced
        Callouts.java
        HelloWorldCallouts.java
        Includes.java
        LinkingCitations.java
        NoXRefProcessing.java
        citeplaintext
        CitingPlaintext.java
        citexml
        CitingXML.java
        helloworld
        AnchorThis.java
        CustomizingAnchorOptions.java
        GenericTest.java
        HelloWorld.java
        HelloWorldTest.java
        package-info.java
        misc
        CitationTests.java
        ReaderSelectionTests.java
        util
        DocletTest.java
- maven-plugin
  - src
    - main
      - java
        org
        fourthline
        lemma
        maven
        LemmaMojo.java
        LemmaReport.java

/*
 * Copyright (C) 2011 4th Line GmbH, Switzerland
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as
 * published by the Free Software Foundation, either version 3 of the
 * License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

package org.fourthline.lemma.reader.xml;

import org.fourthline.lemma.Constants;
import org.fourthline.lemma.anchor.CitationAnchor;
import org.fourthline.lemma.pipeline.Context;
import org.fourthline.lemma.reader.AbstractReader;
import org.fourthline.lemma.reader.content.filter.CleanupFilter;
import org.fourthline.lemma.reader.content.filter.ContentFilter;
import org.fourthline.lemma.reader.content.filter.FragmentFilter;
import org.fourthline.lemma.reader.content.handler.ContentFileHandler;
import org.fourthline.lemma.reader.content.printer.ContentPrinter;
import org.fourthline.lemma.reader.content.printer.XMLContentPrinter;
import org.seamless.xhtml.XHTML;
import org.seamless.xhtml.XHTMLElement;

import java.io.File;
import java.util.logging.Logger;
import java.util.regex.Pattern;

/**
 * Reads XML files - without parsing them.
 * <p>
 * This reader can process citations from any XML, XHTML, or HTML file - it does
 * not parse the content. Its only job is to understand fragment labels embedded in
 * XML comments.
 * </p>
 * <p>
 * It wraps the read content into a {@code <pre class="prettyprint"/>} tag, which
 * helps you to detect XML code blocks in your output document, e.g. for a
 * Javascript syntax highlighter.
 * </p>
 *
 * @author Christian Bauer
 */
public class XMLReader extends AbstractReader {

    final private Logger log = Logger.getLogger(XMLReader.class.getName());

    final public static String CONTEXT_SOURCE_DIRECTORIES = "XMLReader.sourceDirectories";

    final public static Pattern PATTERN_FRAGMENT_LABEL =
            Pattern.compile("(.*?)[\\t ]*<!--[\\t ]*" + Constants.PATTERN_FRAGMENT_LABEL + "[\\t ]*-->$");

    final protected ContentFileHandler handler;
    final protected ContentPrinter printer;
    final protected ContentFilter[] filters;

    public XMLReader() {
        handler = new ContentFileHandler();
        printer = new XMLContentPrinter();
        filters = new ContentFilter[]{
                new FragmentFilter(PATTERN_FRAGMENT_LABEL),
                new CleanupFilter(PATTERN_FRAGMENT_LABEL)
        };
    }

    public XHTML read(CitationAnchor citation, Context context) {

        File[] sourceDirectories = (File[]) context.get(CONTEXT_SOURCE_DIRECTORIES);
        File addressedFile = resolveFile(citation.getAddress().getPath(), sourceDirectories);
        log.fine("Including and parsing XHTML file: " + addressedFile);

        XHTML xhtml = getParser().createDocument();

        XHTMLElement root =
                xhtml.createRoot(getXPath(), Constants.WRAPPER_ELEMENT)
                        .setAttribute(XHTML.ATTR.CLASS, citation.getOutputClasses());

        if (isGenerateId(context))
            root.setAttribute(XHTML.ATTR.id, citation.getOutputIdentifier());

        appendTitle(root, citation.getTitle());
        addFilePath(root, citation, addressedFile);

        appendContent(root, addressedFile, citation);

        return xhtml;
    }

    protected void appendContent(XHTMLElement parent, File file, CitationAnchor citation) {

        String[] content = handler.getContent(file, null);

        for (ContentFilter filter : filters) {
            content = filter.filter(content, citation);
        }

        printer.print(content, citation, parent, "prettyprint");
    }

}