/* * Copyright (C) 2011 4th Line GmbH, Switzerland * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as * published by the Free Software Foundation, either version 3 of the * License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package org.fourthline.lemma.reader.text; import org.fourthline.lemma.Constants; import org.fourthline.lemma.anchor.CitationAnchor; import org.fourthline.lemma.pipeline.Context; import org.fourthline.lemma.reader.AbstractReader; import org.fourthline.lemma.reader.content.filter.CleanupFilter; import org.fourthline.lemma.reader.content.filter.ContentFilter; import org.fourthline.lemma.reader.content.filter.FragmentFilter; import org.fourthline.lemma.reader.content.handler.ContentFileHandler; import org.fourthline.lemma.reader.content.printer.ContentPrinter; import org.fourthline.lemma.reader.content.printer.PlainContentPrinter; import org.seamless.xhtml.Option; import org.seamless.xhtml.XHTML; import org.seamless.xhtml.XHTMLElement; import java.io.File; import java.util.logging.Logger; import java.util.regex.Pattern; /** * Reads any text file, uses "#" as a comment marker. * <p> * This reader can process any text file, its only job is to detect * fragment labels in plain text comments - which are all characters * following a hash "#" character. * </p> * * @author Christian Bauer */ public class PlaintextReader extends AbstractReader { final public static String CONTEXT_SOURCE_DIRECTORIES = "PlaintextReader.sourceDirectories"; final private Logger log = Logger.getLogger(PlaintextReader.class.getName()); final public static Pattern PATTERN_FRAGMENT_LABEL = Pattern.compile("(.*?)[\\t ]*#[\\t ]*" + Constants.PATTERN_FRAGMENT_LABEL + "[\\t ]*$"); final protected ContentFileHandler handler; final protected ContentPrinter printer; final protected ContentFilter[] filters; public PlaintextReader() { handler = new ContentFileHandler(); printer = new PlainContentPrinter(); filters = new ContentFilter[]{ new FragmentFilter(PATTERN_FRAGMENT_LABEL), new CleanupFilter(PATTERN_FRAGMENT_LABEL) }; } public XHTML read(CitationAnchor citation, Context context) { File[] sourceDirectories = (File[])context.get(CONTEXT_SOURCE_DIRECTORIES); File addressedFile = resolveFile(citation.getAddress().getPath(), sourceDirectories); log.fine("Including and parsing XHTML file: " + addressedFile); XHTML xhtml = getParser().createDocument(); XHTMLElement root = xhtml.createRoot(getXPath(), Constants.WRAPPER_ELEMENT) .setAttribute(XHTML.ATTR.CLASS, citation.getOutputClasses()); if (isGenerateId(context)) root.setAttribute(XHTML.ATTR.id, citation.getOutputIdentifier()); appendTitle(root, citation.getTitle()); addFilePath(root, citation, addressedFile); appendContent(root, addressedFile, citation); return xhtml; } protected void appendContent(XHTMLElement parent, File file, CitationAnchor citation) { String[] content = handler.getContent(file, null); for (ContentFilter filter : filters) { content = filter.filter(content, citation); } Option prettyOption = citation.getOption(CitationAnchor.OptionKey.PRETTY); if (prettyOption != null && Boolean.valueOf(prettyOption.getFirstValue())) { printer.print(content, citation, parent, "prettyprint"); } else { printer.print(content, citation, parent); } } }