/* * Copyright 2013, 2014 Deutsche Nationalbibliothek * * Licensed under the Apache License, Version 2.0 the "License"; * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.culturegraph.mf.formeta.parser; import org.culturegraph.mf.commons.StringUtil; import org.culturegraph.mf.framework.FormatException; /** * A parser for the formeta metadata serialisation format. * * @author Christoph Böhme * */ public final class FormetaParser { public static final int SNIPPET_SIZE = 20; public static final String SNIPPET_ELLIPSIS = "\u2026"; public static final String POS_MARKER_LEFT = ">"; public static final String POS_MARKER_RIGHT = "<"; private static final int BUFFER_SIZE = 1024 * 1024; private char[] buffer = new char[BUFFER_SIZE]; private final StructureParserContext structureParserContext = new StructureParserContext(); public void setEmitter(final Emitter emitter) { structureParserContext.setEmitter(emitter); } public Emitter getEmitter() { return structureParserContext.getEmitter(); } public void parse(final String data) { assert structureParserContext.getEmitter() != null: "No emitter set"; // According to http://stackoverflow.com/a/11876086 it is faster to copy // a string into a char array then to use charAt(): buffer = StringUtil.copyToBuffer(data, buffer); final int bufferLen = data.length(); structureParserContext.reset(); StructureParserState state = StructureParserState.ITEM_NAME; int i = 0; try { for (; i < bufferLen; ++i) { state = state.processChar(buffer[i], structureParserContext); } } catch (final FormatException e) { final String errorMsg = "Parsing error at position " + (i + 1) + ": " + getErrorSnippet(data, i) + ", " + e.getMessage(); throw new FormatException(errorMsg, e); } try { state.endOfInput(structureParserContext); } catch (final FormatException e) { throw new FormatException("Parsing error: " + e.getMessage(), e); } } /** * Extracts a text snippet from the record for showing the position at * which an error occurred. The exact position additionally highlighted * with {@link POS_MARKER_LEFT} and {@link POS_MARKER_RIGHT}. * * @param record the record currently being parsed * @param pos the position at which the error occurred * @return a text snippet. */ private static String getErrorSnippet(final String record, final int pos) { final StringBuilder snippet = new StringBuilder(); final int start = pos - SNIPPET_SIZE / 2; if (start < 0) { snippet.append(record.substring(0, pos)); } else { snippet.append(SNIPPET_ELLIPSIS); snippet.append(record.substring(start, pos)); } snippet.append(POS_MARKER_LEFT); snippet.append(record.charAt(pos)); snippet.append(POS_MARKER_RIGHT); if (pos + 1 < record.length()) { final int end = pos + SNIPPET_SIZE / 2; if (end > record.length()) { snippet.append(record.substring(pos + 1)); } else { snippet.append(record.substring(pos + 1, end)); snippet.append(SNIPPET_ELLIPSIS); } } return snippet.toString(); } }