/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.avro.util; import java.io.BufferedReader; import java.io.IOException; import java.util.List; import java.util.regex.Pattern; import java.util.regex.Matcher; /** Parser for files containing test cases consisting of * <code><String,String></code> pairs, where the first string is * the input to the test case, and the second string is the expected * output of the test case. * * <p> A test-case file is a sequence of <a * href="en.wikipedia.org/wiki/Here_document">here documents</a> * ("heredocs"), very similar in syntax to Unix Shell heredocs. * Heredocs labeled "INPUT" indicate the start of a new case, and * these INPUT heredocs the inputs of test cases. Following an * "INPUT" heredoc can more zero or more "expected-output" heredocs. * Each of these expected-output heredocs defines what we call a * <dfn>subcase</dfn>. The assumption here is that for each * interesting test input, there are often multiple different tests * one could run, each with different expected outputs. * * <p> Consumers of this class call the {@link #find} method to find * all subcases marked with a given label. For example, imagine the * following test-case file: * <blockquote> <pre> * <<INPUT 0 * <<VALUE 0 * <<PPRINT 0 * <<INPUT 1+1 * <<VALUE 2 * <<PPRINT 1 + 1 * <<SEXP (+ 1 1) * SEXP * </pre> </blockquote> * Calling {@link #find} on the label "VALUE" will return two test * cases, the pair <code><"0","0"></code> and * <code><"1+1","2"></code>. Calling it on the label "PPRINT" * will return <code><"0","0"></code> and <code><"1+1","1 + * 1"></code>. Notice that there need not be a subcase for every * INPUT. In the case of "SEXP", for example, {@link #find} will * return only the single pair <code><"1+1","(+ 1 1)"></code>. * * <p> There are two forms of heredocs, single-line and multi-line. * The examples above (except "SEXP") are single-line heredocs. The * general syntax for these is: * <blockquote> <pre> * ^<<([a-zA-Z][_a-zA-Z0-9]*) (.*)$ * </pre> </blockquote> * The first group in this regex is the label of the heredoc, and the * second group is the text of the heredoc. A single space separates * the two groups and is not part of there heredoc (subsequent spaces * <em>will</em> be included in the heredoc). A "line terminator" as * defined by the Java language (i.e., CR, LR, or CR followed by LF) * terminates a singline-line heredoc but is not included in the text * of the heredoc. * * <p> As the name implies, multi-line heredocs are spread across * multiple lines, as in this example: * <blockquote> <pre> * <<INPUT * 1 * +1 + * 1 * INPUT * <<VALUE 3 * <<PPRINT 1 + 1 + 1 * </pre> </blockquote> * In this case, the input to the test case is spread across multiple * lines (the line terminators in these documents are preserved as * part of the document text). Multi-line heredocs can be used for * both the inputs of text cases and the expected outputs of them. * <p> The syntax of multi-line heredocs obey the following pseudo-regex: * <blockquote> <pre> * ^<<([a-zA-Z][_a-zA-Z0-9]*)$(.*)$^\1$ * </pre> </blockquote> * That is, as illustrated by the example, a multi-line heredoc named * "LABEL" consists of the text <code><lt;LABEL</code> on a line by * itself, followed by the text of the heredoc, followed by the text * <code>LABEL</code> on a line by itself (if LABEL starts a line but * is not the <em>only</em> text on that line, then that entire line * is part of the heredoc, and the heredoc is not terminated by that * line). * * <p>In multi-line heredocs, neither the line terminator that * terminates the start of the document, nor the one just before the * label that ends the heredoc, are part of the text of the heredoc. * Thus, for example, the text of the multi-line input from above * would be exactly <code>"1\n+1 +\n1"</code>. If you want a new * line at the end of a multi-line heredoc, put a blank line before * the label ending the heredoc. * * <p>Also in multi-line heredocs, line-terminators within the heredoc * are normalized to line-feeds ('\n'). Thus, for example, when a * test file written on a Windows machine is parsed on any machine, * the Windows-style line terminators within heredocs will be * translated to Unix-style line terminators, no matter what platform * the tests are run on. * * <p> Note that lines between heredocs are ignored, and can be used * to provide spacing between and/or commentary on the test cases. */ public class CaseFinder { /** Scan test-case file <code>in</code> looking for test subcases * marked with <code>caseLabel</code>. Any such cases are appended * (in order) to the "cases" parameter. If <code>caseLabel</code> * equals the string <code>"INPUT"</code>, then returns the list of * <<i>input</i>, <code>null</code>> pairs for <i>input</i> * equal to all heredoc's named INPUT's found in the input * stream. */ public static List<Object[]> find(BufferedReader in, String label, List<Object[]> cases) throws IOException { if (! Pattern.matches(LABEL_REGEX, label)) throw new IllegalArgumentException("Bad case subcase label: " + label); final String subcaseMarker = "<<" + label; for (String line = in.readLine();;) { // Find next new case while (line != null && !line.startsWith(NEW_CASE_MARKER)) line = in.readLine(); if (line == null) break; String input; input = processHereDoc(in, line); if (label.equals(NEW_CASE_NAME)) { cases.add(new Object[] { input, null }); line = in.readLine(); continue; } // Check to see if there's a subcase named "label" for that case do { line = in.readLine(); } while (line != null && (!line.startsWith(NEW_CASE_MARKER) && !line.startsWith(subcaseMarker))); if (line == null || line.startsWith(NEW_CASE_MARKER)) continue; String expectedOutput = processHereDoc(in, line); cases.add(new Object[] { input, expectedOutput }); } in.close(); return cases; } private static final String NEW_CASE_NAME = "INPUT"; private static final String NEW_CASE_MARKER = "<<"+NEW_CASE_NAME; private static final String LABEL_REGEX = "[a-zA-Z][_a-zA-Z0-9]*"; private static final Pattern START_LINE_PATTERN = Pattern.compile("^<<("+LABEL_REGEX+")(.*)$"); /** Reads and returns content of a heredoc. Assumes we just read a * start-of-here-doc marker for a here-doc labeled "docMarker." * Replaces arbitrary newlines with sytem newlines, but strips * newline from final line of heredoc. Throws IOException if EOF * is reached before heredoc is terminate. */ private static String processHereDoc(BufferedReader in, String docStart) throws IOException { Matcher m = START_LINE_PATTERN.matcher(docStart); if (! m.matches()) throw new IllegalArgumentException("Wasn't given the start of a heredoc (\""+docStart+"\")"); String docName = m.group(1); // Determine if this is a single-line heredoc, and process if it is String singleLineText = m.group(2); if (singleLineText.length() != 0) { if (! singleLineText.startsWith(" ")) throw new IOException("Single-line heredoc missing initial space (\""+docStart+"\")"); return singleLineText.substring(1); } // Process multi-line heredocs StringBuilder result = new StringBuilder(); String line = in.readLine(); String prevLine = ""; boolean firstTime = true; while (line != null && !line.equals(docName)) { if (! firstTime) result.append(prevLine).append('\n'); else firstTime = false; prevLine = line; line = in.readLine(); } if (line == null) throw new IOException("Here document (" + docName + ") terminated by end-of-file."); return result.append(prevLine).toString(); } }