Lexicals.java example

Explorer

pljava-master
- pljava
  - src
    - main
      - java
        org
        postgresql
        pljava
        internal
        AclId.java
        Backend.java
        ELogFormatter.java
        ELogHandler.java
        ErrorData.java
        ExecutionPlan.java
        HeapTupleHeader.java
        InstallHelper.java
        JavaWrapper.java
        LargeObject.java
        ObjectPoolImpl.java
        Oid.java
        PgSavepoint.java
        Portal.java
        Relation.java
        ResultSetPicker.java
        SPI.java
        SPIException.java
        ServerException.java
        Session.java
        SubXactListener.java
        TransactionalMap.java
        TriggerData.java
        Tuple.java
        TupleDesc.java
        TupleTable.java
        XactListener.java
        package-info.java
        jdbc
        AbstractResultSet.java
        AbstractResultSetMetaData.java
        BlobValue.java
        BuiltinFunctions.java
        ClobValue.java
        Invocation.java
        ObjectResultSet.java
        ReadOnlyResultSet.java
        ResultSetBase.java
        ResultSetField.java
        SPIConnection.java
        SPIDatabaseMetaData.java
        SPIDriver.java
        SPIParameterMetaData.java
        SPIPreparedStatement.java
        SPIResultSet.java
        SPIResultSetMetaData.java
        SPIStatement.java
        SQLInputFromChunk.java
        SQLInputFromTuple.java
        SQLOutputToChunk.java
        SQLOutputToTuple.java
        SQLUtils.java
        SingleRowReader.java
        SingleRowResultSet.java
        SingleRowWriter.java
        StatementClosedException.java
        SyntheticResultSet.java
        SyntheticResultSetMetaData.java
        TriggerResultSet.java
        TypeOid.java
        UnsupportedFeatureException.java
        package-info.java
        management
        Commands.java
        DDRExecutor.java
        SQLDeploymentDescriptor.java
        package-info.java
        sqlj
        EntryStreamHandler.java
        Loader.java
        package-info.java
- pljava-ant
  - src
    - main
      - java
        org
        postgresql
        pljava
        tasks
        JarLoaderTask.java
        PLJavaTask.java
        package-info.java
- pljava-api
  - src
    - main
      - java
        org
        postgresql
        pljava
        ObjectPool.java
        PooledObject.java
        ResultSetHandle.java
        ResultSetProvider.java
        SavepointListener.java
        Session.java
        SessionManager.java
        TransactionListener.java
        TriggerData.java
        TriggerException.java
        annotation
        BaseUDT.java
        Function.java
        MappedUDT.java
        SQLAction.java
        SQLActions.java
        SQLType.java
        Trigger.java
        package-info.java
        package-info.java
        sqlgen
        DDRProcessor.java
        DDRWriter.java
        Lexicals.java
        TriggerNamer.java
        package-info.java
    - test
      - java
        LexicalsTest.java
- pljava-examples
  - src
    - main
      - java
        org
        postgresql
        pljava
        example
        AnyTest.java
        BinaryColumnTest.java
        HugeResultSet.java
        LoggerTest.java
        MetaDataBooleans.java
        MetaDataInts.java
        MetaDataStrings.java
        MetaDataTest.java
        Parameters.java
        RandomInts.java
        ResultSetTest.java
        SPIActions.java
        Security.java
        SetOfRecordTest.java
        Threads.java
        Triggers.java
        TupleReturn.java
        Users.java
        UsingProperties.java
        UsingPropertiesAsResultSet.java
        UsingPropertiesAsScalarSet.java
        annotation
        ComplexScalar.java
        ComplexTuple.java
        ConditionalDDR.java
        Enumeration.java
        IntWithMod.java
        PGF1010962.java
        Point.java
        ThreadTest.java
        Triggers.java
        UDTScalarIOTest.java
        UnicodeRoundTripTest.java
        UsingProperties.java
        UsingPropertiesAsScalarSet.java
        VarlenaUDTTest.java
        package-info.java
- pljava-packaging
  - src
    - main
      - java
        JarX.java
- src
  - java
    - test
      - org
        postgresql
        pljava
        test
        CommandReader.java
        Environment.java
        Path.java
        TableBuilder.java
        TestPLJava.java
        Tester.java

/*
 * Copyright (c) 2015-2016 Tada AB and other contributors, as listed below.
 *
 * All rights reserved. This program and the accompanying materials
 * are made available under the terms of the The BSD 3-Clause License
 * which accompanies this distribution, and is available at
 * http://opensource.org/licenses/BSD-3-Clause
 *
 * Contributors:
 *   Chapman Flack
 */
package org.postgresql.pljava.sqlgen;

import java.util.regex.Matcher;
import java.util.regex.Pattern;

import javax.annotation.processing.Messager;
import javax.tools.Diagnostic.Kind;

/**
 * A few useful SQL lexical definitions supplied as {@link Pattern} objects.
 *
 * The idea is not to go overboard and reimplement an SQL lexer, but to
 * capture in one place the rules for those bits of SQL snippets that are
 * likely to be human-supplied in annotations and need to be checked for
 * correctness when emitted into deployment descriptors. Identifiers, for a
 * start.
 *
 * Supplied in the API module so they are available to {@code javac} to
 * compile and generate DDR when the rest of PL/Java is not necessarily
 * present. Of course backend code such as {@code SQLDeploymentDescriptor}
 * can also refer to these.
 */
public abstract class Lexicals
{
	/** Allowed as the first character of a regular identifier by ISO.
	 */
	public static final Pattern ISO_REGULAR_IDENTIFIER_START = Pattern.compile(
		"[\\p{Lu}\\p{Ll}\\p{Lt}\\p{Lm}\\p{Lo}\\p{Nl}]"
	);

	/** Allowed as any non-first character of a regular identifier by ISO.
	 */
	public static final Pattern ISO_REGULAR_IDENTIFIER_PART =
	Pattern.compile(String.format(
		"[\\xb7\\p{Mn}\\p{Mc}\\p{Nd}\\p{Pc}\\p{Cf}%1$s]",
		ISO_REGULAR_IDENTIFIER_START.pattern()
	));

	/** A complete regular identifier as allowed by ISO.
	 */
	public static final Pattern ISO_REGULAR_IDENTIFIER =
	Pattern.compile(String.format(
		"%1$s%2$s{0,127}+",
		ISO_REGULAR_IDENTIFIER_START.pattern(),
		ISO_REGULAR_IDENTIFIER_PART.pattern()
	));

	/** A complete ISO regular identifier in a single capturing group.
	 */
	public static final Pattern ISO_REGULAR_IDENTIFIER_CAPTURING =
	Pattern.compile(String.format(
		"(%1$s)", ISO_REGULAR_IDENTIFIER.pattern()
	));

	/** A complete delimited identifier as allowed by ISO. As it happens, this
	 * is also the form PostgreSQL uses for elements of a LIST_QUOTE-typed GUC.
	 */
	public static final Pattern ISO_DELIMITED_IDENTIFIER = Pattern.compile(
		"\"(?:[^\"]|\"\"){1,128}+\""
	);

	/** An ISO delimited identifier with a single capturing group that captures
	 * the content (which still needs to have "" replaced with " throughout).
	 * The capturing group is named {@code xd}.
	 */
	public static final Pattern ISO_DELIMITED_IDENTIFIER_CAPTURING =
	Pattern.compile(String.format(
		"\"(?<xd>(?:[^\"]|\"\"){1,128}+)\""
	));

	/** The escape-specifier part of a Unicode delimited identifier or string.
	 * The escape character itself is in the capturing group named {@code uec}.
	 * The group can be absent, in which case \ should be used as the uec.
	 */
	public static final Pattern ISO_UNICODE_ESCAPE_SPECIFIER =
	Pattern.compile(
		"(?:\\p{IsWhite_Space}*+[Uu][Ee][Ss][Cc][Aa][Pp][Ee]"+
		"\\p{IsWhite_Space}*+'(?<uec>[^0-9A-Fa-f+'\"\\p{IsWhite_Space}])')?+"
	);

	/** A Unicode delimited identifier. The body is in capturing group
	 * {@code xui} and the escape character in group {@code uec}. The body
	 * still needs to have "" replaced with ", and {@code Unicode escape value}s
	 * decoded and replaced, and then it has to be verified to be no longer
	 * than 128 codepoints.
	 */
	public static final Pattern ISO_UNICODE_IDENTIFIER =
	Pattern.compile(String.format(
		"[Uu]&\"(?<xui>(?:[^\"]|\"\")++)\"%1$s",
		ISO_UNICODE_ESCAPE_SPECIFIER.pattern()
	));

	/** A compilable pattern to match a {@code Unicode escape value}.
	 * A match should have one of three named capturing groups. If {@code cev},
	 * substitute the {@code uec} itself. If {@code u4d} or {@code u6d},
	 * substitute the codepoint represented by the hex digits. A match with none
	 * of those capturing groups indicates an ill-formed string.
	 *<p>
	 * Maka a Pattern from this by supplying the right {@code uec}, so:
	 * {@code Pattern.compile(String.format(ISO_UNICODE_REPLACER,
	 *   Pattern.quote(uec)));}
	 */
	public static final String ISO_UNICODE_REPLACER =
		"%1$s(?:(?<cev>%1$s)|(?<u4d>[0-9A-Fa-f]{4})|\\+(?<u6d>[0-9A-Fa-f]{6}))";

	/** Allowed as the first character of a regular identifier by PostgreSQL
	 * (PG 7.4 -).
	 */
	public static final Pattern PG_REGULAR_IDENTIFIER_START = Pattern.compile(
		"[A-Za-z\\P{ASCII}_]" // hasn't seen a change since PG 7.4
	);

	/** Allowed as any non-first character of a regular identifier by PostgreSQL
	 * (PG 7.4 -).
	 */
	public static final Pattern PG_REGULAR_IDENTIFIER_PART =
	Pattern.compile(String.format(
		"[0-9$%1$s]", PG_REGULAR_IDENTIFIER_START.pattern()
	));

	/** A complete regular identifier as allowed by PostgreSQL (PG 7.4 -).
	 */
	public static final Pattern PG_REGULAR_IDENTIFIER =
	Pattern.compile(String.format(
		"%1$s%2$s*+",
		PG_REGULAR_IDENTIFIER_START.pattern(),
		PG_REGULAR_IDENTIFIER_PART.pattern()
	));

	/** A complete PostgreSQL regular identifier in a single capturing group.
	 */
	public static final Pattern PG_REGULAR_IDENTIFIER_CAPTURING =
	Pattern.compile(String.format(
		"(%1$s)", PG_REGULAR_IDENTIFIER.pattern()
	));

	/** A regular identifier that satisfies both ISO and PostgreSQL rules.
	 */
	public static final Pattern ISO_AND_PG_REGULAR_IDENTIFIER =
	Pattern.compile(String.format(
		"(?:(?=%1$s)%2$s)(?:(?=%3$s)%4$s)*+",
		ISO_REGULAR_IDENTIFIER_START.pattern(),
		PG_REGULAR_IDENTIFIER_START.pattern(),
		ISO_REGULAR_IDENTIFIER_PART.pattern(),
		PG_REGULAR_IDENTIFIER_PART.pattern()
	));

	/** A regular identifier that satisfies both ISO and PostgreSQL rules,
	 * in a single capturing group named {@code i}.
	 */
	public static final Pattern ISO_AND_PG_REGULAR_IDENTIFIER_CAPTURING =
	Pattern.compile(
		String.format( "(?<i>%1$s)", ISO_AND_PG_REGULAR_IDENTIFIER.pattern())
	);

	/** Pattern that matches any identifier valid by both ISO and PG rules,
	 * with the presence of named capturing groups indicating which kind it is:
	 * {@code i} for a regular identifier, {@code xd} for a delimited identifier
	 * (still needing "" replaced with "), or {@code xui} (with or without an
	 * explicit {@code uec} for a Unicode identifier (still needing "" to " and
	 * decoding of {@code Unicode escape value}s).
	 */
	public static final Pattern ISO_AND_PG_IDENTIFIER_CAPTURING =
	Pattern.compile(String.format(
		"%1$s|(?:%2$s)|(?:%3$s)",
		ISO_AND_PG_REGULAR_IDENTIFIER_CAPTURING.pattern(),
		ISO_DELIMITED_IDENTIFIER_CAPTURING.pattern(),
		ISO_UNICODE_IDENTIFIER.pattern()
	));

	/** An identifier by ISO SQL, PostgreSQL, <em>and</em> Java (not SQL at all)
	 * rules. (Not called {@code REGULAR} because Java allows no other form of
	 * identifier.) This restrictive form is the safest for identifiers being
	 * generated into a deployment descriptor file that an old version of
	 * PL/Java might load, because through 1.4.3 PL/Java used the Java
	 * identifier rules to recognize identifiers in deployment descriptors.
	 */
	public static final Pattern ISO_PG_JAVA_IDENTIFIER =
	Pattern.compile(String.format(
		"(?:(?=%1$s)(?=\\p{%5$sStart})%2$s)(?:(?=%3$s)(?=\\p{%5$sPart})%4$s)*+",
		ISO_REGULAR_IDENTIFIER_START.pattern(),
		PG_REGULAR_IDENTIFIER_START.pattern(),
		ISO_REGULAR_IDENTIFIER_PART.pattern(),
		PG_REGULAR_IDENTIFIER_PART.pattern(),
		"javaJavaIdentifier"
	));

	/**
	 * Return an Identifier, given a {@code Matcher} that has matched an
	 * ISO_AND_PG_IDENTIFIER_CAPTURING. Will determine from the matching named
	 * groups which type of identifier it was, process the matched sequence
	 * appropriately, and return it.
	 * @param m A {@code Matcher} known to have matched an identifier.
	 * @return Identifier made from the recovered string.
	 */
	public static Identifier identifierFrom(Matcher m)
	{
		String s = m.group("i");
		if ( null != s )
			return Identifier.from(s, false);
		s = m.group("xd");
		if ( null != s )
			return Identifier.from(s.replace("\"\"", "\""), true);
		s = m.group("xui");
		if ( null == s )
			return null; // XXX?
		s = s.replace("\"\"", "\"");
		String uec = m.group("uec");
		if ( null == uec )
			uec = "\\";
		int uecp = uec.codePointAt(0);
		Matcher replacer =
			Pattern.compile(
				String.format(ISO_UNICODE_REPLACER, Pattern.quote(uec)))
				.matcher(s);
		StringBuffer sb = new StringBuffer();
		while ( replacer.find() )
		{
			replacer.appendReplacement(sb, "");
			int cp;
			String uev = replacer.group("u4d");
			if ( null == uev )
				uev = replacer.group("u6d");
			if ( null != uev )
				cp = Integer.parseInt(uev, 16);
			else
				cp = uecp;
			// XXX check validity
			sb.appendCodePoint(cp);
		}
		return Identifier.from(replacer.appendTail(sb).toString(), true);
	}

	/**
	 * Class representing a SQL identifier. These have wild and wooly behavior
	 * depending on whether they were represented in the source in quoted form
	 * or not. Quoted ones are case-sensitive,
	 * and {@link #equals(Object) equals} will only recognize exact matches.
	 * Non-quoted ones match case-insensitively; just to make this interesting,
	 * ISO SQL has one set of case-folding rules, while PostgreSQL has another.
	 * Also, a non-quoted identifier can match a quoted one, if the quoted one's
	 * exact spelling matches the non-quoted one's case-folded form.
	 *<p>
	 * For even more fun, the PostgreSQL rules depend on the server encoding.
	 * For any multibyte encoding, <em>only</em> the 26 ASCII uppercase letters
	 * are folded to lower, leaving all other characters alone. In single-byte
	 * encodings, more letters can be touched. But this code has to run in a
	 * javac annotation processor without knowledge of any particular database's
	 * server encoding. The recommended encoding, UTF-8, is multibyte, so the
	 * PostgreSQL rule will be taken to be: only the 26 ASCII letters, always.
	 */
	public static class Identifier
	{
		protected final String m_nonFolded;

		/**
		 * Whether this Identifier case-folds.
		 * @return true if this Identifier was non-quoted in the source,
		 * false if it was quoted.
		 */
		public boolean folds()
		{
			return false;
		}

		/**
		 * This Identifier's original spelling.
		 * @return The spelling as seen in the source, with no case folding.
		 */
		public String nonFolded()
		{
			return m_nonFolded;
		}

		/**
		 * This Identifier as PostgreSQL would case-fold it (or the same as
		 * nonFolded if this was quoted and does not fold).
		 * @return The spelling with ASCII letters (only) folded to lowercase,
		 * if this Identifier folds.
		 */
		public String pgFolded()
		{
			return m_nonFolded;
		}

		/**
		 * This Identifier as ISO SQL would case-fold it (or the same as
		 * nonFolded if this was quoted and does not fold).
		 * @return The spelling with lowercase and titlecase letters folded to
		 * (possibly length-changing) uppercase equivalents,
		 * if this Identifier folds.
		 */
		public String isoFolded()
		{
			return m_nonFolded;
		}

		/**
		 * Create an Identifier given its original, non-folded spelling,
		 * and whether it represents a quoted identifier.
		 * @param s The exact, internal, non-folded spelling of the identifier
		 * (unwrapped from any quoting in its external form).
		 * @param quoted Pass {@code true} if this was parsed from any quoted
		 * external form, false if non-quoted.
		 * @return A corresponding Identifier
		 * @throws IllegalArgumentException if {@code quoted} is {@code false}
		 * but {@code s} cannot be a non-quoted identifier, or {@code s} is
		 * empty or longer than the ISO SQL maximum 128 codepoints.
		 */
		public static Identifier from(String s, boolean quoted)
		{
			boolean foldable =
				ISO_AND_PG_REGULAR_IDENTIFIER.matcher(s).matches();
			if ( ! quoted )
			{
				if ( ! foldable )
					throw new IllegalArgumentException(String.format(
						"impossible for \"%1$s\" to be a non-quoted identifier",
						s));
				return new Folding(s);
			}
			if ( foldable )
				return new Foldable(s);
			return new Identifier(s);
		}

		@Override
		public String toString()
		{
			return m_nonFolded;
		}

		/**
		 * For a quoted identifier that could not match any non-quoted one,
		 * the hash code of its non-folded spelling is good enough. In other
		 * cases, the code must be derived more carefully.
		 */
		@Override
		public int hashCode()
		{
			return m_nonFolded.hashCode();
		}

		@Override
		public boolean equals(Object other)
		{
			return equals(other, null);
		}

		/**
		 * For use in an annotation processor, a version of {@code equals} that
		 * can take a {@link Messager} and use it to emit warnings. It will
		 * emit a warning whenever it compares two Identifiers that are equal
		 * by one or the other of PostgreSQL's or ISO SQL's rules but not both.
		 * @param other Object to compare to
		 * @param msgr a Messager to use for warnings; if {@code null}, no
		 * warnings will be generated.
		 * @return true if two quoted Identifiers match exactly, or two
		 * non-quoted ones match in either the PostgreSQL or ISO SQL folded
		 * form, or a quoted one exactly matches either folded form of a
		 * non-quoted one.
		 */
		public boolean equals(Object other, Messager msgr)
		{
			if ( ! (other instanceof Identifier) )
				return false;
			Identifier oi = (Identifier)other;
			if ( oi.folds() )
				return oi.equals(this);
			return m_nonFolded.equals(oi.nonFolded());
		}

		protected Identifier(String nonFolded)
		{
			m_nonFolded = nonFolded;
			int cpc = nonFolded.codePointCount(0, nonFolded.length());
			if ( 0 == cpc || cpc > 128 )
				throw new IllegalArgumentException(String.format(
					"identifier empty or longer than 128 codepoints: \"%s\"",
					nonFolded));
		}

		/**
		 * Class representing an Identifier that was quoted, therefore does
		 * not case-fold, but satisfies {@code ISO_AND_PG_REGULAR_IDENTIFIER}
		 * and so could conceivably be matched by a non-quoted identifier.
		 */
		static class Foldable extends Identifier
		{
			private final int m_hashCode;

			protected Foldable(String nonFolded)
			{
				this(nonFolded, isoFold(nonFolded));
			}

			protected Foldable(String nonFolded, String isoFolded)
			{
				super(nonFolded);
				m_hashCode = isoFolded.hashCode();
			}

			/**
			 * For any identifier that case-folds, or even could be matched by
			 * another identifier that case-folds, the hash code is tricky.
			 * Hash codes are required to be equal for any instances that are
			 * equal (but not required to be different for instances that are
			 * unequal). In this case, the hash codes need to be equal whenever
			 * the PostgreSQL <em>or</em> ISO SQL folded forms match.
			 *<p>
			 * This hash code will be derived from the ISO-folded spelling of
			 * the identifier. As long as the PostgreSQL rules only affect the
			 * 26 ASCII letters, all of which are also folded (albeit in the
			 * other direction) by the ISO rules, hash codes will also match for
			 * identifiers equal under PostgreSQL rules.
			 */
			@Override
			public int hashCode()
			{
				return m_hashCode;
			}

			/**
			 * The characters that ISO SQL rules will fold: anything that is
			 * lowercase or titlecase.
			 */
			private static final Pattern s_isoFolded =
				Pattern.compile("[\\p{javaLowerCase}\\p{javaTitleCase}]");

			/**
			 * Case-fold a string by the ISO SQL rules, where any lowercase or
			 * titlecase character gets replaced by its uppercase form (the
			 * generalized, possibly length-changing one, requiring
			 * {@link String#toUpperCase} and not
			 * {@link Character#toUpperCase}.
			 * @param s The non-folded value.
			 * @return The folded value.
			 */
			protected static String isoFold(String s)
			{
				Matcher m = s_isoFolded.matcher(s);
				StringBuffer sb = new StringBuffer();
				while ( m.find() )
					m.appendReplacement(sb, m.group().toUpperCase());
				return m.appendTail(sb).toString();
			}
		}

		/**
		 * Class representing an Identifier that was not quoted, and therefore
		 * has case-folded forms.
		 */
		static class Folding extends Foldable
		{
			private final String m_pgFolded;
			private final String m_isoFolded;

			protected Folding(String nonFolded)
			{
				this(nonFolded, isoFold(nonFolded));
			}

			protected Folding(String nonFolded, String isoFolded)
			{
				super(nonFolded, isoFolded);
				m_pgFolded = pgFold(nonFolded);
				m_isoFolded = isoFolded;
			}

			@Override
			public String pgFolded()
			{
				return m_pgFolded;
			}

			@Override
			public String isoFolded()
			{
				return m_isoFolded;
			}

			@Override
			public boolean folds()
			{
				return true;
			}

			@Override
			public boolean equals(Object other, Messager msgr)
			{
				if ( ! (other instanceof Identifier) )
					return false;
				Identifier oi = (Identifier)other;
				boolean eqPG = m_pgFolded.equals(oi.pgFolded());
				boolean eqISO = m_isoFolded.equals(oi.isoFolded());
				if ( eqPG != eqISO  &&  oi.folds()  &&  null != msgr )
				{
					msgr.printMessage(Kind.WARNING, String.format(
						"identifiers \"%1$s\" and \"%2$s\" are equal by ISO " +
						"or PostgreSQL case-insensitivity rules but not both",
						m_nonFolded, oi.nonFolded()));
				}
				return eqPG || eqISO;
			}

			/**
			 * The characters that PostgreSQL rules will fold: only the 26
			 * uppercase ASCII letters.
			 */
			private static final Pattern s_pgFolded = Pattern.compile("[A-Z]");

			/**
			 * Case-fold a string by the PostgreSQL rules (assuming a
			 * multibyte server encoding, where only the 26 uppercase ASCII
			 * letters fold to lowercase).
			 * @param s The non-folded value.
			 * @return The folded value.
			 */
			private String pgFold(String s)
			{
				Matcher m = s_pgFolded.matcher(s);
				StringBuffer sb = new StringBuffer();
				while ( m.find() )
					m.appendReplacement(sb, m.group().toLowerCase());
				return m.appendTail(sb).toString();
			}
		}
	}
}