package adql.db; /* * This file is part of ADQLLibrary. * * ADQLLibrary is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * ADQLLibrary is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with ADQLLibrary. If not, see <http://www.gnu.org/licenses/>. * * Copyright 2015 - Astronomisches Rechen Institut (ARI) */ import java.lang.reflect.Constructor; import java.util.regex.Matcher; import java.util.regex.Pattern; import adql.db.DBType.DBDatatype; import adql.parser.ParseException; import adql.query.operand.ADQLOperand; import adql.query.operand.function.ADQLFunction; import adql.query.operand.function.DefaultUDF; import adql.query.operand.function.UserDefinedFunction; /** * <p>Definition of any function that could be used in ADQL queries.</p> * * <p> * A such definition can be built manually thanks to the different constructors of this class, * or by parsing a string function definition form using the static function {@link #parse(String)}. * </p> * * <p> * The syntax of the expression expected by {@link #parse(String)} is the same as the one used to build * the string returned by {@link #toString()}. Here is this syntax: * </p> * <pre>{fctName}([{param1Name} {param1Type}, ...])[ -> {returnType}]</pre> * * <p> * A description of this function may be set thanks to the public class attribute {@link #description}. * </p> * * @author Grégory Mantelet (ARI) * @version 1.4 (08/2015) * * @since 1.3 */ public class FunctionDef implements Comparable<FunctionDef> { /** Regular expression for what should be a function or parameter name - a regular identifier. */ protected final static String regularIdentifierRegExp = "[a-zA-Z]+[0-9a-zA-Z_]*"; /** Rough regular expression for a function return type or a parameter type. * The exact type is not checked here ; just the type name syntax is tested, not its value. * This regular expression allows a type to have exactly one parameter (which is generally the length of a character or binary string. */ protected final static String typeRegExp = "([a-zA-Z_]+[ 0-9a-zA-Z_]*)(\\(\\s*([0-9]+)\\s*\\))?"; /** Rough regular expression for a function parameters' list. */ protected final static String fctParamsRegExp = "\\s*[^,]+\\s*(,\\s*[^,]+\\s*)*"; /** Rough regular expression for a function parameter: a name (see {@link #regularIdentifierRegExp}) and a type (see {@link #typeRegExp}). */ protected final static String fctParamRegExp = "\\s*(" + regularIdentifierRegExp + ")\\s+" + typeRegExp + "\\s*"; /** Rough regular expression for a whole function definition. */ protected final static String fctDefRegExp = "\\s*(" + regularIdentifierRegExp + ")\\s*\\(([a-zA-Z0-9,() \r\n\t]*)\\)(\\s*->\\s*(" + typeRegExp + "))?\\s*"; /** Pattern of a function definition. This object has been compiled with {@link #fctDefRegExp}. */ protected final static Pattern fctPattern = Pattern.compile(fctDefRegExp); /** Pattern of a single parameter definition. This object has been compiled with {@link #fctParamRegExp}. */ protected final static Pattern paramPattern = Pattern.compile(fctParamRegExp); /** Name of the function. */ public final String name; /** Description of this function. */ public String description = null; /** Type of the result returned by this function. */ public final DBType returnType; /** Indicate whether the return type is a string. */ protected final boolean isString; /** Indicate whether the return type is a numeric. */ protected final boolean isNumeric; /** Indicate whether the return type is a geometry. */ protected final boolean isGeometry; /** Indicate whether the return type is an unknown type. * <p><i><u>Note:</u> * If <code>true</code>, {@link #isString}, {@link #isNumeric} * and {@link #isGeometry} are <code>false</code>. Otherwise, * at least one of these attributes is set to <code>true</code>. * </i></p> * @since 1.4 */ protected final boolean isUnknown; /** Total number of parameters. */ public final int nbParams; /** List of all the parameters of this function. */ protected final FunctionParam[] params; /** <p>String representation of this function.</p> * <p>The syntax of this representation is the following <i>(items between brackets are optional)</i>:</p> * <pre>{fctName}([{param1Name} {param1Type}, ...])[ -> {returnType}]</pre> */ private final String serializedForm; /** <p>String representation of this function dedicated to comparison with any function signature.</p> * <p>This form is different from the serialized form on the following points:</p> * <ul> * <li>the function name is always in lower case.</li> * <li>each parameter is represented by a string of 3 characters, one for each kind of type (in the order): numeric, string, geometry. * Each character is either a 0 or 1, so that indicating whether the parameter is of that kind of type.</li> * <li>no return type.</li> * </ul> * <p>So the syntax of this form is the following <i>(items between brackets are optional ; xxx is a string of 3 characters, each being either 0 or 1)</i>:</p> * <pre>{fctName}([xxx, ...])</pre> */ private final String compareForm; /** * <p>Class of the {@link UserDefinedFunction} which must represent the UDF defined by this {@link FunctionDef} in the ADQL tree.</p> * <p>This class MUST have a constructor with a single parameter of type {@link ADQLOperand}[].</p> * <p>If this {@link FunctionDef} is defining an ordinary ADQL function, this attribute must be NULL. It is used only for user defined functions.</p> */ private Class<? extends UserDefinedFunction> udfClass = null; /** * <p>Definition of a function parameter.</p> * * <p>This definition is composed of two items: the name and the type of the parameter.</p> * * @author Grégory Mantelet (ARI) * @version 1.4 (07/2015) * @since 1.3 */ public static final class FunctionParam { /** Parameter name. <i>Ensured not null</i> */ public final String name; /** Parameter type. <i>Ensured not null</i> */ public final DBType type; /** * Create a function parameter. * * @param paramName Name of the parameter to create. <i>MUST NOT be NULL</i> * @param paramType Type of the parameter to create. <i>If NULL, an {@link DBDatatype#UNKNOWN UNKNOWN} type will be created and set instead.</i> */ public FunctionParam(final String paramName, final DBType paramType){ if (paramName == null) throw new NullPointerException("Missing name! The function parameter can not be created."); this.name = paramName; this.type = (paramType == null) ? new DBType(DBDatatype.UNKNOWN) : paramType; } } /** * <p>Create a function definition.</p> * * <p>The created function will have <b>no return type</b> and <b>no parameter</b>.</p> * * @param fctName Name of the function. */ public FunctionDef(final String fctName){ this(fctName, null, null); } /** * <p>Create a function definition.</p> * * <p>The created function will have a return type (if the provided one is not null) and <b>no parameter</b>.</p> * * @param fctName Name of the function. * @param returnType Return type of the function. <i>If NULL, this function will have no return type</i> */ public FunctionDef(final String fctName, final DBType returnType){ this(fctName, returnType, null); } /** * <p>Create a function definition.</p> * * <p>The created function will have <b>no return type</b> and some parameters (except if the given array is NULL or empty).</p> * * @param fctName Name of the function. * @param params Parameters of this function. <i>If NULL or empty, this function will have no parameter.</i> */ public FunctionDef(final String fctName, final FunctionParam[] params){ this(fctName, null, params); } public FunctionDef(final String fctName, final DBType returnType, final FunctionParam[] params){ // Set the name: if (fctName == null) throw new NullPointerException("Missing name! Can not create this function definition."); this.name = fctName; // Set the parameters: this.params = (params == null || params.length == 0) ? null : params; this.nbParams = (params == null) ? 0 : params.length; // Set the return type; this.returnType = (returnType != null) ? returnType : new DBType(DBDatatype.UNKNOWN); isUnknown = this.returnType.isUnknown(); isNumeric = this.returnType.isNumeric(); isString = this.returnType.isString(); isGeometry = this.returnType.isGeometry(); // Serialize in Strings (serializedForm and compareForm) this function definition: StringBuffer bufSer = new StringBuffer(name), bufCmp = new StringBuffer(name.toLowerCase()); bufSer.append('('); for(int i = 0; i < nbParams; i++){ bufSer.append(params[i].name).append(' ').append(params[i].type); bufCmp.append(params[i].type.isNumeric() ? '1' : '0').append(params[i].type.isString() ? '1' : '0').append(params[i].type.isGeometry() ? '1' : '0'); if (i + 1 < nbParams) bufSer.append(", "); } bufSer.append(')'); if (returnType != null) bufSer.append(" -> ").append(returnType); serializedForm = bufSer.toString(); compareForm = bufCmp.toString(); } /** * Tell whether this function returns a numeric. * * @return <i>true</i> if this function returns a numeric, <i>false</i> otherwise. */ public final boolean isNumeric(){ return isNumeric; } /** * Tell whether this function returns a string. * * @return <i>true</i> if this function returns a string, <i>false</i> otherwise. */ public final boolean isString(){ return isString; } /** * Tell whether this function returns a geometry. * * @return <i>true</i> if this function returns a geometry, <i>false</i> otherwise. */ public final boolean isGeometry(){ return isGeometry; } /** * <p>Tell whether this function returns an unknown type.</p> * * <p> * If this function returns <code>true</code>, {@link #isNumeric()}, {@link #isString()} and {@link #isGeometry()} * <b>MUST ALL</b> return <code>false</code>. Otherwise, one of these 3 last functions MUST return <code>true</code>. * </p> * * @return <i>true</i> if this function returns an unknown/unresolved/unsupported type, <i>false</i> otherwise. */ public final boolean isUnknown(){ return isUnknown; } /** * Get the number of parameters required by this function. * * @return Number of required parameters. */ public final int getNbParams(){ return nbParams; } /** * Get the definition of the indParam-th parameter of this function. * * @param indParam Index of the parameter whose the definition must be returned. * * @return Definition of the specified parameter. * * @throws ArrayIndexOutOfBoundsException If the given index is negative or bigger than the number of parameters. */ public final FunctionParam getParam(final int indParam) throws ArrayIndexOutOfBoundsException{ if (indParam < 0 || indParam >= nbParams) throw new ArrayIndexOutOfBoundsException(indParam); else return params[indParam]; } /** * <p>Get the class of the {@link UserDefinedFunction} able to represent the function defined here in an ADQL tree.</p> * * <p><i>Note: * This getter should return always NULL if the function defined here is not a user defined function. * <br/> * However, if this {@link FunctionDef} is defining a user defined function and this function returns NULL, * the library will create on the fly a {@link DefaultUDF} corresponding to this definition when needed. * Indeed this UDF class is useful only if the translation from ADQL (to SQL for instance) of the defined * function has a different signature (e.g. a different name) in the target language (e.g. SQL). * </i></p> * * @return The corresponding {@link UserDefinedFunction}. <i>MAY BE NULL</i> */ public final Class<? extends UserDefinedFunction> getUDFClass(){ return udfClass; } /** * <p>Set the class of the {@link UserDefinedFunction} able to represent the function defined here in an ADQL tree.</p> * * <p><i>Note: * If this {@link FunctionDef} defines an ordinary ADQL function - and not a user defined function - no class should be set here. * <br/> * However, if it defines a user defined function, there is no obligation to set a UDF class. It is useful only if the translation * from ADQL (to SQL for instance) of the function has a different signature (e.g. a different name) in the target language (e.g. SQL). * If the signature is the same, there is no need to set a UDF class ; a {@link DefaultUDF} will be created on the fly by the library * when needed if it turns out that no UDF class is set. * </i></p> * * @param udfClass Class to use to represent in an ADQL tree the User Defined Function defined in this {@link FunctionDef}. * * @throws IllegalArgumentException If the given class does not provide any constructor with a single parameter of type ADQLOperand[]. */ public final < T extends UserDefinedFunction > void setUDFClass(final Class<T> udfClass) throws IllegalArgumentException{ try{ // Ensure that, if a class is provided, it contains a constructor with a single parameter of type ADQLOperand[]: if (udfClass != null){ Constructor<T> constructor = udfClass.getConstructor(ADQLOperand[].class); if (constructor == null) throw new IllegalArgumentException("The given class (" + udfClass.getName() + ") does not provide any constructor with a single parameter of type ADQLOperand[]!"); } // Set the new UDF class: this.udfClass = udfClass; }catch(SecurityException e){ throw new IllegalArgumentException("A security problem occurred while trying to get constructor from the class " + udfClass.getName() + ": " + e.getMessage()); }catch(NoSuchMethodException e){ throw new IllegalArgumentException("The given class (" + udfClass.getName() + ") does not provide any constructor with a single parameter of type ADQLOperand[]!"); } } /** * <p>Let parsing the serialized form of a function definition.</p> * * <p>The expected syntax is <i>(items between brackets are optional)</i>:</p> * <pre>{fctName}([{param1Name} {param1Type}, ...])[ -> {returnType}]</pre> * * <p> * <em>This function must be able to parse functions as defined by TAPRegExt (section 2.3).</em> * Hence, allowed parameter types and return types should be one of the types listed by the UPLOAD section of the TAP recommendation document. * These types are listed in the enumeration object {@link DBDatatype}. * However, other types should be accepted like the common database types...but it should be better to not rely on that * since the conversion of those types to TAP types should not be exactly what is expected (because depending from the used DBMS); * a default interpretation of database types is nevertheless processed by this parser. * </p> * * @param strDefinition Serialized function definition to parse. * * @return The object representation of the given string definition. * * @throws ParseException If the given string has a wrong syntax or uses unknown types. */ public static FunctionDef parse(final String strDefinition) throws ParseException{ if (strDefinition == null) throw new NullPointerException("Missing string definition to build a FunctionDef!"); // Check the global syntax of the function definition: Matcher m = fctPattern.matcher(strDefinition); if (m.matches()){ // Get the function name: String fctName = m.group(1); // Parse and get the return type: DBType returnType = null; if (m.group(3) != null){ returnType = parseType(m.group(5), (m.group(7) == null) ? DBType.NO_LENGTH : Integer.parseInt(m.group(7))); if (returnType == null){ returnType = new DBType(DBDatatype.UNKNOWN); returnType.type.setCustomType(m.group(4)); } } // Get the parameters, if any: String paramsList = m.group(2); FunctionParam[] params = null; if (paramsList != null && paramsList.trim().length() > 0){ // Check the syntax of the parameters' list: if (!paramsList.matches(fctParamsRegExp)) throw new ParseException("Wrong parameters syntax! Expected syntax: \"(<regular_identifier> <type_name> (, <regular_identifier> <type_name>)*)\", where <regular_identifier>=\"[a-zA-Z]+[a-zA-Z0-9_]*\", <type_name> should be one of the types described in the UPLOAD section of the TAP documentation. Examples of good syntax: \"()\", \"(param INTEGER)\", \"(param1 INTEGER, param2 DOUBLE)\""); // Split all the parameter definitions: String[] paramsSplit = paramsList.split(","); params = new FunctionParam[paramsSplit.length]; DBType paramType; // For each parameter definition... for(int i = 0; i < params.length; i++){ m = paramPattern.matcher(paramsSplit[i]); if (m.matches()){ // ...parse and get the parameter type: paramType = parseType(m.group(2), (m.group(4) == null) ? DBType.NO_LENGTH : Integer.parseInt(m.group(4))); // ...build the parameter definition object: if (paramType == null){ paramType = new DBType(DBDatatype.UNKNOWN); paramType.type.setCustomType(m.group(2) + ((m.group(3) == null) ? "" : m.group(3))); } params[i] = new FunctionParam(m.group(1), paramType); }else // note: should never happen because we have already check the syntax of the whole parameters list before parsing each individual parameter. throw new ParseException("Wrong syntax for the " + (i + 1) + "-th parameter: \"" + paramsSplit[i].trim() + "\"! Expected syntax: \"(<regular_identifier> <type_name> (, <regular_identifier> <type_name>)*)\", where <regular_identifier>=\"[a-zA-Z]+[a-zA-Z0-9_]*\", <type_name> should be one of the types described in the UPLOAD section of the TAP documentation. Examples of good syntax: \"()\", \"(param INTEGER)\", \"(param1 INTEGER, param2 DOUBLE)\""); } } // Build the function definition object: return new FunctionDef(fctName, returnType, params); }else throw new ParseException("Wrong function definition syntax! Expected syntax: \"<regular_identifier>(<parameters>?) <return_type>?\", where <regular_identifier>=\"[a-zA-Z]+[a-zA-Z0-9_]*\", <return_type>=\" -> <type_name>\", <parameters>=\"(<regular_identifier> <type_name> (, <regular_identifier> <type_name>)*)\", <type_name> should be one of the types described in the UPLOAD section of the TAP documentation. Examples of good syntax: \"foo()\", \"foo() -> VARCHAR\", \"foo(param INTEGER)\", \"foo(param1 INTEGER, param2 DOUBLE) -> DOUBLE\""); } /** * Parse the given string representation of a datatype. * * @param datatype String representation of a datatype. * <i>Note: This string must not contain the length parameter or any other parameter. * These latter should have been separated from the datatype before calling this function. * It can however contain space(s) in first, last or intern position.</i> * @param length Length of this datatype. * <i>Note: This length will be used only for binary (BINARY and VARBINARY) * and character (CHAR and VARCHAR) types.</i> * * @return The object representation of the specified datatype * or NULL if the specified datatype can not be resolved. */ private static DBType parseType(String datatype, int length){ if (datatype == null) return null; // Remove leading and trailing spaces and replace each inner serie of spaces by just one space: datatype = datatype.trim().replaceAll(" +", " "); try{ // Try to find a corresponding DBType item: DBDatatype dbDatatype = DBDatatype.valueOf(datatype.toUpperCase()); // If there's a match, build the type object representation: length = (length <= 0) ? DBType.NO_LENGTH : length; switch(dbDatatype){ case CHAR: case VARCHAR: case BINARY: case VARBINARY: return new DBType(dbDatatype, length); default: return new DBType(dbDatatype); } }catch(IllegalArgumentException iae){ // If there's no corresponding DBType item, try to find a match among the most used DB types: datatype = datatype.toLowerCase(); if (datatype.equals("bool") || datatype.equals("boolean") || datatype.equals("short") || datatype.equals("int2") || datatype.equals("smallserial") || datatype.equals("serial2")) return new DBType(DBDatatype.SMALLINT); else if (datatype.equals("int") || datatype.equals("int4") || datatype.equals("serial") || datatype.equals("serial4")) return new DBType(DBDatatype.INTEGER); else if (datatype.equals("long") || datatype.equals("number") || datatype.equals("int8") || datatype.equals("bigserial") || datatype.equals("bigserial8")) return new DBType(DBDatatype.BIGINT); else if (datatype.equals("float") || datatype.equals("float4")) return new DBType(DBDatatype.REAL); else if (datatype.equals("numeric") || datatype.equals("float8") || datatype.equals("double precision")) return new DBType(DBDatatype.DOUBLE); else if (datatype.equals("bit") || datatype.equals("byte") || datatype.equals("raw")) return new DBType(DBDatatype.BINARY, length); else if (datatype.equals("unsignedByte") || datatype.equals("bit varying") || datatype.equals("varbit")) return new DBType(DBDatatype.VARBINARY, length); else if (datatype.equals("character")) return new DBType(DBDatatype.CHAR, length); else if (datatype.equals("string") || datatype.equals("varchar2") || datatype.equals("character varying")) return new DBType(DBDatatype.VARCHAR, length); else if (datatype.equals("bytea")) return new DBType(DBDatatype.BLOB); else if (datatype.equals("text")) return new DBType(DBDatatype.CLOB); else if (datatype.equals("date") || datatype.equals("time") || datatype.equals("timetz") || datatype.equals("timestamptz")) return new DBType(DBDatatype.TIMESTAMP); else if (datatype.equals("position")) return new DBType(DBDatatype.POINT); else if (datatype.equals("polygon") || datatype.equals("box") || datatype.equals("circle")) return new DBType(DBDatatype.REGION); else return null; } } @Override public String toString(){ return serializedForm; } @Override public int compareTo(final FunctionDef def){ return compareForm.compareTo(def.compareForm); } /** * <p>Compare this function definition with the given ADQL function item.</p> * * <p> * The comparison is done only on the function name and on rough type of the parameters. * "Rough type" means here that just the kind of type is tested: numeric, string or geometry. * Anyway, the return type is never tested by this function, since such information is usually * not part of a function signature. * </p> * * <p>The notions of "greater" and "less" are defined here according to the three following test steps:</p> * <ol> * <li><b>Name test:</b> if the name of both function are equals, next steps are evaluated, otherwise the standard string comparison (case insensitive) result is returned.</li> * <li><b>Parameters test:</b> parameters are compared individually. Each time parameters (at the same position in both functions) are equals the next parameter can be tested, * and so on until two parameters are different or the end of the parameters' list is reached. * Just the kind of type is used for parameter comparison. Each kind of type is tested in the following order: numeric, string and geometry. * When a kind of type is not equal for both parameters, the function exits with the appropriate value * (1 if the parameter of this function definition is of the kind of type, -1 otherwise).</li> * <li><b>Number of parameters test:</b> in the case where this function definition has N parameters and the given ADQL function has M parameters, * and that the L (= min(N,M)) first parameters have the same type in both functions, the value returns by this function * will be N-M. Thus, if this function definition has more parameters than the given function, a positive value will be * returned. Otherwise a negative value will be returned, or 0 if the number of parameters is the same.</li> * </ol> * * <p><i><b>Note:</b> * If one of the tested types (i.e. parameters types) is unknown, the match should return 0 (i.e. equality). * The notion of "unknown" is different in function of the tested item. A {@link DBType} is unknown if its function * {@link DBType#isUnknown()} returns <code>true</code> ; thus, its other functions such as {@link DBType#isNumeric()} will * return <code>false</code>. On the contrary, an {@link ADQLOperand} does not have any isUnknown() * function. However, when the type of a such is unknown, all its functions isNumeric(), isString() and isGeometry() return * <code>true</code>. * </i></p> * * @param fct ADQL function item to compare with this function definition. * * @return A positive value if this function definition is "greater" than the given {@link ADQLFunction}, * 0 if they are perfectly matching or one of the tested types (i.e. parameters types) is unknown, * or a negative value if this function definition is "less" than the given {@link ADQLFunction}. */ public int compareTo(final ADQLFunction fct){ if (fct == null) throw new NullPointerException("Missing ADQL function with which comparing this function definition!"); // Names comparison: int comp = name.compareToIgnoreCase(fct.getName()); // If equals, compare the parameters' type: if (comp == 0){ for(int i = 0; comp == 0 && i < nbParams && i < fct.getNbParameters(); i++){ // if one of the types is unknown, the comparison should return true: if (params[i].type.isUnknown() || (fct.getParameter(i).isNumeric() && fct.getParameter(i).isString() && fct.getParameter(i).isGeometry())) comp = 0; // otherwise, just compare each kind of type for an exact match: else if (params[i].type.isNumeric() == fct.getParameter(i).isNumeric()){ if (params[i].type.isString() == fct.getParameter(i).isString()){ if (params[i].type.isGeometry() == fct.getParameter(i).isGeometry()) comp = 0; else comp = params[i].type.isGeometry() ? 1 : -1; }else comp = params[i].type.isString() ? 1 : -1; }else comp = params[i].type.isNumeric() ? 1 : -1; } // If the first min(N,M) parameters are of the same type, do the last comparison on the number of parameters: if (comp == 0 && nbParams != fct.getNbParameters()) comp = nbParams - fct.getNbParameters(); } return comp; } }