/* * Copyright (C) 2012-2015 DataStax Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.datastax.driver.core; import java.text.ParseException; import java.text.ParsePosition; import java.text.SimpleDateFormat; import java.util.Date; import java.util.TimeZone; import java.util.concurrent.TimeUnit; /** * Simple utility class used to help parsing CQL values (mainly UDT and collection ones). */ public abstract class ParseUtils { /** * Valid ISO-8601 patterns for CQL timestamp literals. */ private static final String[] iso8601Patterns = new String[]{ "yyyy-MM-dd HH:mm", "yyyy-MM-dd HH:mm:ss", "yyyy-MM-dd HH:mmZ", "yyyy-MM-dd HH:mm:ssZ", "yyyy-MM-dd HH:mm:ss.SSS", "yyyy-MM-dd HH:mm:ss.SSSZ", "yyyy-MM-dd'T'HH:mm", "yyyy-MM-dd'T'HH:mmZ", "yyyy-MM-dd'T'HH:mm:ss", "yyyy-MM-dd'T'HH:mm:ssZ", "yyyy-MM-dd'T'HH:mm:ss.SSS", "yyyy-MM-dd'T'HH:mm:ss.SSSZ", "yyyy-MM-dd", "yyyy-MM-ddZ" }; /** * Returns the index of the first character in toParse from idx that is not a "space". * * @param toParse the string to skip space on. * @param idx the index to start skipping space from. * @return the index of the first character in toParse from idx that is not a "space. */ public static int skipSpaces(String toParse, int idx) { while (isBlank(toParse.charAt(idx)) && idx < toParse.length()) ++idx; return idx; } /** * Assuming that idx points to the beginning of a CQL value in toParse, returns the * index of the first character after this value. * * @param toParse the string to skip a value form. * @param idx the index to start parsing a value from. * @return the index ending the CQL value starting at {@code idx}. * @throws IllegalArgumentException if idx doesn't point to the start of a valid CQL * value. */ public static int skipCQLValue(String toParse, int idx) { if (idx >= toParse.length()) throw new IllegalArgumentException(); if (isBlank(toParse.charAt(idx))) throw new IllegalArgumentException(); int cbrackets = 0; int sbrackets = 0; int parens = 0; boolean inString = false; do { char c = toParse.charAt(idx); if (inString) { if (c == '\'') { if (idx + 1 < toParse.length() && toParse.charAt(idx + 1) == '\'') { ++idx; // this is an escaped quote, skip it } else { inString = false; if (cbrackets == 0 && sbrackets == 0 && parens == 0) return idx + 1; } } // Skip any other character } else if (c == '\'') { inString = true; } else if (c == '{') { ++cbrackets; } else if (c == '[') { ++sbrackets; } else if (c == '(') { ++parens; } else if (c == '}') { if (cbrackets == 0) return idx; --cbrackets; if (cbrackets == 0 && sbrackets == 0 && parens == 0) return idx + 1; } else if (c == ']') { if (sbrackets == 0) return idx; --sbrackets; if (cbrackets == 0 && sbrackets == 0 && parens == 0) return idx + 1; } else if (c == ')') { if (parens == 0) return idx; --parens; if (cbrackets == 0 && sbrackets == 0 && parens == 0) return idx + 1; } else if (isBlank(c) || !isIdentifierChar(c)) { if (cbrackets == 0 && sbrackets == 0 && parens == 0) return idx; } } while (++idx < toParse.length()); if (inString || cbrackets != 0 || sbrackets != 0 || parens != 0) throw new IllegalArgumentException(); return idx; } /** * Assuming that idx points to the beginning of a CQL identifier in toParse, returns the * index of the first character after this identifier. * * @param toParse the string to skip an identifier from. * @param idx the index to start parsing an identifier from. * @return the index ending the CQL identifier starting at {@code idx}. * @throws IllegalArgumentException if idx doesn't point to the start of a valid CQL * identifier. */ public static int skipCQLId(String toParse, int idx) { if (idx >= toParse.length()) throw new IllegalArgumentException(); char c = toParse.charAt(idx); if (isIdentifierChar(c)) { while (idx < toParse.length() && isIdentifierChar(toParse.charAt(idx))) idx++; return idx; } if (c != '"') throw new IllegalArgumentException(); while (++idx < toParse.length()) { c = toParse.charAt(idx); if (c != '"') continue; if (idx + 1 < toParse.length() && toParse.charAt(idx + 1) == '\"') ++idx; // this is an escaped double quote, skip it else return idx + 1; } throw new IllegalArgumentException(); } /** * Return {@code true} if the given character * is allowed in a CQL identifier, that is, * if it is in the range: {@code [0..9a..zA..Z-+._&]}. * * @param c The character to inspect. * @return {@code true} if the given character * is allowed in a CQL identifier, {@code false} otherwise. */ public static boolean isIdentifierChar(int c) { return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '-' || c == '+' || c == '.' || c == '_' || c == '&'; } /** * Return {@code true} if the given character * is a valid whitespace character in CQL, that is, * if it is a regular space, a tabulation sign, * or a new line sign. * * @param c The character to inspect. * @return {@code true} if the given character * is a valid whitespace character, {@code false} otherwise. */ public static boolean isBlank(int c) { return c == ' ' || c == '\t' || c == '\n'; } /** * Check whether the given string corresponds * to a valid CQL long literal. * Long literals are composed solely by digits, * but can have an optional leading minus sign. * * @param str The string to inspect. * @return {@code true} if the given string corresponds * to a valid CQL integer literal, {@code false} otherwise. */ public static boolean isLongLiteral(String str) { if (str == null || str.isEmpty()) return false; char[] chars = str.toCharArray(); for (int i = 0; i < chars.length; i++) { char c = chars[i]; if ((c < '0' && (i != 0 || c != '-')) || c > '9') return false; } return true; } /** * Return {@code true} if the given string is surrounded * by single quotes, and {@code false} otherwise. * * @param value The string to inspect. * @return {@code true} if the given string is surrounded * by single quotes, and {@code false} otherwise. */ public static boolean isQuoted(String value) { return isQuoted(value, '\''); } /** * Quote the given string; single quotes are escaped. * If the given string is null, this method returns a quoted empty string ({@code ''}). * * @param value The value to quote. * @return The quoted string. */ public static String quote(String value) { return quote(value, '\''); } /** * Unquote the given string if it is quoted; single quotes are unescaped. * If the given string is not quoted, it is returned without any modification. * * @param value The string to unquote. * @return The unquoted string. */ public static String unquote(String value) { return unquote(value, '\''); } /** * Return {@code true} if the given string is surrounded * by double quotes, and {@code false} otherwise. * * @param value The string to inspect. * @return {@code true} if the given string is surrounded * by double quotes, and {@code false} otherwise. */ public static boolean isDoubleQuoted(String value) { return isQuoted(value, '\"'); } /** * Double quote the given string; double quotes are escaped. * If the given string is null, this method returns a quoted empty string ({@code ""}). * * @param value The value to double quote. * @return The double quoted string. */ public static String doubleQuote(String value) { return quote(value, '"'); } /** * Unquote the given string if it is double quoted; double quotes are unescaped. * If the given string is not double quoted, it is returned without any modification. * * @param value The string to un-double quote. * @return The un-double quoted string. */ public static String unDoubleQuote(String value) { return unquote(value, '"'); } /** * Parse the given string as a date, using one of the accepted ISO-8601 date patterns. * <p/> * This method is adapted from Apache Commons {@code DateUtils.parseStrictly()} method (that is used Cassandra side * to parse date strings).. * * @throws ParseException If the given string is not a valid ISO-8601 date. * @see <a href="https://cassandra.apache.org/doc/cql3/CQL-2.2.html#usingtimestamps">'Working with timestamps' section of CQL specification</a> */ public static Date parseDate(String str) throws ParseException { SimpleDateFormat parser = new SimpleDateFormat(); parser.setLenient(false); // set a default timezone for patterns that do not provide one parser.setTimeZone(TimeZone.getTimeZone("UTC")); // Java 6 has very limited support for ISO-8601 time zone formats, // so we need to transform the string first // so that accepted patterns are correctly handled, // such as Z for UTC, or "+00:00" instead of "+0000". // Note: we cannot use the X letter in the pattern // because it has been introduced in Java 7. str = str.replaceAll("(\\+|\\-)(\\d\\d):(\\d\\d)$", "$1$2$3"); str = str.replaceAll("Z$", "+0000"); ParsePosition pos = new ParsePosition(0); for (String parsePattern : iso8601Patterns) { parser.applyPattern(parsePattern); pos.setIndex(0); Date date = parser.parse(str, pos); if (date != null && pos.getIndex() == str.length()) { return date; } } throw new ParseException("Unable to parse the date: " + str, -1); } /** * Parse the given string as a date, using the supplied date pattern. * <p/> * This method is adapted from Apache Commons {@code DateUtils.parseStrictly()} method (that is used Cassandra side * to parse date strings).. * * @throws ParseException If the given string cannot be parsed with the given pattern. * @see <a href="https://cassandra.apache.org/doc/cql3/CQL-2.2.html#usingtimestamps">'Working with timestamps' section of CQL specification</a> */ public static Date parseDate(String str, String pattern) throws ParseException { SimpleDateFormat parser = new SimpleDateFormat(); parser.setLenient(false); // set a default timezone for patterns that do not provide one parser.setTimeZone(TimeZone.getTimeZone("UTC")); // Java 6 has very limited support for ISO-8601 time zone formats, // so we need to transform the string first // so that accepted patterns are correctly handled, // such as Z for UTC, or "+00:00" instead of "+0000". // Note: we cannot use the X letter in the pattern // because it has been introduced in Java 7. str = str.replaceAll("(\\+|\\-)(\\d\\d):(\\d\\d)$", "$1$2$3"); str = str.replaceAll("Z$", "+0000"); ParsePosition pos = new ParsePosition(0); parser.applyPattern(pattern); pos.setIndex(0); Date date = parser.parse(str, pos); if (date != null && pos.getIndex() == str.length()) { return date; } throw new ParseException("Unable to parse the date: " + str, -1); } /** * Parse the given string as a time, using the following time pattern: {@code hh:mm:ss[.fffffffff]}. * <p/> * This method is loosely based on {@code java.sql.Timestamp}. * * @param str The string to parse. * @return A long value representing the number of nanoseconds since midnight. * @throws ParseException if the string cannot be parsed. * @see <a href="https://cassandra.apache.org/doc/cql3/CQL-2.2.html#usingtime">'Working with time' section of CQL specification</a> */ public static long parseTime(String str) throws ParseException { String nanos_s; long hour; long minute; long second; long a_nanos = 0; String formatError = "Timestamp format must be hh:mm:ss[.fffffffff]"; String zeros = "000000000"; if (str == null) throw new IllegalArgumentException(formatError); str = str.trim(); // Parse the time int firstColon = str.indexOf(':'); int secondColon = str.indexOf(':', firstColon + 1); // Convert the time; default missing nanos if (firstColon > 0 && secondColon > 0 && secondColon < str.length() - 1) { int period = str.indexOf('.', secondColon + 1); hour = Integer.parseInt(str.substring(0, firstColon)); if (hour < 0 || hour >= 24) throw new IllegalArgumentException("Hour out of bounds."); minute = Integer.parseInt(str.substring(firstColon + 1, secondColon)); if (minute < 0 || minute >= 60) throw new IllegalArgumentException("Minute out of bounds."); if (period > 0 && period < str.length() - 1) { second = Integer.parseInt(str.substring(secondColon + 1, period)); if (second < 0 || second >= 60) throw new IllegalArgumentException("Second out of bounds."); nanos_s = str.substring(period + 1); if (nanos_s.length() > 9) throw new IllegalArgumentException(formatError); if (!Character.isDigit(nanos_s.charAt(0))) throw new IllegalArgumentException(formatError); nanos_s = nanos_s + zeros.substring(0, 9 - nanos_s.length()); a_nanos = Integer.parseInt(nanos_s); } else if (period > 0) throw new ParseException(formatError, -1); else { second = Integer.parseInt(str.substring(secondColon + 1)); if (second < 0 || second >= 60) throw new ParseException("Second out of bounds.", -1); } } else throw new ParseException(formatError, -1); long rawTime = 0; rawTime += TimeUnit.HOURS.toNanos(hour); rawTime += TimeUnit.MINUTES.toNanos(minute); rawTime += TimeUnit.SECONDS.toNanos(second); rawTime += a_nanos; return rawTime; } /** * Format the given long value as a CQL time literal, using the following time pattern: {@code hh:mm:ss[.fffffffff]}. * * @param value A long value representing the number of nanoseconds since midnight. * @return The formatted value. * @see <a href="https://cassandra.apache.org/doc/cql3/CQL-2.2.html#usingtime">'Working with time' section of CQL specification</a> */ public static String formatTime(long value) { int nano = (int) (value % 1000000000); value -= nano; value /= 1000000000; int seconds = (int) (value % 60); value -= seconds; value /= 60; int minutes = (int) (value % 60); value -= minutes; value /= 60; int hours = (int) (value % 24); value -= hours; value /= 24; assert (value == 0); StringBuilder sb = new StringBuilder(); leftPadZeros(hours, 2, sb); sb.append(":"); leftPadZeros(minutes, 2, sb); sb.append(":"); leftPadZeros(seconds, 2, sb); sb.append("."); leftPadZeros(nano, 9, sb); return sb.toString(); } /** * Return {@code true} if the given string is surrounded * by the quote character given, and {@code false} otherwise. * * @param value The string to inspect. * @return {@code true} if the given string is surrounded * by the quote character, and {@code false} otherwise. */ private static boolean isQuoted(String value, char quoteChar) { return value != null && value.length() > 1 && value.charAt(0) == quoteChar && value.charAt(value.length() - 1) == quoteChar; } /** * @param quoteChar " or ' * @return A quoted empty string. */ private static String emptyQuoted(char quoteChar) { // don't handle non quote characters, this is done so that these are interned and don't create // repeated empty quoted strings. assert quoteChar == '"' || quoteChar == '\''; if (quoteChar == '"') return "\"\""; else return "''"; } /** * Quotes text and escapes any existing quotes in the text. * {@code String.replace()} is a bit too inefficient (see JAVA-67, JAVA-1262). * * @param text The text. * @param quoteChar The character to use as a quote. * @return The text with surrounded in quotes with all existing quotes escaped with (i.e. ' becomes '') */ private static String quote(String text, char quoteChar) { if (text == null || text.isEmpty()) return emptyQuoted(quoteChar); int nbMatch = 0; int start = -1; do { start = text.indexOf(quoteChar, start + 1); if (start != -1) ++nbMatch; } while (start != -1); // no quotes found that need to be escaped, simply surround in quotes and return. if (nbMatch == 0) return quoteChar + text + quoteChar; // 2 for beginning and end quotes. // length for original text // nbMatch for escape characters to add to quotes to be escaped. int newLength = 2 + text.length() + nbMatch; char[] result = new char[newLength]; result[0] = quoteChar; result[newLength - 1] = quoteChar; int newIdx = 1; for (int i = 0; i < text.length(); i++) { char c = text.charAt(i); if (c == quoteChar) { // escape quote with another occurrence. result[newIdx++] = c; result[newIdx++] = c; } else { result[newIdx++] = c; } } return new String(result); } /** * Unquotes text and unescapes non surrounding quotes. * {@code String.replace()} is a bit too inefficient (see JAVA-67, JAVA-1262). * * @param text The text * @param quoteChar The character to use as a quote. * @return The text with surrounding quotes removed and non surrounding quotes unescaped (i.e. '' becomes ') */ private static String unquote(String text, char quoteChar) { if (!isQuoted(text, quoteChar)) return text; if (text.length() == 2) return ""; String search = emptyQuoted(quoteChar); int nbMatch = 0; int start = -1; do { start = text.indexOf(search, start + 2); // ignore the second to last character occurrence, as the last character is a quote. if (start != -1 && start != text.length() - 2) ++nbMatch; } while (start != -1); // no escaped quotes found, simply remove surrounding quotes and return. if (nbMatch == 0) return text.substring(1, text.length() - 1); // length of the new string will be its current length - the number of occurrences. int newLength = text.length() - nbMatch - 2; char[] result = new char[newLength]; int newIdx = 0; // track whenever a quoteChar is encountered and the previous character is not a quoteChar. boolean firstFound = false; for (int i = 1; i < text.length() - 1; i++) { char c = text.charAt(i); if (c == quoteChar) { if (firstFound) { // The previous character was a quoteChar, don't add this to result, this action in // effect removes consecutive quotes. firstFound = false; } else { // found a quoteChar and the previous character was not a quoteChar, include in result. firstFound = true; result[newIdx++] = c; } } else { // non quoteChar encountered, include in result. result[newIdx++] = c; firstFound = false; } } return new String(result); } private static void leftPadZeros(int value, int digits, StringBuilder sb) { sb.append(String.format("%0" + digits + "d", value)); } private ParseUtils() { } }