/*! ******************************************************************************
*
* Pentaho Data Integration
*
* Copyright (C) 2002-2015 by Pentaho : http://www.pentaho.com
*
*******************************************************************************
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
******************************************************************************/
package org.pentaho.di.core.util;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;
import org.apache.commons.collections.BidiMap;
import org.apache.commons.collections.bidimap.DualHashBidiMap;
public class DateDetector {
private static final String LOCALE_en_US = "en_US";
@SuppressWarnings( "serial" )
static final BidiMap DATE_FORMAT_TO_REGEXPS_US = new DualHashBidiMap() {
{
put( "MM-dd-yyyy", "^[0-1]?[0-9]-[0-3]?[0-9]-\\d{4}$" );
put( "dd/MM/yyyy", "^[0-3]?[0-9]/[0-1]?[0-9]/\\d{4}$" );
put( "MM-dd-yy", "^[0-1]?[0-9]-[0-3]?[0-9]-\\d{2}$" );
put( "dd/MM/yy", "^[0-3]?[0-9]/[0-1]?[0-9]/\\d{2}$" );
put( "yyyyMMdd", "^\\d{8}$" );
put( "dd-MM-yy", "^\\d{1,2}-\\d{1,2}-\\d{2}$" );
put( "dd-MM-yyyy", "^\\d{1,2}-\\d{1,2}-\\d{4}$" );
put( "dd.MM.yy", "^\\d{1,2}\\.\\d{1,2}\\.\\d{2}$" );
put( "dd.MM.yyyy", "^\\d{1,2}\\.\\d{1,2}\\.\\d{4}$" );
put( "MM/dd/yy", "^\\d{1,2}/\\d{1,2}/\\d{2}$" );
put( "MM/dd/yyyy", "^\\d{1,2}/\\d{1,2}/\\d{4}$" );
put( "yyyy-MM-dd", "^\\d{4}-\\d{1,2}-\\d{1,2}$" );
put( "yyyy.MM.dd", "^\\d{4}\\.\\d{1,2}\\.\\d{1,2}$" );
put( "yyyy/MM/dd", "^\\d{4}/\\d{1,2}/\\d{1,2}$" );
put( "dd MMM yyyy", "^\\d{1,2}\\s[a-z]{3}\\s\\d{4}$" );
put( "dd MMMM yyyy", "^\\d{1,2}\\s[a-z]{4,}\\s\\d{4}$" );
put( "yyyyMMddHHmm", "^\\d{12}$" );
put( "yyyyMMdd HHmm", "^\\d{8}\\s\\d{4}$" );
put( "dd-MM-yy HH:mm", "^\\d{1,2}-\\d{1,2}-\\d{2}\\s\\d{1,2}:\\d{2}$" );
put( "dd-MM-yyyy HH:mm", "^\\d{1,2}-\\d{1,2}-\\d{4}\\s\\d{1,2}:\\d{2}$" );
put( "dd.MM.yy HH:mm", "^\\d{1,2}\\.\\d{1,2}\\.\\d{2}\\s\\d{1,2}:\\d{2}$" );
put( "dd.MM.yyyy HH:mm", "^\\d{1,2}\\.\\d{1,2}\\.\\d{4}\\s\\d{1,2}:\\d{2}$" );
put( "MM/dd/yy HH:mm", "^\\d{1,2}/\\d{1,2}/\\d{2}\\s\\d{1,2}:\\d{2}$" );
put( "MM/dd/yyyy HH:mm", "^\\d{1,2}/\\d{1,2}/\\d{4}\\s\\d{1,2}:\\d{2}$" );
put( "yyyy-MM-dd HH:mm", "^\\d{4}-\\d{1,2}-\\d{1,2}\\s\\d{1,2}:\\d{2}$" );
put( "yyyy.MM.dd HH:mm", "^\\d{4}\\.\\d{1,2}\\.\\d{1,2}\\s\\d{1,2}:\\d{2}$" );
put( "yyyy/MM/dd HH:mm", "^\\d{4}/\\d{1,2}/\\d{1,2}\\s\\d{1,2}:\\d{2}$" );
put( "dd MMM yyyy HH:mm", "^\\d{1,2}\\s[a-z]{3}\\s\\d{4}\\s\\d{1,2}:\\d{2}$" );
put( "dd MMMM yyyy HH:mm", "^\\d{1,2}\\s[a-z]{4,}\\s\\d{4}\\s\\d{1,2}:\\d{2}$" );
put( "yyyyMMddHHmmss", "^\\d{14}$" );
put( "yyyyMMdd HHmmss", "^\\d{8}\\s\\d{6}$" );
put( "dd-MM-yy HH:mm:ss", "^\\d{1,2}-\\d{1,2}-\\d{2}\\s\\d{1,2}:\\d{2}:\\d{2}$" );
put( "dd-MM-yyyy HH:mm:ss", "^\\d{1,2}-\\d{1,2}-\\d{4}\\s\\d{1,2}:\\d{2}:\\d{2}$" );
put( "dd.MM.yy HH:mm:ss", "^\\d{1,2}\\.\\d{1,2}\\.\\d{2}\\s\\d{1,2}:\\d{2}:\\d{2}$" );
put( "dd.MM.yyyy HH:mm:ss", "^\\d{1,2}\\.\\d{1,2}\\.\\d{4}\\s\\d{1,2}:\\d{2}:\\d{2}$" );
put( "MM/dd/yy HH:mm:ss", "^\\d{1,2}/\\d{1,2}/\\d{2}\\s\\d{1,2}:\\d{2}:\\d{2}$" );
put( "MM/dd/yyyy HH:mm:ss", "^\\d{1,2}/\\d{1,2}/\\d{4}\\s\\d{1,2}:\\d{2}:\\d{2}$" );
put( "yyyy-MM-dd HH:mm:ss", "^\\d{4}-\\d{1,2}-\\d{1,2}\\s\\d{1,2}:\\d{2}:\\d{2}$" );
put( "yyyy.MM.dd HH:mm:ss", "^\\d{4}\\.\\d{1,2}\\.\\d{1,2}\\s\\d{1,2}:\\d{2}:\\d{2}$" );
put( "yyyy/MM/dd HH:mm:ss", "^\\d{4}/\\d{1,2}/\\d{1,2}\\s\\d{1,2}:\\d{2}:\\d{2}$" );
put( "dd MMM yyyy HH:mm:ss", "^\\d{1,2}\\s[a-z]{3}\\s\\d{4}\\s\\d{1,2}:\\d{2}:\\d{2}$" );
put( "dd MMMM yyyy HH:mm:ss", "^\\d{1,2}\\s[a-z]{4,}\\s\\d{4}\\s\\d{1,2}:\\d{2}:\\d{2}$" );
put( "dd-MM-yy HH:mm:ss.SSS", "^\\d{1,2}-\\d{1,2}-\\d{2}\\s\\d{1,2}:\\d{2}:\\d{2}\\.\\d{3}$" );
put( "dd-MM-yyyy HH:mm:ss.SSS", "^\\d{1,2}-\\d{1,2}-\\d{4}\\s\\d{1,2}:\\d{2}:\\d{2}\\.\\d{3}$" );
put( "dd.MM.yy HH:mm:ss.SSS", "^\\d{1,2}\\.\\d{1,2}\\.\\d{2}\\s\\d{1,2}:\\d{2}:\\d{2}\\.\\d{3}$" );
put( "dd.MM.yyyy HH:mm:ss.SSS", "^\\d{1,2}\\.\\d{1,2}\\.\\d{4}\\s\\d{1,2}:\\d{2}:\\d{2}\\.\\d{3}$" );
put( "MM/dd/yy HH:mm:ss.SSS", "^\\d{1,2}/\\d{1,2}/\\d{2}\\s\\d{1,2}:\\d{2}:\\d{2}\\.\\d{3}$" );
put( "MM/dd/yyyy HH:mm:ss.SSS", "^\\d{1,2}/\\d{1,2}/\\d{4}\\s\\d{1,2}:\\d{2}:\\d{2}\\.\\d{3}$" );
put( "yyyy-MM-dd HH:mm:ss.SSS", "^\\d{4}-\\d{1,2}-\\d{1,2}\\s\\d{1,2}:\\d{2}:\\d{2}\\.\\d{3}$" );
put( "yyyy.MM.dd HH:mm:ss.SSS", "^\\d{4}\\.\\d{1,2}\\.\\d{1,2}\\s\\d{1,2}:\\d{2}:\\d{2}\\.\\d{3}$" );
put( "yyyy/MM/dd HH:mm:ss.SSS", "^\\d{4}/\\d{1,2}/\\d{1,2}\\s\\d{1,2}:\\d{2}:\\d{2}\\.\\d{3}$" );
put( "dd MMM yyyy HH:mm:ss.SSS", "^\\d{1,2}\\s[a-z]{3}\\s\\d{4}\\s\\d{1,2}:\\d{2}:\\d{2}\\.\\d{3}$" );
put( "dd MMMM yyyy HH:mm:ss.SSS", "^\\d{1,2}\\s[a-z]{4,}\\s\\d{4}\\s\\d{1,2}:\\d{2}:\\d{2}\\.\\d{3}$" );
}
};
@SuppressWarnings( "serial" )
static final BidiMap DATE_FORMAT_TO_REGEXPS = new DualHashBidiMap() {
{
put( "MM-dd-yyyy", "^[0-1]?[0-9]-[0-3]?[0-9]-\\d{4}$" );
put( "dd/MM/yyyy", "^[0-3]?[0-9]/[0-1]?[0-9]/\\d{4}$" );
put( "MM-dd-yy", "^[0-1]?[0-9]-[0-3]?[0-9]-\\d{2}$" );
put( "dd/MM/yy", "^[0-3]?[0-9]/[0-1]?[0-9]/\\d{2}$" );
put( "yyyyMMdd", "^\\d{8}$" );
put( "dd-MM-yy", "^\\d{1,2}-\\d{1,2}-\\d{2}$" );
put( "dd-MM-yyyy", "^\\d{1,2}-\\d{1,2}-\\d{4}$" );
put( "dd.MM.yy", "^\\d{1,2}\\.\\d{1,2}\\.\\d{2}$" );
put( "dd.MM.yyyy", "^\\d{1,2}\\.\\d{1,2}\\.\\d{4}$" );
put( "dd/MM/yy", "^\\d{1,2}/\\d{1,2}/\\d{2}$" );
put( "dd/MM/yyyy", "^\\d{1,2}/\\d{1,2}/\\d{4}$" );
put( "yyyy-MM-dd", "^\\d{4}-\\d{1,2}-\\d{1,2}$" );
put( "yyyy.MM.dd", "^\\d{4}\\.\\d{1,2}\\.\\d{1,2}$" );
put( "yyyy/MM/dd", "^\\d{4}/\\d{1,2}/\\d{1,2}$" );
put( "dd MMM yyyy", "^\\d{1,2}\\s[a-z]{3}\\s\\d{4}$" );
put( "dd MMMM yyyy", "^\\d{1,2}\\s[a-z]{4,}\\s\\d{4}$" );
put( "yyyyMMddHHmm", "^\\d{12}$" );
put( "yyyyMMdd HHmm", "^\\d{8}\\s\\d{4}$" );
put( "dd-MM-yy HH:mm", "^\\d{1,2}-\\d{1,2}-\\d{2}\\s\\d{1,2}:\\d{2}$" );
put( "dd-MM-yyyy HH:mm", "^\\d{1,2}-\\d{1,2}-\\d{4}\\s\\d{1,2}:\\d{2}$" );
put( "dd.MM.yy HH:mm", "^\\d{1,2}\\.\\d{1,2}\\.\\d{2}\\s\\d{1,2}:\\d{2}$" );
put( "dd.MM.yyyy HH:mm", "^\\d{1,2}\\.\\d{1,2}\\.\\d{4}\\s\\d{1,2}:\\d{2}$" );
put( "dd/MM/yy HH:mm", "^\\d{1,2}/\\d{1,2}/\\d{2}\\s\\d{1,2}:\\d{2}$" );
put( "dd/MM/yyyy HH:mm", "^\\d{1,2}/\\d{1,2}/\\d{4}\\s\\d{1,2}:\\d{2}$" );
put( "yyyy-MM-dd HH:mm", "^\\d{4}-\\d{1,2}-\\d{1,2}\\s\\d{1,2}:\\d{2}$" );
put( "yyyy.MM.dd HH:mm", "^\\d{4}\\.\\d{1,2}\\.\\d{1,2}\\s\\d{1,2}:\\d{2}$" );
put( "yyyy/MM/dd HH:mm", "^\\d{4}/\\d{1,2}/\\d{1,2}\\s\\d{1,2}:\\d{2}$" );
put( "dd MMM yyyy HH:mm", "^\\d{1,2}\\s[a-z]{3}\\s\\d{4}\\s\\d{1,2}:\\d{2}$" );
put( "dd MMMM yyyy HH:mm", "^\\d{1,2}\\s[a-z]{4,}\\s\\d{4}\\s\\d{1,2}:\\d{2}$" );
put( "yyyyMMddHHmmss", "^\\d{14}$" );
put( "yyyyMMdd HHmmss", "^\\d{8}\\s\\d{6}$" );
put( "dd-MM-yy HH:mm:ss", "^\\d{1,2}-\\d{1,2}-\\d{2}\\s\\d{1,2}:\\d{2}:\\d{2}$" );
put( "dd-MM-yyyy HH:mm:ss", "^\\d{1,2}-\\d{1,2}-\\d{4}\\s\\d{1,2}:\\d{2}:\\d{2}$" );
put( "dd.MM.yy HH:mm:ss", "^\\d{1,2}\\.\\d{1,2}\\.\\d{2}\\s\\d{1,2}:\\d{2}:\\d{2}$" );
put( "dd.MM.yyyy HH:mm:ss", "^\\d{1,2}\\.\\d{1,2}\\.\\d{4}\\s\\d{1,2}:\\d{2}:\\d{2}$" );
put( "dd/MM/yy HH:mm:ss", "^\\d{1,2}/\\d{1,2}/\\d{2}\\s\\d{1,2}:\\d{2}:\\d{2}$" );
put( "dd/MM/yyyy HH:mm:ss", "^\\d{1,2}/\\d{1,2}/\\d{4}\\s\\d{1,2}:\\d{2}:\\d{2}$" );
put( "yyyy-MM-dd HH:mm:ss", "^\\d{4}-\\d{1,2}-\\d{1,2}\\s\\d{1,2}:\\d{2}:\\d{2}$" );
put( "yyyy.MM.dd HH:mm:ss", "^\\d{4}\\.\\d{1,2}\\.\\d{1,2}\\s\\d{1,2}:\\d{2}:\\d{2}$" );
put( "yyyy/MM/dd HH:mm:ss", "^\\d{4}/\\d{1,2}/\\d{1,2}\\s\\d{1,2}:\\d{2}:\\d{2}$" );
put( "dd MMM yyyy HH:mm:ss", "^\\d{1,2}\\s[a-z]{3}\\s\\d{4}\\s\\d{1,2}:\\d{2}:\\d{2}$" );
put( "dd MMMM yyyy HH:mm:ss", "^\\d{1,2}\\s[a-z]{4,}\\s\\d{4}\\s\\d{1,2}:\\d{2}:\\d{2}$" );
put( "dd-MM-yy HH:mm:ss.SSS", "^\\d{1,2}-\\d{1,2}-\\d{2}\\s\\d{1,2}:\\d{2}:\\d{2}\\.\\d{3}$" );
put( "dd-MM-yyyy HH:mm:ss.SSS", "^\\d{1,2}-\\d{1,2}-\\d{4}\\s\\d{1,2}:\\d{2}:\\d{2}\\.\\d{3}$" );
put( "dd.MM.yy HH:mm:ss.SSS", "^\\d{1,2}\\.\\d{1,2}\\.\\d{2}\\s\\d{1,2}:\\d{2}:\\d{2}\\.\\d{3}$" );
put( "dd.MM.yyyy HH:mm:ss.SSS", "^\\d{1,2}\\.\\d{1,2}\\.\\d{4}\\s\\d{1,2}:\\d{2}:\\d{2}\\.\\d{3}$" );
put( "dd/MM/yy HH:mm:ss.SSS", "^\\d{1,2}/\\d{1,2}/\\d{2}\\s\\d{1,2}:\\d{2}:\\d{2}\\.\\d{3}$" );
put( "dd/MM/yyyy HH:mm:ss.SSS", "^\\d{1,2}/\\d{1,2}/\\d{4}\\s\\d{1,2}:\\d{2}:\\d{2}\\.\\d{3}$" );
put( "yyyy-MM-dd HH:mm:ss.SSS", "^\\d{4}-\\d{1,2}-\\d{1,2}\\s\\d{1,2}:\\d{2}:\\d{2}\\.\\d{3}$" );
put( "yyyy.MM.dd HH:mm:ss.SSS", "^\\d{4}\\.\\d{1,2}\\.\\d{1,2}\\s\\d{1,2}:\\d{2}:\\d{2}\\.\\d{3}$" );
put( "yyyy/MM/dd HH:mm:ss.SSS", "^\\d{4}/\\d{1,2}/\\d{1,2}\\s\\d{1,2}:\\d{2}:\\d{2}\\.\\d{3}$" );
put( "dd MMM yyyy HH:mm:ss.SSS", "^\\d{1,2}\\s[a-z]{3}\\s\\d{4}\\s\\d{1,2}:\\d{2}:\\d{2}\\.\\d{3}$" );
put( "dd MMMM yyyy HH:mm:ss.SSS", "^\\d{1,2}\\s[a-z]{4,}\\s\\d{4}\\s\\d{1,2}:\\d{2}:\\d{2}\\.\\d{3}$" );
}
};
// util class, hide constructor
private DateDetector() {
};
/**
*
* @param dateFormat - date format for get regexp
* @return regexp for given date format
*/
public static String getRegexpByDateFormat( String dateFormat ) {
return getRegexpByDateFormat( dateFormat, null );
}
/**
*
* @param dateFormat - date format for get regexp by locale
* @return regexp for given date format
*/
public static String getRegexpByDateFormat( String dateFormat, String locale ) {
if ( locale != null && LOCALE_en_US.equalsIgnoreCase( locale ) ) {
return (String) DATE_FORMAT_TO_REGEXPS_US.get( dateFormat );
}
return (String) DATE_FORMAT_TO_REGEXPS.get( dateFormat );
}
/**
*
* @param regex - regexp for parse date format from string
* <br>
* <b>NOTES:</b> if regex could be used for US and EU locale.
* It returns europeans locale. For en_US locale please use
*
* {@link #getDateFormatByRegex( String regex, String locale ) }
*
* @return {@link java.lang.String} string wich represented Date Format
*/
public static String getDateFormatByRegex( String regex ) {
return getDateFormatByRegex( regex, null );
}
/**
*
* @param regex
* - regexp for parse date format from string by locale
* @return {@link java.lang.String} string wich represented Date Format
*/
public static String getDateFormatByRegex( String regex, String locale ) {
if ( locale != null && LOCALE_en_US.equalsIgnoreCase( locale ) ) {
return (String) DATE_FORMAT_TO_REGEXPS_US.getKey( regex );
}
return (String) DATE_FORMAT_TO_REGEXPS.getKey( regex );
}
/**
*
* @param dateString
* date string for parse
* @return {@link java.util.Date} converted from dateString by detected format
* @throws ParseException
* - if we can not detect date format for string or we can not parse date string
*/
public static Date getDateFromString( String dateString ) throws ParseException {
String dateFormat = detectDateFormat( dateString );
if ( dateFormat == null ) {
throw new ParseException( "Unknown date format.", 0 );
}
return getDateFromStringByFormat( dateString, dateFormat );
}
/**
*
* @param dateString
* date string for parse
* @return {@link java.util.Date} converted from dateString by detected format
* @throws ParseException
* - if we can not detect date format for string or we can not parse date string
*/
public static Date getDateFromString( String dateString, String locale ) throws ParseException {
String dateFormat = detectDateFormat( dateString, locale );
if ( dateFormat == null ) {
throw new ParseException( "Unknown date format.", 0 );
}
return getDateFromStringByFormat( dateString, dateFormat );
}
/**
*
* @param dateString
* date string for parse
* @param dateFormat
* format which should be applied for string
* @return {@link java.util.Date} converted from dateString by format
* @throws ParseException
* if we can not parse date string
*/
public static Date getDateFromStringByFormat( String dateString, String dateFormat ) throws ParseException {
if ( dateFormat == null ) {
throw new ParseException( "Unknown date format. Format is null. ", 0 );
}
if ( dateString == null ) {
throw new ParseException( "Unknown date string. Date string is null. ", 0 );
}
SimpleDateFormat simpleDateFormat = new SimpleDateFormat( dateFormat );
simpleDateFormat.setLenient( false ); // Don't automatically convert invalid date.
return simpleDateFormat.parse( dateString );
}
/**
*
* @param dateString
* - date string for detect date format
* @return {@link java.lang.String} string which represented Date Format or null
*
*/
public static String detectDateFormat( String dateString ) {
return detectDateFormat( dateString, null );
}
/**
*
* @param dateString
* - date string for detect date format
* @return {@link java.lang.String} string which represented Date Format or null
*/
public static String detectDateFormat( String dateString, String locale ) {
if ( dateString == null ) {
return null;
}
for ( Object regexp : getDateFormatToRegExps( locale ).values() ) {
if ( dateString.toLowerCase().matches( (String) regexp ) ) {
return (String) getDateFormatToRegExps( locale ).getKey( regexp );
}
}
return null;
}
/**
* Finds a date format that matches the date value given. Will try the desiredKey format before attempting others. The
* first to match is returned.
*
* @param dateString
* the literal value of the date (eg: "01/01/2001")
* @param locale
* the locale in play
* @param desiredKey
* the desired format (should be a valid key to DATE_FORMAT_TO_REGEXPS)
* @return The key to the format that matched or null if none found.
*/
public static String detectDateFormatBiased( String dateString, String locale, String desiredKey ) {
if ( dateString == null ) {
return null;
}
String regex = (String) getDateFormatToRegExps( locale ).get( desiredKey );
if ( regex != null && dateString.toLowerCase().matches( regex ) ) {
return desiredKey;
} else {
return detectDateFormat( dateString, locale );
}
}
public static BidiMap getDateFormatToRegExps( String locale ) {
if ( locale == null || LOCALE_en_US.equalsIgnoreCase( locale ) ) {
return DATE_FORMAT_TO_REGEXPS_US;
} else {
return DATE_FORMAT_TO_REGEXPS;
}
}
/**
*
* @param dateString - string for check
* @param dateFormat - format for check
* @return true if we can parse string by format without exception
*/
public static boolean isValidDate( String dateString, String dateFormat ) {
try {
getDateFromStringByFormat( dateString, dateFormat );
return true;
} catch ( ParseException e ) {
return false;
}
}
/**
* @param dateString - string for check
* @return true if we can parse string without exception
*/
public static boolean isValidDate( String dateString ) {
try {
getDateFromString( dateString );
return true;
} catch ( ParseException e ) {
return false;
}
}
/**
*
* @param dateFormat - format which we will try to apply for string
* @param dateString - string which contains date
* @return true if we found that we know dateFormat and it applied for given string
*/
public static boolean isValidDateFormatToStringDate( String dateFormat, String dateString ) {
String detectedDateFormat = detectDateFormat( dateString );
if ( ( dateFormat != null ) && ( dateFormat.equals( detectedDateFormat ) ) ) {
return true;
}
return false;
}
/**
*
* @param dateFormat - format which we will try to apply for string
* @param dateString - string which contains date
* @param locale - locale for date format
* @return true if we found that we know dateFormat and it applied for given string
*/
public static boolean isValidDateFormatToStringDate( String dateFormat, String dateString, String locale ) {
String detectedDateFormat =
dateFormat != null ? detectDateFormatBiased( dateString, locale, dateFormat ) : detectDateFormat( dateString,
locale );
if ( ( dateFormat != null ) && ( dateFormat.equals( detectedDateFormat ) ) ) {
return true;
}
return false;
}
}