/**
* VMware Continuent Tungsten Replicator
* Copyright (C) 2015 VMware, Inc. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* Initial developer(s): Gilles Rayrat
* Contributor(s):
*/
package com.continuent.tungsten.common.utils;
import java.math.BigInteger;
import java.security.SecureRandom;
import java.util.regex.Pattern;
import junit.framework.TestCase;
/**
* Benchmark of text search to compare full text and regular expression search.
* This is not a proper unit test, it just displays results
*
* @author <a href="mailto:gilles.rayrat@continuent.com">Gilles Rayrat</a>
* @version 1.0
*/
public class FullTextVsRegexTextSearchBenchmark extends TestCase
{
/**
* Test are run 10 times. This puts in evidence weird runs such as where
* some other tasks are eating CPU cycles
*/
private static final int NUMBER_OF_RUNS = 5;
/**
* This is the number of times the search should be run. 1 million simple
* searches take a few seconds
*/
private static final int NUMBER_OF_ITERATIONS = 1000000;
public static final String SELECT = "SELECT";
public static final String SELECT_REGEX = "^SELECT";
public static final String SELECT_CASE_INSENSITIVE_GM_TRICK = "^[sS][eE][lL][eE][cC][tT]";
public static Pattern SELECT_PATTERN_INSENSITIVE = Pattern
.compile(
SELECT_REGEX,
Pattern.CASE_INSENSITIVE);
public static Pattern SELECT_PATTERN_SENSITIVE = Pattern
.compile(SELECT_REGEX);
public static Pattern SELECT_PATTERN_INSENSITIVE_GM_TRICK = Pattern
.compile(SELECT_CASE_INSENSITIVE_GM_TRICK);
/** used to generate random text */
private SecureRandom random = new SecureRandom();
public void testTextSearch()
{
// Simple request
System.out.println("************ " + NUMBER_OF_RUNS
+ " runs with SIMPLE SELECT ***************");
for (int i = 0; i < NUMBER_OF_RUNS; i++)
{
runTextSearchTestWith(SELECT + " * FROM mytable");
}
// With a random string of ~200 characters. We keep the select keyword
// so that the string is always matched
System.out.println("************ " + NUMBER_OF_RUNS
+ " runs with MEDIUM STRING ***************");
for (int i = 0; i < NUMBER_OF_RUNS; i++)
{
String s = new BigInteger(1024, random).toString(32);
runTextSearchTestWith(SELECT + s);
}
// With a random string of ~2000 characters. We keep the select keyword
// so that the string is always matched
System.out.println("************ " + NUMBER_OF_RUNS
+ " runs with LARGE STRING ***************");
for (int i = 0; i < NUMBER_OF_RUNS; i++)
{
String s = new BigInteger(1024 * 10, random).toString(32);
runTextSearchTestWith(SELECT + s);
}
}
public void runTextSearchTestWith(String sql)
{
// comment / uncomment the tests you want to run here:
long before = System.currentTimeMillis();
for (int i = 0; i < NUMBER_OF_ITERATIONS; i++)
{
analyzeRequestFullTextCaseSensitive(sql);
}
System.out.println(System.currentTimeMillis() - before
+ "\t ms for full text case insensitive search");
// before = System.currentTimeMillis();
// for (int i = 0; i < NUMBER_OF_ITERATIONS; i++)
// {
// analyzeRequestFullTextWithToUpper(sql);
// }
// System.out.println(System.currentTimeMillis() - before
// + "\t ms for full text case sensitive/toUpper search");
before = System.currentTimeMillis();
for (int i = 0; i < NUMBER_OF_ITERATIONS; i++)
{
analyzeRequestFullTextWithToUpperOfRelevantPart(sql);
}
System.out.println(System.currentTimeMillis() - before
+ "\t ms for full text case sensitive/toUpper search");
before = System.currentTimeMillis();
for (int i = 0; i < NUMBER_OF_ITERATIONS; i++)
{
analyzeRequestFullTextWithTrim(sql);
}
System.out.println(System.currentTimeMillis() - before
+ "\t ms for full text with trim search");
before = System.currentTimeMillis();
for (int i = 0; i < NUMBER_OF_ITERATIONS; i++)
{
analyzeRequestFullTextWithRegionMatches(sql);
}
System.out.println(System.currentTimeMillis() - before
+ "\t ms for full text with regionMatches search");
before = System.currentTimeMillis();
for (int i = 0; i < NUMBER_OF_ITERATIONS; i++)
{
analyzeRequestFullTextCaseInsensitive(sql);
}
System.out.println(System.currentTimeMillis() - before
+ "\t ms for full text case sensitive/double compare search");
before = System.currentTimeMillis();
for (int i = 0; i < NUMBER_OF_ITERATIONS; i++)
{
analyzeRequestFullTextWithAFewPatterns(sql);
}
System.out.println(System.currentTimeMillis() - before
+ "\t ms for full text case sensitive/a few compare search");
before = System.currentTimeMillis();
for (int i = 0; i < NUMBER_OF_ITERATIONS; i++)
{
analyzeRequestFullTextWithLotsOfPatterns(sql);
}
System.out.println(System.currentTimeMillis() - before
+ "\t ms for full text case sensitive/a lot compare search");
before = System.currentTimeMillis();
for (int i = 0; i < NUMBER_OF_ITERATIONS; i++)
{
analyzeRequestRegexCaseSensitive(sql);
}
System.out.println(System.currentTimeMillis() - before
+ "\t ms for regex case sensitive search");
before = System.currentTimeMillis();
for (int i = 0; i < NUMBER_OF_ITERATIONS; i++)
{
analyzeRequestRegexCaseInsensitive(sql);
}
System.out.println(System.currentTimeMillis() - before
+ "\t ms for regex case insensitive search");
before = System.currentTimeMillis();
for (int i = 0; i < NUMBER_OF_ITERATIONS; i++)
{
analyzeRequestRegexCaseInsensitiveGMtrick(sql);
}
System.out.println(System.currentTimeMillis() - before
+ "\t ms for regex case insensitive/GM trick search");
System.out.println();
}
public boolean analyzeRequestFullTextCaseSensitive(String request)
{
if (request.startsWith(SELECT))
{
return true;
}
return false;
}
public boolean analyzeRequestFullTextWithToUpper(String request)
{
String requestUpperCase = request.toUpperCase();
if (requestUpperCase.startsWith(SELECT))
{
return true;
}
return false;
}
public boolean analyzeRequestFullTextWithToUpperOf15Chars(String request)
{
String requestPrefixUpperCase = request.substring(0,
Math.min(request.length(), 15)).toUpperCase();
if (requestPrefixUpperCase.startsWith(SELECT))
{
return true;
}
return false;
}
public boolean analyzeRequestFullTextWithToUpperOfRelevantPart(
String request)
{
String requestPrefixUpperCase = request.substring(0,
Math.min(request.length(), SELECT.length())).toUpperCase();
if (requestPrefixUpperCase.startsWith(SELECT))
{
return true;
}
return false;
}
public boolean analyzeRequestFullTextWithTrim(String request)
{
String requestUpperCase = request.trim();
if (requestUpperCase.startsWith(SELECT))
{
return true;
}
return false;
}
public boolean analyzeRequestFullTextWithRegionMatches(String request)
{
String requestUpperCase = request.trim();
if (requestUpperCase.regionMatches(true, 0, SELECT, 0, SELECT.length()))
{
return true;
}
return false;
}
public boolean analyzeRequestFullTextCaseInsensitive(String request)
{
if (request.startsWith(SELECT) || request.startsWith(SELECT))
{
return true;
}
return false;
}
public boolean analyzeRequestFullTextWithAFewPatterns(String request)
{
if (request.startsWith("APDFIA") || request.startsWith("EFNEPW")
|| request.startsWith("VFSPUC") || request.startsWith(SELECT))
{
return true;
}
return false;
}
public boolean analyzeRequestFullTextWithLotsOfPatterns(String request)
{
if (request.startsWith("select") || request.startsWith("Select")
|| request.startsWith("sElect") || request.startsWith("seLect")
|| request.startsWith("selEct") || request.startsWith("seleCt")
|| request.startsWith("selecT") || request.startsWith("SElect")
|| request.startsWith("SeLect") || request.startsWith("SelEct")
|| request.startsWith("SeleCt") || request.startsWith("SelecT")
|| request.startsWith("SElect") || request.startsWith("SElEct")
|| request.startsWith("SEleCt") || request.startsWith("SElecT")
|| request.startsWith("SELect") || request.startsWith("SELeCt")
|| request.startsWith("SELecT") || request.startsWith("SELEct")
|| request.startsWith("SELEcT") || request.startsWith("SELECt")
|| request.startsWith("SeLECT") || request.startsWith("SElECT")
|| request.startsWith("SELeCT") || request.startsWith("SELEcT")
|| request.startsWith("SELECt") || request.startsWith("SelECT")
|| request.startsWith("SeLeCT") || request.startsWith("SeLEcT")
|| request.startsWith("SeLECt") || request.startsWith("SelECT")
|| request.startsWith("SelEcT") || request.startsWith("SelECt")
|| request.startsWith("SeleCT") || request.startsWith("SeleCt")
|| request.startsWith("SelecT") || request.startsWith("Select")
|| request.startsWith(SELECT))
{
return true;
}
return false;
}
public boolean analyzeRequestRegexCaseSensitive(String request)
{
if (SELECT_PATTERN_SENSITIVE.matcher(request).find())
{
return true;
}
return false;
}
public boolean analyzeRequestRegexCaseInsensitive(String request)
{
if (SELECT_PATTERN_INSENSITIVE.matcher(request).find())
{
return true;
}
return false;
}
public boolean analyzeRequestRegexCaseInsensitiveGMtrick(String request)
{
if (SELECT_PATTERN_INSENSITIVE_GM_TRICK.matcher(request).find())
{
return true;
}
return false;
}
}