/******************************************************************************* * Copyright (c) 2010 Robert "Unlogic" Olofsson (unlogic@unlogic.se). * All rights reserved. This program and the accompanying materials * are made available under the terms of the GNU Lesser Public License v3 * which accompanies this distribution, and is available at * http://www.gnu.org/licenses/lgpl-3.0-standalone.html ******************************************************************************/ package se.unlogic.standardutils.dao.script; import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; public class MySQLScriptUtility implements ScriptUtility { private static final String MULTI_LINE_COMMENT_START_SYMBOL = "\\/\\*"; private static final String MULTI_LINE_COMMENT_END_SYMBOL = "\\*\\/"; private static final String LINE_COMMENT_START_SYMBOL = "^--|^#"; private static final String LINE_COMMENT_END_SYMBOL = "\\n"; private static final String STRING_TYPE1_SYMBOL = "\'"; private static final String STRING_TYPE2_SYMBOL = "\""; private static final String STATEMENT_TERMINATOR = ";"; public List<String> getStatements(String script) { List<String> statements = new ArrayList<String>(); List<Symbol> allSymbols = new ArrayList<Symbol>(); allSymbols.addAll(this.findSymbols(STRING_TYPE1_SYMBOL, script)); allSymbols.addAll(this.findSymbols(STRING_TYPE2_SYMBOL, script)); allSymbols.addAll(this.findSymbols(LINE_COMMENT_START_SYMBOL, script)); allSymbols.addAll(this.findSymbols(LINE_COMMENT_END_SYMBOL, script)); allSymbols.addAll(this.findSymbols(MULTI_LINE_COMMENT_START_SYMBOL, script)); allSymbols.addAll(this.findSymbols(MULTI_LINE_COMMENT_END_SYMBOL, script)); String str = script; int startIndex = 0; for(Symbol terminator : this.getValidSymbols(this.findSymbols(STATEMENT_TERMINATOR, script), this.createRegions(this.getValidSymbols(allSymbols)))) { str = script.substring(startIndex, terminator.getOffsets().getEnd()); startIndex += str.length(); statements.add(str); } return statements; } /** * Searches the sequence for symbols * @param regex - A regular expression used to find the symbol (a sequence) within the sequence * @param sequence - The haystack in which to search for symbols * @return */ private List<Symbol> findSymbols(String regex, String sequence) { List<Symbol> symbols = new ArrayList<Symbol>(); Pattern pattern = Pattern.compile(regex,Pattern.MULTILINE); Matcher matcher = pattern.matcher(sequence); while(matcher.find()) { symbols.add(new Symbol(regex, new Offsets(matcher.start(),matcher.end()))); } return symbols; } /** * Creates regions between supplied symbols * @param symbols * @return */ private List<Region> createRegions(List<Symbol> symbols) { List<Region> regions = new ArrayList<Region>(); Symbol stringType1Symbol = null; Symbol stringType2Symbol = null; Symbol lineCommentStartSymbol = null; Symbol multiLineCommentStartSymbol = null; for(Symbol symbol : symbols) { if(symbol.getKey().equals(STRING_TYPE1_SYMBOL)) { if(stringType1Symbol != null) { regions.add(new Region(stringType1Symbol, symbol)); stringType1Symbol = null; } else { stringType1Symbol = symbol; } } else if(symbol.getKey().equals(STRING_TYPE2_SYMBOL)) { if(stringType2Symbol != null) { regions.add(new Region(stringType2Symbol, symbol)); stringType2Symbol = null; } else { stringType2Symbol = symbol; } } else if(symbol.getKey().equals(LINE_COMMENT_END_SYMBOL) && lineCommentStartSymbol != null) { regions.add(new Region(lineCommentStartSymbol, symbol)); lineCommentStartSymbol = null; } else if(symbol.getKey().equals(LINE_COMMENT_START_SYMBOL)) { lineCommentStartSymbol = symbol; } else if(symbol.getKey().equals(MULTI_LINE_COMMENT_END_SYMBOL) && multiLineCommentStartSymbol != null) { regions.add(new Region(multiLineCommentStartSymbol, symbol)); multiLineCommentStartSymbol = null; } else if(symbol.getKey().equals(MULTI_LINE_COMMENT_START_SYMBOL)) { multiLineCommentStartSymbol = symbol; } } if(stringType1Symbol != null) { regions.add(new Region(stringType1Symbol, null)); stringType1Symbol = null; } if(stringType2Symbol != null) { regions.add(new Region(stringType2Symbol, null)); stringType2Symbol = null; } if(lineCommentStartSymbol != null) { regions.add(new Region(lineCommentStartSymbol, null)); lineCommentStartSymbol = null; } if(multiLineCommentStartSymbol != null) { regions.add(new Region(multiLineCommentStartSymbol, null)); multiLineCommentStartSymbol = null; } return regions; } /** * Returns valid symbols from a collection of symbols * @param symbols * @return */ private List<Symbol> getValidSymbols(List<Symbol> symbols) { List<Symbol> validSymbols = new ArrayList<Symbol>(); boolean withinStringType1 = false; boolean withinStringType2 = false; boolean withinLineComment = false; boolean withinMultiLineComment = false; Collections.sort(symbols, new Symbol.SymbolStartComparator()); for(Symbol symbol : symbols) { if(!withinStringType1 && !withinStringType2 && !withinLineComment && !withinMultiLineComment) { validSymbols.add(symbol); if(symbol.getKey().equals(STRING_TYPE1_SYMBOL)) { withinStringType1 = true; } else if(symbol.getKey().equals(STRING_TYPE2_SYMBOL)) { withinStringType2 = true; } else if(symbol.getKey().equals(LINE_COMMENT_START_SYMBOL)) { withinLineComment = true; } else if(symbol.getKey().equals(MULTI_LINE_COMMENT_START_SYMBOL)) { withinMultiLineComment = true; } } else if(withinStringType1 && symbol.getKey().equals(STRING_TYPE1_SYMBOL)) { validSymbols.add(symbol); withinStringType1 = false; } else if(withinStringType2 && symbol.getKey().equals(STRING_TYPE2_SYMBOL)) { validSymbols.add(symbol); withinStringType2 = false; } else if(withinLineComment && symbol.getKey().equals(LINE_COMMENT_END_SYMBOL)) { validSymbols.add(symbol); withinLineComment = false; } else if(withinMultiLineComment && symbol.getKey().equals(MULTI_LINE_COMMENT_END_SYMBOL)) { validSymbols.add(symbol); withinMultiLineComment = false; } } return validSymbols; } /** * Returns valid symbols from a set of symbols and regions * @param symbols - symbols to validate against supplied regions * @param invalidRegions - invalid regions. Symbols within these regions are considered invalid. * @return */ private List<Symbol> getValidSymbols(List<Symbol> symbols, List<Region> invalidRegions) { List<Symbol> validSymbols = new ArrayList<Symbol>(); symbol: for(Symbol symbol : symbols) { for(Region invalidRegion : invalidRegions) { if(symbol.getOffsets().getStart() >= invalidRegion.getStart() && (invalidRegion.getEnd() == null || symbol.getOffsets().getStart() < invalidRegion.getEnd())) { continue symbol; } } validSymbols.add(symbol); } return validSymbols; } }