/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
* JFlex 1.4.1 *
* Copyright (C) 1998-2004 Gerwin Klein <lsf@jflex.de> *
* All rights reserved. *
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License. See the file *
* COPYRIGHT for more information. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License along *
* with this program; if not, write to the Free Software Foundation, Inc., *
* 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA *
* *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
package weka.core.parser.JFlex;
/**
* Stores a regular expression of rules section in a JFlex-specification.
*
* This base class has no content other than its type.
*
* @author Gerwin Klein
* @version JFlex 1.4.1, $Revision: 1.1 $, $Date: 2008/05/09 09:14:10 $
*/
public class RegExp {
/**
* The type of the regular expression. This field will be
* filled with values from class sym.java (generated by cup)
*/
int type;
/**
* Create a new regular expression of the specified type.
*
* @param type a value from the cup generated class sym.
*
* @see JFlex.sym
*/
public RegExp(int type) {
this.type = type;
}
/**
* Returns a String-representation of this regular expression
* with the specified indentation.
*
* @param tab a String that should contain only space characters and
* that is inserted in front of standard String-representation
* pf this object.
*/
public String print(String tab) {
return tab+toString();
}
/**
* Returns a String-representation of this regular expression
*/
public String toString() {
return "type = "+type;
}
/**
* Find out if this regexp is a char class or equivalent to one.
*
* @param macros for macro expansion
* @return true if the regexp is equivalent to a char class.
*/
public boolean isCharClass(Macros macros) {
RegExp1 unary;
RegExp2 binary;
switch (type) {
case sym.CHAR:
case sym.CHAR_I:
case sym.CCLASS:
case sym.CCLASSNOT:
return true;
case sym.BAR:
binary = (RegExp2) this;
return binary.r1.isCharClass(macros) && binary.r2.isCharClass(macros);
case sym.MACROUSE:
unary = (RegExp1) this;
return macros.getDefinition((String) unary.content).isCharClass(macros);
default: return false;
}
}
/**
* The approximate number of NFA states this expression will need (only
* works correctly after macro expansion and without negation)
*
* @param macros macro table for expansion
*/
public int size(Macros macros) {
RegExp1 unary;
RegExp2 binary;
RegExp content;
switch ( type ) {
case sym.BAR:
binary = (RegExp2) this;
return binary.r1.size(macros) + binary.r2.size(macros) + 2;
case sym.CONCAT:
binary = (RegExp2) this;
return binary.r1.size(macros) + binary.r2.size(macros);
case sym.STAR:
unary = (RegExp1) this;
content = (RegExp) unary.content;
return content.size(macros) + 2;
case sym.PLUS:
unary = (RegExp1) this;
content = (RegExp) unary.content;
return content.size(macros) + 2;
case sym.QUESTION:
unary = (RegExp1) this;
content = (RegExp) unary.content;
return content.size(macros);
case sym.BANG:
unary = (RegExp1) this;
content = (RegExp) unary.content;
return content.size(macros) * content.size(macros);
// this is only a very rough estimate (worst case 2^n)
// exact size too complicated (propably requires construction)
case sym.TILDE:
unary = (RegExp1) this;
content = (RegExp) unary.content;
return content.size(macros) * content.size(macros) * 3;
// see sym.BANG
case sym.STRING:
case sym.STRING_I:
unary = (RegExp1) this;
return ((String) unary.content).length()+1;
case sym.CHAR:
case sym.CHAR_I:
return 2;
case sym.CCLASS:
case sym.CCLASSNOT:
return 2;
case sym.MACROUSE:
unary = (RegExp1) this;
return macros.getDefinition((String) unary.content).size(macros);
}
throw new Error("unknown regexp type "+type);
}
}