/* * reserved comment block * DO NOT REMOVE OR ALTER! */ /* * Copyright 1999-2004 The Apache Software Foundation. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.sun.org.apache.regexp.internal; import java.io.Serializable; /** * A class that holds compiled regular expressions. This is exposed mainly * for use by the recompile utility (which helps you produce precompiled * REProgram objects). You should not otherwise need to work directly with * this class. * * @see RE * @see RECompiler * * @author <a href="mailto:jonl@muppetlabs.com">Jonathan Locke</a> * @version $Id: REProgram.java,v 1.1.2.1 2005/08/01 00:02:54 jeffsuttor Exp $ */ public class REProgram implements Serializable { static final int OPT_HASBACKREFS = 1; char[] instruction; // The compiled regular expression 'program' int lenInstruction; // The amount of the instruction buffer in use char[] prefix; // Prefix string optimization int flags; // Optimization flags (REProgram.OPT_*) int maxParens = -1; /** * Constructs a program object from a character array * @param instruction Character array with RE opcode instructions in it */ public REProgram(char[] instruction) { this(instruction, instruction.length); } /** * Constructs a program object from a character array * @param parens Count of parens in the program * @param instruction Character array with RE opcode instructions in it */ public REProgram(int parens, char[] instruction) { this(instruction, instruction.length); this.maxParens = parens; } /** * Constructs a program object from a character array * @param instruction Character array with RE opcode instructions in it * @param lenInstruction Amount of instruction array in use */ public REProgram(char[] instruction, int lenInstruction) { setInstructions(instruction, lenInstruction); } /** * Returns a copy of the current regular expression program in a character * array that is exactly the right length to hold the program. If there is * no program compiled yet, getInstructions() will return null. * @return A copy of the current compiled RE program */ public char[] getInstructions() { // Ensure program has been compiled! if (lenInstruction != 0) { // Return copy of program char[] ret = new char[lenInstruction]; System.arraycopy(instruction, 0, ret, 0, lenInstruction); return ret; } return null; } /** * Sets a new regular expression program to run. It is this method which * performs any special compile-time search optimizations. Currently only * two optimizations are in place - one which checks for backreferences * (so that they can be lazily allocated) and another which attempts to * find an prefix anchor string so that substantial amounts of input can * potentially be skipped without running the actual program. * @param instruction Program instruction buffer * @param lenInstruction Length of instruction buffer in use */ public void setInstructions(char[] instruction, int lenInstruction) { // Save reference to instruction array this.instruction = instruction; this.lenInstruction = lenInstruction; // Initialize other program-related variables flags = 0; prefix = null; // Try various compile-time optimizations if there's a program if (instruction != null && lenInstruction != 0) { // If the first node is a branch if (lenInstruction >= RE.nodeSize && instruction[0 + RE.offsetOpcode] == RE.OP_BRANCH) { // to the end node int next = instruction[0 + RE.offsetNext]; if (instruction[next + RE.offsetOpcode] == RE.OP_END) { // and the branch starts with an atom if (lenInstruction >= (RE.nodeSize * 2) && instruction[RE.nodeSize + RE.offsetOpcode] == RE.OP_ATOM) { // then get that atom as an prefix because there's no other choice int lenAtom = instruction[RE.nodeSize + RE.offsetOpdata]; prefix = new char[lenAtom]; System.arraycopy(instruction, RE.nodeSize * 2, prefix, 0, lenAtom); } } } BackrefScanLoop: // Check for backreferences for (int i = 0; i < lenInstruction; i += RE.nodeSize) { switch (instruction[i + RE.offsetOpcode]) { case RE.OP_ANYOF: i += (instruction[i + RE.offsetOpdata] * 2); break; case RE.OP_ATOM: i += instruction[i + RE.offsetOpdata]; break; case RE.OP_BACKREF: flags |= OPT_HASBACKREFS; break BackrefScanLoop; } } } } }