package project.phase2.ll1parsergenerator; import java.io.File; import java.io.IOException; import java.io.InputStream; import java.io.RandomAccessFile; import java.text.ParseException; import java.util.ArrayList; import java.util.List; import project.phase2.ll1parsergenerator.dfastuff.DFA; import project.phase2.ll1parsergenerator.dfastuff.DFA.TokenMatch; import project.phase2.ll1parsergenerator.dfastuff.NFA; import project.phase2.ll1parsergenerator.dfastuff.Parser; /** * Used for scanning a stream using DFAs. Implements some methods for common * uses. * */ public class DFAScanner { // // CLASS/INSTANCE DATA // /** * The DFA to scan using. */ private DFA mScan; /** * Token buffer (in we need to maintain data from passed in input stream). */ private String mBuffer; /** * Whether the tables should be minimized. */ private boolean mMinimize; // // CTOR // public DFAScanner(boolean minimize) { mMinimize = minimize; } public DFAScanner() { this(true); } // // PUBLIC METHODS // /** * Returns the input buffer. * * @return the input buffer. */ public String getBuffer() { return mBuffer; } /** * Adds a regular expression to the scanner. * * @param regex * the regular expression. */ public void addRegex(String regex) { addRegex(regex, null); } /** * Adds a regular expression with a label to the scanner. * * @param regex * the regular expression. * @param label * the label. */ public void addRegex(String regex, String label) { NFA nfa = Parser.fromString(regex); nfa.setGoalLabels(label); if (mScan == null) { mScan = DFA.fromNFA(nfa); } else { mScan = DFA.fromNFA(nfa.union(mScan)); } if (mMinimize) mScan = mScan.minimize(); } /** * Returns the label of the token starting at the beginning of the stream. * Returns null if there is not a valid match to the scanner. * * @param stream * the stream to search. * @return the label. */ public String labelToken(InputStream stream) throws IOException, ParseException { mScan.reset(); mBuffer = ""; int nextInt; char next; String longestLabel = null; while ((nextInt = stream.read()) != -1) { next = (char) nextInt; mBuffer += next; TokenMatch tm = mScan.test(Character.toString(next)); if (tm.isAccepted()) { longestLabel = tm.getLabel(); } else if (tm.isRejected()) { break; } } if (longestLabel == null) throw new ParseException(mBuffer, mBuffer.length()); return longestLabel; } /** * Finds all occurrences of the specified regex in the given file. Uses * longest matching and does not include overlapping occurrences. * * @param f * the file to search. * @throws IOException */ public List<MatchDescriptor> findAllInFile(File f) throws IOException// throws // IOException { RandomAccessFile raf = null; ArrayList<MatchDescriptor> ret = new ArrayList<MatchDescriptor>(); MatchDescriptor currMatch = null; String currentString; int currPos = 0; int nextInt; char next; try { raf = new RandomAccessFile(f, "r"); while (currPos < raf.length()) { raf.seek(currPos); mScan.reset(); currentString = ""; currMatch = null; while ((nextInt = raf.read()) != -1) { next = (char) nextInt; TokenMatch tm = mScan.test(Character.toString(next)); if (tm.isRejected()) { break; } else { currentString += next; if (tm.isAccepted()) { currMatch = new MatchDescriptor(currentString, (int) raf.getFilePointer() - currentString.length()); } } } if (currMatch != null) { ret.add(currMatch); currPos += currMatch.getString().length(); } else { currPos++; } } } finally { if (raf != null) raf.close(); } return ret; } /** * Finds all occurrences of the specified regex in the given string. Uses * longest matching and does not include overlapping occurrences. * * @param f * the file to search. * @throws IOException */ public List<MatchDescriptor> findAllInString(String f) { ArrayList<MatchDescriptor> ret = new ArrayList<MatchDescriptor>(); MatchDescriptor currMatch = null; String currentString; int currPos = 0, currInnerPos = 0; char next; while (currPos < f.length()) { currInnerPos = currPos; mScan.reset(); currentString = ""; currMatch = null; while (currInnerPos < f.length()) { next = f.charAt(currInnerPos); currInnerPos++; TokenMatch tm = mScan.test(Character.toString(next)); if (tm.isRejected()) { break; } else { currentString += next; if (tm.isAccepted()) { currMatch = new MatchDescriptor(currentString, currInnerPos - currentString.length()); } } } if (currMatch != null) { ret.add(currMatch); currPos += currMatch.getString().length(); } else { currPos++; } } return ret; } // // INNER CLASS // public class MatchDescriptor { // // CLASS/INSTANCE DATA // /** * The string that was matched. */ private String mString; /** * The location that the string was matched at. */ private int mLocation; // // CTOR // public MatchDescriptor(String str, int loc) { mString = str; mLocation = loc; } // // PUBLIC METHODS // /** * Returns the string. * * @return the string. */ public String getString() { return mString; } /** * Returns the location. * * @return the location. */ public int getLocation() { return mLocation; } /** * Returns a string representation of the match. * * @return the string representation. */ public String toString() { return "\"" + mString + "\" at " + mLocation; } } }