/******************************************************************************* * Copyright (c) 2015 Red Hat. * All rights reserved. This program and the accompanying materials * are made available under the terms of the Eclipse Public License v1.0 * which accompanies this distribution, and is available at * http://www.eclipse.org/legal/epl-v10.html * * Contributors: * Red Hat - initial implementation *******************************************************************************/ package org.eclipse.linuxtools.internal.systemtap.ui.ide.launcher; import java.util.AbstractMap.SimpleEntry; import java.util.ArrayList; import java.util.List; import java.util.Map.Entry; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.eclipse.core.resources.IWorkspaceRoot; import org.eclipse.core.resources.ResourcesPlugin; import org.eclipse.core.runtime.IPath; import org.eclipse.jface.text.IDocument; import org.eclipse.linuxtools.internal.systemtap.ui.ide.CommentRemover; import org.eclipse.ui.IEditorPart; import org.eclipse.ui.IWorkbench; import org.eclipse.ui.PlatformUI; import org.eclipse.ui.ide.ResourceUtil; import org.eclipse.ui.texteditor.ITextEditor; /** * A class that is used for generating regular expressions that capture the output of .stp scripts. */ public class SystemTapRegexGenerator { public static enum ErrResult { IO_EXCEPTION } /** * Generate a list of regular expressions that will capture the output of a given .stp script. * Only output coming from <code>printf</code> statements will be captured. * @param scriptPath The absolute path of the script to capture the output of. * @param maxToFind The maximum number of regexs to create and return. * A negative value indicates no limit. * @return A list of generated regexs, each paired with the number of capturing groups it has. */ public static List<Entry<String, Integer>> generateFromPrintf(IPath scriptPath, int maxToFind) { List<Entry<String, Integer>> regexs = new ArrayList<>(); if (maxToFind == 0) { return regexs; } String contents = null; IWorkbench workbench = PlatformUI.getWorkbench(); IWorkspaceRoot root = ResourcesPlugin.getWorkspace().getRoot(); IEditorPart editor = ResourceUtil.findEditor( workbench.getActiveWorkbenchWindow().getActivePage(), root.getFile(scriptPath.makeRelativeTo(root.getLocation()))); if (editor != null) { // If editor of this file is open, take current file contents. ITextEditor tEditor = editor.getAdapter(ITextEditor.class); IDocument document = tEditor.getDocumentProvider(). getDocument(tEditor.getEditorInput()); contents = CommentRemover.exec(document.get()); } else { // If chosen file is not being edited or is outside of the workspace, use the saved contents of the file itself. contents = CommentRemover.execWithFile(scriptPath.toString()); } // Now actually search the contents for "printf(...)" statements. (^|[\s({;])printf\("(.+?)",.+\) Pattern pattern = Pattern.compile("(?<=[^\\w])printf\\(\"(.+?)\",.+?\\)"); //$NON-NLS-1$ Matcher matcher = pattern.matcher(contents); while (matcher.find() && (maxToFind < 0 || regexs.size() < maxToFind)) { String regex = null; // Note: allow optional "long" modifier 'l'. Not captured because it doesn't impact output format. // Also, don't support variable width/precision modifiers (*). // TODO: Consider %m & %M support. Pattern format = Pattern.compile("%([-\\+ \\#0])?(\\d+)?(\\.\\d*)?l?([bcdiopsuxX%])"); //$NON-NLS-1$ // Only capture until newlines to preserve the "column" format. // Don't try gluing together output from multiple printfs // since asynchronous prints would make things messy. String[] printls = matcher.group(1).split("\\\\n"); //$NON-NLS-1$ for (int i = 0; i < printls.length; i++) { String printl = printls[i]; // Ignore newlines if they are escaped ("\\n"). if (printl.endsWith("\\")) { //$NON-NLS-1$ printls[i+1] = printl.concat("\\n" + printls[i+1]); //$NON-NLS-1$ continue; } Matcher fmatch = format.matcher(printl); int lastend = 0; int numColumns = 0; while (fmatch.find()) { numColumns++; char chr = fmatch.group(4) == null ? '\0' : fmatch.group(4).charAt(0); if (chr == '\0') { // Skip this statement if an invalid regex is found. regex = null; break; } char flag = fmatch.group(1) == null ? '\0' : fmatch.group(1).charAt(0); int width = fmatch.group(2) == null ? 0 : Integer.parseInt(fmatch.group(2)); String precision = fmatch.group(3) == null ? null : fmatch.group(3).substring(1); // First, add any non-capturing characters. String pre = addRegexEscapes(printl.substring(lastend, fmatch.start())); regex = lastend > 0 ? regex.concat(pre) : pre; lastend = fmatch.end(); // Now add what will be captured. String target = "("; //$NON-NLS-1$ if (chr == 'u' || (flag != '#' && chr == 'o')) { target = target.concat("\\d+"); //$NON-NLS-1$ } else if (chr == 'd' || chr == 'i') { if (flag == '+') { target = target.concat("\\+|"); //$NON-NLS-1$ } else if (flag == ' ') { target = target.concat(" |"); //$NON-NLS-1$ } target = target.concat("-?\\d+"); //$NON-NLS-1$ } else if (flag == '#' && chr == 'o') { target = target.concat("0\\d+"); //$NON-NLS-1$ } else if (chr == 'p') { target = target.concat("0x[a-f0-9]+"); //$NON-NLS-1$ } else if (chr == 'x') { if (flag == '#') { target = target.concat("0x"); //$NON-NLS-1$ } target = target.concat("[a-f0-9]+"); //$NON-NLS-1$ } else if (chr == 'X') { if (flag == '#') { target = target.concat("0X"); //$NON-NLS-1$ } target = target.concat("[A-F0-9]+"); //$NON-NLS-1$ } else if (chr == 'b') { target = target.concat("."); //$NON-NLS-1$ } else if (chr == 'c') { if (flag != '#') { target = target.concat("."); //$NON-NLS-1$ } else { target = target.concat("\\([a-z]|[0-9]{3})|.|\\\\"); //$NON-NLS-1$ } } else if (chr == 's') { if (precision != null) { target = target.concat(".{" + precision + "}"); //$NON-NLS-1$ //$NON-NLS-2$ } else { target = target.concat(".+"); //$NON-NLS-1$ } } else { // Invalid or unhandled format specifier. Skip this regex. regex = null; break; } target = target.concat(")"); //$NON-NLS-1$ // Handle the optional width specifier. // Ignore it for %b, which uses the width value in a different way. if (chr != 'b' && --width > 0) { if (flag == '-') { target = target.concat(" {0," + width + "}"); //$NON-NLS-1$ //$NON-NLS-2$ } else if (flag != '0' || chr == 's' || chr == 'c') { target = " {0," + width + "}".concat(target); //$NON-NLS-1$ //$NON-NLS-2$ } } regex = regex.concat(target); } if (regex != null) { // Finally, add the uncaptured remainder of the print statement to the regex. regexs.add(new SimpleEntry<>(regex.concat(addRegexEscapes(printl.substring(lastend))), numColumns)); } } } return regexs; } /** * This escapes all special regex characters in a string. Escapes must be added * to the generated regexs to capture printf output that doesn't * come from format specifiers (aka literal strings). * @param s The string to add escapes to. * @return The same string, after it has been modified with escapes. */ private static String addRegexEscapes(String s) { String schars = "[^$.|?*+(){}"; //$NON-NLS-1$ for (int i = 0; i < schars.length(); i++) { s = s.replaceAll("(\\" + schars.substring(i,i+1) + ")", "\\\\$1"); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ } return s; } }