/** * Copyright (c) 2005-2013 by Appcelerator, Inc. All Rights Reserved. * Licensed under the terms of the Eclipse Public License (EPL). * Please see the license.txt included with this distribution for details. * Any modifications to this file must keep this entire header intact. */ /* * Created on 03/09/2005 */ package org.python.pydev.core.docutils; import java.io.UnsupportedEncodingException; import java.net.URLEncoder; import java.util.Collection; import java.util.HashMap; import java.util.List; import java.util.Map; import org.python.pydev.core.ObjectsInternPool; import org.python.pydev.core.log.Log; import org.python.pydev.shared_core.string.FastStringBuffer; import org.python.pydev.shared_core.string.StringUtils; /** * This is an extension to the String utils so * @author Fabio * */ public final class PyStringUtils { private PyStringUtils() { } private static final boolean DEBUG = false; /** * <p>Find the index of <tt>character</tt> in a <tt>string</tt>.</p> * * <p>This method is like {@link java.lang.String#indexOf(int)} * but has the additional ability to ignore occurrences of * <tt>character</tt> in Python string literals (e.g. enclosed * by single, double or triple quotes). This is done by employing * a very simple statemachine.</p> * * @param string - the source string, e.g. the <i>haystack</i> * @param character - the character to retrieve the index for * @param ignoreInStringLiteral - if <tt>true</tt>, ignore occurrences * of <tt>character</tt> in Python string literals * @return the position of the character in string.<br> * if <tt>string</tt> is <tt>null</tt> or empty, or * if <tt>(int)character < 0</tt>, returns <tt>-1</tt>. * @note escaped (i.e. <tt>\"</tt>) characters are ignored and * control characters, e.g. line delimiters etc., are treated * normally like every other character. */ public static int indexOf(final String string, final char character, final boolean ignoreInStringLiteral) { if (null == string || (character < 0) || string.length() == 0) { return -1; } int index = string.indexOf(character); if (-1 == index) { return index; } if (ignoreInStringLiteral) { final int len = string.length(); boolean inString = false; char nextc = '\0'; char c = '\0'; int i = -1; try { while (i < len) { i++; c = string.charAt(i); if ((i + 1) < len) { nextc = string.charAt(i + 1); } if ('\\' == c) { // ignore escapes i++; continue; } if (!inString && character == c) { index = i; break; } if ('"' == c || '\'' == c) { if ('"' == nextc || '\'' == nextc) { i++; continue; } else { if (inString) { inString = false; } else { inString = true; } } } } } catch (StringIndexOutOfBoundsException e) { // malformed Python string literals may throw a SIOOBE if (DEBUG) { System.err.print(e.getMessage()); } index = -1; } } return index; } /** * <p>Find the substring in <tt>string</tt> that starts from the first * occurrence of <tt>character</tt>.</p> * * <p>This method is similar to {@link java.lang.String#substring} * but has the additional ability to ignore occurrences of * <tt>character</tt> in Python string literals (e.g. enclosed * by single, double or triple single/double quotes).</p> * * @param string - the source string, e.g. the <i>haystack</i> * @param character - the character that is the starting boundary of the searched substring * @param ignoreInStringLiteral - if <tt>true</tt>, ignore occurrences * of <tt>character</tt> in Python string literals * @return a substring from <tt>string</tt><br>or <tt>null</tt> if * {@link PyStringUtils#indexOf} returns <tt>-1</tt> * @see {@link PyStringUtils#indexOf} */ public static String findSubstring(final String string, final char character, final boolean ignoreInStringLiteral) { String result = null; int index = PyStringUtils.indexOf(string, character, ignoreInStringLiteral); if (index >= 0) { result = string.substring(index + 1); } return result; } /** * Formats a docstring to be shown and adds the indentation passed to all the docstring lines but the 1st one. */ public static String fixWhitespaceColumnsToLeftFromDocstring(String docString, String indentationToAdd) { FastStringBuffer buf = new FastStringBuffer(); List<String> splitted = StringUtils.splitInLines(docString); for (int i = 0; i < splitted.size(); i++) { String initialString = splitted.get(i); if (i == 0) { buf.append(initialString);//first is unchanged } else { String string = StringUtils.leftTrim(initialString); buf.append(indentationToAdd); if (string.length() > 0) { buf.append(string); } else { int length = initialString.length(); if (length > 0) { char c; if (length > 1) { //check 2 chars c = initialString.charAt(length - 2); if (c == '\n' || c == '\r') { buf.append(c); } } c = initialString.charAt(length - 1); if (c == '\n' || c == '\r') { buf.append(c); } } } } } //last line if (buf.length() > 0) { char c = buf.lastChar(); if (c == '\r' || c == '\n') { buf.append(indentationToAdd); } } return buf.toString(); } public static String removeWhitespaceColumnsToLeft(String hoverInfo) { FastStringBuffer buf = new FastStringBuffer(); int firstCharPosition = Integer.MAX_VALUE; List<String> splitted = StringUtils.splitInLines(hoverInfo); for (String line : splitted) { if (line.trim().length() > 0) { int found = PySelection.getFirstCharPosition(line); firstCharPosition = Math.min(found, firstCharPosition); } } if (firstCharPosition != Integer.MAX_VALUE) { for (String line : splitted) { if (line.length() > firstCharPosition) { buf.append(line.substring(firstCharPosition)); } } return buf.toString(); } else { return hoverInfo;//return initial } } public static String removeWhitespaceColumnsToLeftAndApplyIndent(String code, String indent, boolean indentCommentLinesAt0Pos) { FastStringBuffer buf = new FastStringBuffer(); int firstCharPosition = Integer.MAX_VALUE; List<String> splitted = StringUtils.splitInLines(code); for (String line : splitted) { if (indentCommentLinesAt0Pos || !line.startsWith("#")) { if (line.trim().length() > 0) { int found = PySelection.getFirstCharPosition(line); firstCharPosition = Math.min(found, firstCharPosition); } } } if (firstCharPosition != Integer.MAX_VALUE) { for (String line : splitted) { if (indentCommentLinesAt0Pos || !line.startsWith("#")) { buf.append(indent); if (line.length() > firstCharPosition) { buf.append(line.substring(firstCharPosition)); } else { buf.append(line); } } else { buf.append(line); } } return buf.toString(); } else { return code;//return initial } } /** * Splits some string given some char (that char will not appear in the returned strings) * Empty strings are also never added. */ public static void splitWithIntern(String string, char toSplit, Collection<String> addTo) { synchronized (ObjectsInternPool.lock) { int len = string.length(); int last = 0; char c = 0; for (int i = 0; i < len; i++) { c = string.charAt(i); if (c == toSplit) { if (last != i) { addTo.add(ObjectsInternPool.internUnsynched(string.substring(last, i))); } while (c == toSplit && i < len - 1) { i++; c = string.charAt(i); } last = i; } } if (c != toSplit) { if (last == 0 && len > 0) { addTo.add(ObjectsInternPool.internUnsynched(string)); //it is equal to the original (no char to split) } else if (last < len) { addTo.add(ObjectsInternPool.internUnsynched(string.substring(last, len))); } } } } /** * Tests whether each character in the given string is a valid identifier. * * @param str * @return <code>true</code> if the given string is a word */ public static boolean isValidIdentifier(final String str, boolean acceptPoint) { int len = str.length(); if (str == null || len == 0) { return false; } char c = '\0'; boolean lastWasPoint = false; for (int i = 0; i < len; i++) { c = str.charAt(i); if (i == 0) { if (!Character.isJavaIdentifierStart(c)) { return false; } } else { if (!Character.isJavaIdentifierPart(c)) { if (acceptPoint && c == '.') { if (lastWasPoint) { return false; //can't have 2 consecutive dots. } lastWasPoint = true; continue; } return false; } } lastWasPoint = false; } if (c == '.') { //if the last char is a point, don't accept it (i.e.: only accept at middle). return false; } return true; } /** * An array of Python pairs of characters that you will find in any Python code. * * Currently, the set contains: * <ul> * <ol>left and right brackets: [, ]</ol> * <ol>right and right parentheses: (, ) * </ul> */ public static final char[] BRACKETS = { '{', '}', '(', ')', '[', ']' }; public static final char[] CLOSING_BRACKETS = { '}', ')', ']' }; public static boolean hasOpeningBracket(String trimmedLine) { return trimmedLine.indexOf('{') != -1 || trimmedLine.indexOf('(') != -1 || trimmedLine.indexOf('[') != -1; } public static boolean hasClosingBracket(String trimmedLine) { return trimmedLine.indexOf('}') != -1 || trimmedLine.indexOf(')') != -1 || trimmedLine.indexOf(']') != -1; } public static boolean hasUnbalancedClosingPeers(final String line) { Map<Character, Integer> stack = new HashMap<Character, Integer>(); final int len = line.length(); for (int i = 0; i < len; i++) { char c = line.charAt(i); switch (c) { case '(': case '{': case '[': Integer iStack = stack.get(c); if (iStack == null) { iStack = 0; } iStack++; stack.put(c, iStack); break; case ')': case '}': case ']': char peer = StringUtils.getPeer(c); iStack = stack.get(peer); if (iStack == null) { iStack = 0; } iStack--; stack.put(peer, iStack); break; } } for (int i : stack.values()) { if (i < 0) { return true; } } return false; } public static String urlEncodeKeyValuePair(String key, String value) { String result = null; try { result = URLEncoder.encode(key, "UTF-8") + "=" + URLEncoder.encode(value, "UTF-8"); } catch (UnsupportedEncodingException e) { Log.log(e); } return result; } /** * //Python 3.0 can use unicode identifiers. So, the letter construct deals with that... * TOKEN : * Python identifiers * * { * < NAME: <LETTER> ( <LETTER> | <DIGIT>)* > * | * < #LETTER: * [ * "a"-"z", * "A"-"Z", * "_", * "\u0080"-"\uffff" //Anything more than 128 is considered valid (unicode range) * * ] * > * } * @param param * @return */ public static boolean isPythonIdentifier(final String param) { final int len = param.length(); if (len == 0) { return false; } char c = param.charAt(0); if (!Character.isLetter(c) && c != '_' && c <= 128) { return false; } for (int i = 1; i < len; i++) { c = param.charAt(i); if ((!Character.isLetter(c) && !Character.isDigit(c) && c != '_') && (c <= 128)) { return false; } } return true; } public static String getExeAsFileSystemValidPath(String executableOrJar) { return "v1_" + StringUtils.md5(executableOrJar); } }