/******************************************************************************* * Copyright (c) 2009, 2015, 2016 IBM Corporation and others. * All rights reserved. This program and the accompanying materials * are made available under the terms of the Eclipse Public License v1.0 * which accompanies this distribution, and is available at * http://www.eclipse.org/legal/epl-v10.html * * Contributors: * IBM Corporation - initial API and implementation * Zend Technologies *******************************************************************************/ package org.eclipse.php.internal.core.util.text; import java.io.IOException; import java.io.StringReader; import java.util.ArrayList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.eclipse.dltk.annotations.NonNull; import org.eclipse.dltk.annotations.Nullable; import org.eclipse.dltk.core.ISourceRange; import org.eclipse.dltk.core.SourceRange; import org.eclipse.jface.text.BadLocationException; import org.eclipse.jface.text.IDocument; import org.eclipse.jface.text.IRegion; import org.eclipse.jface.text.Region; import org.eclipse.php.core.PHPVersion; import org.eclipse.php.internal.core.documentModel.parser.AbstractPHPLexer; import org.eclipse.php.internal.core.documentModel.parser.PHPRegionContext; import org.eclipse.php.internal.core.documentModel.parser.PHPLexerFactory; import org.eclipse.php.internal.core.documentModel.parser.regions.IPHPScriptRegion; import org.eclipse.php.internal.core.documentModel.parser.regions.PHPRegionTypes; import org.eclipse.php.internal.core.documentModel.partitioner.PHPPartitionTypes; import org.eclipse.wst.sse.core.internal.provisional.text.IStructuredDocumentRegion; import org.eclipse.wst.sse.core.internal.provisional.text.ITextRegion; import org.eclipse.wst.sse.core.internal.provisional.text.ITextRegionCollection; import org.eclipse.wst.sse.core.internal.provisional.text.ITextRegionContainer; public class PHPTextSequenceUtilities { private static final Pattern FUNCTION_PATTERN = Pattern.compile("function[ \\t\\n\\r]", //$NON-NLS-1$ Pattern.CASE_INSENSITIVE); private static final Pattern CLASS_PATTERN = Pattern.compile("(class|interface)[ \\t\\n\\r]", //$NON-NLS-1$ Pattern.CASE_INSENSITIVE); private static final String LBRACE = "{"; //$NON-NLS-1$ private static final String RBRACE = "}"; //$NON-NLS-1$ private static final String LPAREN = "("; //$NON-NLS-1$ private static final String RPAREN = ")"; //$NON-NLS-1$ private static final String COMMA = ","; //$NON-NLS-1$ private static final String LBRACKET = "["; //$NON-NLS-1$ private static final String RBRACKET = "]"; //$NON-NLS-1$ private static final String OBJECT_OPERATOR = "->"; //$NON-NLS-1$ private static final String PAAMAYIM_NEKUDOTAYIM = "::"; //$NON-NLS-1$ private PHPTextSequenceUtilities() { } /** * This function returns statement text depending on the current offset. It * searches backwards (starting from offset - 1) until it finds ';', '{' or * '}'. * * @param offset * The absolute offset in the document * @param sdRegion * Structured document region of the offset * @param removeComments * Flag determining whether to remove comments in the resulted * text sequence * * @return text sequence of the statement, cannot be null */ public static @NonNull TextSequence getStatement(int offset, @NonNull IStructuredDocumentRegion sdRegion, boolean removeComments) { int documentOffset = offset; if (documentOffset == sdRegion.getEndOffset()) { documentOffset -= 1; } ITextRegion tRegion = sdRegion.getRegionAtCharacterOffset(documentOffset); ITextRegionCollection container = sdRegion; if (tRegion instanceof ITextRegionContainer) { container = (ITextRegionContainer) tRegion; tRegion = container.getRegionAtCharacterOffset(offset); } if (tRegion != null && tRegion.getType() == PHPRegionContext.PHP_CLOSE) { tRegion = container.getRegionAtCharacterOffset(container.getStartOffset() + tRegion.getStart() - 1); } // This text region must be of type PhpScriptRegion: if (tRegion != null && tRegion.getType() == PHPRegionContext.PHP_CONTENT) { IPHPScriptRegion phpScriptRegion = (IPHPScriptRegion) tRegion; try { // Set default starting position to the beginning of the // PhpScriptRegion: int startOffset = container.getStartOffset() + phpScriptRegion.getStart(); // Now, search backwards for the statement start (in this // PhpScriptRegion): ITextRegion startTokenRegion; if (documentOffset == startOffset) { startTokenRegion = phpScriptRegion.getPHPToken(0); } else { startTokenRegion = phpScriptRegion.getPHPToken(offset - startOffset - 1); } List<IRegion> comments = new ArrayList<IRegion>(); while (true) { // If statement start is at the beginning of the PHP script // region: if (startTokenRegion.getStart() == 0) { break; } String type = startTokenRegion.getType(); if (removeComments && PHPPartitionTypes.isPHPCommentState(type)) { comments.add(new Region(phpScriptRegion.getStart() + startTokenRegion.getStart(), startTokenRegion.getLength())); } if (type == PHPRegionTypes.PHP_CURLY_CLOSE || type == PHPRegionTypes.PHP_CURLY_OPEN || type == PHPRegionTypes.PHP_SEMICOLON /* || startTokenRegion.getType() == PHPRegionTypes.PHP_IF */) { // Calculate starting position of the statement (it // should go right after this startTokenRegion): startOffset += startTokenRegion.getEnd(); break; } startTokenRegion = phpScriptRegion.getPHPToken(startTokenRegion.getStart() - 1); } TextSequence textSequence = TextSequenceUtilities.createTextSequence(sdRegion, startOffset, offset - startOffset); // remove comments if (removeComments) { textSequence = removeComments(textSequence, comments); } // remove spaces from start. return textSequence.subTextSequence(readForwardSpaces(textSequence, 0), textSequence.length()); } catch (BadLocationException e) { } } return TextSequenceUtilities.createTextSequence(sdRegion, 0, 0); } /** * <p> * This function returns statement region depending on the current offset. * It searches backwards (starting from offset - 1) until it finds ';', '{' * or '}'. * </p> * <p> * <b> Be careful, empty region can be returned (i.e. region's length is 0) * when no statement was found. In this case, the offset from the returned * region has no special meaning. * </p> * </b> * * @param offset * The absolute offset in the document * @param sdRegion * Structured document region of the offset * @param ignoreStartComments * move start offset to no-comment region * * @return text sequence region, cannot be null */ public static @NonNull Region getStatementRegion(int offset, @NonNull IStructuredDocumentRegion sdRegion, boolean ignoreStartComments) { // temporary workaround to fix // https://bugs.eclipse.org/bugs/show_bug.cgi?id=472197 TextSequence textSequence = getStatement(offset, sdRegion, ignoreStartComments); return new Region(textSequence.getOriginalOffset(0), textSequence.length()); } /** * * @param textSequence * @param comments * comments must be reverse ordered (i.e. from latest to first in * document) * @return */ private static @NonNull TextSequence removeComments(@NonNull TextSequence textSequence, List<IRegion> comments) { int seqStart = textSequence.getOriginalOffset(0); for (IRegion commentStartRegion : comments) { int textSequenceLength = textSequence.length(); if (textSequenceLength == 0) { break; } int start = commentStartRegion.getOffset() - seqStart; int end = start + commentStartRegion.getLength(); if (end <= 0) { // no need to handle remaining comments break; } if (start >= textSequenceLength) { continue; } start = Math.max(0, start); end = Math.min(textSequenceLength, end); textSequence = textSequence.cutTextSequence(start, end); } return textSequence; } public static int getMethodEndIndex(@NonNull CharSequence textSequence, int offset, boolean allowToStartWithWhitespaces) { int length = textSequence.length(); if (allowToStartWithWhitespaces) { while (offset < length && Character.isWhitespace(textSequence.charAt(offset))) { ++offset; } } if (offset < length && textSequence.charAt(offset) == '(') { ++offset; } else { return -1; } while (offset < length && textSequence.charAt(offset) != ')') { ++offset; } if (textSequence.length() > offset && textSequence.charAt(offset) == ')') { return offset + 1; } return -1; } /** * Checks if we are inside function declaration statement. If yes the start * offset of the function, otherwise returns -1. */ public static int isInFunctionDeclaration(@NonNull TextSequence textSequence) { Matcher matcher = FUNCTION_PATTERN.matcher(textSequence); // search for the 'function' word. while (matcher.find()) { // verify char before 'function' word. int functionStart = matcher.start(); if (functionStart != 0 && Character.isJavaIdentifierStart(textSequence.charAt(functionStart - 1))) { continue; } // verfy state String type = TextSequenceUtilities.getType(textSequence, functionStart + 1); if (PHPPartitionTypes.isPHPRegularState(type)) { // verify the function is not closed. int offset; boolean possibleReturnType = false; boolean returnType = false; for (offset = matcher.end(); offset < textSequence.length(); offset++) { if (textSequence.charAt(offset) == ')') { // verify state type = TextSequenceUtilities.getType(textSequence, offset); if (PHPPartitionTypes.isPHPRegularState(type)) { possibleReturnType = true; } } else if ((possibleReturnType || returnType) && textSequence.charAt(offset) == '{') { break; } else if (possibleReturnType && textSequence.charAt(offset) == ':') { possibleReturnType = false; returnType = true; } else if (possibleReturnType && !Character.isWhitespace(textSequence.charAt(offset))) { break; } } if (offset == textSequence.length()) { return functionStart; } } } return -1; } private static boolean isClassOrInterfaceKeyword(@NonNull TextSequence textSequence, int classStartOffset) { if (classStartOffset == 0) { return true; } int offset = readBackwardSpaces(textSequence, classStartOffset); if (offset == 0) { return true; } if (offset == classStartOffset && Character.isJavaIdentifierStart(textSequence.charAt(offset - 1))) { return false; } // https://bugs.eclipse.org/bugs/show_bug.cgi?id=501974 // Since PHP 5.5, keyword "class" can be used as a class constant name // (for class name resolution). Let's keep it simple, exclude current // match when there is an object/class operator before keyword "class" // or "interface". // XXX: handle comments between object/class operators and keyword // "class" or "interface". assert OBJECT_OPERATOR.length() == 2 && PAAMAYIM_NEKUDOTAYIM.length() == 2; if (offset < 2) { return true; } String s = textSequence.subSequence(offset - 2, offset).toString(); if (OBJECT_OPERATOR.equals(s) || PAAMAYIM_NEKUDOTAYIM.equals(s)) { return false; } return true; } public static int isInClassDeclaration(@NonNull TextSequence textSequence) { Matcher matcher = CLASS_PATTERN.matcher(textSequence); // search for the 'class' or 'interface words. while (matcher.find()) { // verify char before start. int startOffset = matcher.start(); if (!isClassOrInterfaceKeyword(textSequence, startOffset)) { continue; } // verify state String type = TextSequenceUtilities.getType(textSequence, startOffset + 1); if (PHPPartitionTypes.isPHPRegularState(type)) { int endOffset = matcher.end(); // verify the class is not closed. int offset; for (offset = endOffset; offset < textSequence.length(); offset++) { if (textSequence.charAt(offset) == '}') { // verify state type = TextSequenceUtilities.getType(textSequence, offset); if (PHPPartitionTypes.isPHPRegularState(type)) { break; } } } if (offset == textSequence.length()) { return endOffset; } } } return -1; } /** * @return start index (can be < 0) */ public static int readNamespaceStartIndex(@NonNull CharSequence textSequence, int startPosition, boolean includeDollar) { boolean onBackslash = false; boolean onWhitespace = false; int oldStartPosition = startPosition; startPosition = readBackwardSpaces(textSequence, startPosition); while (startPosition > 0) { char ch = textSequence.charAt(startPosition - 1); if (!Character.isLetterOrDigit(ch) && ch != '_') { if (ch == '\\') { if (onBackslash) { break; } onBackslash = true; onWhitespace = false; } else if (Character.isWhitespace(ch)) { onWhitespace = true; onBackslash = false; } else { break; } } else { if (onWhitespace) { break; } onBackslash = false; onWhitespace = false; } startPosition--; } if (includeDollar && startPosition > 0 && textSequence.charAt(startPosition - 1) == '$') { startPosition--; } startPosition = startPosition >= 0 ? readForwardSpaces(textSequence, startPosition) : startPosition; // FIXME bug 291970 i do not know if this is right or not if (startPosition > oldStartPosition) { startPosition = oldStartPosition; } return startPosition; } public static int readNamespaceEndIndex(@NonNull CharSequence textSequence, int startPosition, boolean includeDollar) { boolean onBackslash = false; boolean onWhitespace = false; int length = textSequence.length(); if (includeDollar && startPosition < length && textSequence.charAt(startPosition) == '$') { startPosition++; } while (startPosition < length) { char ch = textSequence.charAt(startPosition); if (!Character.isLetterOrDigit(ch) && ch != '_') { if (ch == '\\') { if (onBackslash) { break; } onBackslash = true; onWhitespace = false; } else if (Character.isWhitespace(ch)) { onWhitespace = true; onBackslash = false; } else { break; } } else { if (onWhitespace) { break; } onBackslash = false; onWhitespace = false; } startPosition++; } return startPosition >= 0 ? readBackwardSpaces(textSequence, startPosition) : startPosition; } /** * @return start index (can be < 0) */ public static int readIdentifierStartIndex(@NonNull CharSequence textSequence, int startPosition, boolean includeDollar) { while (startPosition > 0) { char ch = textSequence.charAt(startPosition - 1); if (!Character.isLetterOrDigit(ch) && ch != '_') { break; } startPosition--; } if (includeDollar && startPosition > 0 && textSequence.charAt(startPosition - 1) == '$') { startPosition--; } return startPosition; } public static int readIdentifierEndIndex(@NonNull CharSequence textSequence, int startPosition, boolean includeDollar) { int length = textSequence.length(); if (includeDollar && startPosition < length && textSequence.charAt(startPosition) == '$') { startPosition++; } while (startPosition < length) { char ch = textSequence.charAt(startPosition); if (!Character.isLetterOrDigit(ch) && ch != '_') { break; } startPosition++; } return startPosition; } /** * @return start index (can be < 0) */ public static int readIdentifierStartIndex(@NonNull PHPVersion phpVersion, @NonNull CharSequence textSequence, int startPosition, boolean includeDollar) { if (phpVersion.isLessThan(PHPVersion.PHP5_3)) { return readIdentifierStartIndex(textSequence, startPosition, includeDollar); } return readNamespaceStartIndex(textSequence, startPosition, includeDollar); } public static int readIdentifierEndIndex(@NonNull PHPVersion phpVersion, @NonNull CharSequence textSequence, int startPosition, boolean includeDollar) { if (phpVersion.isLessThan(PHPVersion.PHP5_3)) { return readIdentifierEndIndex(textSequence, startPosition, includeDollar); } return readNamespaceEndIndex(textSequence, startPosition, includeDollar); } /** * Tries to find identifier enclosing given position. * * @param contents * @param pos * @return */ public static @Nullable ISourceRange getEnclosingIdentifier(@NonNull CharSequence textSequence, int pos) { if (pos < 0 || pos >= textSequence.length()) return null; int start = readIdentifierStartIndex(textSequence, pos, true); int end = readIdentifierEndIndex(textSequence, pos, true); if (start < 0 || start > end) return null; return new SourceRange(start, end - start + 1); } public static int readBackwardSpaces(@NonNull CharSequence textSequence, int startPosition) { int rv = startPosition; for (; rv > 0; rv--) { if (!Character.isWhitespace(textSequence.charAt(rv - 1))) { break; } } return rv; } public static int readForwardSpaces(@NonNull IDocument document, int startPosition, int endPosition) throws BadLocationException { int rv = startPosition; for (; rv < endPosition; rv++) { if (!Character.isWhitespace(document.getChar(rv))) { break; } } return rv; } public static int readForwardSpaces(@NonNull CharSequence textSequence, int startPosition) { int rv = startPosition; for (; rv < textSequence.length(); rv++) { if (!Character.isWhitespace(textSequence.charAt(rv))) { break; } } return rv; } public static int readForwardUntilSpaces(@NonNull CharSequence textSequence, int startPosition) { int rv = startPosition; for (; rv < textSequence.length(); rv++) { if (Character.isWhitespace(textSequence.charAt(rv))) { break; } } return rv; } /** * Returns the next position on the text where one the given delimiters * start * * @param textSequence * - The input text sequence * @param startPosition * - The current position in the text sequence to start from * @param delims * - The array of delimiters */ public static int readForwardUntilDelim(@NonNull CharSequence textSequence, int startPosition, @NonNull char[] delims) { int rv = startPosition; for (; rv < textSequence.length(); rv++) { char c = textSequence.charAt(rv); if (isDelimiter(c, delims)) { break; } } return rv; } private static boolean isDelimiter(char c, @NonNull char[] delims) { for (char curr : delims) { if (curr == c) { return true; } } return false; } // /////////////////////////////////////////////////////////////////////////////////////////////////////// // /////////////////////////////////////////////////////////////////////////////////////////////////////// // /////////////////////////////////////////////////////////////////////////////////////////////////////// public static int getPreviousTriggerIndex(@NonNull CharSequence textSequence, int startPosition) { int rv = startPosition; int bracketsNum = 0; char inStringMode = 0; boolean inWhiteSpaceBeforeLiteral = false; boolean inLiteral = false; for (; rv > 0; rv--) { char currChar = textSequence.charAt(rv - 1); if (currChar == '\'' || currChar == '"') { inStringMode = inStringMode == 0 ? currChar : inStringMode == currChar ? 0 : inStringMode; } if (inStringMode != 0) { continue; } // The next block solves bug #205034: // store state for whitespace before literals and if another literal // comes before it - return 'not found' if (Character.isLetterOrDigit(currChar) || currChar == '$') { if (inWhiteSpaceBeforeLiteral && bracketsNum == 0) { return -1; } inLiteral = true; } else { if (inLiteral && Character.isWhitespace(currChar)) { inWhiteSpaceBeforeLiteral = true; } if (!Character.isWhitespace(currChar)) { inWhiteSpaceBeforeLiteral = false; } inLiteral = false; } if (!Character.isLetterOrDigit(currChar) && currChar != '_' && currChar != '$' && !Character.isWhitespace(currChar)) { switch (currChar) { case '(': case '[': case '{': bracketsNum--; if (bracketsNum < 0) { return -1; } break; case ')': case ']': case '}': bracketsNum++; break; case ':': if (bracketsNum == 0 && rv >= 2) { if (textSequence.charAt(rv - 2) == ':') { return rv - 2; } else { return -1; } } break; case '>': if (bracketsNum == 0 && rv >= 2) { if (textSequence.charAt(rv - 2) == '-') { return rv - 2; } else { return -1; } } break; default: if (bracketsNum == 0) { return -1; } } } } return -1; } public static int readIdentifierListStartIndex(@NonNull CharSequence textSequence, int endPosition) { int startPosition = endPosition; int listStartPosition = startPosition; boolean beforeWhitespace = false; boolean beforeComma = false; while (startPosition > 0) { final char ch = textSequence.charAt(startPosition - 1); if (Character.isLetterOrDigit(ch) || ch == '_') { if (beforeWhitespace) { // identifiers delimited by a whitespace are not a list: return --listStartPosition; } listStartPosition = startPosition; beforeComma = false; } else if (ch == ',') { if (beforeComma) { // only one comma may delimit a list return endPosition; } beforeComma = true; beforeWhitespace = false; } else if (Character.isWhitespace(ch) && !beforeComma) { beforeWhitespace = true; } else { return --listStartPosition; } startPosition--; } return listStartPosition; } /** * Read string argnames from CharSequence * * TODO Nested parenthesis expression * * @param phpVersion * @param textSequence * @return */ public static @NonNull String[] getArgNames(@Nullable PHPVersion phpVersion, @Nullable CharSequence textSequence) { List<String> args = new ArrayList<String>(); if (textSequence != null && textSequence.length() > 2) { if (textSequence.charAt(textSequence.length() - 1) == ')') { textSequence = textSequence.subSequence(0, textSequence.length() - 1); } if (textSequence != null && textSequence.charAt(0) == '(') { textSequence = textSequence.subSequence(1, textSequence.length()); } if (textSequence == null) { // should never happen (but makes @Nullable control for // parameter textSequence happy) return args.toArray(new String[args.size()]); } if (phpVersion == null) { phpVersion = PHPVersion.getLatestVersion(); } AbstractPHPLexer lexer = PHPLexerFactory.createLexer(new StringReader(textSequence.toString()), phpVersion); lexer.initialize(lexer.getScriptingState()); String symbol = null; int level = 0; int argIndex = 0; do { try { symbol = lexer.getNextToken(); if (symbol != null) { CharSequence text = textSequence.subSequence(lexer.getTokenStart(), lexer.getTokenStart() + lexer.getLength()); if (symbol.equals(PHPRegionTypes.PHP_TOKEN)) { if (text.equals(LPAREN) || text.equals(LBRACE) || text.equals(LBRACKET)) { level++; } else if (text.equals(RPAREN) || text.equals(RBRACE) || text.equals(RBRACKET)) { level--; } else if (level == 0 && text.equals(COMMA)) { argIndex++; } } else if (level == 0 && symbol.equals(PHPRegionTypes.PHP_CONSTANT_ENCAPSED_STRING)) { if (args.size() < argIndex + 1) { args.add(text.toString()); } } else if (level == 0 && !symbol.equals(PHPRegionTypes.WHITESPACE)) { if (args.size() < argIndex + 1) { args.add(null); } else { args.set(argIndex, null); } } } } catch (IOException e) { symbol = null; } } while (symbol != null); } return args.toArray(new String[args.size()]); } public static @Nullable String suggestObjectOperator(@NonNull CharSequence statement) { String insert = null; statement = statement.toString().trim(); int statementPosition = statement.length() - 1; if (statementPosition < 0) { return null; } int charAt = statement.charAt(statementPosition); if (charAt == '>') { return null; } if (charAt == '-') { insert = String.valueOf('>'); } else if (charAt == ':') { if (statementPosition > 0 && statement.charAt(statementPosition - 1) == ':') { return null; } insert = String.valueOf(':'); } else { statementPosition = readBackwardSpaces(statement, statementPosition); switch (statement.charAt(statementPosition)) { case '}': case ')': case ']': insert = OBJECT_OPERATOR; break; case '>': case ':': return null; default: int identStart = readIdentifierStartIndex(statement, statementPosition, true); if (identStart < 0) { return null; } if (statement.charAt(identStart) == '$' || statement.charAt(identStart) == '}') { insert = OBJECT_OPERATOR; } else { identStart = readBackwardSpaces(statement, identStart - 1); if (identStart > 1 && statement.charAt(identStart) == '>' && statement.charAt(identStart - 1) == '-') { insert = OBJECT_OPERATOR; } else { insert = PAAMAYIM_NEKUDOTAYIM; } } } } return insert; } }