/* * Copyright 2000-2016 JetBrains s.r.o. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.intellij.psi.css; import com.intellij.lang.ParserDefinition; import com.intellij.lexer.Lexer; import com.intellij.openapi.util.text.StringUtil; import com.intellij.psi.tree.IElementType; import com.intellij.psi.tree.TokenSet; import org.jetbrains.annotations.NotNull; public class MinifiedFilesUtil { private MinifiedFilesUtil() { } private static final int MAX_OFFSET = 2048; // this is how far we look through the file private static final int MIN_SIZE = 150; // file should be large enough to be considered as minified (only non-comment text counts) private static final double MAX_UNNEEDED_OFFSET_PERCENTAGE = 0.01; private static final int COUNT_OF_CONSIDERING_CHARACTERS_FROM_END_OF_FILE = 400; /** * Finds out whether the file minified by using common (not language-specific) heuristics. * Can be used for checking of css/less/scss/sass and js files. * * @param fileContent target file content * @param parserDefinition Parser definition of target language * @param noWSRequireAfterTokenSet TokenSet of types that doesn't require whitespaces after them. */ public static boolean isMinified(@NotNull CharSequence fileContent, @NotNull ParserDefinition parserDefinition, @NotNull TokenSet noWSRequireBeforeTokenSet, @NotNull TokenSet noWSRequireAfterTokenSet) { return isMinified(fileContent, parserDefinition, noWSRequireBeforeTokenSet, noWSRequireAfterTokenSet, parserDefinition.getStringLiteralElements()); } /** * Finds out whether the file minified by using common (not language-specific) heuristics. * Can be used for checking of css/less/scss/sass and js files. * * @param fileContent target file content * @param parserDefinition Parser definition of target language * @param noWSRequireAfterTokenSet TokenSet of types that doesn't require whitespaces after them. * @param stringsTokenSet TokenSet of types considered as string elements */ public static boolean isMinified(@NotNull CharSequence fileContent, @NotNull ParserDefinition parserDefinition, @NotNull TokenSet noWSRequireBeforeTokenSet, @NotNull TokenSet noWSRequireAfterTokenSet, @NotNull TokenSet stringsTokenSet) { Lexer lexer = parserDefinition.createLexer(null); lexer.start(fileContent); if (!isMinified(lexer, parserDefinition, noWSRequireBeforeTokenSet, noWSRequireAfterTokenSet, stringsTokenSet)) { return false; } else if (lexer.getTokenType() == null) { // whole file had been considered return true; } int startOffset = fileContent.length() - COUNT_OF_CONSIDERING_CHARACTERS_FROM_END_OF_FILE; if (startOffset <= 0) { return true; } while (lexer.getTokenType() != null && lexer.getTokenStart() < startOffset) lexer.advance(); if (lexer.getTokenType() == null || (fileContent.length() - lexer.getTokenStart() < MIN_SIZE * 2)) { return true; } return isMinified(lexer, parserDefinition, noWSRequireBeforeTokenSet, noWSRequireAfterTokenSet, stringsTokenSet); } protected static boolean isMinified(@NotNull Lexer lexer, @NotNull ParserDefinition parserDefinition, @NotNull TokenSet noWSRequireBeforeTokenSet, @NotNull TokenSet noWSRequireAfterTokenSet, @NotNull TokenSet stringLiteralElements) { int offsetIgnoringComments = 0; int offsetIgnoringCommentsAndStrings = 0; int unneededWhitespaceCount = 0; String lastTokenText = null; IElementType lastTokenType = null; TokenSet whitespaceTokens = parserDefinition.getWhitespaceTokens(); TokenSet commentTokens = parserDefinition.getCommentTokens(); boolean lastWhiteSpaceWasHandled = false; for (IElementType tokenType = lexer.getTokenType(); tokenType != null; lexer.advance(), tokenType = lexer.getTokenType()) { if (commentTokens.contains(tokenType)) { lastTokenType = tokenType; lastTokenText = lexer.getTokenText(); continue; } int tokenLength = lexer.getTokenEnd() - lexer.getTokenStart(); if (isNewLine(lexer, tokenLength) && commentTokens.contains(lastTokenType) && !noWSRequireAfterTokenSet.contains(lastTokenType)) { // do not count new line after line comment token since it's required and it's part of comment continue; } offsetIgnoringComments += tokenLength; if (!stringLiteralElements.contains(tokenType)) { offsetIgnoringCommentsAndStrings += tokenLength; } if (whitespaceTokens.contains(tokenType)) { lastWhiteSpaceWasHandled = false; if (!commentTokens.contains(lastTokenType) && tokenLength > 1) { lexer.advance(); if (lexer.getTokenType() == null) { // it was last token break; } else { return false; } } if (isNewLine(lexer, tokenLength) && StringUtil.equals(lastTokenText, "\n") || tokenLength > 0 && noWSRequireAfterTokenSet.contains(lastTokenType)) { unneededWhitespaceCount++; lastWhiteSpaceWasHandled = true; } } else { if (!lastWhiteSpaceWasHandled && whitespaceTokens.contains(lastTokenType) && StringUtil.isNotEmpty(lastTokenText) && noWSRequireBeforeTokenSet.contains(tokenType)) { unneededWhitespaceCount++; } } if (stringLiteralElements.contains(tokenType)) { lastTokenType = tokenType; lastTokenText = lexer.getTokenText(); continue; } if (offsetIgnoringComments >= MAX_OFFSET) { break; } lastTokenType = tokenType; lastTokenText = lexer.getTokenText(); } return offsetIgnoringComments >= MIN_SIZE && (unneededWhitespaceCount + 0.0d) / offsetIgnoringCommentsAndStrings < MAX_UNNEEDED_OFFSET_PERCENTAGE; } private static boolean isNewLine(@NotNull Lexer lexer, int tokenLength) { return tokenLength == 1 && StringUtil.equals(lexer.getTokenText(), "\n"); } public static boolean isMinified(@NotNull CharSequence fileContent, @NotNull ParserDefinition parserDefinition) { return isMinified(fileContent, parserDefinition, TokenSet.EMPTY, TokenSet.EMPTY); } }