/* LanguageTool, a natural language style checker
* Copyright (C) 2011 Michael Bryant
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
* USA
*/
package org.languagetool.rules;
import java.io.IOException;
import java.text.MessageFormat;
import java.util.ArrayList;
import java.util.List;
import java.util.ResourceBundle;
import java.util.regex.Pattern;
import org.languagetool.AnalyzedSentence;
import org.languagetool.AnalyzedTokenReadings;
/**
* A rule that warns on long sentences. Note that this rule is off by default.
*/
public class LongSentenceRule extends Rule {
private static final int DEFAULT_MAX_WORDS = 40;
private static final Pattern NON_WORD_REGEX = Pattern.compile("[.?!:;,~’'\"„“»«‚‘›‹()\\[\\]-]");
private static final boolean DEFAULT_INACTIVE = false;
private final int maxWords;
/**
* @param defaultActive allows default granularity
* @since 3.7
*/
public LongSentenceRule(ResourceBundle messages, int maxSentenceLength, boolean defaultActive) {
super(messages);
super.setCategory(Categories.STYLE.getCategory(messages));
if (maxSentenceLength <= 0) {
throw new IllegalArgumentException("maxSentenceLength must be > 0: " + maxSentenceLength);
}
maxWords = maxSentenceLength;
if (!defaultActive) {
setDefaultOff();
}
setLocQualityIssueType(ITSIssueType.Style);
}
/**
* @param maxSentenceLength the maximum sentence length that does not yet trigger a match
* @since 2.4
*/
public LongSentenceRule(ResourceBundle messages, int maxSentenceLength) {
this(messages, maxSentenceLength, DEFAULT_INACTIVE);
}
/**
* Creates a rule with the default maximum sentence length (40 words).
*/
public LongSentenceRule(ResourceBundle messages) {
this(messages, DEFAULT_MAX_WORDS, DEFAULT_INACTIVE);
}
@Override
public String getDescription() {
return MessageFormat.format(messages.getString("long_sentence_rule_desc"), maxWords);
}
@Override
public String getId() {
return "TOO_LONG_SENTENCE";
}
@Override
public RuleMatch[] match(AnalyzedSentence sentence) throws IOException {
List<RuleMatch> ruleMatches = new ArrayList<>();
AnalyzedTokenReadings[] tokens = sentence.getTokensWithoutWhitespace();
String msg = MessageFormat.format(messages.getString("long_sentence_rule_msg"), maxWords);
int numWords = 0;
int pos = 0;
if (tokens.length < maxWords + 1) { // just a short-circuit
return toRuleMatchArray(ruleMatches);
} else {
for (AnalyzedTokenReadings aToken : tokens) {
String token = aToken.getToken();
pos += token.length(); // won't match the whole offending sentence, but much of it
if (!aToken.isSentenceStart() && !aToken.isSentenceEnd() && !NON_WORD_REGEX.matcher(token).matches()) {
numWords++;
}
}
}
if (numWords > maxWords) {
RuleMatch ruleMatch = new RuleMatch(this, 0, pos, msg);
ruleMatches.add(ruleMatch);
}
return toRuleMatchArray(ruleMatches);
}
}