/*
* Copyright 2013
* Ubiquitous Knowledge Processing (UKP) Lab
* Technische Universität Darmstadt
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package de.tudarmstadt.ukp.dkpro.core.readability.measure;
import java.util.ArrayList;
import java.util.List;
/**
* Java port of readability measures from the Linux 'style' command ('diction'
* package).
*
*/
// FIXME add unit test
public class ReadabilityMeasures
{
public enum Measures {
ari,
coleman_liau,
flesch,
fog,
kincaid,
lix,
smog
}
private final WordSyllableCounter syllableCounter;
private String language;
public ReadabilityMeasures()
{
// initialize with default (English)
this("en");
}
public ReadabilityMeasures(String language)
{
this.language = language;
this.syllableCounter = new WordSyllableCounter(language);
}
public double getReadabilityScore(Measures measure, List<String> words, int nrofSentences) {
if (measure.equals(Measures.ari)) {
return ari(words, nrofSentences);
}
else if (measure.equals(Measures.coleman_liau)) {
return coleman_liau(words, nrofSentences);
}
else if (measure.equals(Measures.flesch)) {
return flesch(words, nrofSentences);
}
else if (measure.equals(Measures.fog)) {
return fog(words, nrofSentences);
}
else if (measure.equals(Measures.kincaid)) {
return kincaid(words, nrofSentences);
}
else if (measure.equals(Measures.lix)) {
return lix(words, nrofSentences);
}
else if (measure.equals(Measures.smog)) {
return smog(words, nrofSentences);
}
else {
throw new IllegalArgumentException("Unknown measure: " + measure.name());
}
}
/*
* only the strings consist of numbers or letters
* are considered as words.
*/
private boolean isWord(String strWord){
for(int i = 0; i < strWord.length(); ++ i){
char ch = strWord.charAt(i);
if(!Character.isLetterOrDigit(ch))
return false;
}
return true;
}
private List<String> filterWords(List<String> words){
List<String> newWords = new ArrayList<String>();
for(String word : words){
if(isWord(word))
newWords.add(word);
}
return newWords;
}
/**
* Calculate Kincaid Formula (reading grade).
*
* @param words words
* @param nrofSentences number of sentences.
* @return score.
*
*/
public double kincaid(List<String> words, int nrofSentences)
{
words = filterWords(words);
int nrofSyllables = this.syllableCounter.countSyllables(words);
return kincaid(words.size(), nrofSyllables, nrofSentences);
}
private double kincaid(Integer nrofWords, Integer nrofSyllables, Integer nrofSentences)
{
return 11.8 * (((double) nrofSyllables) / nrofWords) + 0.39 * (((double) nrofWords) / nrofSentences)
- 15.59;
}
/**
* Calculate Automated Readability Index (reading grade).
*
* @param words words.
* @param nrofSentences number of sentences.
* @return score.
*/
public double ari(List<String> words, int nrofSentences)
{
words = filterWords(words);
int nrofLetters = this.getNrofLetters(words);
return ari(nrofLetters, words.size(), nrofSentences);
}
private double ari(Integer nrofLetters, Integer nrofWords, Integer nrofSentences)
{
return 4.71 * (((double) nrofLetters) / nrofWords) + 0.5 * (((double) nrofWords) / nrofSentences) - 21.43;
}
/**
* Calculate Coleman-Liau formula.
*
* @param words words.
* @param nrofSentences number of sentences.
* @return score.
*
*/
public double coleman_liau(List<String> words, int nrofSentences)
{
words = filterWords(words);
int nrofLetters = this.getNrofLetters(words);
return coleman_liau(nrofLetters, words.size(), nrofSentences);
}
private double coleman_liau(Integer nrofLetters, Integer nrofWords, Integer nrofSentences)
{
return 5.89 * (((double) nrofLetters) / nrofWords) - 0.3 * (((double) nrofSentences) / (100 * nrofWords))
- 15.8;
}
/**
* Calculate Flesch reading ease score.
*
* @param words words.
* @param nrofSentences number of sentences.
* @return score.
*/
public double flesch(List<String> words, int nrofSentences)
{
words = filterWords(words);
int nrofSyllables = this.syllableCounter.countSyllables(words);
return flesch(nrofSyllables, words.size(), nrofSentences);
}
private double flesch(Integer nrofSyllables, Integer nrofWords, Integer nrofSentences)
{
return 206.835 - 84.6 * (((double) nrofSyllables) / nrofWords) - 1.015
* (((double) nrofWords) / nrofSentences);
}
// 206.835-84.6*(((double)syllables)/words)-1.015*(((double)words)/sentences);
/**
* Calculate FOG index.
*
* @param words words.
* @param nrofSentences number of sentences.
* @return score.
*/
public double fog(List<String> words, int nrofSentences)
{
words = filterWords(words);
int nrofBigwords = getNrofBigwords(words);
return fog(words.size(), nrofBigwords, nrofSentences);
}
private double fog(Integer nrofWords, Integer nrofBigwords, Integer nrofSentences)
{
return ((((double) nrofWords) / nrofSentences + (100.0 * nrofBigwords) / nrofWords) * 0.4);
}
/**
* Calculate Björnsson's Lix formula.
*
* @param words words.
* @param nrofSentences number of sentences.
* @return the wheeler smith index as result and the grade level in grade.
* If grade is 0, the index is lower than any grade, if the index
* is 99, it is higher than any grade.
*/
public double lix(List<String> words, int nrofSentences)
{
words = filterWords(words);
int nrofLongWords = this.getNrofLongwords(words);
return lix(words.size(), nrofLongWords, nrofSentences);
}
private double lix(Integer nrofWords, Integer nrofLongWords, Integer nrofSentences)
{
double idx = ((double) nrofWords) / nrofSentences + 100.0 * (nrofLongWords) / nrofWords;
if (idx < 34)
return 0;
else if (idx < 38)
return 5;
else if (idx < 41)
return 6;
else if (idx < 44)
return 7;
else if (idx < 48)
return 8;
else if (idx < 51)
return 9;
else if (idx < 54)
return 10;
else if (idx < 57)
return 11;
else
return 99;
}
/**
* Calculate SMOG-Grading.
*
* @param words words.
* @param nrofSentences number of sentences.
* @return score.
*/
public double smog(List<String> words, int nrofSentences)
{
words = filterWords(words);
int nrofBigwords = this.getNrofBigwords(words);
return smog(nrofBigwords, nrofSentences);
}
private double smog(Integer nrofBigWords, Integer nrofSentences)
{
return Math.sqrt((((double) nrofBigWords) / ((double) nrofSentences)) * 30.0) + 3.0;
}
public String getLanguage()
{
return language;
}
public void setLanguage(String language)
{
this.language = language;
}
private int getNrofLetters(Iterable<String> words)
{
int count = 0;
for (String word : words) {
count = count + word.length();
}
return count;
}
/**
* @param words
* An iterable over words.
* @return The number of words with more than 3 syllables.
*/
private int getNrofBigwords(Iterable<String> words)
{
int count = 0;
for (String word : words) {
if (this.syllableCounter.countSyllables(word) >= 3) {
count++;
}
}
return count;
}
/**
* @param words
* An iterable over words.
* @return The number of words with more than 6 letters.
*/
private int getNrofLongwords(Iterable<String> words)
{
int count = 0;
for (String word : words) {
if (word.length() > 6) {
count++;
}
}
return count;
}
}