// ============================================================================
//
// Copyright (C) 2006-2016 Talend Inc. - www.talend.com
//
// This source code is available under agreement available at
// %InstallDIR%\features\org.talend.rcp.branding.%PRODUCTNAME%\%PRODUCTNAME%license.txt
//
// You should have received a copy of the agreement
// along with this program; if not, write to Talend SA
// 9 rue Pages 92150 Suresnes, France
//
// ============================================================================
package org.talend.dataquality.wordnet;
public class TextUtils {
public static String cutText(String input) {
StringBuilder sb = new StringBuilder();
int len = input.length();
if (len > 0) {
sb.append(input.charAt(0));
char lastCh = input.charAt(0);
for (int i = 1; i < len; i++) {
char ch = input.charAt(i);
if (Character.isUpperCase(ch)) {// current char is uppercased.
char nextChar = i < len - 1 ? input.charAt(i + 1) : ' ';
if (Character.isLowerCase(lastCh) && Character.isLetter(nextChar)) {
// last char is lowercased, which means the current char starts a new word.
sb.append(' ');
} else if (Character.isUpperCase(lastCh) && Character.isLowerCase(nextChar)) {
// next char is lowercased, which alse means the current char starts a new word.
sb.append(' ');
}
}
sb.append(ch);
lastCh = ch;
}
}
return sb.toString();
}
public static String[] cutTextAndSplit(String input) {
return cutText(input).split(" ");
}
}