/* * This file is part of gwap, an open platform for games with a purpose * * Copyright (C) 2013 * Project play4science * Lehr- und Forschungseinheit für Programmier- und Modellierungssprachen * Ludwig-Maximilians-Universität München * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package gwap.mit; import java.util.ArrayList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; /** * Helps with processing input text * * @author Fabian Kneißl */ public class TextHelper { private static Pattern pattern = Pattern.compile("[\\p{L}]+|[^\\p{L}\\s]"); // \p{L}: unicode letter public static List<String> splitIntoTokens(String text) { List<String> tokens = new ArrayList<String>(); Matcher matcher = pattern.matcher(text); while (matcher.find()) { tokens.add(matcher.group()); } return tokens; } }