/** ======================================================================== * handytrowel: src/main/java/nlp/WordReader.java * Read line-delimited words from a file with hash comments. * ======================================================================== * Copyright (c) 2014, Asim Ihsan, All rights reserved. * <http://www.asimihsan.com> * https://github.com/asimihsan/handytrowel/blob/master/LICENSE * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published * by the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. * ======================================================================== */ package com.asimihsan.handytrowel.nlp; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.nio.charset.StandardCharsets; import java.util.LinkedList; import java.util.List; /** * Read line-delimited words from a file with hash comments. * * @author Asim Ihsan */ public class WordReader { private final String resourcePath; private WordReader(String resourcePath) { this.resourcePath = resourcePath; } public static WordReader wordReaderWithResourcePath(String resourcePath) { WordReader reader = new WordReader(resourcePath); return reader; } public List<String> getWords() throws IOException { List<String> words = new LinkedList<>(); try ( InputStream is = getClass().getResourceAsStream(this.resourcePath); InputStreamReader isr = new InputStreamReader(is, StandardCharsets.US_ASCII); BufferedReader br = new BufferedReader(isr); ) { String line = null; while ((line = br.readLine()) != null) { if (line.startsWith("#")) continue; if (line.trim().length() == 0) continue; words.add(line.trim()); } } return words; } }