/* * Copyright 2007 T-Rank AS * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package no.trank.openpipe.reader; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStreamReader; import java.io.Reader; import java.io.UnsupportedEncodingException; import java.nio.charset.Charset; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import no.trank.openpipe.api.document.Document; import no.trank.openpipe.api.document.DocumentOperation; /** * @version $Revision$ */ public class TextFileDocumentReader extends FileDocumentReader { private static final Logger log = LoggerFactory.getLogger(TextFileDocumentReader.class); private String bodyField = "body"; private String encoding; @Override protected FileDocReader createReader() { if (bodyField == null) { throw new NullPointerException("bodyField cannot be null"); } final Charset charset; if (encoding == null) { charset = Charset.defaultCharset(); log.info("Using default encoding '{}'", charset.name()); } else { charset = Charset.forName(encoding); } return new TextFileDocReader(bodyField, charset); } public String getBodyField() { return bodyField; } public void setBodyField(String bodyField) { this.bodyField = bodyField; } public String getEncoding() { return encoding; } public void setEncoding(String encoding) { this.encoding = encoding; } private static class TextFileDocReader implements FileDocReader { private final String bodyField; private final Charset charset; public TextFileDocReader(String bodyField, Charset charset) { this.bodyField = bodyField; this.charset = charset; } @Override public Document getDocument(File file) { try { final Reader reader = new InputStreamReader(new FileInputStream(file), charset); try { final StringBuilder sb = new StringBuilder((int) file.length()); final char[] cbuf = new char[2048]; int bytesread = reader.read(cbuf); while (bytesread > 0) { sb.append(cbuf, 0, bytesread); bytesread = reader.read(cbuf); } final Document doc = new Document(); doc.setFieldValue(bodyField, sb.toString()); doc.setOperation(DocumentOperation.ADD_VALUE); return doc; } finally { try { reader.close(); } catch (IOException e) { // Ignoring } } } catch (FileNotFoundException e) { throw new RuntimeException("Coule not read file", e); } catch (UnsupportedEncodingException e) { throw new RuntimeException("Coule not read file", e); } catch (IOException e) { throw new RuntimeException("Coule not read file", e); } } } }