/************************************************************************** OmegaT - Computer Assisted Translation (CAT) tool with fuzzy matching, translation memory, keyword search, glossaries, and translation leveraging into updated projects. Copyright (C) 2016 Aaron Madlon-Kay Home page: http://www.omegat.org/ Support center: http://groups.yahoo.com/group/OmegaT/ This file is part of OmegaT. OmegaT is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. OmegaT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see <http://www.gnu.org/licenses/>. **************************************************************************/ package org.omegat.util; import java.io.BufferedReader; import java.io.IOException; import java.io.Reader; import java.util.stream.IntStream; /** * This reader tries to detect the correct EOL type for the input stream based * on the frequency of EOL chars encountered within a lookahead range. Calling * {@link #readLine()} will return lines that include "bad" EOL chars. * <p> * For example an input that is detected to be CRLF that contains a line * "foo\r\r\n" will return "foo\r" for that line. This differs from * {@link BufferedReader} in that the latter will treat all EOL chars as * starting new lines, so the above example would give "foo" and then "". * * @author Aaron Madlon-Kay * */ public class MixedEolHandlingReader extends Reader { private final BufferedReader in; private String detectedEol; private boolean hasMixedEol; public MixedEolHandlingReader(Reader in) throws IOException { if (in instanceof BufferedReader) { this.in = (BufferedReader) in; } else { this.in = new BufferedReader(in); } init(); } private final void init() throws IOException { in.mark(OConsts.READ_AHEAD_LIMIT); char[] buf = new char[OConsts.READ_AHEAD_LIMIT]; int read = in.read(buf); int[] counts = countEols(buf, read); hasMixedEol = IntStream.of(counts).filter(i -> i > 0).count() > 1; detectedEol = decideRepresentativeEol(counts[0], counts[1], counts[2]); in.reset(); } static int[] countEols(char[] buf, int len) { int cr = 0; int lf = 0; int crlf = 0; for (int i = 0; i < len; i++) { char c = buf[i]; if (c == '\r') { if (i < len - 1 && buf[i + 1] == '\n') { crlf++; i++; } else { cr++; } } else if (c == '\n') { lf++; } } return new int[] { cr, lf, crlf }; } static String decideRepresentativeEol(int cr, int lf, int crlf) { if (cr > lf && cr > crlf) { return "\r"; } if (lf > cr && lf > crlf) { return "\n"; } if (crlf > lf && crlf > cr) { return "\r\n"; } if (crlf > 0) { return "\r\n"; } return System.lineSeparator(); } public String getDetectedEol() { return detectedEol; } public boolean hasMixedEol() { return hasMixedEol; } @Override public int read(char[] cbuf, int off, int len) throws IOException { return in.read(cbuf, off, len); } public String readLine() throws IOException { int c = in.read(); if (c == -1) { return null; } StringBuilder line = new StringBuilder(1024); while (true) { line.append((char) c); if (encounteredEol(line)) { return line.substring(0, line.length() - detectedEol.length()); } c = in.read(); if (c == -1) { return line.toString(); } } } private boolean encounteredEol(StringBuilder sb) { if (sb.length() < detectedEol.length()) { return false; } for (int i = 0; i < detectedEol.length(); i++) { if (sb.charAt(sb.length() - detectedEol.length() + i) != detectedEol.charAt(i)) { return false; } } return true; } @Override public void close() throws IOException { in.close(); } }