/* Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package java.util; import java.io.Closeable; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.StringReader; import java.io.UnsupportedEncodingException; import java.math.BigDecimal; import java.math.BigInteger; import java.nio.CharBuffer; import java.nio.channels.Channels; import java.nio.channels.ReadableByteChannel; import java.nio.charset.Charset; import java.text.DecimalFormat; import java.text.DecimalFormatSymbols; import java.text.NumberFormat; import java.util.regex.MatchResult; import java.util.regex.Matcher; import java.util.regex.Pattern; import libcore.io.IoUtils; /** * A parser that parses a text string of primitive types and strings with the * help of regular expressions. This class is not as useful as it might seem. * It's very inefficient for communicating between machines; you should use JSON, * protobufs, or even XML for that. Very simple uses might get away with {@link String#split}. * For input from humans, the use of locale-specific regular expressions make it not only * expensive but also somewhat unpredictable. * * <p>This class supports localized numbers and various * radixes. The input is broken into tokens by the delimiter pattern, which is * {@code \\p{javaWhitespace}} by default. * * <p>Example: * <pre> * Scanner s = new Scanner("1A true"); * assertEquals(26, s.nextInt(16)); * assertEquals(true, s.nextBoolean()); * </pre> * * <p>The {@code Scanner} class is not thread-safe. */ public final class Scanner implements Closeable, Iterator<String> { private static final String NL = "\n|\r\n|\r|\u0085|\u2028|\u2029"; // Default delimiting pattern. private static final Pattern DEFAULT_DELIMITER = Pattern.compile("\\p{javaWhitespace}+"); // The boolean's pattern. private static final Pattern BOOLEAN_PATTERN = Pattern.compile("true|false", Pattern.CASE_INSENSITIVE); // Pattern used to recognize line terminator. private static final Pattern LINE_TERMINATOR = Pattern.compile(NL); // Pattern used to recognize multiple line terminators. private static final Pattern MULTI_LINE_TERMINATOR = Pattern.compile("(" + NL + ")+"); // Pattern used to recognize a line with a line terminator. private static final Pattern LINE_PATTERN = Pattern.compile(".*(" + NL + ")|.+$"); // The pattern matches anything. private static final Pattern ANY_PATTERN = Pattern.compile("(?s).*"); private static final int DEFAULT_RADIX = 10; // The input source of scanner. private Readable input; private CharBuffer buffer = CharBuffer.allocate(1024); private Pattern delimiter = DEFAULT_DELIMITER; private Matcher matcher; private int currentRadix = DEFAULT_RADIX; private Locale locale = Locale.getDefault(); // The position where find begins. private int findStartIndex = 0; // The last find start position. private int preStartIndex = findStartIndex; // The length of the buffer. private int bufferLength = 0; // Record the status of this scanner. True if the scanner is closed. private boolean closed = false; private IOException lastIOException; private boolean matchSuccessful = false; private DecimalFormat decimalFormat; // Records whether the underlying readable has more input. private boolean inputExhausted = false; private Object cachedNextValue = null; private int cachedNextIndex = -1; private Pattern cachedFloatPattern = null; private int cachedIntegerPatternRadix = -1; private Pattern cachedIntegerPattern = null; /** * Creates a {@code Scanner} with the specified {@code File} as input. The default charset * is applied when reading the file. * * @param src * the file to be scanned. * @throws FileNotFoundException * if the specified file does not exist. */ public Scanner(File src) throws FileNotFoundException { this(src, Charset.defaultCharset().name()); } /** * Creates a {@code Scanner} with the specified {@code File} as input. The specified charset * is applied when reading the file. * * @param src * the file to be scanned. * @param charsetName * the name of the encoding type of the file. * @throws FileNotFoundException * if the specified file does not exist. * @throws IllegalArgumentException * if the specified coding does not exist. */ public Scanner(File src, String charsetName) throws FileNotFoundException { if (src == null) { throw new NullPointerException("src == null"); } FileInputStream fis = new FileInputStream(src); if (charsetName == null) { throw new IllegalArgumentException("charsetName == null"); } InputStreamReader streamReader; try { streamReader = new InputStreamReader(fis, charsetName); } catch (UnsupportedEncodingException e) { IoUtils.closeQuietly(fis); throw new IllegalArgumentException(e.getMessage()); } initialize(streamReader); } /** * Creates a {@code Scanner} on the specified string. * * @param src * the string to be scanned. */ public Scanner(String src) { initialize(new StringReader(src)); } /** * Creates a {@code Scanner} on the specified {@code InputStream}. The default charset is * applied when decoding the input. * * @param src * the {@code InputStream} to be scanned. */ public Scanner(InputStream src) { this(src, Charset.defaultCharset().name()); } /** * Creates a {@code Scanner} on the specified {@code InputStream}. The specified charset is * applied when decoding the input. * * @param src * the {@code InputStream} to be scanned. * @param charsetName * the encoding type of the {@code InputStream}. * @throws IllegalArgumentException * if the specified character set is not found. */ public Scanner(InputStream src, String charsetName) { if (src == null) { throw new NullPointerException("src == null"); } InputStreamReader streamReader; try { streamReader = new InputStreamReader(src, charsetName); } catch (UnsupportedEncodingException e) { throw new IllegalArgumentException(e.getMessage()); } initialize(streamReader); } /** * Creates a {@code Scanner} with the specified {@code Readable} as input. * * @param src * the {@code Readable} to be scanned. */ public Scanner(Readable src) { if (src == null) { throw new NullPointerException("src == null"); } initialize(src); } /** * Creates a {@code Scanner} with the specified {@code ReadableByteChannel} as * input. The default charset is applied when decoding the input. * * @param src * the {@code ReadableByteChannel} to be scanned. */ public Scanner(ReadableByteChannel src) { this(src, Charset.defaultCharset().name()); } /** * Creates a {@code Scanner} with the specified {@code ReadableByteChannel} as * input. The specified charset is applied when decoding the input. * * @param src * the {@code ReadableByteChannel} to be scanned. * @param charsetName * the encoding type of the content. * @throws IllegalArgumentException * if the specified character set is not found. */ public Scanner(ReadableByteChannel src, String charsetName) { if (src == null) { throw new NullPointerException("src == null"); } if (charsetName == null) { throw new IllegalArgumentException("charsetName == null"); } initialize(Channels.newReader(src, charsetName)); } private void initialize(Readable input) { this.input = input; matcher = delimiter.matcher(""); matcher.useTransparentBounds(true); matcher.useAnchoringBounds(false); } /** * Closes this {@code Scanner} and the underlying input if the input implements * {@code Closeable}. If the {@code Scanner} has been closed, this method will have * no effect. Any scanning operation called after calling this method will throw * an {@code IllegalStateException}. * * @see Closeable */ public void close() { if (closed) { return; } if (input instanceof Closeable) { try { ((Closeable) input).close(); } catch (IOException e) { lastIOException = e; } } closed = true; } /** * Returns the delimiter {@code Pattern} in use by this {@code Scanner}. * * @return the delimiter {@code Pattern} in use by this {@code Scanner}. */ public Pattern delimiter() { return delimiter; } /** * Tries to find the pattern in the input. Delimiters are ignored. If the * pattern is found before line terminator, the matched string will be * returned, and the {@code Scanner} will advance to the end of the matched string. * Otherwise, {@code null} will be returned and the {@code Scanner} will not advance. * When waiting for input, the {@code Scanner} may be blocked. All the * input may be cached if no line terminator exists in the buffer. * * @param pattern * the pattern to find in the input. * @return the matched string or {@code null} if the pattern is not found * before the next line terminator. * @throws IllegalStateException * if the {@code Scanner} is closed. */ public String findInLine(Pattern pattern) { checkOpen(); checkNotNull(pattern); int horizonLineSeparator = 0; matcher.usePattern(MULTI_LINE_TERMINATOR); matcher.region(findStartIndex, bufferLength); boolean findComplete = false; int terminatorLength = 0; while (!findComplete) { if (matcher.find()) { horizonLineSeparator = matcher.start(); terminatorLength = matcher.end() - matcher.start(); findComplete = true; } else { if (!inputExhausted) { readMore(); resetMatcher(); } else { horizonLineSeparator = bufferLength; findComplete = true; } } } matcher.usePattern(pattern); /* * TODO The following 2 statements are used to deal with regex's bug. * java.util.regex.Matcher.region(int start, int end) implementation * does not have any effects when called. They will be removed once the * bug is fixed. */ int oldLimit = buffer.limit(); // Considering the look ahead feature, the line terminator should be involved as RI buffer.limit(horizonLineSeparator + terminatorLength); // ========== To deal with regex bug ==================== // Considering the look ahead feature, the line terminator should be involved as RI matcher.region(findStartIndex, horizonLineSeparator + terminatorLength); if (matcher.find()) { // The scanner advances past the input that matched findStartIndex = matcher.end(); // If the matched pattern is immediately followed by line // terminator. if (horizonLineSeparator == matcher.end()) { findStartIndex += terminatorLength; } // the line terminator itself should not be a part of // the match result according to the Spec if (horizonLineSeparator != bufferLength && (horizonLineSeparator + terminatorLength == matcher .end())) { // ========== To deal with regex bug ==================== buffer.limit(oldLimit); // ========== To deal with regex bug ==================== matchSuccessful = false; return null; } matchSuccessful = true; // ========== To deal with regex bug ==================== buffer.limit(oldLimit); // ========== To deal with regex bug ==================== return matcher.group(); } // ========== To deal with regex bug ==================== buffer.limit(oldLimit); // ========== To deal with regex bug ==================== matchSuccessful = false; return null; } /** * Compiles the pattern string and tries to find a substring matching it in the input data. The * delimiter will be ignored. This is the same as invoking * {@code findInLine(Pattern.compile(pattern))}. * * @param pattern * a string used to construct a pattern which is in turn used to * match a substring of the input data. * @return the matched string or {@code null} if the pattern is not found * before the next line terminator. * @throws IllegalStateException * if the {@code Scanner} is closed. * @see #findInLine(Pattern) */ public String findInLine(String pattern) { return findInLine(Pattern.compile(pattern)); } /** * Tries to find the pattern in the input between the current position and the specified * horizon. Delimiters are ignored. If the pattern is found, the matched * string will be returned, and the {@code Scanner} will advance to the end of the * matched string. Otherwise, null will be returned and {@code Scanner} will not * advance. When waiting for input, the {@code Scanner} may be blocked. * <p> * The {@code Scanner}'s search will never go more than {@code horizon} code points from current * position. The position of {@code horizon} does have an effect on the result of the * match. For example, when the input is "123" and current position is at zero, * <code>findWithinHorizon(Pattern.compile("\\p{Digit}{3}"), 2)</code> * will return {@code null}, while * <code>findWithinHorizon(Pattern.compile("\\p{Digit}{3}"), 3)</code> * will return {@code "123"}. {@code horizon} is treated as a transparent, * non-anchoring bound. (refer to * {@link Matcher#useTransparentBounds(boolean)} and * {@link Matcher#useAnchoringBounds(boolean)}) * <p> * A {@code horizon} whose value is zero will be ignored and the whole input will be * used for search. In this situation, all the input may be cached. * * @param pattern * the pattern used to scan. * @param horizon * the search limit. * @return the matched string or {@code null} if the pattern is not found * within the specified {@code horizon}. * @throws IllegalStateException * if the {@code Scanner} is closed. * @throws IllegalArgumentException * if {@code horizon} is less than zero. */ public String findWithinHorizon(Pattern pattern, int horizon) { checkOpen(); checkNotNull(pattern); if (horizon < 0) { throw new IllegalArgumentException("horizon < 0"); } matcher.usePattern(pattern); String result = null; int horizonEndIndex = (horizon == 0) ? Integer.MAX_VALUE : findStartIndex + horizon; while (true) { // If horizon > 0, then search up to // min( bufferLength, findStartIndex + horizon). // Otherwise search until readable is exhausted. int findEndIndex = Math.min(horizonEndIndex, bufferLength); // If horizon == 0, consider horizon as always outside buffer. boolean isHorizonInBuffer = (horizonEndIndex <= bufferLength); // First, try to find pattern within buffer. If pattern can not be // found in buffer, then expand the buffer and try again, // util horizonEndIndex is exceeded or no more input left. matcher.region(findStartIndex, findEndIndex); if (matcher.find()) { if ((horizon == 0 && !matcher.hitEnd()) || isHorizonInBuffer || inputExhausted) { result = matcher.group(); break; } } else { // Pattern is not found in buffer while horizonEndIndex is // within buffer, or input is exhausted. Under this situation, // it can be judged that find fails. if (isHorizonInBuffer || inputExhausted) { break; } } // Expand buffer and reset matcher if needed. if (!inputExhausted) { readMore(); resetMatcher(); } } if (result != null) { findStartIndex = matcher.end(); matchSuccessful = true; } else { matchSuccessful = false; } return result; } /** * Tries to find the pattern in the input between the current position and the specified * {@code horizon}. Delimiters are ignored. This call is the same as invoking * {@code findWithinHorizon(Pattern.compile(pattern))}. * * @param pattern * the pattern used to scan. * @param horizon * the search limit. * @return the matched string, or {@code null} if the pattern is not found * within the specified horizon. * @throws IllegalStateException * if the {@code Scanner} is closed. * @throws IllegalArgumentException * if {@code horizon} is less than zero. * @see #findWithinHorizon(Pattern, int) */ public String findWithinHorizon(String pattern, int horizon) { return findWithinHorizon(Pattern.compile(pattern), horizon); } /** * Returns whether this {@code Scanner} has one or more tokens remaining to parse. * This method will block if the data is still being read. * * @return {@code true} if this {@code Scanner} has one or more tokens remaining, * otherwise {@code false}. * @throws IllegalStateException * if the {@code Scanner} has been closed. */ public boolean hasNext() { return hasNext(ANY_PATTERN); } /** * Returns whether this {@code Scanner} has one or more tokens remaining to parse * and the next token matches the given pattern. This method will block if the data is * still being read. * * @param pattern * the pattern to check for. * @return {@code true} if this {@code Scanner} has more tokens and the next token * matches the pattern, {@code false} otherwise. * @throws IllegalStateException * if the {@code Scanner} has been closed. */ public boolean hasNext(Pattern pattern) { checkOpen(); checkNotNull(pattern); matchSuccessful = false; prepareForScan(); // if the next token exists, set the match region, otherwise return // false if (!setTokenRegion()) { recoverPreviousStatus(); return false; } matcher.usePattern(pattern); boolean hasNext = false; // check whether next token matches the specified pattern if (matcher.matches()) { cachedNextIndex = findStartIndex; matchSuccessful = true; hasNext = true; } recoverPreviousStatus(); return hasNext; } /** * Returns {@code true} if this {@code Scanner} has one or more tokens remaining to parse * and the next token matches a pattern compiled from the given string. This method will * block if the data is still being read. This call is equivalent to * {@code hasNext(Pattern.compile(pattern))}. * * @param pattern * the string specifying the pattern to scan for * @return {@code true} if the specified pattern matches this {@code Scanner}'s * next token, {@code false} otherwise. * @throws IllegalStateException * if the {@code Scanner} has been closed. */ public boolean hasNext(String pattern) { return hasNext(Pattern.compile(pattern)); } /** * Returns whether the next token can be translated into a valid * {@code BigDecimal}. * * @return {@code true} if the next token can be translated into a valid * {@code BigDecimal}, otherwise {@code false.} * @throws IllegalStateException * if the {@code Scanner} has been closed. */ public boolean hasNextBigDecimal() { Pattern floatPattern = getFloatPattern(); boolean isBigDecimalValue = false; if (hasNext(floatPattern)) { String floatString = matcher.group(); floatString = removeLocaleInfoFromFloat(floatString); try { cachedNextValue = new BigDecimal(floatString); isBigDecimalValue = true; } catch (NumberFormatException e) { matchSuccessful = false; } } return isBigDecimalValue; } /** * Returns whether the next token can be translated into a valid * {@code BigInteger} in the default radix. * * @return {@code true} if the next token can be translated into a valid * {@code BigInteger}, otherwise {@code false}. * @throws IllegalStateException * if the {@code Scanner} has been closed. */ public boolean hasNextBigInteger() { return hasNextBigInteger(currentRadix); } /** * Returns whether the next token can be translated into a valid * {@code BigInteger} in the specified radix. * * @param radix * the radix used to translate the token into a * {@code BigInteger}. * @return {@code true} if the next token can be translated into a valid * {@code BigInteger}, otherwise {@code false}. * @throws IllegalStateException * if the {@code Scanner} has been closed. */ public boolean hasNextBigInteger(int radix) { Pattern integerPattern = getIntegerPattern(radix); boolean isBigIntegerValue = false; if (hasNext(integerPattern)) { String intString = matcher.group(); intString = removeLocaleInfo(intString, int.class); try { cachedNextValue = new BigInteger(intString, radix); isBigIntegerValue = true; } catch (NumberFormatException e) { matchSuccessful = false; } } return isBigIntegerValue; } /** * Returns whether the next token can be translated into a valid * {@code boolean} value. * * @return {@code true} if the next token can be translated into a valid * {@code boolean} value, otherwise {@code false}. * @throws IllegalStateException * if the {@code Scanner} has been closed. */ public boolean hasNextBoolean() { return hasNext(BOOLEAN_PATTERN); } /** * Returns whether the next token can be translated into a valid * {@code byte} value in the default radix. * * @return {@code true} if the next token can be translated into a valid * {@code byte} value, otherwise {@code false}. * @throws IllegalStateException * if the {@code Scanner} has been closed. */ public boolean hasNextByte() { return hasNextByte(currentRadix); } /** * Returns whether the next token can be translated into a valid * {@code byte} value in the specified radix. * * @param radix * the radix used to translate the token into a {@code byte} * value * @return {@code true} if the next token can be translated into a valid * {@code byte} value, otherwise {@code false}. * @throws IllegalStateException * if the {@code Scanner} has been closed. */ public boolean hasNextByte(int radix) { Pattern integerPattern = getIntegerPattern(radix); boolean isByteValue = false; if (hasNext(integerPattern)) { String intString = matcher.group(); intString = removeLocaleInfo(intString, int.class); try { cachedNextValue = Byte.valueOf(intString, radix); isByteValue = true; } catch (NumberFormatException e) { matchSuccessful = false; } } return isByteValue; } /** * Returns whether the next token translated into a valid {@code double} * value. * * @return {@code true} if the next token can be translated into a valid * {@code double} value, otherwise {@code false}. * @throws IllegalStateException * if the {@code Scanner} has been closed. */ public boolean hasNextDouble() { Pattern floatPattern = getFloatPattern(); boolean isDoubleValue = false; if (hasNext(floatPattern)) { String floatString = matcher.group(); floatString = removeLocaleInfoFromFloat(floatString); try { cachedNextValue = Double.valueOf(floatString); isDoubleValue = true; } catch (NumberFormatException e) { matchSuccessful = false; } } return isDoubleValue; } /** * Returns whether the next token can be translated into a valid * {@code float} value. * * @return {@code true} if the next token can be translated into a valid * {@code float} value, otherwise {@code false}. * @throws IllegalStateException * if the {@code Scanner} has been closed. */ public boolean hasNextFloat() { Pattern floatPattern = getFloatPattern(); boolean isFloatValue = false; if (hasNext(floatPattern)) { String floatString = matcher.group(); floatString = removeLocaleInfoFromFloat(floatString); try { cachedNextValue = Float.valueOf(floatString); isFloatValue = true; } catch (NumberFormatException e) { matchSuccessful = false; } } return isFloatValue; } /** * Returns whether the next token can be translated into a valid {@code int} * value in the default radix. * * @return {@code true} if the next token can be translated into a valid * {@code int} value, otherwise {@code false}. * @throws IllegalStateException * if the {@code Scanner} has been closed, */ public boolean hasNextInt() { return hasNextInt(currentRadix); } /** * Returns whether the next token can be translated into a valid {@code int} * value in the specified radix. * * @param radix * the radix used to translate the token into an {@code int} * value. * @return {@code true} if the next token in this {@code Scanner}'s input can be * translated into a valid {@code int} value, otherwise * {@code false}. * @throws IllegalStateException * if the {@code Scanner} has been closed. */ public boolean hasNextInt(int radix) { Pattern integerPattern = getIntegerPattern(radix); boolean isIntValue = false; if (hasNext(integerPattern)) { String intString = matcher.group(); intString = removeLocaleInfo(intString, int.class); try { cachedNextValue = Integer.valueOf(intString, radix); isIntValue = true; } catch (NumberFormatException e) { matchSuccessful = false; } } return isIntValue; } /** * Returns true if there is a line terminator in the input. * This method may block. * * @throws IllegalStateException if this {@code Scanner} is closed. */ public boolean hasNextLine() { prepareForScan(); String result = findWithinHorizon(LINE_PATTERN, 0); recoverPreviousStatus(); return result != null; } /** * Returns whether the next token can be translated into a valid * {@code long} value in the default radix. * * @return {@code true} if the next token can be translated into a valid * {@code long} value, otherwise {@code false}. * @throws IllegalStateException * if the {@code Scanner} has been closed. */ public boolean hasNextLong() { return hasNextLong(currentRadix); } /** * Returns whether the next token can be translated into a valid * {@code long} value in the specified radix. * * @param radix * the radix used to translate the token into a {@code long} * value. * @return {@code true} if the next token can be translated into a valid * {@code long} value, otherwise {@code false}. * @throws IllegalStateException * if the {@code Scanner} has been closed. */ public boolean hasNextLong(int radix) { Pattern integerPattern = getIntegerPattern(radix); boolean isLongValue = false; if (hasNext(integerPattern)) { String intString = matcher.group(); intString = removeLocaleInfo(intString, int.class); try { cachedNextValue = Long.valueOf(intString, radix); isLongValue = true; } catch (NumberFormatException e) { matchSuccessful = false; } } return isLongValue; } /** * Returns whether the next token can be translated into a valid * {@code short} value in the default radix. * * @return {@code true} if the next token can be translated into a valid * {@code short} value, otherwise {@code false}. * @throws IllegalStateException * if the {@code Scanner} has been closed. */ public boolean hasNextShort() { return hasNextShort(currentRadix); } /** * Returns whether the next token can be translated into a valid * {@code short} value in the specified radix. * * @param radix * the radix used to translate the token into a {@code short} * value. * @return {@code true} if the next token can be translated into a valid * {@code short} value, otherwise {@code false}. * @throws IllegalStateException * if the {@code Scanner} has been closed. */ public boolean hasNextShort(int radix) { Pattern integerPattern = getIntegerPattern(radix); boolean isShortValue = false; if (hasNext(integerPattern)) { String intString = matcher.group(); intString = removeLocaleInfo(intString, int.class); try { cachedNextValue = Short.valueOf(intString, radix); isShortValue = true; } catch (NumberFormatException e) { matchSuccessful = false; } } return isShortValue; } /** * Returns the last {@code IOException} that was raised while reading from the underlying * input, or {@code null} if none was thrown. */ public IOException ioException() { return lastIOException; } /** * Returns the {@code Locale} of this {@code Scanner}. */ public Locale locale() { return locale; } private void setLocale(Locale locale) { this.locale = locale; this.decimalFormat = null; this.cachedFloatPattern = null; this.cachedIntegerPatternRadix = -1; this.cachedIntegerPattern = null; } /** * Returns the result of the last matching operation. * <p> * The next* and find* methods return the match result in the case of a * successful match. * * @return the match result of the last successful match operation * @throws IllegalStateException * if the match result is not available, of if the last match * was not successful. */ public MatchResult match() { if (!matchSuccessful) { throw new IllegalStateException(); } return matcher.toMatchResult(); } /** * Returns the next token. The token will be both prefixed and suffixed by * the delimiter that is currently being used (or a string that matches the * delimiter pattern). This method will block if input is being read. * * @return the next complete token. * @throws IllegalStateException * if this {@code Scanner} has been closed. * @throws NoSuchElementException * if input has been exhausted. */ public String next() { return next(ANY_PATTERN); } /** * Returns the next token if it matches the specified pattern. The token * will be both prefixed and suffixed by the delimiter that is currently * being used (or a string that matches the delimiter pattern). This method will block * if input is being read. * * @param pattern * the specified pattern to scan. * @return the next token. * @throws IllegalStateException * if this {@code Scanner} has been closed. * @throws NoSuchElementException * if input has been exhausted. * @throws InputMismatchException * if the next token does not match the pattern given. */ public String next(Pattern pattern) { checkOpen(); checkNotNull(pattern); matchSuccessful = false; prepareForScan(); if (!setTokenRegion()) { recoverPreviousStatus(); // if setting match region fails throw new NoSuchElementException(); } matcher.usePattern(pattern); if (!matcher.matches()) { recoverPreviousStatus(); throw new InputMismatchException(); } matchSuccessful = true; return matcher.group(); } /** * Returns the next token if it matches the specified pattern. The token * will be both prefixed and suffixed by the delimiter that is currently * being used (or a string that matches the delimiter pattern). This method will block * if input is being read. Calling this method is equivalent to * {@code next(Pattern.compile(pattern))}. * * @param pattern * the string specifying the pattern to scan for. * @return the next token. * @throws IllegalStateException * if this {@code Scanner} has been closed. * @throws NoSuchElementException * if input has been exhausted. * @throws InputMismatchException * if the next token does not match the pattern given. */ public String next(String pattern) { return next(Pattern.compile(pattern)); } /** * Returns the next token as a {@code BigDecimal}. This method will block if input is * being read. If the next token can be translated into a {@code BigDecimal} * the following is done: All {@code Locale}-specific prefixes, group separators, * and {@code Locale}-specific suffixes are removed. Then non-ASCII digits are * mapped into ASCII digits via {@link Character#digit(char, int)}, and a * negative sign (-) is added if the {@code Locale}-specific negative prefix or * suffix was present. Finally the resulting string is passed to * {@code BigDecimal(String) }. * * @return the next token as a {@code BigDecimal}. * @throws IllegalStateException * if this {@code Scanner} has been closed. * @throws NoSuchElementException * if input has been exhausted. * @throws InputMismatchException * if the next token can not be translated into a valid * {@code BigDecimal}. */ public BigDecimal nextBigDecimal() { checkOpen(); Object obj = cachedNextValue; cachedNextValue = null; if (obj instanceof BigDecimal) { findStartIndex = cachedNextIndex; return (BigDecimal) obj; } Pattern floatPattern = getFloatPattern(); String floatString = next(floatPattern); floatString = removeLocaleInfoFromFloat(floatString); BigDecimal bigDecimalValue; try { bigDecimalValue = new BigDecimal(floatString); } catch (NumberFormatException e) { matchSuccessful = false; recoverPreviousStatus(); throw new InputMismatchException(); } return bigDecimalValue; } /** * Returns the next token as a {@code BigInteger} in the current radix. * This method may block for more input. * * @throws IllegalStateException * if this {@code Scanner} has been closed. * @throws NoSuchElementException * if input has been exhausted. * @throws InputMismatchException * if the next token can not be translated into a valid * {@code BigInteger}. */ public BigInteger nextBigInteger() { return nextBigInteger(currentRadix); } /** * Returns the next token as a {@code BigInteger} with the specified radix. * This method will block if input is being read. If the next token can be translated * into a {@code BigInteger} the following is done: All {@code Locale}-specific * prefixes, group separators, and {@code Locale}-specific suffixes are removed. * Then non-ASCII digits are mapped into ASCII digits via * {@link Character#digit(char, int)}, and a negative sign (-) is added if the * {@code Locale}-specific negative prefix or suffix was present. Finally the * resulting String is passed to {@link BigInteger#BigInteger(String, int)}} * with the specified radix. * * @param radix * the radix used to translate the token into a * {@code BigInteger}. * @return the next token as a {@code BigInteger} * @throws IllegalStateException * if this {@code Scanner} has been closed. * @throws NoSuchElementException * if input has been exhausted. * @throws InputMismatchException * if the next token can not be translated into a valid * {@code BigInteger}. */ public BigInteger nextBigInteger(int radix) { checkOpen(); Object obj = cachedNextValue; cachedNextValue = null; if (obj instanceof BigInteger) { findStartIndex = cachedNextIndex; return (BigInteger) obj; } Pattern integerPattern = getIntegerPattern(radix); String intString = next(integerPattern); intString = removeLocaleInfo(intString, int.class); BigInteger bigIntegerValue; try { bigIntegerValue = new BigInteger(intString, radix); } catch (NumberFormatException e) { matchSuccessful = false; recoverPreviousStatus(); throw new InputMismatchException(); } return bigIntegerValue; } /** * Returns the next token as a {@code boolean}. This method will block if input is * being read. * * @return the next token as a {@code boolean}. * @throws IllegalStateException * if this {@code Scanner} has been closed. * @throws NoSuchElementException * if input has been exhausted. * @throws InputMismatchException * if the next token can not be translated into a valid * {@code boolean} value. */ public boolean nextBoolean() { return Boolean.parseBoolean(next(BOOLEAN_PATTERN)); } /** * Returns the next token as a {@code byte} in the current radix. * This method may block for more input. * * @throws IllegalStateException * if this {@code Scanner} has been closed. * @throws NoSuchElementException * if input has been exhausted. * @throws InputMismatchException * if the next token can not be translated into a valid * {@code byte} value. */ public byte nextByte() { return nextByte(currentRadix); } /** * Returns the next token as a {@code byte} with the specified radix. Will * block if input is being read. If the next token can be translated into a * {@code byte} the following is done: All {@code Locale}-specific prefixes, group * separators, and {@code Locale}-specific suffixes are removed. Then non-ASCII * digits are mapped into ASCII digits via * {@link Character#digit(char, int)}, and a negative sign (-) is added if the * {@code Locale}-specific negative prefix or suffix was present. Finally the * resulting String is passed to {@link Byte#parseByte(String, int)}} with * the specified radix. * * @param radix * the radix used to translate the token into {@code byte} value. * @return the next token as a {@code byte}. * @throws IllegalStateException * if this {@code Scanner} has been closed. * @throws NoSuchElementException * if input has been exhausted. * @throws InputMismatchException * if the next token can not be translated into a valid * {@code byte} value. */ @SuppressWarnings("boxing") public byte nextByte(int radix) { checkOpen(); Object obj = cachedNextValue; cachedNextValue = null; if (obj instanceof Byte) { findStartIndex = cachedNextIndex; return (Byte) obj; } Pattern integerPattern = getIntegerPattern(radix); String intString = next(integerPattern); intString = removeLocaleInfo(intString, int.class); byte byteValue = 0; try { byteValue = Byte.parseByte(intString, radix); } catch (NumberFormatException e) { matchSuccessful = false; recoverPreviousStatus(); throw new InputMismatchException(); } return byteValue; } /** * Returns the next token as a {@code double}. This method will block if input is being * read. If the next token can be translated into a {@code double} the * following is done: All {@code Locale}-specific prefixes, group separators, and * {@code Locale}-specific suffixes are removed. Then non-ASCII digits are mapped * into ASCII digits via {@link Character#digit(char, int)}, and a negative * sign (-) is added if the {@code Locale}-specific negative prefix or suffix was * present. Finally the resulting String is passed to * {@link Double#parseDouble(String)}}. If the token matches the localized * NaN or infinity strings, it is also passed to * {@link Double#parseDouble(String)}}. * * @return the next token as a {@code double}. * @throws IllegalStateException * if this {@code Scanner} has been closed. * @throws NoSuchElementException * if input has been exhausted. * @throws InputMismatchException * if the next token can not be translated into a valid * {@code double} value. */ @SuppressWarnings("boxing") public double nextDouble() { checkOpen(); Object obj = cachedNextValue; cachedNextValue = null; if (obj instanceof Double) { findStartIndex = cachedNextIndex; return (Double) obj; } Pattern floatPattern = getFloatPattern(); String floatString = next(floatPattern); floatString = removeLocaleInfoFromFloat(floatString); double doubleValue; try { doubleValue = Double.parseDouble(floatString); } catch (NumberFormatException e) { matchSuccessful = false; recoverPreviousStatus(); throw new InputMismatchException(); } return doubleValue; } /** * Returns the next token as a {@code float}. This method will block if input is being * read. If the next token can be translated into a {@code float} the * following is done: All {@code Locale}-specific prefixes, group separators, and * {@code Locale}-specific suffixes are removed. Then non-ASCII digits are mapped * into ASCII digits via {@link Character#digit(char, int)}, and a negative * sign (-) is added if the {@code Locale}-specific negative prefix or suffix was * present. Finally the resulting String is passed to * {@link Float#parseFloat(String)}}.If the token matches the localized NaN * or infinity strings, it is also passed to * {@link Float#parseFloat(String)}}. * * @return the next token as a {@code float}. * @throws IllegalStateException * if this {@code Scanner} has been closed. * @throws NoSuchElementException * if input has been exhausted. * @throws InputMismatchException * if the next token can not be translated into a valid * {@code float} value. */ @SuppressWarnings("boxing") public float nextFloat() { checkOpen(); Object obj = cachedNextValue; cachedNextValue = null; if (obj instanceof Float) { findStartIndex = cachedNextIndex; return (Float) obj; } Pattern floatPattern = getFloatPattern(); String floatString = next(floatPattern); floatString = removeLocaleInfoFromFloat(floatString); float floatValue; try { floatValue = Float.parseFloat(floatString); } catch (NumberFormatException e) { matchSuccessful = false; recoverPreviousStatus(); throw new InputMismatchException(); } return floatValue; } /** * Returns the next token as an {@code int} in the current radix. * This method may block for more input. * * @throws IllegalStateException * if this {@code Scanner} has been closed. * @throws NoSuchElementException * if input has been exhausted. * @throws InputMismatchException * if the next token can not be translated into a valid * {@code int} value. */ public int nextInt() { return nextInt(currentRadix); } /** * Returns the next token as an {@code int} with the specified radix. This method will * block if input is being read. If the next token can be translated into an * {@code int} the following is done: All {@code Locale}-specific prefixes, group * separators, and {@code Locale}-specific suffixes are removed. Then non-ASCII * digits are mapped into ASCII digits via * {@link Character#digit(char, int)}, and a negative sign (-) is added if the * {@code Locale}-specific negative prefix or suffix was present. Finally the * resulting String is passed to {@link Integer#parseInt(String, int)} with * the specified radix. * * @param radix * the radix used to translate the token into an {@code int} * value. * @return the next token as an {@code int}. * @throws IllegalStateException * if this {@code Scanner} has been closed. * @throws NoSuchElementException * if input has been exhausted. * @throws InputMismatchException * if the next token can not be translated into a valid * {@code int} value. */ @SuppressWarnings("boxing") public int nextInt(int radix) { checkOpen(); Object obj = cachedNextValue; cachedNextValue = null; if (obj instanceof Integer) { findStartIndex = cachedNextIndex; return (Integer) obj; } Pattern integerPattern = getIntegerPattern(radix); String intString = next(integerPattern); intString = removeLocaleInfo(intString, int.class); int intValue; try { intValue = Integer.parseInt(intString, radix); } catch (NumberFormatException e) { matchSuccessful = false; recoverPreviousStatus(); throw new InputMismatchException(); } return intValue; } /** * Returns the skipped input and advances the {@code Scanner} to the beginning of * the next line. The returned result will exclude any line terminator. When * searching, if no line terminator is found, then a large amount of input * will be cached. If no line at all can be found, a {@code NoSuchElementException} * will be thrown. * * @return the skipped line. * @throws IllegalStateException * if the {@code Scanner} is closed. * @throws NoSuchElementException * if no line can be found, e.g. when input is an empty string. */ public String nextLine() { checkOpen(); matcher.usePattern(LINE_PATTERN); matcher.region(findStartIndex, bufferLength); String result; while (true) { if (matcher.find()) { if (inputExhausted || matcher.end() != bufferLength || bufferLength < buffer.capacity()) { matchSuccessful = true; findStartIndex = matcher.end(); result = matcher.group(); break; } } else { if (inputExhausted) { matchSuccessful = false; throw new NoSuchElementException(); } } if (!inputExhausted) { readMore(); resetMatcher(); } } // Find text without line terminator here. if (result != null) { Matcher terminatorMatcher = LINE_TERMINATOR.matcher(result); if (terminatorMatcher.find()) { result = result.substring(0, terminatorMatcher.start()); } } return result; } /** * Returns the next token as a {@code long} in the current radix. * This method may block for more input. * * @throws IllegalStateException * if this {@code Scanner} has been closed. * @throws NoSuchElementException * if input has been exhausted. * @throws InputMismatchException * if the next token can not be translated into a valid * {@code long} value. */ public long nextLong() { return nextLong(currentRadix); } /** * Returns the next token as a {@code long} with the specified radix. This method will * block if input is being read. If the next token can be translated into a * {@code long} the following is done: All {@code Locale}-specific prefixes, group * separators, and {@code Locale}-specific suffixes are removed. Then non-ASCII * digits are mapped into ASCII digits via * {@link Character#digit(char, int)}, and a negative sign (-) is added if the * {@code Locale}-specific negative prefix or suffix was present. Finally the * resulting String is passed to {@link Long#parseLong(String, int)}} with * the specified radix. * * @param radix * the radix used to translate the token into a {@code long} * value. * @return the next token as a {@code long}. * @throws IllegalStateException * if this {@code Scanner} has been closed. * @throws NoSuchElementException * if input has been exhausted. * @throws InputMismatchException * if the next token can not be translated into a valid * {@code long} value. */ @SuppressWarnings("boxing") public long nextLong(int radix) { checkOpen(); Object obj = cachedNextValue; cachedNextValue = null; if (obj instanceof Long) { findStartIndex = cachedNextIndex; return (Long) obj; } Pattern integerPattern = getIntegerPattern(radix); String intString = next(integerPattern); intString = removeLocaleInfo(intString, int.class); long longValue; try { longValue = Long.parseLong(intString, radix); } catch (NumberFormatException e) { matchSuccessful = false; recoverPreviousStatus(); throw new InputMismatchException(); } return longValue; } /** * Returns the next token as a {@code short} in the current radix. * This method may block for more input. * * @throws IllegalStateException * if this {@code Scanner} has been closed. * @throws NoSuchElementException * if input has been exhausted. * @throws InputMismatchException * if the next token can not be translated into a valid * {@code short} value. */ public short nextShort() { return nextShort(currentRadix); } /** * Returns the next token as a {@code short} with the specified radix. This method will * block if input is being read. If the next token can be translated into a * {@code short} the following is done: All {@code Locale}-specific prefixes, group * separators, and {@code Locale}-specific suffixes are removed. Then non-ASCII * digits are mapped into ASCII digits via * {@link Character#digit(char, int)}, and a negative sign (-) is added if the * {@code Locale}-specific negative prefix or suffix was present. Finally the * resulting String is passed to {@link Short#parseShort(String, int)}} * with the specified radix. * * @param radix * the radix used to translate the token into {@code short} * value. * @return the next token as a {@code short}. * @throws IllegalStateException * if this {@code Scanner} has been closed. * @throws NoSuchElementException * if input has been exhausted. * @throws InputMismatchException * if the next token can not be translated into a valid * {@code short} value. */ @SuppressWarnings("boxing") public short nextShort(int radix) { checkOpen(); Object obj = cachedNextValue; cachedNextValue = null; if (obj instanceof Short) { findStartIndex = cachedNextIndex; return (Short) obj; } Pattern integerPattern = getIntegerPattern(radix); String intString = next(integerPattern); intString = removeLocaleInfo(intString, int.class); short shortValue; try { shortValue = Short.parseShort(intString, radix); } catch (NumberFormatException e) { matchSuccessful = false; recoverPreviousStatus(); throw new InputMismatchException(); } return shortValue; } /** * Return the radix of this {@code Scanner}. * * @return the radix of this {@code Scanner} */ public int radix() { return currentRadix; } /** * Tries to use specified pattern to match input starting from the current position. * The delimiter will be ignored. If a match is found, the matched input will be * skipped. If an anchored match of the specified pattern succeeds, the corresponding input * will also be skipped. Otherwise, a {@code NoSuchElementException} will be thrown. * Patterns that can match a lot of input may cause the {@code Scanner} to read * in a large amount of input. * * @param pattern * used to skip over input. * @return the {@code Scanner} itself. * @throws IllegalStateException * if the {@code Scanner} is closed. * @throws NoSuchElementException * if the specified pattern match fails. */ public Scanner skip(Pattern pattern) { checkOpen(); checkNotNull(pattern); matcher.usePattern(pattern); matcher.region(findStartIndex, bufferLength); while (true) { if (matcher.lookingAt()) { boolean matchInBuffer = matcher.end() < bufferLength || (matcher.end() == bufferLength && inputExhausted); if (matchInBuffer) { matchSuccessful = true; findStartIndex = matcher.end(); break; } } else { if (inputExhausted) { matchSuccessful = false; throw new NoSuchElementException(); } } if (!inputExhausted) { readMore(); resetMatcher(); } } return this; } /** * Tries to use the specified string to construct a pattern and then uses * the constructed pattern to match input starting from the current position. The * delimiter will be ignored. This call is the same as invoke * {@code skip(Pattern.compile(pattern))}. * * @param pattern * the string used to construct a pattern which in turn is used to * match input. * @return the {@code Scanner} itself. * @throws IllegalStateException * if the {@code Scanner} is closed. */ public Scanner skip(String pattern) { return skip(Pattern.compile(pattern)); } /** * Returns a string representation of this {@code Scanner}. The information * returned may be helpful for debugging. The format of the string is unspecified. * * @return a string representation of this {@code Scanner}. */ @Override public String toString() { return getClass().getName() + "[delimiter=" + delimiter + ",findStartIndex=" + findStartIndex + ",matchSuccessful=" + matchSuccessful + ",closed=" + closed + "]"; } /** * Sets the delimiting pattern of this {@code Scanner}. * * @param pattern * the delimiting pattern to use. * @return this {@code Scanner}. */ public Scanner useDelimiter(Pattern pattern) { delimiter = pattern; return this; } /** * Sets the delimiting pattern of this {@code Scanner} with a pattern compiled from * the supplied string value. * * @param pattern * a string from which a {@code Pattern} can be compiled. * @return this {@code Scanner}. */ public Scanner useDelimiter(String pattern) { return useDelimiter(Pattern.compile(pattern)); } /** * Sets the {@code Locale} of this {@code Scanner} to a specified {@code Locale}. * * @param l * the specified {@code Locale} to use. * @return this {@code Scanner}. */ public Scanner useLocale(Locale l) { if (l == null) { throw new NullPointerException("l == null"); } setLocale(l); return this; } /** * Sets the radix of this {@code Scanner} to the specified radix. * * @param radix * the specified radix to use. * @return this {@code Scanner}. */ public Scanner useRadix(int radix) { checkRadix(radix); this.currentRadix = radix; return this; } private void checkRadix(int radix) { if (radix < Character.MIN_RADIX || radix > Character.MAX_RADIX) { throw new IllegalArgumentException("Invalid radix: " + radix); } } /** * Remove is not a supported operation on {@code Scanner}. * * @throws UnsupportedOperationException * if this method is invoked. */ public void remove() { throw new UnsupportedOperationException(); } private void checkOpen() { if (closed) { throw new IllegalStateException(); } } private void checkNotNull(Pattern pattern) { if (pattern == null) { throw new NullPointerException("pattern == null"); } } /* * Change the matcher's input after modifying the contents of the buffer. * The current implementation of Matcher causes a copy of the buffer to be taken. */ private void resetMatcher() { matcher.reset(buffer); matcher.region(findStartIndex, bufferLength); } /* * Recover buffer space for characters that are already processed and save the matcher's state * in case parsing fails. See recoverPrevousState. This method must be called before * any buffer offsets are calculated. */ private void prepareForScan() { // Compacting the buffer recovers space taken by already processed characters. This does not // prevent the buffer growing in all situations but keeps the buffer small when delimiters // exist regularly. if (findStartIndex >= buffer.capacity() / 2) { // When over half the buffer is filled with characters no longer being considered by the // scanner we take the cost of compacting the buffer. // Move all characters from [findStartIndex, findStartIndex + remaining()) to // [0, remaining()). int oldPosition = buffer.position(); buffer.position(findStartIndex); buffer.compact(); buffer.position(oldPosition); // Update Scanner state to reflect the new buffer state. bufferLength -= findStartIndex; findStartIndex = 0; preStartIndex = -1; // The matcher must also be informed that the buffer has changed because it operates on // a String copy. resetMatcher(); } // Save the matcher's last find position so it can be returned to if the next token cannot // be parsed. preStartIndex = findStartIndex; } /* * Change the matcher's status to last find position */ private void recoverPreviousStatus() { findStartIndex = preStartIndex; } private Pattern getIntegerPattern(int radix) { checkRadix(radix); if (decimalFormat == null) { decimalFormat = (DecimalFormat) NumberFormat.getInstance(locale); } if (cachedIntegerPatternRadix == radix) { return cachedIntegerPattern; } String digits = "0123456789abcdefghijklmnopqrstuvwxyz"; String ASCIIDigit = digits.substring(0, radix); String nonZeroASCIIDigit = digits.substring(1, radix); String digit = "((?i)[" + ASCIIDigit + "]|\\p{javaDigit})"; String nonZeroDigit = "((?i)[" + nonZeroASCIIDigit + "]|([\\p{javaDigit}&&[^0]]))"; String numeral = getNumeral(digit, nonZeroDigit); String regex = "(([-+]?(" + numeral + ")))|" + "(" + addPositiveSign(numeral) + ")|" + "(" + addNegativeSign(numeral) + ")"; cachedIntegerPatternRadix = radix; cachedIntegerPattern = Pattern.compile(regex); return cachedIntegerPattern; } private Pattern getFloatPattern() { if (decimalFormat == null) { decimalFormat = (DecimalFormat) NumberFormat.getInstance(locale); } if (cachedFloatPattern != null) { return cachedFloatPattern; } DecimalFormatSymbols dfs = decimalFormat.getDecimalFormatSymbols(); String digit = "([0-9]|(\\p{javaDigit}))"; String nonZeroDigit = "[\\p{javaDigit}&&[^0]]"; String numeral = getNumeral(digit, nonZeroDigit); String decimalSeparator = "\\" + dfs.getDecimalSeparator(); String decimalNumeral = "(" + numeral + "|" + numeral + decimalSeparator + digit + "*+|" + decimalSeparator + digit + "++)"; String exponent = "([eE][+-]?" + digit + "+)?"; String decimal = "(([-+]?" + decimalNumeral + "(" + exponent + "?)" + ")|" + "(" + addPositiveSign(decimalNumeral) + "(" + exponent + "?)" + ")|" + "(" + addNegativeSign(decimalNumeral) + "(" + exponent + "?)" + "))"; String hexFloat = "([-+]?0[xX][0-9a-fA-F]*\\.[0-9a-fA-F]+([pP][-+]?[0-9]+)?)"; String localNaN = dfs.getNaN(); String localeInfinity = dfs.getInfinity(); String nonNumber = "(NaN|\\Q" + localNaN + "\\E|Infinity|\\Q" + localeInfinity + "\\E)"; String signedNonNumber = "((([-+]?(" + nonNumber + ")))|" + "(" + addPositiveSign(nonNumber) + ")|" + "(" + addNegativeSign(nonNumber) + "))"; cachedFloatPattern = Pattern.compile(decimal + "|" + hexFloat + "|" + signedNonNumber); return cachedFloatPattern; } private String getNumeral(String digit, String nonZeroDigit) { String groupSeparator = "\\" + decimalFormat.getDecimalFormatSymbols().getGroupingSeparator(); String groupedNumeral = "(" + nonZeroDigit + digit + "?" + digit + "?" + "(" + groupSeparator + digit + digit + digit + ")+)"; return "((" + digit + "++)|" + groupedNumeral + ")"; } /* * Add the locale specific positive prefixes and suffixes to the pattern */ private String addPositiveSign(String unsignedNumeral) { String positivePrefix = ""; String positiveSuffix = ""; if (!decimalFormat.getPositivePrefix().isEmpty()) { positivePrefix = "\\Q" + decimalFormat.getPositivePrefix() + "\\E"; } if (!decimalFormat.getPositiveSuffix().isEmpty()) { positiveSuffix = "\\Q" + decimalFormat.getPositiveSuffix() + "\\E"; } return positivePrefix + unsignedNumeral + positiveSuffix; } /* * Add the locale specific negative prefixes and suffixes to the pattern */ private String addNegativeSign(String unsignedNumeral) { String negativePrefix = ""; String negativeSuffix = ""; if (!decimalFormat.getNegativePrefix().isEmpty()) { negativePrefix = "\\Q" + decimalFormat.getNegativePrefix() + "\\E"; } if (!decimalFormat.getNegativeSuffix().isEmpty()) { negativeSuffix = "\\Q" + decimalFormat.getNegativeSuffix() + "\\E"; } return negativePrefix + unsignedNumeral + negativeSuffix; } /* * Remove locale related information from float String */ private String removeLocaleInfoFromFloat(String floatString) { // If the token is HexFloat if (floatString.indexOf('x') != -1 || floatString.indexOf('X') != -1) { return floatString; } // If the token is scientific notation int exponentIndex; if ((exponentIndex = floatString.indexOf('e')) != -1 || (exponentIndex = floatString.indexOf('E')) != -1) { String decimalNumeralString = floatString.substring(0, exponentIndex); String exponentString = floatString.substring(exponentIndex + 1, floatString.length()); decimalNumeralString = removeLocaleInfo(decimalNumeralString, float.class); return decimalNumeralString + "e" + exponentString; } return removeLocaleInfo(floatString, float.class); } /* * Remove the locale specific prefixes, group separators, and locale * specific suffixes from input string */ private String removeLocaleInfo(String token, Class<?> type) { DecimalFormatSymbols dfs = decimalFormat.getDecimalFormatSymbols(); StringBuilder tokenBuilder = new StringBuilder(token); boolean negative = removeLocaleSign(tokenBuilder); // Remove group separator String groupSeparator = String.valueOf(dfs.getGroupingSeparator()); int separatorIndex; while ((separatorIndex = tokenBuilder.indexOf(groupSeparator)) != -1) { tokenBuilder.delete(separatorIndex, separatorIndex + 1); } // Remove decimal separator String decimalSeparator = String.valueOf(dfs.getDecimalSeparator()); separatorIndex = tokenBuilder.indexOf(decimalSeparator); StringBuilder result = new StringBuilder(""); if (type == int.class) { for (int i = 0; i < tokenBuilder.length(); i++) { if (Character.digit(tokenBuilder.charAt(i), Character.MAX_RADIX) != -1) { result.append(tokenBuilder.charAt(i)); } } } else if (type == float.class) { if (tokenBuilder.toString().equals(dfs.getNaN())) { result.append("NaN"); } else if (tokenBuilder.toString().equals(dfs.getInfinity())) { result.append("Infinity"); } else { for (int i = 0; i < tokenBuilder.length(); i++) { if (Character.digit(tokenBuilder.charAt(i), 10) != -1) { result.append(Character.digit(tokenBuilder.charAt(i), 10)); } } } } else { throw new AssertionError("Unsupported type: " + type); } // Token is NaN or Infinity if (result.length() == 0) { result = tokenBuilder; } if (separatorIndex != -1) { result.insert(separatorIndex, "."); } // If input is negative if (negative) { result.insert(0, '-'); } return result.toString(); } /* * Remove positive and negative sign from the parameter stringBuilder, and * return whether the input string is negative */ private boolean removeLocaleSign(StringBuilder tokenBuilder) { String positivePrefix = decimalFormat.getPositivePrefix(); String positiveSuffix = decimalFormat.getPositiveSuffix(); String negativePrefix = decimalFormat.getNegativePrefix(); String negativeSuffix = decimalFormat.getNegativeSuffix(); if (tokenBuilder.indexOf("+") == 0) { tokenBuilder.delete(0, 1); } if (!positivePrefix.isEmpty() && tokenBuilder.indexOf(positivePrefix) == 0) { tokenBuilder.delete(0, positivePrefix.length()); } if (!positiveSuffix.isEmpty() && tokenBuilder.indexOf(positiveSuffix) != -1) { tokenBuilder.delete(tokenBuilder.length() - positiveSuffix.length(), tokenBuilder.length()); } boolean negative = false; if (tokenBuilder.indexOf("-") == 0) { tokenBuilder.delete(0, 1); negative = true; } if (!negativePrefix.isEmpty() && tokenBuilder.indexOf(negativePrefix) == 0) { tokenBuilder.delete(0, negativePrefix.length()); negative = true; } if (!negativeSuffix.isEmpty() && tokenBuilder.indexOf(negativeSuffix) != -1) { tokenBuilder.delete(tokenBuilder.length() - negativeSuffix.length(), tokenBuilder.length()); negative = true; } return negative; } /* * Find the prefixed delimiter and suffixed delimiter in the input resource * and set the start index and end index of Matcher region. If the suffixed * delimiter does not exist, the end index is set to be end of input. */ private boolean setTokenRegion() { // The position where token begins int tokenStartIndex; // The position where token ends int tokenEndIndex; // Use delimiter pattern matcher.usePattern(delimiter); matcher.region(findStartIndex, bufferLength); tokenStartIndex = findPreDelimiter(); if (setHeadTokenRegion(tokenStartIndex)) { return true; } tokenEndIndex = findDelimiterAfter(); // If the second delimiter is not found if (tokenEndIndex == -1) { // Just first Delimiter Exists if (findStartIndex == bufferLength) { return false; } tokenEndIndex = bufferLength; findStartIndex = bufferLength; } matcher.region(tokenStartIndex, tokenEndIndex); return true; } /* * Find prefix delimiter */ private int findPreDelimiter() { int tokenStartIndex; boolean findComplete = false; while (!findComplete) { if (matcher.find()) { findComplete = true; // If just delimiter remains if (matcher.start() == findStartIndex && matcher.end() == bufferLength) { // If more input resource exists if (!inputExhausted) { readMore(); resetMatcher(); findComplete = false; } } } else { if (!inputExhausted) { readMore(); resetMatcher(); } else { return -1; } } } tokenStartIndex = matcher.end(); findStartIndex = tokenStartIndex; return tokenStartIndex; } /* * Handle some special cases */ private boolean setHeadTokenRegion(int findIndex) { int tokenStartIndex; int tokenEndIndex; boolean setSuccess = false; // If no delimiter exists, but something exists in this scanner if (findIndex == -1 && preStartIndex != bufferLength) { tokenStartIndex = preStartIndex; tokenEndIndex = bufferLength; findStartIndex = bufferLength; matcher.region(tokenStartIndex, tokenEndIndex); setSuccess = true; } // If the first delimiter of scanner is not at the find start position if (findIndex != -1 && preStartIndex != matcher.start()) { tokenStartIndex = preStartIndex; tokenEndIndex = matcher.start(); findStartIndex = matcher.start(); // set match region and return matcher.region(tokenStartIndex, tokenEndIndex); setSuccess = true; } return setSuccess; } private int findDelimiterAfter() { int tokenEndIndex; boolean findComplete = false; while (!findComplete) { if (matcher.find()) { findComplete = true; if (matcher.start() == findStartIndex && matcher.start() == matcher.end()) { findComplete = false; } } else { if (!inputExhausted) { readMore(); resetMatcher(); } else { return -1; } } } tokenEndIndex = matcher.start(); findStartIndex = tokenEndIndex; return tokenEndIndex; } /* * Read more data from underlying Readable. If nothing is available or I/O * operation fails, global boolean variable inputExhausted will be set to * true, otherwise set to false. */ private void readMore() { int oldPosition = buffer.position(); int oldBufferLength = bufferLength; // Increase capacity if empty space is not enough if (bufferLength >= buffer.capacity()) { expandBuffer(); } // Read input resource int readCount; try { buffer.limit(buffer.capacity()); buffer.position(oldBufferLength); while ((readCount = input.read(buffer)) == 0) { // nothing to do here } } catch (IOException e) { // Consider the scenario: readable puts 4 chars into // buffer and then an IOException is thrown out. In this case, // buffer is actually grown, but readable.read() will never return. bufferLength = buffer.position(); // Use -1 to record IOException occurring, and no more input can be read. readCount = -1; lastIOException = e; } buffer.flip(); buffer.position(oldPosition); if (readCount == -1) { inputExhausted = true; } else { bufferLength = readCount + bufferLength; } } // Expand the size of internal buffer. private void expandBuffer() { int oldPosition = buffer.position(); int oldCapacity = buffer.capacity(); int oldLimit = buffer.limit(); int newCapacity = oldCapacity * 2; char[] newBuffer = new char[newCapacity]; System.arraycopy(buffer.array(), 0, newBuffer, 0, oldLimit); buffer = CharBuffer.wrap(newBuffer, 0, newCapacity); buffer.position(oldPosition); buffer.limit(oldLimit); } /** * Resets this scanner's delimiter, locale, and radix. * * @return this scanner * @since 1.6 */ public Scanner reset() { delimiter = DEFAULT_DELIMITER; setLocale(Locale.getDefault()); currentRadix = DEFAULT_RADIX; return this; } }