/** * Copyright © 2010-2012 Atilika Inc. All rights reserved. * * Atilika Inc. licenses this file to you under the Apache License, Version * 2.0 (the "License"); you may not use this file except in compliance with * the License. A copy of the License is distributed with this work in the * LICENSE.txt file. You may also obtain a copy of the License from * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations * under the License. */ package org.atilika.kuromoji.util; import java.util.ArrayList; import java.util.regex.Matcher; import java.util.regex.Pattern; public class CSVUtil { private static final char QUOTE = '"'; private static final char COMMA = ','; private static final Pattern QUOTE_REPLACE_PATTERN = Pattern.compile("^\"([^\"]+)\"$"); private static final String ESCAPED_QUOTE = "\"\""; /** * Parse CSV line * @param line * @return Array of values */ public static String[] parse(String line) { boolean insideQuote = false; ArrayList<String> result = new ArrayList<String>(); int quoteCount = 0; StringBuilder sb = new StringBuilder(); for(int i = 0; i < line.length(); i++) { char c = line.charAt(i); if(c == QUOTE) { insideQuote = !insideQuote; quoteCount++; } if(c == COMMA && !insideQuote) { String value = sb.toString(); value = unQuoteUnEscape(value); result.add(value); sb = new StringBuilder(); continue; } sb.append(c); } result.add(sb.toString()); // Validate if (quoteCount % 2 != 0) { return new String[0]; } return result.toArray(new String[result.size()]); } private static String unQuoteUnEscape(String original) { String result = original; // Unquote Matcher m = QUOTE_REPLACE_PATTERN.matcher(original); if(m.matches()) { result = m.group(1); } // Unescape result = result.replaceAll(ESCAPED_QUOTE, "\""); return result; } /** * Quote and escape input value for CSV * @param original * @return */ public static String quoteEscape(String original) { String result = original.replaceAll("\"", ESCAPED_QUOTE); if(result.indexOf(COMMA) >= 0) { result = "\"" + result + "\""; } return result; } }