/* * Copyright 2012-2017 CodeLibs Project and the Others. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, * either express or implied. See the License for the specific language * governing permissions and limitations under the License. */ package org.codelibs.fess.util; /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.util.ArrayList; import java.util.regex.Matcher; import java.util.regex.Pattern; /** * Utility class for parsing CSV text */ public final class KuromojiCSVUtil { private static final char QUOTE = '"'; private static final char COMMA = ','; private static final Pattern QUOTE_REPLACE_PATTERN = Pattern.compile("^\"([^\"]+)\"$"); private static final String ESCAPED_QUOTE = "\"\""; private KuromojiCSVUtil() { } // no instance!!! /** * Parse CSV line * * @param line * line containing csv-encoded data * @return Array of values */ public static String[] parse(final String line) { boolean insideQuote = false; final ArrayList<String> result = new ArrayList<>(); int quoteCount = 0; final StringBuilder sb = new StringBuilder(); for (int i = 0; i < line.length(); i++) { final char c = line.charAt(i); if (c == QUOTE) { insideQuote = !insideQuote; quoteCount++; } if (c == COMMA && !insideQuote) { String value = sb.toString(); value = unQuoteUnEscape(value); result.add(value); sb.setLength(0); continue; } sb.append(c); } result.add(sb.toString()); // Validate if (quoteCount % 2 != 0) { return new String[0]; } return result.toArray(new String[result.size()]); } private static String unQuoteUnEscape(final String original) { String result = original; // Unquote if (result.indexOf('\"') >= 0) { final Matcher m = QUOTE_REPLACE_PATTERN.matcher(original); if (m.matches()) { result = m.group(1); } // Unescape if (result.indexOf(ESCAPED_QUOTE) >= 0) { result = result.replace(ESCAPED_QUOTE, "\""); } } return result; } /** * Quote and escape input value for CSV * * @param original Original text. * @return Escaped text. */ public static String quoteEscape(final String original) { String result = original; if (result.indexOf('\"') >= 0) { result = result.replace("\"", ESCAPED_QUOTE); } if (result.indexOf(COMMA) >= 0) { result = "\"" + result + "\""; } return result; } }