/* * chombo: Hadoop Map Reduce utility * Author: Pranab Ghosh * * Licensed under the Apache License, Version 2.0 (the "License"); you * may not use this file except in compliance with the License. You may * obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or * implied. See the License for the specific language governing * permissions and limitations under the License. */ package org.chombo.transformer; import java.io.Serializable; import java.util.regex.Pattern; import org.chombo.util.BasicUtils; /** * Flattens multi line JSON * @author pranab * */ public class MultiLineJsonFlattener implements Serializable { private StringBuilder flattenedLineBld = new StringBuilder(); private String rec; private int braceMatchCount; private int lineCounter; private Pattern openBrace; private Pattern closeBrace; private boolean atBegin = true; private int numBraces; private boolean debugOn; /** * */ public MultiLineJsonFlattener() { openBrace = Pattern.compile("\\{"); closeBrace = Pattern.compile("\\}"); } public void setDebugOn(boolean debugOn) { this.debugOn = debugOn; } /** * @param rawLine * @return */ public String processRawLine(String rawLine) { String current = null; rec = null; flattenedLineBld.append(" ").append(rawLine); String jsonRec = null; ++lineCounter; if (debugOn) System.out.println("lineCounter: " + lineCounter + " raw line: " + rawLine); if (atBegin) { if (rawLine.indexOf("[") != -1) { truncateBegin(); atBegin = false; } else { if (debugOn) System.out.println("still in begin segment"); } } else { current = flattenedLineBld.toString(); braceCount(current); //got complete JSON if (numBraces > 0 && braceMatchCount == 0) { rec = flattenedLineBld.toString(); int firstBrace = rec.indexOf("{"); int lastBrace = rec.lastIndexOf("}"); System.out.println("brace matched rec:" + rec); if (lastBrace == rec.length() - 1) { //line ends with } jsonRec = rec.substring(firstBrace); flattenedLineBld.delete(0, flattenedLineBld.length()); } else { //there is more in the line after } ++lastBrace; jsonRec = rec.substring(firstBrace, lastBrace); flattenedLineBld.delete(0, flattenedLineBld.length()); flattenedLineBld.append(rec.substring(lastBrace)); } } else { if (debugOn) System.out.println("brace not matched"); } } return jsonRec; } /** * */ private void truncateBegin() { String current = null; //remove {..[ from beginning String lines = flattenedLineBld.toString(); int pos = lines.indexOf("["); if (pos == -1) { throw new IllegalStateException("invalid json"); } else { flattenedLineBld.delete(0, flattenedLineBld.length()); current = lines.substring(pos + 1); flattenedLineBld.append(current); braceCount(current); } if (debugOn) System.out.println("after trucating doc begin: " + flattenedLineBld.toString()); } /** * @param current */ private void braceCount(String current) { braceMatchCount = numBraces = BasicUtils.findNumOccureneces(current, openBrace); braceMatchCount -= BasicUtils.findNumOccureneces(current, closeBrace); } }