/*
* chombo: Hadoop Map Reduce utility
* Author: Pranab Ghosh
*
* Licensed under the Apache License, Version 2.0 (the "License"); you
* may not use this file except in compliance with the License. You may
* obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied. See the License for the specific language governing
* permissions and limitations under the License.
*/
package org.chombo.transformer;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Flattens multi line record to singlr line
* @author pranab
*
*/
public class MultiLineFlattener {
private List<Pattern> begPatterns = new ArrayList<Pattern>();
private List<String> begStrings;
private boolean regExRecordBegDetectors;
private StringBuilder flattenedLineBld = new StringBuilder();
private String falttenedRec;
private Matcher matcher;
/**
* @param rawSchema
*/
public MultiLineFlattener(RawAttributeSchema rawSchema) {
regExRecordBegDetectors = rawSchema.isRegExRecordBegDetectors();
if (regExRecordBegDetectors) {
for (String regEx : rawSchema.getRecordBegDetectors()) {
begPatterns.add(Pattern.compile(regEx));
}
} else {
begStrings = rawSchema.getRecordBegDetectors();
}
}
/**
* @param rawLine
* @return
*/
public String processRawLine(String rawLine) {
falttenedRec = null;
boolean newRecord = false;
if (regExRecordBegDetectors) {
//pattern
for (Pattern begPattern : begPatterns) {
matcher = begPattern.matcher(rawLine);
if (matcher.find()) {
newRecord = true;
break;
}
}
} else {
//string
for (String begString : begStrings) {
if (rawLine.startsWith(begString)) {
newRecord = true;
break;
}
}
}
if (newRecord) {
if (flattenedLineBld.length() > 0) {
falttenedRec = flattenedLineBld.toString();
flattenedLineBld.delete(0, flattenedLineBld.length());
}
} else {
flattenedLineBld.append(" ").append(rawLine);
}
return falttenedRec;
}
/**
* @return
*/
public String processCleanup() {
falttenedRec = null;
if (flattenedLineBld.length() > 0) {
falttenedRec = flattenedLineBld.toString();
flattenedLineBld.delete(0, flattenedLineBld.length());
}
return falttenedRec;
}
}