/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.nutchbase.parse; // Commons Logging imports import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; // Nutch Imports import org.apache.nutch.parse.ParseException; import org.apache.nutch.parse.ParserNotFound; import org.apache.nutchbase.util.hbase.RowPart; // Hadoop imports import org.apache.hadoop.conf.Configuration; /** * A Utility class containing methods to simply perform parsing utilities such * as iterating through a preferred list of {@link Parser}s to obtain * {@link Parse} objects. * * @author mattmann * @author Jérôme Charron * @author Sébastien Le Callonnec */ public class ParseUtilHbase { /* our log stream */ public static final Log LOG = LogFactory.getLog(ParseUtilHbase.class); private ParserFactoryHbase parserFactory; /** * * @param conf */ public ParseUtilHbase(Configuration conf) { this.parserFactory = new ParserFactoryHbase(conf); } /** * Performs a parse by iterating through a List of preferred {@link Parser}s * until a successful parse is performed and a {@link Parse} object is * returned. If the parse is unsuccessful, a message is logged to the * <code>WARNING</code> level, and an empty parse is returned. * * @throws ParseException If no suitable parser is found to perform the parse. */ public ParseHbase parse(String url, RowPart row) throws ParseException { ParserHbase[] parsers = null; String contentType = row.getContentType(); try { parsers = this.parserFactory.getParsers(contentType, url); } catch (ParserNotFound e) { if (LOG.isWarnEnabled()) { LOG.warn("No suitable parser found when trying to parse content " + url + " of type " + contentType); } throw new ParseException(e.getMessage()); } ParseHbase parse = null; for (int i=0; i<parsers.length; i++) { if (LOG.isDebugEnabled()) { LOG.debug("Parsing [" + url + "] with [" + parsers[i] + "]"); } parse = parsers[i].getParse(url, row); if (parse != null) return parse; } if (LOG.isWarnEnabled()) { LOG.warn("Unable to successfully parse content " + url + " of type " + contentType); } return null; } }