/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package focusedCrawler.crawler.crawlercommons.fetcher; import java.io.Serializable; import java.util.HashMap; import java.util.HashSet; import java.util.Map; import java.util.Set; import org.apache.tika.mime.MediaType; @SuppressWarnings("serial") public abstract class BaseFetcher implements Serializable { public static final int DEFAULT_MAX_CONTENT_SIZE = 64 * 1024; protected Map<String, Integer> _maxContentSizes = new HashMap<String, Integer>(); protected int _defaultMaxContentSize = DEFAULT_MAX_CONTENT_SIZE; protected Set<String> _validMimeTypes = new HashSet<String>(); public BaseFetcher() { } public void setDefaultMaxContentSize(int defaultMaxContentSize) { _defaultMaxContentSize = defaultMaxContentSize; } public int getDefaultMaxContentSize() { return _defaultMaxContentSize; } public void setMaxContentSize(String mimeType, int maxContentSize) { _maxContentSizes.put(mimeType, maxContentSize); } public int getMaxContentSize(String mimeType) { Integer result = _maxContentSizes.get(mimeType); if (result == null) { result = getDefaultMaxContentSize(); } return result; } public Set<String> getValidMimeTypes() { return _validMimeTypes; } public void setValidMimeTypes(Set<String> validMimeTypes) { _validMimeTypes = new HashSet<String>(validMimeTypes); } public void addValidMimeTypes(Set<String> validMimeTypes) { _validMimeTypes.addAll(validMimeTypes); } public void addValidMimeType(String validMimeType) { _validMimeTypes.add(validMimeType); } public FetchedResult get(String url) throws BaseFetchException { return get(url, null); } protected static String getMimeTypeFromContentType(String contentType) { String result = ""; MediaType mt = MediaType.parse(contentType); if (mt != null) { result = mt.getType() + "/" + mt.getSubtype(); } return result; } /** * Get the content stored in the resource referenced by <url> * * @param url * @param payload * @return * @throws BaseFetchException */ public abstract FetchedResult get(String url, Payload payload) throws BaseFetchException; /** * Terminate any async request being processed. * */ public abstract void abort(); }