/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.jboss.elasticsearch.river.remote.sitemap; import java.net.URL; import java.text.DateFormat; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.Date; import java.util.Locale; import java.util.TimeZone; /** * SiteMap or SiteMapIndex. * * @author http://code.google.com/p/crawler-commons */ public abstract class AbstractSiteMap { /** Various Sitemap types */ public enum SitemapType { INDEX, XML, ATOM, RSS, TEXT }; /** W3C date the Sitemap was last modified */ private Date lastModified; /** This Sitemap's type */ private SitemapType type; /** Indicates if we have tried to process this Sitemap or not */ private boolean processed; protected URL url; public AbstractSiteMap() { lastModified = null; } public static DateFormat getFullDateFormat() { return dateFormats.get()[1]; } /** * lastModified uses the W3C date format (http://www.w3.org/TR/NOTE-datetime) */ private static final ThreadLocal<DateFormat[]> dateFormats = new ThreadLocal<DateFormat[]>() { protected DateFormat[] initialValue() { DateFormat[] df = new DateFormat[] { new SimpleDateFormat("yyyy-MM-dd'T'HH:mm+hh:00",Locale.US), new SimpleDateFormat("yyyy-MM-dd'T'HH:mm-hh:00",Locale.US), new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss+hh:00",Locale.US), new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss-hh:00",Locale.US), new SimpleDateFormat("EEE, dd MMM yyyy HH:mm:ss zzz",Locale.US), new SimpleDateFormat("yyyy-MM-dd",Locale.US) }; for (DateFormat d : df) { d.setTimeZone(TimeZone.getTimeZone("UTC")); } return df; } }; public boolean isIndex() { if (type == SitemapType.INDEX) { return true; } return false; }; /** * @return the URL of the Sitemap */ public URL getUrl() { return url; } /** * @param type the Sitemap type to set */ void setType(SitemapType type) { this.type = type; } /** * @return the Sitemap type */ public SitemapType getType() { return type; } /** * @param processed - indicate if the Sitemap has been processed. */ public void setProcessed(boolean processed) { this.processed = processed; } /** * @return true if the Sitemap has been processed i.e it contains at least one SiteMapURL */ public boolean isProcessed() { return processed; } /** * @param lastModified - the lastModified to set */ public void setLastModified(Date lastModified) { this.lastModified = lastModified; } /** * @param lastModified - the lastModified to set */ public void setLastModified(String lastModified) { this.lastModified = SiteMap.convertToDate(lastModified); } /** * @return the lastModified date of the Sitemap */ public Date getLastModified() { return lastModified; } /** * Convert the given date (given in an acceptable DateFormat), null if the date is not in the correct format. * * @param date - the date to be parsed * @return the Date equivalent */ public static Date convertToDate(String date) { if (date != null) { for (DateFormat df : dateFormats.get()) { try { return df.parse(date); } catch (ParseException e) { // do nothing } } } // Not successful parsing any dates return null; } }