/*
* This file is part of the Wayback archival access software
* (http://archive-access.sourceforge.net/projects/wayback/).
*
* Licensed to the Internet Archive (IA) by one or more individual
* contributors.
*
* The IA licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.archive.wayback.replay.html.transformer;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.archive.wayback.replay.html.ReplayParseContext;
/**
* Translates URL found in META-REFRESH's <code>CONTENT</code> attribute.
* <p>
* Extracts URL part in <code>CONTENT</code> attribute value, and translates it.
* </p>
* <p>Possible Refactoring:
* There's no strong reason this class should extend {@link URLStringTransformer}.
* Since <code>javascript:</code> URI in META-REFRESH is rejected by most browsers,
* <code>transform</code> method could simply call {@link ReplayParseContext#contextualizeUrl(String)}
* rather than <code>super.transform</code>. As META-REFRESH is found only in HTML
* document, there's no need for <code>flags</code> member either.
* </p>
*
* @author brad
*
*/
public class MetaRefreshUrlStringTransformer extends URLStringTransformer {
private final static Pattern refreshURLPattern = Pattern.compile(
"^[\\d.]+\\s*;\\s*url\\s*=\\s*(.+?)\\s*$", Pattern.CASE_INSENSITIVE
| Pattern.MULTILINE);
/*
* (non-Javadoc)
*
* @see
* org.archive.wayback.replay.html.StringTransformer#transform(org.archive
* .wayback.replay.html.ReplayParseContext, java.lang.String)
*/
public String transform(ReplayParseContext context, String input) {
// Ex. <META HTTP-EQUIV="Refresh" CONTENT="0; URL=/ics/default.asp">
// input receives the value of the "CONTENT" attribute.
// So, we need to search for the "URL=", take everything to the right of
// that, trim it, contextualize it, and return that.
Matcher m = refreshURLPattern.matcher(input);
if (m.matches()) {
if (m.groupCount() == 1) {
String url = m.group(1);
String replayUrl = context.contextualizeUrl(url);
if (replayUrl != url) {
input = input.substring(0, m.start(1)) + replayUrl +
input.substring(m.end(1));
}
}
}
return input;
}
}