/* * This file is part of the Wayback archival access software * (http://archive-access.sourceforge.net/projects/wayback/). * * Licensed to the Internet Archive (IA) by one or more individual * contributors. * * The IA licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.archive.wayback.replay.html.transformer; import java.net.MalformedURLException; import java.net.URL; import java.util.ArrayList; import junit.framework.TestCase; import org.archive.wayback.replay.html.ContextResultURIConverterFactory; import org.archive.wayback.replay.html.IdentityResultURIConverterFactory; import org.archive.wayback.replay.html.ReplayParseContext; /** * @author brad * */ public class JSStringTransformerTest extends TestCase { URL baseURL; // TODO: extract interface from ReplayParseContext and // use EasyMock instead of hand-writing mock object. RecordingReplayParseContext rc; JSStringTransformer jst; @Override protected void setUp() throws Exception { baseURL = new URL("http://foo.com"); rc = new RecordingReplayParseContext(null, baseURL, null); jst = new JSStringTransformer(); } /** * Test method for {@link org.archive.wayback.replay.html.transformer.JSStringTransformer#transform(org.archive.wayback.replay.html.ReplayParseContext, java.lang.String)}. * @throws MalformedURLException */ public void testTransform_HostOnly() throws MalformedURLException { String input = "'<a href=\'http://www.gavelgrab.org\' target=\'_blank\'>Learn more in Gavel Grab</a>'"; JSStringTransformer jst = new JSStringTransformer(); jst.transform(rc, input); assertEquals(1,rc.got.size()); assertEquals("http://www.gavelgrab.org",rc.got.get(0)); } public void testTransform_WithPath() { final String input = "'<a href=\'http://www.gavelgrab.org/foobla/blah\' target=\'_blank\'>Learn more in Gavel Grab</a>'"; jst.transform(rc, input); assertEquals(1,rc.got.size()); assertEquals("http://www.gavelgrab.org",rc.got.get(0)); } /** * slash is often escaped with backslash in JavaScript (esp. JSON). */ public void testTransform_EscapedSlashes() { final String input = "onloadRegister(function (){window.location.href=\"http:\\/\\/www.facebook.com\\/barrettforwisconsin?v=info\";});"; jst.transform(rc, input); assertEquals(1,rc.got.size()); assertEquals("http:\\/\\/www.facebook.com",rc.got.get(0)); } /** * {@code rewriteHttpsOnly} property is used to limit URL rewrite * to HTTPS ones (intended for proxy mode). That should affect how * StringTransformer picks up URLs in text for translation. * <p>Now {@code rewriteHttpsOnly} has no effect on {@code JSStringTransformer}'s * behavior and picks up all fulll URLs.</p> * @throws Exception */ public void testRewriteHttpsOnly() throws Exception { rc = new RecordingReplayParseContext(null, baseURL, null); rc.setRewriteHttpsOnly(true); final String input = "var img1 = 'http://www1.example.com/img/1.jpeg';\n" + "var img2 = 'https://secure1.example.com/img/2.jpeg';\n" + "var img3 = '/img/3.jpeg';\n" + "var host1 = 'http://www2.example.com';\n" + "var host2 = 'https://secure2.example.com';\n"; jst.transform(rc, input); assertEquals(4, rc.got.size()); // with default regex, JSStringTransformer captures // scheme and netloc only (no path). assertTrue(rc.got.contains("http://www1.example.com")); assertTrue(rc.got.contains("https://secure1.example.com")); assertTrue(rc.got.contains("http://www2.example.com")); assertTrue(rc.got.contains("https://secure2.example.com")); } /** * same as above, slashes are backslash-escaped. * @throws Exception */ public void testRewriteHttpsOnlyEscapedSlashes() throws Exception { // using custom RecordingReplayParseContext for testing actual rewrite. This is more than // a unit test of JSStringTransformer, but it is useful to capture bugs caused by inconsistency // among JSStringTransformer, ReplayParseContext and ResultURIConverterFactory (hopefully // they should be refactored into coherent, easier-to-test components.) this is a common // setup for proxy-mode (IdentityResultURIConverterFactory returns ProxyHttpsResultURIConverter.) IdentityResultURIConverterFactory uriConverterFactory = new IdentityResultURIConverterFactory(); rc = new RecordingReplayParseContext(uriConverterFactory, baseURL, null); rc.setRewriteHttpsOnly(true); // Note: ParseContext.resolve(String) uses UsableURIFactory.getInstance() for // making URL absolute. It not only prepends baseURL but also removes escaping // like "\/", "%3A". So, depending on the URL pattern, more "\/" may be replaced // by "/" (as the default pattern matches scheme and netloc only, "\/" in // path part is retained here). ResultURIConverter final String input = "var img1 = 'http:\\/\\/example.com\\/img\\/1.jpeg';\n" + "var img2 = 'https:\\/\\/secure1.example.com\\/img\\/2.jpeg';\n" + "var img3 = '\\/img\\/3.jpeg';\n" + "var host1 = 'http:\\/\\/example.com';\n" + "var host2 = 'https:\\/\\/secure2.example.com';\n"; final String expected = "var img1 = 'http:\\/\\/example.com\\/img\\/1.jpeg';\n" + "var img2 = 'http://secure1.example.com\\/img\\/2.jpeg';\n" + "var img3 = '\\/img\\/3.jpeg';\n" + "var host1 = 'http:\\/\\/example.com';\n" + "var host2 = 'http://secure2.example.com';\n"; String out = jst.transform(rc, input); assertEquals(expected, out); } /** * test of rewriting protocol relative URLs ({@code "//www.example.com/..."}) * with non-default regex. * <p>check if text preceding the first group is preserved in the result. * also make sure it works with URLs with protocol.</p> * @throws Exception */ public void testRewriteProtocolRelativeWithCustomRegex() throws Exception { jst.setRegex("[\"']((?:https?:)?//(?:[^/]+@)?[^@:/]+(?:\\.[^@:/]+)+(?:[0-9]+)?)"); final String input = "js=d.createElement(s);js.id=id;js.src=\"//platform.twitter.com/widgets.js\";" + "js.src2=\"https://platform2.twitter.com/widgets2.js\";" + "fjs.parentNode.insertBefore(js,fjs);"; final String expected = "js=d.createElement(s);js.id=id;js.src=\"###//platform.twitter.com/widgets.js\";" + "js.src2=\"###https://platform2.twitter.com/widgets2.js\";" + "fjs.parentNode.insertBefore(js,fjs);"; String output = jst.transform(rc, input); assertEquals(2, rc.got.size()); assertTrue(rc.got.contains("//platform.twitter.com")); assertTrue(rc.got.contains("https://platform2.twitter.com")); assertEquals(expected, output); } /** * test of rewriting corner case where URL contains special chars for * {@code Matcher#appendReplacement}. * * @throws Exception */ public void testRewriteSpecialCharURL() throws Exception { // using custom regex, as default pattern does not allow backslash in URL. // this regex also deliberately excludes single quote so that replacement // text ends with backslash. jst.setRegex("[\"']((?:https?:)?//(?:[^/]+@)?[^@:/']+(?:\\.[^@:/']+)+(?:[0-9]+)?)"); final String input = "var b='http://www.example.com\\'"; final String expected = "var b='###http://www.example.com\\'"; // throws an exception if replacement text is not properly escaped. String output = jst.transform(rc, input); assertEquals(1, rc.got.size()); assertEquals("http://www.example.com\\", rc.got.get(0)); assertEquals(expected, output); } /** * ReplayParseContext mock * TODO: move to package-level as this is useful for testing other * {@code StringTransformer}s. */ public static class RecordingReplayParseContext extends ReplayParseContext { ArrayList<String> got = null; boolean stub = true; /** * @param uriConverterFactory * @param baseUrl * @param datespec */ public RecordingReplayParseContext( ContextResultURIConverterFactory uriConverterFactory, URL baseUrl, String datespec) { super(uriConverterFactory, baseUrl, datespec); got = new ArrayList<String>(); stub = (uriConverterFactory == null); } @Override public String contextualizeUrl(String url, String flags) { // TODO record flags, too got.add(url); if (stub) return "###" + url; else return super.contextualizeUrl(url, flags); } } }