/**
* DataCleaner (community edition)
* Copyright (C) 2014 Neopost - Customer Information Management
*
* This copyrighted material is made available to anyone wishing to use, modify,
* copy, or redistribute it subject to the terms and conditions of the GNU
* Lesser General Public License, as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this distribution; if not, write to:
* Free Software Foundation, Inc.
* 51 Franklin Street, Fifth Floor
* Boston, MA 02110-1301 USA
*/
package org.datacleaner.beans.standardize;
import java.util.Arrays;
import org.datacleaner.api.InputRow;
import org.datacleaner.api.OutputColumns;
import org.datacleaner.data.MockInputRow;
import junit.framework.TestCase;
public class UrlStandardizerTransformerTest extends TestCase {
UrlStandardizerTransformer transformer;
@Override
protected void setUp() throws Exception {
super.setUp();
transformer = new UrlStandardizerTransformer();
}
public void testGetOutputColumns() throws Exception {
final OutputColumns outputColumns = transformer.getOutputColumns();
assertEquals(5, outputColumns.getColumnCount());
assertEquals("Protocol", outputColumns.getColumnName(0));
assertEquals("Domain", outputColumns.getColumnName(1));
assertEquals("Port", outputColumns.getColumnName(2));
assertEquals("Path", outputColumns.getColumnName(3));
assertEquals("Querystring", outputColumns.getColumnName(4));
}
public void testTransformValidUrls() throws Exception {
Object[] result;
result = transformer.transform("http://www.google.com/search?q=eobjects");
assertEquals(5, result.length);
assertEquals("[http, www.google.com, null, /search, q=eobjects]", Arrays.toString(result));
result = transformer.transform("https://localhost");
assertEquals("[https, localhost, null, null, null]", Arrays.toString(result));
result = transformer.transform("http://localhost:8080");
assertEquals("[http, localhost, 8080, null, null]", Arrays.toString(result));
result = transformer.transform("http://www.yahoo.com/");
assertEquals("[http, www.yahoo.com, null, /, null]", Arrays.toString(result));
result = transformer.transform("http://www.rethe.com/ref=bleh/234-2565344-2354454");
assertEquals("[http, www.rethe.com, null, /ref=bleh/234-2565344-2354454, null]", Arrays.toString(result));
result = transformer.transform("https://www.ghzsffs.com/gswdp/nav/redir.html/ref=some-page");
assertEquals("[https, www.ghzsffs.com, null, /gswdp/nav/redir.html/ref=some-page, null]",
Arrays.toString(result));
result = transformer.transform("http://localhost:8080/trac");
assertEquals("[http, localhost, 8080, /trac, null]", Arrays.toString(result));
result = transformer.transform("http://eobjects.org/trac/ticket/395#comment:1");
assertEquals("[http, eobjects.org, null, /trac/ticket/395, null]", Arrays.toString(result));
result = transformer.transform("http://localhost?string=hello%20world");
assertEquals("[http, localhost, null, null, string=hello%20world]", Arrays.toString(result));
result = transformer.transform("https://foo.bar.foobar.w00p:1234/hello/world?who=eobjects");
assertEquals("[https, foo.bar.foobar.w00p, 1234, /hello/world, who=eobjects]", Arrays.toString(result));
result = transformer.transform("ftp://username@hostname/path");
assertEquals("[ftp, hostname, null, /path, null]", Arrays.toString(result));
}
// Best effort
public void testInvalidUrls() throws Exception {
Object[] result;
// white space is not allowed
result = transformer.transform("http://www.google com/search?q=eobjects");
assertEquals(5, result.length);
assertEquals("[null, null, null, null, null]", Arrays.toString(result));
// semicolon is not a valid port delim
result = transformer.transform("http://www.google.com;8080/search?q=eobjects");
assertEquals(5, result.length);
assertEquals("[http, null, null, /search, q=eobjects]", Arrays.toString(result));
}
public void testTransformNull() throws Exception {
final Object[] result = transformer.transform((InputRow) new MockInputRow());
assertEquals(5, result.length);
assertEquals("[null, null, null, null, null]", Arrays.toString(result));
}
}