/*
* Copyright (c) 2013 Websquared, Inc.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the GNU Public License v2.0
* which accompanies this distribution, and is available at
* http://www.gnu.org/licenses/old-licenses/gpl-2.0.html
*
* Contributors:
* swsong - initial API and implementation
*/
package org.fastcatsearch.util;
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.List;
import junit.framework.TestCase;
import org.apache.http.NameValuePair;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.HttpClient;
import org.apache.http.client.ResponseHandler;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.impl.client.BasicResponseHandler;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.protocol.HTTP;
import org.fastcatsearch.ir.common.IRException;
import org.junit.Test;
public class HTMLTagRemoverTest extends TestCase {
public void test1(){
HttpClient httpclient = new DefaultHttpClient();
ResponseHandler<String> responseHandler = new BasicResponseHandler();
HttpPost httpost = new HttpPost("http://www.fastcatsearch.org/");
HttpGet httpGet = new HttpGet("http://www.fastcatsearch.org/");
List<NameValuePair> nvps = new ArrayList<NameValuePair>();
try {
httpost.setEntity(new UrlEncodedFormEntity(nvps, HTTP.UTF_8));
String responseBody = httpclient.execute(httpGet, responseHandler);
System.out.println(HTMLTagRemover.clean(responseBody));
} catch (UnsupportedEncodingException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (ClientProtocolException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IRException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
public void test2(){
try {
System.out.println(HTMLTagRemover.clean("<img src=\"sdfsdfds.jp\"> 홍삼 저"));
} catch (IRException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
@Test
public void test3() {
String str = "김치냉장고_스탠드형|232L|2룸|소비전력:17.4kwh(월)|나노항균|냉장+냉동겸용|야채,과일보관|색상:함연주화이트|<IMG src=\"http://office.danawa.com/prod_img/500000/975/502/img/1502975_1.jpg?time=1348054028\" style=\"FILTER: RevealTrans(duration=0,transition=X)\" OnmouseOver=\"this.filters[0].apply(); this.src='http:";
try {
str = HTMLTagRemover.clean(str);
System.out.println(str);
} catch (IRException e) {
e.printStackTrace();
}
}
@Test
public void test4() {
String str = "abc4.0 qwe 3.0 tyu 9.0 \n123 \n\n456\n789";
try {
str = HTMLTagRemover.clean(str);
System.out.println(str);
} catch (IRException e) {
e.printStackTrace();
}
}
@Test
public void testfile() throws Exception {
String strFilePath="/Users/swsong/Desktop/a.html";
StringBuilder sb = new StringBuilder();
BufferedReader reader = null;
try {
reader = new BufferedReader(new InputStreamReader(new FileInputStream(strFilePath), "UTF-8") );
String line = null;
while((line = reader.readLine()) != null){
sb.append(line).append("\r");
}
} catch (IOException e) {
e.printStackTrace();
}
String str = HTMLTagRemover.clean(sb.toString());
System.out.println(str);
}
}