package com.livingsocial.hive.udf;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDF;
import org.apache.hadoop.io.Text;
import org.jsoup.Jsoup;
@Description(
name = "striphtml",
value = "_FUNC_(str) - Returns str with all HTML tags removed."
)
public class StripHTML extends UDF {
public Text evaluate(Text html) {
String stripped = Jsoup.parse(html.toString()).text();
return new Text(stripped);
}
}