package com.alimama.quanjingmonitor.mdrillImport.parse.for416;
import java.util.Date;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.log4j.Logger;
import com.alimama.mdrill.json.JSONException;
import com.alimama.mdrill.json.JSONObject;
import com.alimama.mdrillImport.InvalidEntryException;
import com.alimama.quanjingmonitor.mdrillImport.parse.FetchAdid2Pid;
/**
* usertrack日志
* @author yannian.mu
*
*/
public class app_log_parse extends com.alimama.mdrillImport.DataParser{
private static final long serialVersionUID = 1L;
public volatile long groupCreateerror=0;
private static Logger LOG = Logger.getLogger(app_log_parse.class);
private volatile long lines=0;
private volatile long lines_sb=0;
private volatile long lines_sb3=0;
private static long TS_MAX=3600l*24*31;
private volatile long laststartts=System.currentTimeMillis()/1000-TS_MAX;
private volatile long lastendts=System.currentTimeMillis()/1000+TS_MAX;
private volatile long timediff=System.currentTimeMillis();
private volatile long timediff3=System.currentTimeMillis();
public static void main(String[] args) {
System.out.println(111);
}
public static JSONObject parseSb(String str) throws JSONException
{
String[] split=str.split("[ ]*,[ ]*",-1);
for(String s:split)
{
if(!s.startsWith("_sb="))
{
continue;
}
String json=decodeString(s.substring(4));
return new JSONObject(json);
}
return new JSONObject();
}
private static String decodeString(String args) {
try {
return new String(java.net.URLDecoder.decode(args,"UTF-8") .getBytes("UTF-8"), "UTF-8");
} catch (Throwable e) {
try {
return new String(java.net.URLDecoder.decode(args,"GBK") .getBytes("UTF-8"), "UTF-8");
} catch (Throwable e2) {
return args;
}
}
}
public String formatRows(String[] clicklog)
{
StringBuilder b = new StringBuilder();
for (int i = 0; i < clicklog.length; i++) {
b.append(i);
b.append("=");
b.append(String.valueOf(clicklog[i]));
b.append(",");
}
return b.toString();
}
private String matchGet(String s,Pattern pat,int index)
{
if(s==null)
{
return null;
}
Matcher mat = pat.matcher(s);
while (mat.find()) {
return mat.group(index);
}
return null;
}
private String get_json_object(String s,String key)
{
if(s==null)
{
return null;
}
try{
JSONObject obj= new JSONObject(s);
if(obj.has(key))
{
return String.valueOf(obj.get(key));
}
}catch(Throwable e)
{
return null;
}
return null;
}
private static Pattern pat_arg1=Pattern.compile("(.*)(lwfrom|point|_sb)=([^&=,]+)(.*)");
private static Pattern pat_reserves=Pattern.compile("(.*)(lwfrom|_sb|point)=([^&=,]+)(.*)");
private static Pattern pat_args=Pattern.compile("(.*)(lwfrom|_sb|point)=([^&=,]+)(.*)");
private static Pattern pat_last=Pattern.compile("(\\w+)[#]*(.*)");
/**
2014-04-10 00:04:48 0=59.174.28.74,1=5.0.1,2=860623029116421,3=460012710506535,4=Huawei,5=ARMv7 Processor rev 2 (v7l),6=860623029116421,7=HUAWEI G610-U00,8=960*540,9=中国联通,10=Wi-Fi,11=Unknown,12=227200,13=12278902,14=4.1.2,15=tbzhanglihua,16=tbzhanglihua,17=-,18=Unknown,19=Unknown,20=Android,21=4.2.1,22=Android,23=1.3.8,24=KORRWMGMFVKHQQLJLUTIBEED_12278902_1397059405807,25=KORRWMGMFVKHQQLJLUTIBEED,26=-,27=-,28=-,29=_uid=121123981,_cc=227200,_oc=227200,30=2014-04-10 00:03:38,31=1397059418449,32=Page_Webview,33=2001,34=Page_Home,35=Page_Home_Button-home-1-5-1,36=6161,37=action=kpv,list_param=1_72091_h18019_首焦-来往-0409-0410_home-1-5-1,list_type=activity,from=lw,url=http://m.laiwang.com/go/market/laiwang/mingrenmingxing.php?locate=home-1-5-1&actparam=1_72091_h18019_%E9%A6%96%E7%84%A6-%E6%9D%A5%E5%BE%80-0409-0410&lwfrom=20140404152605385&imei=860623029116421&imsi=460012710506535&ttid=227200@taobao_android_4.1.2,dep=16,idx=1955,38=1397059200,39=1397059200,40=1397059488,41=0,
* @param arg1
* @param reserves
* @param args
* @param key
* @return
*/
private String parseGet(String arg1,String reserves,String args)
{
String rtn=null;
if(rtn==null&&arg1!=null&&arg1.indexOf("lwfrom")>=0)
{
String lower_decode_arg1=decodeString(String.valueOf(arg1)).toLowerCase();
rtn=matchGet(lower_decode_arg1,pat_arg1,3);
}
if(rtn==null&&reserves!=null&&reserves.indexOf("lwfrom")>=0)
{
String lower_decode_reserves=decodeString(String.valueOf(reserves)).toLowerCase();
rtn=matchGet(lower_decode_reserves,pat_reserves,3);
}
if(rtn==null&&args!=null&&args.indexOf("lwfrom")>=0)
{
String lower_decode_args=decodeString(String.valueOf(args)).toLowerCase();
rtn=matchGet(lower_decode_args,pat_args,3);
}
if(rtn==null)
{
return null;
}
return matchGet(rtn,pat_last,1);
}
private boolean isempty(String refpid)
{
return refpid==null||refpid.isEmpty()||refpid.length()<5||refpid.length()>500;
}
@Override
public DataIter parseLine(String line) throws InvalidEntryException {
try {
if(line==null)
{
return null;
}
String[] clicklog=line.split("\001",-1);
if(clicklog.length<41)
{
return null;
}
if(clicklog[40].isEmpty()||clicklog[40].length()<=5)
{
return null;
}
String app_key = clicklog[13];//应用的标识
String event_id=clicklog[33];
String arg1=clicklog[34];
String args=clicklog[37];
String reserves =clicklog[29];
// boolean match_app_key="12278902".equals(app_key)||"12087020".equals(app_key)||"12500477".equals(app_key);
// if(!match_app_key)
// {
// return null;
// }
//
// boolean match_event_id="21032".equals(event_id)||"2001".equals(event_id)||"30001".equals(event_id);
// if(!match_event_id)
// {
// return null;
//
// }
boolean match_lwfrom=arg1.indexOf("lwfrom")>=0||args.indexOf("lwfrom")>=0||reserves.indexOf("lwfrom")>=0;
boolean match_refpid=arg1.indexOf("refpid")>=0||args.indexOf("refpid")>=0||reserves.indexOf("refpid")>=0;
if(!(match_lwfrom||match_refpid))
{
return null;
}
this.lines++;
if(this.lines>100000)
{
this.laststartts=(System.currentTimeMillis()/1000)-TS_MAX;
this.lastendts=(System.currentTimeMillis()/1000)+TS_MAX;
this.lines=0;
}
long ts = Long.parseLong(clicklog[40]);
this.lines_sb++;
if(this.lines_sb>5000)
{
this.lines_sb=0;
long nowts=System.currentTimeMillis();
if(nowts-timediff>30000)
{
timediff=nowts;
LOG.info("parseLine_sb_"+ColsDefine.formatDayMin.format(new Date(ts*1000))+" "+formatRows(clicklog));
}
}
if(ts<laststartts||ts>lastendts)
{
return null;
}
String refpid=null;//parseGet(arg1, reserves, args, "refpid");;
String lwfrom=parseGet(arg1, reserves, args);;
if(refpid==null&&lwfrom!=null)
{
String strday=ColsDefine.formatDay.format(new Date(ts*1000));
refpid=FetchAdid2PidWireLess.fetch().get(strday+"@"+String.valueOf(lwfrom));
}
if(isempty(refpid))
{
this.lines_sb3++;
if(this.lines_sb3>100)
{
this.lines_sb3=0;
long nowts=System.currentTimeMillis();
if(nowts-timediff3>30000)
{
timediff3=nowts;
LOG.info("parse error :"+ColsDefine.formatDayMin.format(new Date(ts*1000))+" "+formatRows(clicklog));
}
}
return null;
}
return new DataIterParse(ts,clicklog,refpid);
} catch (Throwable nfe) {
if(groupCreateerror<100)
{
LOG.error("InvalidEntryException:"+line,nfe);
groupCreateerror++;
}
throw new InvalidEntryException("Invalid log `" + line + "'\n" , nfe);
}
}
public static class DataIterParse implements DataIter{
private String[] pvlog=null;
String refpid=null;
long ts;
public DataIterParse(long ts,String[] pvlog,String refpid) {
this.pvlog = pvlog;
this.ts=ts;
this.refpid=refpid;
}
@Override
public boolean next() {
return false;
}
@Override
public Number[] getSum() {
return new Number[] { 0, 0, 0, 0, 0, 1, 0, 0,0 };
}
@Override
public long getTs() {
return (ts/10)*10000;
}
//wdm_v3_user_track
@Override
public Object[] getGroup() {
long ts300 = (this.ts / 300) * 300000;
Date d = new Date(ts300);
String channel = String.valueOf(pvlog[20]).toLowerCase();
if (channel.indexOf("android") >= 0) {
channel = "android";
} else if (channel.indexOf("iphone") >= 0||channel.indexOf("ios") >= 0) {
channel = "ios";
} else {
channel = "other";
}
return new String[] {
String.valueOf(ColsDefine.formatDay.format(d)),
String.valueOf(ColsDefine.formatMin.format(d)),
"wireless",
"app_log_parse",
String.valueOf(this.refpid)
, channel // channel
,""
,""
,ColsDefine.version
};
}
}
@Override
public String[] getSumName() {
return ColsDefine.colSumName;
}
@Override
public String getTableName() {
return ColsDefine.tablename;
}
@Override
public String[] getGroupName() {
return ColsDefine.colname;
}
}