/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.parsers.lnkis; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import gplx.core.net.*; import gplx.xowa.wikis.xwikis.*;
import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.wkrs.lnkis.htmls.*; import gplx.xowa.htmls.hrefs.*;
import gplx.xowa.wikis.domains.*;
public class Xop_link_parser {
public byte[] Html_xowa_ttl() {return html_xowa_ttl;} private byte[] html_xowa_ttl;
public byte Html_anchor_cls() {return html_anchor_cls;} private byte html_anchor_cls;
public byte Html_anchor_rel() {return html_anchor_rel;} private byte html_anchor_rel;
public byte[] Parse(Bry_bfr tmp_bfr, Xoa_url tmp_url, Xowe_wiki wiki, byte[] raw, byte[] or) {
html_xowa_ttl = null; html_anchor_cls = Xoh_lnki_consts.Tid_a_cls_image; html_anchor_rel = Xoh_lnki_consts.Tid_a_rel_none; // default member variables for html
Xoae_app app = wiki.Appe(); int raw_len = raw.length;
wiki.Utl__url_parser().Parse(tmp_url, raw);
switch (tmp_url.Protocol_tid()) {
case Gfo_protocol_itm.Tid_http: case Gfo_protocol_itm.Tid_https: // "http:" or "https:"; check if to offline wiki and redirect
byte[] wiki_bry = tmp_url.Wiki_bry(), page_bry = tmp_url.Page_bry();
if ( !tmp_url.Wiki_is_missing() // https://www.a.org and others will be marked "missing" by Xow_url_parser
&&( Bry_.Eq(wiki_bry, wiki.Domain_bry()) // link is to this wiki; check if alias
|| app.Xwiki_mgr__exists(wiki_bry) // link is to an xwiki
)
) {
page_bry = tmp_url.Page_for_lnki();
Parse__ttl(tmp_bfr, wiki, wiki_bry, page_bry);
}
else { // http is to an unknown site
if (tmp_url.Protocol_is_relative()) { // relative protocol; EX:"//www.a.org";
Gfo_protocol_itm protocol_itm = Gfo_protocol_itm.Get_or(wiki.Props().Protocol_tid(), Gfo_protocol_itm.Itm_https);
tmp_bfr.Add(protocol_itm.Key_w_colon_bry()); // prepend protocol b/c mozilla cannot launch "//www.a.org", but can launch "https://www.a.org"; DATE:2015-07-27
}
tmp_bfr.Add(raw); // dump everything
}
raw = tmp_bfr.To_bry_and_clear();
html_anchor_cls = Xoh_lnki_consts.Tid_a_cls_none;
Xow_domain_itm domain_itm = Xow_domain_itm_.parse(wiki_bry);
html_anchor_rel = domain_itm.Domain_type().Tid() == Xow_domain_tid_.Tid__other ? Xoh_lnki_consts.Tid_a_rel_nofollow : Xoh_lnki_consts.Tid_a_rel_none; // rel=nofollow if not WM wiki; DATE:2015-11-19
break;
case Gfo_protocol_itm.Tid_file: // "file:///" or "File:A.png"
int proto_len = Gfo_protocol_itm.Bry_file.length; // "file:"
if (proto_len + 1 < raw_len && raw[proto_len + 1] == Byte_ascii.Slash) { // next char is slash, assume xfer_itm refers to protocol; EX: file:///C/A.png
int slash_pos = Bry_find_.Find_bwd(raw, Byte_ascii.Slash);
if (slash_pos != Bry_find_.Not_found) // set xowa_title to file_name; TODO_OLD: call Xoa_url.build; note that this will fail sometimes when (a) xfer_itm is very long (File:ReallyLongName will be shortened to 128 chars) or (b) xfer_itm has invalid windows characters (EX:File:a"b"c.jpg)
html_xowa_ttl = Bry_.Mid(raw, slash_pos + Int_.Const_dlm_len, raw.length);
}
else // next char is not slash; assume xfer_itm refers to ns; EX:File:A.png
raw = tmp_bfr.Add(Xoh_href_.Bry__wiki).Add(raw).To_bry_and_clear();
break;
default: // is page only; EX: Abc
if (Bry_.Len_eq_0(raw)) // empty link should not create anchor; EX:[[File:A.png|link=|abc]]; [[File:Loudspeaker.svg|11px|link=|alt=play]]; PAGE:en.w:List_of_counties_in_New_York; DATE:2016-01-10;
raw = Bry_.Empty;
else {
if (raw[0] == Byte_ascii.Colon) raw = Bry_.Mid(raw, 1, raw.length); // ignore initial colon; EX: [[:commons:A.png]]
if (!Parse__ttl(tmp_bfr, wiki, wiki.Domain_bry(), raw)) {
tmp_bfr.Clear();
return null;
}
raw = tmp_bfr.To_bry_and_clear();
}
break;
}
return raw;
}
private static boolean Parse__ttl(Bry_bfr tmp_bfr, Xowe_wiki wiki, byte[] wiki_bry, byte[] page_bry) {
// handle colon-only aliases; EX:"link:" PAGE:en.w:Wikipedia:Main_Page_alternative_(CSS_Update) DATE:2016-08-18
Xoa_ttl page_ttl = wiki.Ttl_parse(page_bry);
Xow_xwiki_itm xwiki_itm = page_ttl == null ? null : page_ttl.Wik_itm();
if ( xwiki_itm != null // ttl is xwiki; EX:[[File:A.png|link=wikt:A]]
&& page_ttl.Page_db().length == 0) { // ttl is empty; EX:[[File:A.png|link=wikt:]]
Xow_wiki xwiki_wiki = wiki.App().Wiki_mgri().Get_by_or_make_init_n(page_ttl.Wik_itm().Domain_bry());
page_bry = Bry_.Add(page_bry, xwiki_wiki.Props().Main_page()); // append Main_Page to ttl; EX:"wikt:" + "Wikipedia:Main_Page" -> "wikt:Wikipedia:Main_Page"
page_ttl = wiki.Ttl_parse(page_bry);
xwiki_itm = page_ttl.Wik_itm(); // should still be the same, but re-set it for good form
}
// identify wiki / page
boolean page_ttl_is_valid = page_ttl != null;
if (page_ttl_is_valid) { // xwiki; need to define wiki / page
if (xwiki_itm != null) { // is alias; set wiki, page
wiki_bry = xwiki_itm.Domain_bry();
page_bry = Bry_.Mid(page_bry, xwiki_itm.Key_bry().length + 1, page_bry.length); // +1 to skip ":"
}
else // basic; just define page; use ttl.Full_db() to normalize; EX: -> _
page_bry = page_ttl.Full_db_w_anch(); // add anch; PAGE:en.w:History_of_Nauru; DATE:2015-12-27
}
// build either "/wiki/Page" or "/site/domain/wiki/Page"
if (Bry_.Eq(wiki_bry, wiki.Domain_bry())) { // NOTE: check against wiki.Key_bry() again; EX: in en_wiki, and http://commons.wikimedia.org/wiki/w:A
// title-case by ns; needed to handle "link=w:Help:a" which needs to generate "w:Help:A"
if (page_ttl_is_valid) { // valid_ttl; parse in same ns to title-case; EX:link=w:Help:a -> Help:A; DATE:2016-01-11
page_ttl = wiki.Ttl_parse(page_ttl.Full_db_wo_xwiki());
page_bry = page_ttl.Full_db_w_anch();
}
tmp_bfr.Add(Xoh_href_.Bry__wiki).Add(page_bry);
}
else
tmp_bfr.Add(Xoh_href_.Bry__site).Add(wiki_bry).Add(Xoh_href_.Bry__wiki).Add(page_bry);
return page_ttl_is_valid;
}
}