/* * Copyright 2013 Dmitry Monakhov. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package monakhv.samlib.db.entity; import monakhv.samlib.data.AbstractSettings; import monakhv.samlib.exception.SamlibParseException; import monakhv.samlib.log.Log; import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.File; import java.io.FileReader; import java.io.FileWriter; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.Iterator; import java.util.LinkedList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; /** * * @author monakhv */ public class SamLibConfig { static final String SPLIT = "\\|";//Use To parse Book and author Card object data static final String SLASH = "/"; public static final int SEARCH_LIMIT=100;//maximum number of results can be returned by the search procedure public static final int SELECTED_BOOK_ID=-1;//Special id for selected book public static final int GROUP_ID_ALL=-1;//Group for all books of the author public static final int GROUP_ID_SELECTED=-2;//Group for selected books public static final int TAG_AUTHOR_ALL =-1; public static final int TAG_AUTHOR_NEW =-2; //public static final int TAG_AUTHOR_ID = -10; // for test parsing use http://samlib.ru/s/seryj_d_m/indexdate.shtml use shtml inside base file name into URL public static final Pattern BOOK_PATTERN = Pattern.compile("^<DL><DT><li>.*HREF=(.*)\\.shtml><b>(.*)</b>.*<b>(\\d+)k</b>.*\\s+\"(.*)\"\\s+(.*\\b|\\S*)\\s*<.*?<br>(<DD><font\\scolor=\"#555555\">(.*)</font>|)(</DL>|<DD>)"); public static final Pattern AUTHOR_NAME_PATTERN =Pattern.compile("<h3>(.*):<br>"); public static final String COLLATION_RULES_NEW = "&' '<'-'<'_'<','<';'<':'<'!'<'?'<'/'<'.'<0<1<2<3<4<5<6<7<8<9<a,A<b,B<c,C<d,D<ð,Ð<e,E<f,F<g,G<h,H<i,I<j,J<k,K<l,L<m,M<n,N<o,O" + "<p,P<q,Q<r,R<s,S<t,T<u,U<v,V<w,W<x,X<y,Y<z,Z <а,А< б,Б<в,В<г,Г< д , Д< е , Е< ё , Ё< ж , Ж< з , З< и , И< й , Й< к , К< л ,Л< м , М" + "< н , Н< о , О< п , П< р , Р< с , С< т , Т< у , У< ф , Ф< х , Х< ц , Ц< ч , Ч< ш , Ш< щ , Щ< ъ , Ъ< ы , Ы< ь , Ь< э , Э< ю , Ю< я , Я"; public static final String COLLATION_RULES_OLD = "<' '<'-'<'_'<','<';'<':'<'!'<'?'<'/'<'.'<0<1<2<3<4<5<6<7<8<9<a,A<b,B<c,C<d,D<ð,Ð<e,E<f,F<g,G<h,H<i,I<j,J<k,K<l,L<m,M<n,N<o,O" + "<p,P<q,Q<r,R<s,S<t,T<u,U<v,V<w,W<x,X<y,Y<z,Z <а,А< б,Б<в,В<г,Г< д , Д< е , Е< ё , Ё< ж , Ж< з , З< и , И< й , Й< к , К< л ,Л< м , М" + "< н , Н< о , О< п , П< р , Р< с , С< т , Т< у , У< ф , Ф< х , Х< ц , Ц< ч , Ч< ш , Ш< щ , Щ< ъ , Ъ< ы , Ы< ь , Ь< э , Э< ю , Ю< я , Я"; private static final int AUTHOR_PAGE_SIZE = 500;//page size for author search // private static final SamIzdat[] ForwardURLsOrder = {SamIzdat.SamLib, SamIzdat.BudClub};//Samizdat mirrors. Order is important this is the order mirror is selected by // private static final SamIzdat[] ReverseURLsOrder = {SamIzdat.BudClub,SamIzdat.SamLib }; private static final SamIzdat[] ForwardURLOrder= {SamIzdat.SamLib, SamIzdat.ZhurnalLib};//Samizdat mirrors. Order is important this is the order mirror is selected by private static final SamIzdat[] ReverseURLOrder = {SamIzdat.ZhurnalLib,SamIzdat.SamLib }; private static final SamIzdat[] AllUrl = {SamIzdat.SamLib,SamIzdat.ZhurnalLib,SamIzdat.BudClub}; private static final String DEBUG_TAG = "SamLibConfig"; private static final String URLPTR = "/\\w/\\w+/"; private static final String SAMLIB_PROTO = "http://"; private static final String TMPL_ANUM="_ANUM_"; private static final String TMPL_PAGE="_PAGE_"; private static final String TMPL_PAGELEN ="_PAGELEN_"; //private static final String REQUEST_AUTHOR_DATA = "/cgi-bin/areader?q=razdel&order=date&object="; private static final String REQUEST_BOOK_TEXT = "/cgi-bin/areader?q=book&object="; private static final String REQUEST_AUTHOR_SEARCH = "/cgi-bin/areader?q=alpha&anum=_ANUM_&page=_PAGE_&pagelen=_PAGELEN_"; private static final String REQUEST_INDEXDATE="indexdate.shtml"; private static final String[] ABC_LETTER = new String[]{ "А", "Б", "В", "Г", "Д", "Е", "Ё", "Ж", "З", "И", "Й", "К", "Л", "М", "Н", "О", "П", "Р", "С", "Т", "У", "Ф", "Х", "Ц", "Ч", "Ш", "Щ", "Ъ", "Ы", "Ь", "Э", "Ю", "Я", "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z"}; private static final String[] ABC_CODE = new String[]{ "225", "226", "247", "231", "228", "229", "179", "246", "250", "233", "234", "235", "236", "237", "238", "239", "240", "242", "243", "244", "245", "230", "232", "227", "254", "251", "253", "255", "249", "248", "252", "224", "241", "048", "049", "050", "051", "052", "053", "054", "055", "056", "057", "065", "066", "067", "068", "069", "070", "071", "072", "073", "074", "075", "076", "077", "078", "079", "080", "081", "082", "083", "084", "085", "086", "087", "088", "089", "090"}; private static final HashMap<String, String> ABC; static { ABC = new HashMap<>(); for (int i = 0; i < ABC_CODE.length; i++) { ABC.put(ABC_LETTER[i], ABC_CODE[i]); } } private static SamLibConfig instance = null; private final LinkedList<SamIzdat> linkedSZ;//actual list of Samizdat URLs private AbstractSettings settings; public static SamLibConfig getInstance(AbstractSettings settings){ if (instance == null){ instance = new SamLibConfig(settings); } return instance; } private SamLibConfig(AbstractSettings settings){ this.settings=settings; linkedSZ = new LinkedList<>(); refreshData( ); } /** * Load Samizdat data according to the preference data * We have two possible redefined orders */ public void refreshData( ) { String fm =settings.getFirstMirror(); linkedSZ.clear(); if (fm.equals(SamIzdat.SamLib.getName())){ linkedSZ.addAll(Arrays.asList(ForwardURLOrder)); } else { linkedSZ.addAll(Arrays.asList(ReverseURLOrder)); } } /** * Change order of the elements in the LinkedList the first element goes to the end of list */ public void flipOrder(){ SamIzdat theFirst = linkedSZ.poll(); linkedSZ.add(theFirst); } private Iterator<SamIzdat> getIterator(){ return linkedSZ.listIterator(); } /** * Small Internal class to store Samizdat mirrors data */ private enum SamIzdat { SamLib("SamLib","samlib.ru","81.176.66.171"), BudClub("BudClub","budclub.ru","194.63.140.119"), ZhurnalLib("ZhurnalLib","zhurnal.lib.ru","81.176.66.169"); private static final String ZIP =".zip" ; private final String name; private final Pattern pattern;//search url pattern private final String urlH;//Host URL for browser usage private final String urlIP;//For internal update usage SamIzdat(String name, String host, String ip) { this.name = name; urlH = SAMLIB_PROTO+host; urlIP=SAMLIB_PROTO+ip; pattern=Pattern.compile(".*("+urlH+"/\\w/\\w+)($|\\b)"); } public String getName(){ return name; } public Pattern getSearchPattern(){ return pattern; } /** * Test whether URL has a form http://<url>/q/qqqq_qq_q/ * * @param txt url to test * @return true id the success */ private boolean testFullUrl(String txt) { //All URL must be closed by / if (!txt.endsWith(SLASH)) { txt = txt + SLASH; } String ptr = urlH + URLPTR; return txt.matches(ptr); } /** * Construct URL to get Author data * @param uu reduced author URL * @return URL used to get author data from the site */ private String getAuthorIndexDate(String uu) { return urlIP + uu+REQUEST_INDEXDATE; } /** * Construct URL to download the book * @param uu book url * @return URL to download the book */ private String getBookURL(String uu,AbstractSettings.FileType fileType){ switch (fileType){ case HTML: return urlIP+REQUEST_BOOK_TEXT+uu; case FB2: return urlIP+SLASH+uu+fileType.ext+ZIP; default: return null; } } /** * Construct URL to search Author * * @param pattern string pattern to search * @param page number of page * @return URL to make search */ private String getSearchAuthorURL(String pattern,int page) throws SamlibParseException { //Log.i(DEBUG_TAG, "Got pattern: "+pattern); String res = urlIP+REQUEST_AUTHOR_SEARCH; //Log.i(DEBUG_TAG, "Template string: "+res); String first = pattern.substring(0, 1); first = first.toUpperCase(); if (!ABC.containsKey(first)){ Log.w(DEBUG_TAG,"Can not find Code for letter: "+first); Log.d(DEBUG_TAG, "ABC length for keys: "+ABC.keySet().size()+" values: "+ABC.values().size()); Log.d(DEBUG_TAG,"Letter - 4 "+ABC_LETTER[4]); Log.d(DEBUG_TAG, "The code "+ABC.get(first)); throw new SamlibParseException("Pattern: " + pattern); } res = res. replaceFirst(TMPL_ANUM, ABC.get(first)). replaceFirst(TMPL_PAGE, String.valueOf(page)). replaceFirst(TMPL_PAGELEN, String.valueOf(AUTHOR_PAGE_SIZE)); return res; } } //End SamIzdat class /** * Construct URL to open the book in WEB browser * * @param book the Book object to open * @return URL to open the book in browser */ String getBookUrlForBrowser(Book book){ return getDefaultURL() + SLASH + book.getUri() + ".shtml"; } /** * Construct URL to open the book in WEB browser and to store bookmark list * * @param author Author object * @return URL to open author page in browser */ String getAuthorUrlForBrowser(Author author){ return getDefaultURL() + author.getUrl() ; } /** * Test whether URL has a form http://<url>/w/www_w_w/ Must be ended by / * Must be begin with one of the valid URL * * @param txt url to make test * @return true if success */ /* public static boolean testFullUrl(String txt) { for (SamIzdat sz : URLs) { if (sz.testFullUrl(txt)) { return true; } } return false; } */ /** * Make parsed URL use for import bookmarks data into Database * * See examples * <PRE> * Found Good SSN: http://samlib.ru/a/ab -- http://samlib.ru/a/ab * Found Good SSN: http://samlib.ru/a/ab/ -- http://samlib.ru/a/ab * Found Good SSN: http://samlib.ru/a/ab/qwqwqwqw.html -- http://samlib.ru/a/ab * Found Good SSN: href = http://samlib.ru/a/ab -- http://samlib.ru/a/ab * Found Bad SSN: aaaa href = sdsds * Found Bad SSN: /a/asasa_q * Found Bad SSN: /a/asasa_q/ * Found Bad SSN: /a/asasa_q_e/ * Found Bad SSN: /asasas * Found Bad SSN: /asasas/asasas/ * Found Good SSN: ><A HREF="http://samlib.ru/a/abwow_a_s/" -- http://samlib.ru/a/abwow_a_s * </PRE> * * * @param str String to parse * @return parsed URL or null if Can not be parsed */ public static String getParsedUrl(String str){ String res ; for (SamIzdat sz : AllUrl) { Matcher m = sz.getSearchPattern().matcher(str); if (m.find()){ res = m.group(1); return res; } } return null; } /** * Take URL check syntax * * @param str full URL String * @return reduced URL or NULL if the syntax is wrong */ public static String reduceUrl(String str) { //All URL must be closed by / if (!str.endsWith(SamLibConfig.SLASH)) { str = str + SamLibConfig.SLASH; } if (str.startsWith(SAMLIB_PROTO)) {//full URL case for (SamIzdat sz : AllUrl) { if (sz.testFullUrl(str)) { return str.replaceAll(sz.urlH, ""); } } return null; } else {//reduced AUTHOR URL if (str.matches(URLPTR)) {//checking syntax return str; } else { return null;//wrong syntax retrn null } } } /** * Return the list of request URLs to get Author data * * @param a the author object to get data for * @return the list of url */ public List<String> getAuthorIndexDate(Author a) { List<String> res = new ArrayList<>(); Iterator<SamIzdat> itr = getIterator(); while(itr.hasNext()){ res.add(itr.next().getAuthorIndexDate(a.getUrl())); } return res; } /** * Get Default URL to use for browser open intend * @return Default URL to use for browser open intend */ private String getDefaultURL(){ Iterator<SamIzdat> itr = getIterator(); return itr.next().urlH; } /** * Return the list of request URLs to search authors * @param pattern search pattern * @param page number of page * @return List of URL to make search */ public List<String> getSearchAuthorURL(String pattern,int page) throws SamlibParseException { List<String> res = new ArrayList<>(); Iterator<SamIzdat> itr = getIterator(); while(itr.hasNext()){ res.add(itr.next().getSearchAuthorURL(pattern, page)); } return res; } /** * Get book url to download html content * @param b Book object * @return List of URLs */ public List<String> getBookUrl(Book b) { List<String> res = new ArrayList<>(); Iterator<SamIzdat> itr = getIterator(); while(itr.hasNext()){ res.add(itr.next().getBookURL(b.getUri(),b.getFileType())); } return res; } public static void transformBook(File orig) throws IOException { File tmp = new File(orig.getAbsoluteFile() + ".tmp"); if (! orig.renameTo(tmp)){ Log.e(DEBUG_TAG, "Error to rename file to tmp"); } BufferedWriter bw = new BufferedWriter(new FileWriter(orig)); BufferedReader br = new BufferedReader(new FileReader(tmp)); String line = br.readLine(); String[] str = line.split("\\|"); bw.write("<html><head>"); bw.write("<title>" + str[1] + "</title>"); bw.write("<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">"); bw.write("</head><body>"); bw.newLine(); bw.write("<center><h3>" + str[0] + "</h3>"); bw.write("<h2>" + str[1] + "</h2></center>"); line = br.readLine(); while (line != null) { bw.write(line); line = br.readLine(); } bw.write("</body></html>"); bw.flush(); bw.close(); br.close(); if (! tmp.delete()){ Log.e(DEBUG_TAG,"Error to delete tmp file"); } } public static int testSplit(String str) { String[] arr = str.split(SamLibConfig.SPLIT); return arr.length; } }