package com.statusParser.parsers; import java.io.IOException; import com.statusParser.Configuring; import com.statusParser.Mysql; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import java.sql.SQLException; import java.util.Properties; import java.util.regex.Matcher; import java.util.regex.Pattern; public class VkXml { private static final String USER_AGENT = "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.95 Safari/537.36"; private static final int POSTS_ON_ROTATION = 100; private static boolean mysqlOn = false; private static Mysql mysql; private int parseOwnerId(String groupName) { String regex = "\\d+"; //Только цифры Pattern p = Pattern.compile(regex); Matcher m = p.matcher(groupName); if (m.matches()) { return Integer.parseInt(groupName); } else try { Document xmlDocScreenName = Jsoup.connect("https://api.vk.com/method/utils.resolveScreenName.xml?screen_name=" + groupName) .userAgent(USER_AGENT) .get(); Element ownerIdElement = xmlDocScreenName.select("object_id").first(); return Integer.parseInt(ownerIdElement.text()); } catch (IOException e) { e.printStackTrace(); } return -1; } public void parse(String group) { int ownerId = parseOwnerId(group); Configuring conf = new Configuring(); Properties prop = conf.load(); if (prop.getProperty("mysql") != null) { if (prop.getProperty("mysql").equals("on")) { mysqlOn = true; mysql = new Mysql(); } } try { Document xmlDocCountPosts = Jsoup.connect("https://api.vk.com/method/wall.get.xml?owner_id=-" + ownerId + "&filte=owner&offset=0&count=1") .userAgent(USER_AGENT) .get(); Element counterPostsElement = xmlDocCountPosts.select("count").first(); int countPosts = Integer.parseInt(counterPostsElement.text()); int countRotations = (countPosts + (POSTS_ON_ROTATION - 1)) / POSTS_ON_ROTATION; for (int i = 0; i < countRotations; i++) { String[][] parsedData = partParse(ownerId, i * POSTS_ON_ROTATION); //System.out.println("Progress..." + (i * POSTS_ON_ROTATION * 100) / countPosts); if (mysqlOn) { for (String post[] : parsedData) { //postIdOk, statusOk, imgOk, likesOk mysql.sentData(Integer.parseInt(post[0]), post[1], post[2], Integer.parseInt(post[3])); //FIXME!!! Null pointer exception when running in terminal } } } } catch (IOException e) { e.printStackTrace(); } } private String[][] partParse(int group, int offset) { int iter = 0; String[][] parsedData =new String[POSTS_ON_ROTATION][4]; try { Document xmlDoc = Jsoup.connect("https://api.vk.com/method/wall.get.xml?owner_id=-" + group + "&filte=owner&offset=" + offset + "&count=" + POSTS_ON_ROTATION) .userAgent(USER_AGENT) .get(); Elements posts = xmlDoc.select("post"); for (Element post : posts) { //postid parsing Element postid = post.select("id").first(); parsedData[iter][0] = postid.text(); //text parsing Element text = post.select("text").first(); String status = text.text(); if (!status.equals("")) { if (status.length() <= 500) { System.out.println(iter + ": " + status); parsedData[iter][1] = status; /*FIXME!!! Incorrect string value with emoticons*/ } else { System.out.println(iter + ": Too long post"); } } else { System.out.println(iter + ": No text in post"); } //image parsing if (post.select("attachment > photo > src_big").first() != null) { Element img = post.select("attachment > photo > src_big").first(); parsedData[iter][2] = img.text(); } //likes parsing Element likes = post.select("likes").first(); parsedData[iter][3] = likes.text(); iter++; } } catch (IOException e) { e.printStackTrace(); } return (parsedData); } }