import java.io.ByteArrayOutputStream; import java.io.DataInputStream; import java.io.DataOutputStream; import java.io.IOException; import java.nio.charset.Charset; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.Map; import java.util.Map.Entry; import java.util.Set; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import net.minidev.json.JSONArray; import net.minidev.json.JSONObject; import net.minidev.json.parser.JSONParser; import net.minidev.json.parser.ParseException; public class NeweggParser { private static final boolean PARSE_IMAGES = true; private static final byte PRODUCT_LIST_REQUEST = 0; private static final byte PRODUCT_INFO_REQUEST = 1; private static final byte MODULE_RESPONSE = 0; private static final byte MODULE_HTTP_GET_REQUEST = 1; private static final int TYPE_LONG = 0; private static final int TYPE_STRING = 1; private static final int BUFFER_SIZE = 4096; private static final int DOWNLOAD_OK = 0; private static final Charset ASCII = Charset.forName("US-ASCII"); private static final Charset UTF8 = Charset.forName("UTF-8"); private static final String ROOT_URL = "http://www.ows.newegg.com/Stores.egg/Menus"; private static final String ROOT_KEY = "StoreDepa"; private static final String ROOT_VALUE = "ComputerHardware"; private static final String STORE_ID = "StoreID"; private static final String STORE_URL = "http://www.ows.newegg.com/Stores.egg/Categories/"; private static final String DESCRIPTION_KEY = "Description"; private static final String CATEGORY_ID = "CategoryID"; private static final String NODE_ID = "NodeId"; private static final String CATEGORY_URL = "http://www.ows.newegg.com/Stores.egg/Navigation/"; private static final String SUBCATEGORY_URL = "http://m.newegg.com/ProductList?"; private static final String ITEM_NUMBER = "itemNumber="; private static final String PRODUCT_URL = "http://www.ows.newegg.com/Products.egg/"; private static final String FULL_PRODUCT_URL = "http://www.newegg.com/Product/Product.aspx?Item="; private static final String PRODUCT_URL_SUFFIX = "/Specification"; private static final HashSet<Object> STORE_DESCRIPTIONS = new HashSet<Object>(Arrays.asList( "CD / DVD Burners & Media", "Computer Accessories", "Computer Cases", "CPUs / Processors", "Fans & Heatsinks", "Hard Drives", "Keyboards & Mice", "Memory", "Monitors", "Motherboards", "Networking", "Power Supplies", "Printers / Scanners & Supplies", "Soundcards, Speakers & Headsets", "Video Cards & Video Devices" )); private static final HashSet<Object> SUBCATEGORY_DESCRIPTIONS = new HashSet<Object>(Arrays.asList( /* for "CD / DVD Burners & Media" */ "Blu-Ray Burners", "Blu-Ray Drives", "CD / DVD Burners", "CD / DVD Drives", "Duplicators", "External CD / DVD / Blu-Ray Drives", "CD / DVD / Blu-ray Media", /* for "Computer Accessories" */ "Cables", "Adapters & Gender Changers", "Add-On Cards", "Cable Management", "Card Readers", "Case Accessories", "Controller Panels", "CPU Accessories", "Mouse Pads & Accessories", "Power Strips", "SSD/ HDD Accessory", /* for "Computer Cases" */ "Computer Cases", "Server Chassis", /* for "CPUs / Processors" */ "Processors - Desktops", "Processors - Servers", "Processors - Mobile", /* for "Fans & Heatsinks" */ "Case Fans", "CPU Fans & Heatsinks", "Hard Drive Cooling", "Memory & Chipset Cooling", "Thermal Compound / Grease", "VGA Cooling", "Water / Liquid Cooling", /* for "Hard Drives" */ "Internal Hard Drives", "SSD", "Laptop Hard Drives", "Mac Hard Drives", "External Hard Drives", "Controllers / RAID Cards", /* for "Keyboards & Mice" */ "Keyboards", "Mice", /* for "Memory" */ "Desktop Memory", "Flash Memory", "Laptop Memory", "Mac Memory", "Server Memory", "System Specific Memory", "USB Flash Drives", /* for "Monitors" */ "LCD Monitors", "Large Format Display", "Touchscreen Monitors", "Monitor Accessories", /* for "Motherboards" */ "AMD Motherboards", "Intel Motherboards", "Motherboard / CPU / VGA Combo", "Motherboard Accessories", "Server Motherboards", /* for "Networking" */ "Wireless Networking", "Wired Networking", "VoIP", "Firewalls/Security Appliances", "Security & Surveillance", "Modems", "Powerline Networking", /* for "Power Supplies" */ "Power Supplies", "Server Power Supplies", /* for "Printers / Scanners & Supplies" */ "Laser Printers", "Inkjet Printers", "Document Scanners", "Flatbed Scanners", "Fax Machines & Copiers", /* for "Soundcards, Speakers & Headsets" */ "Headsets & Accessories", "Microphones", "Sound Cards", "Speakers", /* for "Video Cards & Video Devices" */ "Desktop Graphics Cards", "Professional Graphics Cards" )); private static final JSONParser parser = new JSONParser(JSONParser.DEFAULT_PERMISSIVE_MODE); private static final DataOutputStream out = new DataOutputStream(System.out); private static final DataInputStream in = new DataInputStream(System.in); private static byte[] httpResponse() throws IOException { /* ignore the HTTP content type field */ int length = in.readUnsignedShort(); byte[] data = new byte[length]; in.readFully(data); ByteArrayOutputStream response = new ByteArrayOutputStream(BUFFER_SIZE); length = in.readUnsignedShort(); while (length != 0) { data = new byte[length]; in.readFully(data); response.write(data); length = in.readUnsignedShort(); } int errorCode = in.readUnsignedByte(); if (errorCode != DOWNLOAD_OK) { System.err.println("NeweggParser.httpResponse ERROR:" + " Error occurred during download."); return null; } return response.toByteArray(); } private static byte[] httpGetRequest(String url) throws IOException { out.writeByte(MODULE_HTTP_GET_REQUEST); out.writeShort(url.length()); out.write(url.getBytes(ASCII)); out.flush(); return httpResponse(); } private static byte[] encodeState(Subcategory subcategory, int page) { String state = subcategory.getCategoryId() + "." + subcategory.getSubcategoryId() + "." + page; return state.getBytes(UTF8); } private static State decodeState(byte[] state) { if (state == null || state.length == 0) return null; try { String[] tokens = new String(state, UTF8).split("\\."); int categoryId = Integer.parseInt(tokens[0]); int subcategoryId = Integer.parseInt(tokens[1]); int page = Integer.parseInt(tokens[2]); return new State(new Subcategory(null, categoryId, subcategoryId, null), page); } catch (Exception e) { System.err.println("NeweggParser.decodeState: Could not decode state. " + e.getMessage()); return null; } } private static void respond(ArrayList<String> productIds, Subcategory subcategory, int page) throws IOException { byte[] state = encodeState(subcategory, page); out.writeByte(MODULE_RESPONSE); out.writeShort(state.length); out.write(state); out.writeShort(productIds.size()); for (String productId : productIds) { byte[] data = productId.getBytes(UTF8); out.writeShort(data.length); out.write(data); } out.flush(); } private static void respond(Map<String, Object> keyValues) throws IOException { keyValues.remove("features"); keyValues.remove("windows vista"); keyValues.remove("operating systems supported"); out.writeByte(MODULE_RESPONSE); out.writeShort(keyValues.size()); for (Entry<String, Object> pair : keyValues.entrySet()) { String keyString = pair.getKey().trim().toLowerCase(); byte[] key = keyString.getBytes(UTF8); out.writeShort(key.length); out.write(key); Object valueObject = pair.getValue(); if (valueObject instanceof String) { byte[] value = ((String) valueObject).getBytes(UTF8); out.writeByte(TYPE_STRING); out.writeShort(value.length); out.write(value); } else if (valueObject instanceof Number) { out.writeByte(TYPE_LONG); out.writeLong(((Number) valueObject).longValue()); } } out.flush(); } private static void findKeyValues( HashMap<String, Object> keyValues, Object json) { if (json instanceof JSONArray) { /* look for the key-value pair in this array */ JSONArray array = (JSONArray) json; for (int i = 0; i < array.size(); i++) findKeyValues(keyValues, array.get(i)); } else if (json instanceof JSONObject) { /* look for the key-value pair in this map */ JSONObject jsonMap = (JSONObject) json; if (jsonMap.containsKey("Key") && jsonMap.containsKey("Value")) { Object key = jsonMap.get("Key"); Object value = jsonMap.get("Value"); if (!(key instanceof String) || !(value instanceof String)) { System.err.println("NeweggParser.findKeyValues ERROR:" + " Expected String key-value pair."); } else keyValues.put(((String) key).toLowerCase(), (String) value); } /* it could be in its children */ for (Object child : jsonMap.values()) findKeyValues(keyValues, child); } } private static HashMap<Object, JSONObject> findKeyValues( Object key, Set<Object> values, Object json) { HashMap<Object, JSONObject> map = new HashMap<Object, JSONObject>(); if (json instanceof JSONArray) { /* look for the key-value pair in this array */ JSONArray array = (JSONArray) json; for (int i = 0; i < array.size(); i++) map.putAll(findKeyValues(key, values, array.get(i))); } else if (json instanceof JSONObject) { /* look for the key-value pair in this map */ JSONObject jsonMap = (JSONObject) json; if (jsonMap.containsKey(key) && values.contains(jsonMap.get(key))) map.put(jsonMap.get(key), jsonMap); /* it could be in its children */ for (Object child : jsonMap.values()) map.putAll(findKeyValues(key, values, child)); } else { return map; } return map; } private static JSONObject findKeyValue(Object key, Object value, Object json) { HashMap<Object, JSONObject> result = findKeyValues(key, Collections.singleton(value), json); if (result == null || result.size() == 0) return null; return result.values().iterator().next(); } private static HashSet<Subcategory> parseCategory( Object storeId, Object categoryId, Object nodeId) { byte[] data; String url = CATEGORY_URL + storeId + '/' + categoryId + '/' + nodeId; try { data = httpGetRequest(url); } catch (IOException e) { System.err.println("NeweggParser.parseCategory ERROR:" + " Error requesting URL '" + url + "'."); return null; } Object parsed; try { parsed = parser.parse(data); } catch (ParseException e) { System.err.println("NeweggParser.parseCategory ERROR:" + " Error parsing JSON."); return null; } HashMap<Object, JSONObject> result = findKeyValues(DESCRIPTION_KEY, SUBCATEGORY_DESCRIPTIONS, parsed); HashSet<Subcategory> subcategories = new HashSet<Subcategory>(); for (JSONObject map : result.values()) { Subcategory subcategory = new Subcategory( storeId, categoryId, map.get(CATEGORY_ID), map.get(NODE_ID)); subcategories.add(subcategory); } return subcategories; } private static void parseSubcategory(Subcategory subcategory, int pageStart) { ArrayList<String> productIds = new ArrayList<String>(20); for (int pageNumber = pageStart;; pageNumber++) { String url = SUBCATEGORY_URL + "categoryId=" + subcategory.getSubcategoryId() + "&storeId=" + subcategory.getStoreId() + "&nodeId=" + subcategory.getNodeId() + "&parentCategoryId=" + subcategory.getCategoryId() + "&isSubCategory=true&Page=" + pageNumber; byte[] data; try { data = httpGetRequest(url); } catch (IOException e) { System.err.println("NeweggParser.parseSubcategory ERROR:" + " Error requesting URL '" + url + "'."); return; } Document document = Jsoup.parse(new String(data, UTF8)); Elements elements = document.select("a.listCell"); if (elements.size() == 0) break; for (Element element : elements) { String link = element.attr("href"); int index = link.indexOf(ITEM_NUMBER); productIds.add(link.substring(index + ITEM_NUMBER.length())); } /* send the product IDs to the core */ try { respond(productIds, subcategory, pageNumber); } catch (IOException e) { System.err.println("NeweggParser.parseSubcategory ERROR:" + " Error responding with product ID list."); return; } productIds.clear(); } } private static void getProductList(State previous) { /* first get the list of stores from the root JSON document */ byte[] data; try { data = httpGetRequest(ROOT_URL); } catch (IOException e) { System.err.println("NeweggParser.getProductList ERROR:" + " Error requesting URL '" + ROOT_URL + "'."); return; } Object parsed; try { parsed = parser.parse(data); } catch (ParseException e) { System.err.println("NeweggParser.getProductList ERROR:" + " Error parsing JSON."); return; } /* find the computer hardware store ID */ JSONObject map = findKeyValue(ROOT_KEY, ROOT_VALUE, parsed); if (map == null || !map.containsKey(STORE_ID)) { System.err.println("NeweggParser.getProductList ERROR:" + " Could not determine 'ComputerHardware' store ID."); return; } /* get the list of categories in that store */ Object storeId = map.get(STORE_ID); String url = STORE_URL + storeId; try { data = httpGetRequest(url); } catch (IOException e) { System.err.println("NeweggParser.getProductList ERROR:" + " Error requesting URL '" + url + "'."); return; } try { parsed = parser.parse(data); } catch (ParseException e) { System.err.println("NeweggParser.getProductList ERROR:" + " Error parsing JSON."); return; } /* for each category, get a list of subcategories */ HashMap<Object, JSONObject> result = findKeyValues(DESCRIPTION_KEY, STORE_DESCRIPTIONS, parsed); HashSet<Subcategory> subcategories = new HashSet<Subcategory>(); for (JSONObject category : result.values()) { subcategories.addAll(parseCategory( storeId, category.get(CATEGORY_ID), category.get(NODE_ID))); } /* for each subcategory, get a list of products */ for (Subcategory subcategory : subcategories) { if (previous == null) parseSubcategory(subcategory, 1); else if (subcategory.equals(previous.getSubcategory())) { parseSubcategory(subcategory, previous.getPage()); previous = null; } } } private static Integer parsePrice(Object price) { if (price == null || !price.getClass().equals(String.class)) return null; try { String parsed = (String) price; return Integer.parseInt(parsed.trim().replaceAll("\\$", "").replaceAll("\\.", "")); } catch (NumberFormatException e) { return null; } } private static boolean parseProductInfo(String productId) { byte[] data; String url = PRODUCT_URL + productId + PRODUCT_URL_SUFFIX; HashMap<String, Object> keyValues = new HashMap<String, Object>(); try { data = httpGetRequest(url); if (data == null || data.length == 0) { respond(keyValues); return true; } } catch (IOException e) { System.err.println("NeweggParser.parseProductInfo ERROR:" + " Error requesting URL '" + url + "'."); return false; } Object parsed; try { parsed = parser.parse(data); } catch (ParseException e) { System.err.println("NeweggParser.parseProductInfo ERROR:" + " Error parsing JSON."); return false; } /* parse product specifications */ findKeyValues(keyValues, parsed); /* parse general product info */ url = PRODUCT_URL + productId; try { data = httpGetRequest(url); if (data == null || data.length == 0) { respond(keyValues); return true; } } catch (IOException e) { System.err.println("NeweggParser.parseProductInfo ERROR:" + " Error requesting URL '" + url + "'."); return false; } try { parsed = parser.parse(data); } catch (ParseException e) { System.err.println("NeweggParser.parseProductInfo ERROR:" + " Error parsing JSON."); return false; } if (!(parsed instanceof JSONObject)) { System.err.println("NeweggParser.parseProductInfo ERROR:" + " Expected a JSON map at root."); return false; } JSONObject map = (JSONObject) parsed; Object name = map.get("Title"); if (name != null) keyValues.put("name", name.toString()); Object price = map.get("FinalPrice"); Integer parsedPrice = parsePrice(price); if (parsedPrice == null) parsedPrice = parsePrice(map.get("MappingFinalPrice")); if (parsedPrice != null) keyValues.put("price", parsedPrice); keyValues.put("url", FULL_PRODUCT_URL + productId); if (PARSE_IMAGES) { Object image = map.get("Image"); if (!(image instanceof JSONObject)) { System.err.println("NeweggParser.parseProductInfo ERROR:" + " Expected a map value for key 'Image'."); return false; } JSONObject imageMap = (JSONObject) image; Object imagePath = imageMap.get("PathSize640"); if (imagePath != null) keyValues.put("image", imagePath.toString()); } try { respond(keyValues); } catch (IOException e) { System.err.println("NeweggParser.parseProductInfo ERROR:" + " Error responding with product information."); return false; } return true; } public static void main(String[] args) { try { /* wait for the type of request */ switch (in.readUnsignedByte()) { case PRODUCT_LIST_REQUEST: State previous = null; int length = in.readUnsignedShort(); if (length > 0) { byte[] data = new byte[length]; in.readFully(data); previous = decodeState(data); } getProductList(previous); break; case PRODUCT_INFO_REQUEST: length = in.readUnsignedShort(); while (length > 0) { byte[] data = new byte[length]; in.readFully(data); if (!parseProductInfo(new String(data, UTF8))) break; length = in.readUnsignedShort(); } break; default: } } catch (IOException e) { System.err.println("NeweggParser.main ERROR:" + " Error communicating with core."); return; } } } class State { private Subcategory subcategory; private int page; public State(Subcategory subcategory, int page) { this.subcategory = subcategory; this.page = page; } public Subcategory getSubcategory() { return subcategory; } public int getPage() { return page; } } class Subcategory { private Object storeId; private Object categoryId; private Object subcategoryId; private Object nodeId; public Subcategory(Object storeId, Object categoryId, Object subcategoryId, Object nodeId) { this.storeId = storeId; this.categoryId = categoryId; this.subcategoryId = subcategoryId; this.nodeId = nodeId; } public Object getStoreId() { return this.storeId; } public Object getCategoryId() { return this.categoryId; } public Object getSubcategoryId() { return this.subcategoryId; } public Object getNodeId() { return this.nodeId; } @Override public int hashCode() { return subcategoryId.hashCode(); } @Override public boolean equals(Object o) { if (o == null) return false; else if (o == this) return true; else if (!o.getClass().equals(this.getClass())) return false; Subcategory other = (Subcategory) o; return categoryId.equals(other.categoryId) && subcategoryId.equals(other.subcategoryId); } }