package zh.solr.se.indexer.chinese; import java.io.BufferedReader; import java.io.FileInputStream; import java.io.InputStreamReader; import org.apache.solr.common.SolrInputDocument; import org.codehaus.jackson.map.ObjectMapper; import zh.solr.se.indexer.IndexerBase; import zh.solr.se.indexer.db.entity.ChineseEntity; import zh.solr.se.indexer.solrproxy.SolrConstants; import zh.solr.se.indexer.solrproxy.UnsupportedCoreException; public class ChineseIndexer extends IndexerBase { private String sourceFile; public ChineseIndexer(String dataType) throws UnsupportedCoreException { super(SolrConstants.CORE_NAME_CHINESE, dataType); } public ChineseIndexer(String dataType, String sourceFile) throws UnsupportedCoreException { super(SolrConstants.CORE_NAME_CHINESE, dataType); this.setSourceFile(sourceFile); } @Override protected String getFieldBoostsPropertyName() { return ""; } @Override public int indexAllDocuments() throws Exception { if (super.dataType.equals(DataType.JSON) || super.dataType.equals(DataType.XML) || super.dataType.equals(DataType.CSV)) { return indexDocumentsFromFile(); } else if (super.dataType.equals(DataType.MYSQL)) { return indexDocumentsFromDB(); } return -1; } public void setSourceFile(String sourceFile){ this.sourceFile = sourceFile; } public String getSourceFile(){ return this.sourceFile; } private int indexDocumentsFromFile() throws Exception{ int count = 0; if(super.dataType.equals(DataType.JSON)){ JsonProcesser processer = new JsonProcesser(new ObjectMapper()); BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(getSourceFile()))); String line = null; while(null !=(line = br.readLine())){ ChineseEntity entity = processer.parseDataModel(line.trim()); // index entity SolrInputDocument solrDoc = toSolrDocument(entity); try { indexDocument(solrDoc); ++count; } catch (final Exception e) { e.printStackTrace(); // skip this document continue; } //print log if ((count % 1000) == 0) System.out.println("Already indexed " + count + " documents ... "); } br.close(); }else if(super.dataType.equals(DataType.XML)){ // add xml parse code here }else if(super.dataType.equals(DataType.CSV)){ // add csv parse code here } return count; } private int indexDocumentsFromDB(){ return -1; } private SolrInputDocument toSolrDocument(final ChineseEntity entity) { if (entity == null) return null; final SolrInputDocument solrDoc = new SolrInputDocument(); // add the article fields addFields(solrDoc, entity); return solrDoc; } private void addFields(final SolrInputDocument solrDoc, final ChineseEntity entity) { if (solrDoc == null || entity == null) return; addSolrField(solrDoc, SolrConstants.FIELD_CHINESE_ID, entity.getId()); addSolrField(solrDoc, SolrConstants.FIELD_CHINESE_NAME, entity.getName()); addSolrField(solrDoc, SolrConstants.FIELD_CHINESE_CONTENT, entity.getContent()); } /** * Main method * * @param args * @throws UnsupportedCoreException */ public static void main(final String[] args) throws UnsupportedCoreException { final ChineseIndexer indexer = new ChineseIndexer("json","/var/zh-solr-se/samples/movie-data.json"); try { indexer.startIndexing(); System.out.println("Indexing successfully."); System.exit(0); } catch (final Exception e) { System.out.println("Indexing error: " + e.getMessage()); System.exit(1); } } }