package org.fastcatsearch.datasource.reader; import java.io.File; import java.io.FileFilter; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.regex.Pattern; import org.fastcatsearch.datasource.SourceModifier; import org.fastcatsearch.datasource.reader.annotation.SourceReader; import org.fastcatsearch.ir.common.IRException; import org.fastcatsearch.ir.config.SingleSourceConfig; @SourceReader(name="DIRECTORY_PATH") @Deprecated public class DirectoryPathReader extends SingleSourceReader<Map<String,Object>> implements FileFilter, Runnable { public DirectoryPathReader() { super(); } @SuppressWarnings({ "rawtypes", "unchecked" }) public DirectoryPathReader(String collectionId, File filePath, SingleSourceConfig singleSourceConfig, SourceModifier sourceModifier, String lastIndexTime) throws IRException { super(collectionId, filePath, singleSourceConfig, sourceModifier, lastIndexTime); } String rootPath; //String encoding; String fieldId; Pattern[] skipPatterns; Pattern[] acceptPatterns; List<String> filePaths; boolean finished; int bufferSize; int maxDepth; int maxCount; int currentDepth; private Map<String, Object> record; @Override public void init() throws IRException { rootPath = getConfigString("rootPath","/"); //encoding = getConfigString("encoding",null); fieldId = getConfigString("fieldId").trim().toUpperCase(); String[] skipPatternStr = getConfigString("skipPatterns", "").trim().split("\n"); String[] acceptPatternStr = getConfigString("acceptPatterns", "").trim().split("\n"); maxDepth = getConfigInt("maxDepth"); maxCount = getConfigInt("maxCount"); bufferSize = getConfigInt("bufferSize"); if(maxDepth < 0) { maxDepth = 0; } if(maxCount < 0) { maxCount = 0; } if(bufferSize < 100) { bufferSize = 100; } if (skipPatternStr != null) { skipPatterns = new Pattern[skipPatternStr.length]; for (int inx = 0; inx < skipPatternStr.length; inx++) { if (skipPatternStr[inx] != null && !"".equals(skipPatternStr[inx])) { skipPatterns[inx] = Pattern.compile(skipPatternStr[inx]); } } } if (acceptPatternStr != null) { acceptPatterns = new Pattern[acceptPatternStr.length]; for (int inx = 0; inx < acceptPatternStr.length; inx++) { if (acceptPatternStr[inx] != null && !"".equals(acceptPatternStr[inx])) { acceptPatterns[inx] = Pattern.compile(acceptPatternStr[inx]); } } } finished = false; filePaths = new ArrayList<String>(); currentDepth = 0; Thread t = new Thread(this); t.start(); } @Override protected void initParameters() { registerParameter(new SourceReaderParameter("rootPath", "Data Root Path", "Root Filepath for Indexing. (Absolute Path)" , SourceReaderParameter.TYPE_STRING_LONG, true, null)); registerParameter(new SourceReaderParameter("fieldId", "Mapping Field Id", "Mapping Field-Id In Collection Schema" , SourceReaderParameter.TYPE_STRING, true, "path")); //registerParameter(new SourceReaderParameter("encoding", "Encoding", "File encoding" // , SourceReaderParameter.TYPE_STRING, false, null)); registerParameter(new SourceReaderParameter("maxDepth", "Max Depth", "Maximum Depth For File Exploring" , SourceReaderParameter.TYPE_NUMBER, false, "10")); registerParameter(new SourceReaderParameter("skipPatterns", "Skip Patterns", "Skip Patterns (in regex)" , SourceReaderParameter.TYPE_TEXT, false, "")); registerParameter(new SourceReaderParameter("acceptPatterns", "Accept Only Patterns", "Describe Accept Only Patterns ( blank = accept all ) " , SourceReaderParameter.TYPE_TEXT, false, "")); registerParameter(new SourceReaderParameter("bufferSize", "Buffer Size", "Reading Buffer Size" , SourceReaderParameter.TYPE_NUMBER, false, "100")); registerParameter(new SourceReaderParameter("maxCount", "Max Count", "Document limit for indexing. ( 0 = no limit )" , SourceReaderParameter.TYPE_NUMBER, false, "0")); } @Override public boolean hasNext() throws IRException { if(record == null) { record = fill(); } return record !=null; } @Override protected Map<String, Object> next() throws IRException { Map<String,Object> ret = record; record = null; if(ret != null) { return ret; } else { ret = fill(); } return ret; } private Map<String, Object> fill() throws IRException { while (true) { if (filePaths.size() > 0) { logger.trace("fetch record.."); Map<String, Object> record = new HashMap<String, Object>(); String path = filePaths.remove(0); record.put(fieldId, path); return record; } else { try { logger.trace("waiting.."); Thread.sleep(100); } catch (InterruptedException ex) { logger.debug("CATCH INTERRUPT! {}", ex.getMessage()); } if(finished && filePaths.size() == 0) { break; } continue; } } return null; } @Override public boolean accept(File file) { logger.trace("finished:{} / file:{}", finished, file); int currentDepth = this.currentDepth; while(!finished) { if(filePaths.size() > bufferSize) { try { Thread.sleep(100); } catch (InterruptedException ignore) { } continue; } if(file.isDirectory()) { logger.trace("dir current:{} / max:{}", currentDepth, maxDepth); if (currentDepth + 1 <= maxDepth) { //싱글스레드 이기 때문에 가능. this.currentDepth++; file.listFiles(this); this.currentDepth = currentDepth; } else { logger.trace("Max Depth Over In Exploring..{}", file); } } else if(file.isFile()) { logger.trace("file : {}", file); String path = file.getAbsolutePath(); if (skipPatterns != null && skipPatterns.length > 0) { for (int inx = 0; inx < skipPatterns.length; inx++) { try { if (skipPatterns[inx] != null && !"".equals(skipPatterns[inx])) { if(skipPatterns[inx].matcher(path).find()) { logger.trace("Skip Pattern Found In : {}", path); return false; } } } catch (IllegalArgumentException ignore) { } } } if(acceptPatterns != null && acceptPatterns.length > 0) { for (int inx = 0; inx < acceptPatterns.length; inx++) { try { if (acceptPatterns[inx] != null && !"".equals(acceptPatterns[inx])) { if(!acceptPatterns[inx].matcher(path).find()) { logger.trace("Not Accepted Pattern Found In : {}", path); return false; } } } catch (IllegalArgumentException ignore) { } } } filePaths.add(path); } break; } return false; } @Override public void run() { File rootFile = new File(rootPath); rootFile.listFiles(this); finished = true; } }