package org.ariadne_eu.utils.rest; import java.io.FileInputStream; import java.io.InputStream; import java.io.InputStreamReader; import java.io.RandomAccessFile; import java.io.Reader; import java.util.ArrayList; import java.util.Arrays; import java.util.Scanner; public class SearchNode { private class MinimumHeap { //when sorted it gives descending results int keyCapacity; int size; boolean built = false; int[] keys; // int[] values; //The value on which is sorted public void initialise(int[] values){ initialise(values.length, values); } public void initialise(int keyCapacity, int[] values){ this.keyCapacity = keyCapacity; keys = new int[keyCapacity]; this.values = values; size = 0; built = false; } /** * if capacity is full then * if value[key] is greater than value[head] replace head * else skip * @param key * @return */ public boolean addWithinCapacity(int key){ if (size<keyCapacity){ return add(key); } else { build(); if (values[key]>values[keys[0]]){ keys[0] = key; return refreshTop(); } } return true; } public boolean add(int key){ if (size >= keyCapacity) return false; keys[size++] = key; built = false; return true; } /** * PRECONDITION: the values are initiated * @return */ public void buildUsingValues(){ size = values.length; for (int i=0;i<size;i++) keys[i]=i; build(); } /** * It is assumed that values & keys are initialised */ public void build(){ if (!built && size>0){ for( int i = size / 2; i >= 0; i-- ){ //Sift down int child; int tmp = keys[ i ]; int j; for( j=i; 2 * j + 1 < size; j = child ) { child = 2 * j + 1; //leftChild( i ) if( child != size - 1 && values[keys[ child ]] > ( values[keys[ child + 1 ]] )) child++; if( values[tmp] > values[keys[child ]] ) keys[ j ] = keys[child ]; else break; } keys[ j ] = tmp; } //System.out.print("MyHeap: "); //for( int i=0;i<keys.length;i++)System.out.print(values[keys[i]]+" "); //System.out.println(); } built = true; } public boolean refreshTop(){ if (size<=0) return false; build(); int child; int j; int tmp=keys[0]; for( j=0; 2 * j + 1 < size; j = child ) { child = 2 * j + 1; //leftChild( j ) if( child != size - 1 && values[keys[ child ]] > ( values[keys[ child + 1 ]] )) child++; if( values[tmp] > values[keys[child ]] ) keys[ j ] = keys[child ]; else break; } keys[ j ] = tmp; return true; } public int poll(){ if (size<=0) return -1; build(); int result = keys[0]; keys[0] = keys[size]; size--; int child; int j; int tmp=keys[0]; for( j=0; 2 * j + 1 < size; j = child ) { child = 2 * j + 1; //leftChild( j ) if( child != size - 1 && values[keys[ child ]] > ( values[keys[ child + 1 ]] )) child++; if( values[tmp] > values[keys[child ]] ) keys[ j ] = keys[child ]; else break; } keys[ j ] = tmp; return result; } public int peek(){ if (size<=0) return -1; build(); return keys[0]; } public int[] sort(boolean ascending){ build(); for( int i = size - 1; i > 0; i-- ) { //Delete max int tmp = keys[i]; keys[i] = keys[0]; keys[0]= tmp; //Sift down int child; int j; for( j=0; 2 * j + 1 < i; j = child ) { child = 2 * j + 1; //leftChild( j ) if( child != i - 1 && values[keys[ child ]] > ( values[keys[ child + 1 ]] )) child++; if( values[tmp] > values[keys[child ]] ) keys[ j ] = keys[child ]; else break; } keys[ j ] = tmp; } return keys; } public int[] toArray(){ build(); int[] result = new int[size]; for (int i=0;i<size;i++) result[i] = keys[i]; return result; } } private int nodeNr; private int nrOfIndexes; private int idOffset; private int nrOfFiles; private String[] indexNames = new String[0]; private int[] indexOffset = new int[0]; private String[][] keys = new String[0][]; //the first dimension is the indexNr, the second the keyNr private int[][] valuesArray = new int[0][]; //the first dimension is the indexOffset + keyNr, the second the values private int[]rating = new int[0]; private long[]cmrPointer = new long[0]; private Reader cmrFile; //private InputStream cmrFile; //private FileInputStream fis; //private InputStreamReader in; //private BufferedReader cmrJsonFile; private RandomAccessFile fcData; public SearchNode(int nr){ nodeNr = nr; } public void copyFrom(SearchNode from, int benchMarkOffset) { idOffset = from.idOffset + benchMarkOffset; nrOfIndexes = from.nrOfIndexes; indexNames = new String[from.indexNames.length]; for (int i=0;i<indexNames.length;i++) indexNames[i] = from.indexNames[i]; indexOffset = new int[from.indexOffset.length]; for (int i=0;i<indexOffset.length;i++) indexOffset[i] = from.indexOffset[i]; keys = new String[from.keys.length][]; for (int i=0;i<keys.length;i++){ keys[i] = new String[from.keys[i].length]; for (int j=0;j<keys[i].length;j++)keys[i][j] = from.keys[i][j]; } valuesArray = new int[from.valuesArray.length][]; for (int i=0;i<valuesArray.length;i++){ valuesArray[i] = new int[from.valuesArray[i].length]; for (int j=0;j<valuesArray[i].length;j++)valuesArray[i][j] = from.valuesArray[i][j]; } cmrPointer = new long[from.cmrPointer.length]; for (int i=0;i<cmrPointer.length;i++)cmrPointer[i] = from.cmrPointer[i]; cmrFile = from.cmrFile; } public QueryResult search(Query qry) { QueryResult result = new QueryResult(qry); result.initialise(this); final int dimConj = qry.searchTerms.length; //Number of disjunctions in the conjunction if (dimConj==0) return result; int indNr; int keyNr=0; //Initialise the facet counting int dimfacets = 0; for (int i=0;i< qry.facets.length;i++) dimfacets = dimfacets+getKeysDimension(qry.facets[i]); int[] facetPositions = new int[dimfacets]; int[] facetKeyNumbers = new int[dimfacets]; int[] facetCurrentValues = new int[dimfacets]; //This is the value (i.e. ID) to which the facetPosition is pointing for (int i=0;i< qry.facets.length;i++){ indNr = getIndexID(qry.facets[i]); if (indNr >=0){ for (int j=indexOffset[indNr];j<indexOffset[indNr+1];j++){ if (valuesArray[j].length>0){ facetCurrentValues[keyNr] = valuesArray[j][0]; facetKeyNumbers[keyNr]=j; keyNr++; } else { facetCurrentValues[j] = Integer.MAX_VALUE; //Integer.MAX_VALUE is the sentinel value } } } } MinimumHeap facetHeap = new MinimumHeap(); facetHeap.initialise(facetCurrentValues); facetHeap.buildUsingValues(); //Check for empty disjunctions and find the disjunction with the smallest nr of values int[] disjRangeTemp = new int[dimConj+1]; // indTemp is the ArrayList holding indexNr and keyNr ArrayList<Integer> indTemp = new ArrayList<Integer>(); int minNrOfValInDisj=0; int smallestDisjunction = 0; for (int i=0;i<dimConj;i++){ int cntDim = 0; disjRangeTemp[i+1] = disjRangeTemp[i]; String[] disjunction = qry.searchTerms[i]; for (int j=0;j<disjunction.length;j++){ //Find the indexNr by name String st = qry.searchTerms[i][j]; //int separatorPos = st.indexOf("="); int separatorPos = st.indexOf(":"); String indNam = st.substring(0, separatorPos); for (indNr=0; indNr < indexNames.length;indNr++) if (indexNames[indNr].equalsIgnoreCase(indNam))break; //Find the keyNr by name String keyNam = st.substring(separatorPos+1); keyNr = -1; if (keys.length > 0) keyNr = Arrays.binarySearch(keys[indNr],keyNam); if (keyNr >= 0){ keyNr = keyNr + indexOffset[indNr]; if (valuesArray[keyNr].length > 0){ disjRangeTemp[i+1]++; indTemp.add(keyNr); cntDim = cntDim + valuesArray[keyNr].length; } } } if (cntDim==0) return result;//This disjunction has no elements if (i==0) minNrOfValInDisj = cntDim; else if (cntDim<minNrOfValInDisj){ minNrOfValInDisj = cntDim; smallestDisjunction = i; } } //Set the disjunction with the least terms in front // fill the disjunction ranges and // select the values from cache into qryValues int nrOfTerms = disjRangeTemp[dimConj]; int[][] qryValues = new int[nrOfTerms][]; int[] disjRange = new int[dimConj+1]; int k=0; for (int j=disjRangeTemp[smallestDisjunction];j<disjRangeTemp[smallestDisjunction+1];j++){ qryValues[k] = valuesArray[indTemp.get(j)]; k++; } disjRange[1]=k; for (int i=0;i<smallestDisjunction;i++){ for (int j=disjRangeTemp[i];j<disjRangeTemp[i+1];j++){ qryValues[j+k] = valuesArray[indTemp.get(j)]; } disjRange[i+2]=disjRangeTemp[i+1]+k; } for (int i=smallestDisjunction+1;i<dimConj;i++){ for (int j=disjRangeTemp[i];j<disjRangeTemp[i+1];j++){ qryValues[j] = valuesArray[indTemp.get(j)]; } disjRange[i+1]=disjRangeTemp[i+1]; } /*for (int i=0;i<dimConj;i++){ System.out.println("==>"+i); for (int j=disjRange[i];j<disjRange[i+1];j++){ System.out.print(j+": "); for (k=0;k<qryValues[j].length;k++) System.out.print(" "+qryValues[j][k]); System.out.println(); } System.out.println(); }*/ int[] pos = new int[nrOfTerms];//position in the qryValues //int cnt = 0; boolean found = true; //TODO verify why found is used; it is never set to false //int candidate = qryValues[0][0]; int candidate = -1; while (found){ //Find a common value in all disjunctions int i=0; //the ith conjunction element while (i<dimConj) { //find the minimum of the i-th disjunction (i.e. the i-th element in the conjunction) int min = -1; for (int j=disjRange[i]; j<disjRange[i+1];){ while((pos[j]<qryValues[j].length) && (qryValues[j][pos[j]]<candidate))pos[j]++; if (pos[j]<qryValues[j].length){ if(min<0){ min = qryValues[j][pos[j]]; } else { if (qryValues[j][pos[j]]<min) min = qryValues[j][pos[j]]; } j++; } else { //Here we are at the end of a list if (disjRange[i+1]==disjRange[i]+1) return result;//There is a disjunction with only one search term and no values for (k=i+1; k<=dimConj; k++)disjRange[k]--; for (k=j+1;k<nrOfTerms;k++){ pos[k-1]=pos[k]; qryValues[k-1] = qryValues[k]; } nrOfTerms--; } } if (min > candidate){ candidate = min; if(i==0) i = 1; else i=0; //if i=0 there is no need to visit it again; we can move to the next } else { i++; } } //All conjunctions have been processed if (found){ //System.out.println(candidate); //TODO // Check whether candidate is not in the exclusion list, // compute here the rank and store in the result heap //Add to the results list (intIDs) if (result.nrOfResults<result.intIDs.length) result.intIDs[result.nrOfResults]=candidate+idOffset; result.nrOfResults++; //Add to the facets if (qry.facets.length > 0) { int currentFacetKey = facetHeap.peek(); while (facetCurrentValues[currentFacetKey]<=candidate){ int facetPos = facetPositions[currentFacetKey]; int facetKeyNr = facetKeyNumbers[currentFacetKey]; //Find a value that is >= the candidate while ((facetPos<valuesArray[facetKeyNr].length) &&(valuesArray[facetKeyNr][facetPos]<candidate))facetPos++; //If there is a match then add 1 to the facet count & refresh the top of the facetHeap if ((facetPos<valuesArray[facetKeyNr].length) && (valuesArray[facetKeyNr][facetPos]==candidate)) { //System.out.println(candidate +" "+currentFacetKey); result.facetCounts[currentFacetKey]++; facetPos++; } if (facetPos<valuesArray[facetKeyNr].length){ facetCurrentValues[currentFacetKey] = valuesArray[facetKeyNr][facetPos]; facetPositions[currentFacetKey]=facetPos; } else{ facetCurrentValues[currentFacetKey] = Integer.MAX_VALUE; } facetHeap.refreshTop(); currentFacetKey = facetHeap.peek(); } //for (int f=0;f<dimfacets;f++){ // int facetPos = facetPositions[f]; // int facetKeyNr = facetKeyNumbers[f]; // while ((facetPos<valuesArray[facetKeyNr].length) // &&(valuesArray[facetKeyNr][facetPos]<candidate))facetPos++; // if((facetPos<valuesArray[facetKeyNr].length) // &&(valuesArray[facetKeyNr][facetPos]==candidate)){ // result.facetCounts[f]++; // facetPos++; // } // facetPositions[f]=facetPos; //} } candidate++; } } return result; } /** * * @param a * @param b * @return */ public int countIntersection(int[] a, int[] b) { int i=0, j=0,cnt=0,aLength = a.length,bLength =b.length; while (i<aLength && j<bLength) { if (a[i]<= b[j]){ if (a[i]== b[j]) { cnt++; } i++; } else j++; } return cnt; } /** * PRECONDITION loadTerms must be run first in order to set nrOfFiles * @param inFileNam * @throws Exception */ public void loadPointers(String inDir,String fileNam) throws Exception { cmrPointer = new long[nrOfFiles*4+1]; InputStream fis = new FileInputStream (inDir+"pre_point/"+fileNam+".txt"); //Reader isr = new InputStreamReader (fis, "UTF-8"); //Scanner s = new Scanner(isr); Scanner s = new Scanner(fis); //Get rid of possible garbage in the beginning. Sometimes UTF-8 has this problem while (!s.hasNextInt())s.nextLine(); int k = 0; while (s.hasNextInt()){ cmrPointer[k] = s.nextInt(); if (s.hasNextLine()) s.nextLine(); k++; } System.out.println(k); //cmrFile = new InputStreamReader (new FileInputStream (inFileNam+"pre/part"+nod+".txt"), "UTF-8"); //fcData = new RandomAccessFile(inFileNam+"pre/part"+nod+".data", "r").getChannel(); fcData = new RandomAccessFile(inDir+"pre/"+fileNam+".data", "r"); //fcData.read(ByteBuffer dst, long position) } /** * PRECONDITION: the terms must be generated by the IndexBuilder * @throws Exception */ public int loadTerms(String inDir, String fileNam, int ios) throws Exception { int result = ios; String debugStr = "Node: "+nodeNr; InputStream fis = new FileInputStream (inDir+"index/"+fileNam+".txt"); Reader isr = new InputStreamReader (fis, "UTF-8"); Scanner s = new Scanner(isr); //Get rid of possible garbage in the beginning. Sometimes UTF-8 has this problem while (!s.hasNextInt())s.nextLine(); //Read offSet and nr of files //idOffset = s.nextInt();s.nextLine(); idOffset = ios; nrOfFiles = s.nextInt();s.nextLine(); rating = new int[nrOfFiles]; //Read nr of indexes and initialise indexNames and keys nrOfIndexes = s.nextInt()+1;//+1 because we also make an extra key which is 'all' indexNames = new String[nrOfIndexes]; indexNames[0] = "collection"; indexOffset = new int[nrOfIndexes+1]; indexOffset[0] = 0; indexOffset[1] = 1; keys = new String[nrOfIndexes][]; keys[0] = new String[]{"all"}; //Read total nr of keys and initialise valuesArray int totalNrOfKeys = s.nextInt()+1; valuesArray = new int[totalNrOfKeys][]; try { //For each index read its name, the nr of keys for this index, and {key,{value}*}* for (int i=1; i <nrOfIndexes;i++){ //Read the name of the index s.nextLine();//read the rest of the previous line first String indexNam = s.nextLine(); debugStr = "Node: "+nodeNr+" index "+indexNam; indexNames[i] = indexNam.substring(1, indexNam.length()-1); //Read nr of keys in the index int nrOfKeys = s.nextInt(); keys[i] = new String[nrOfKeys]; indexOffset[i+1] = indexOffset[i]+nrOfKeys; for (int k=0; k <nrOfKeys;k++){ //Read the key s.nextLine();//read the rest of the previous line first String key = s.nextLine(); debugStr = "Node="+nodeNr+" index="+indexNam+" key="+key; key = key.substring(1, key.length()-1); //Read the number of values for the key keys[i][k] = key; int nrOfValues = s.nextInt(); //Read all the values valuesArray[indexOffset[i]+k] = new int[nrOfValues]; for (int v=0;v<nrOfValues;v++) valuesArray[indexOffset[i]+k][v] = s.nextInt(); } } //Read the 'all' index int nrOfValues = s.nextInt(); result = result + nrOfValues; System.out.println(nrOfValues); valuesArray[0] = new int[nrOfValues]; s.nextLine(); for (int i=0;i<nrOfValues;i++){ valuesArray[0][i] = s.nextInt(); rating[i] = s.nextInt(); s.nextLine(); //Read the rest of the line } } catch (Exception e) { System.out.println("Exception in load terms reading "+debugStr+". Error:"+e.getMessage()); throw e; } return result; } public void loadTerms2(String inFileNam) throws Exception { String debugStr = "Node: "+nodeNr; InputStream fis = new FileInputStream (inFileNam); Reader isr = new InputStreamReader (fis, "UTF-8"); Scanner s = new Scanner(isr); //Get rid of possible garbage in the beginning. Sometimes UTF-8 has this problem while (!s.hasNextInt())s.nextLine(); //Read nr of indexes and initialise indexNames and keys nrOfIndexes = s.nextInt(); indexNames = new String[nrOfIndexes]; indexOffset = new int[nrOfIndexes+1]; indexOffset[0] = 0; keys = new String[nrOfIndexes][]; //Read total nr of keys and initialise valuesArray int totalNrOfKeys = s.nextInt(); valuesArray = new int[totalNrOfKeys][]; try { //For each index read its name, the nr of keys for this index, and {key,{value}*}* for (int i=0; i <nrOfIndexes;i++){ //Read the name of the index s.nextLine();//read the rest of the previous line first String indexNam = s.nextLine(); debugStr = "Node: "+nodeNr+" index "+indexNam; indexNames[i] = indexNam.substring(1, indexNam.length()-1); //Read nr of keys in the index int nrOfKeys = s.nextInt(); keys[i] = new String[nrOfKeys]; indexOffset[i+1] = indexOffset[i]+nrOfKeys; for (int k=0; k <nrOfKeys;k++){ //Read the key s.nextLine();//read the rest of the previous line first String key = s.nextLine(); debugStr = "Node="+nodeNr+" index="+indexNam+" key="+key; key = key.substring(1, key.length()-1); //Read the number of values for the key keys[i][k] = key; int nrOfValues = s.nextInt(); //Read all the values valuesArray[indexOffset[i]+k] = new int[nrOfValues]; for (int v=0;v<nrOfValues;v++) valuesArray[indexOffset[i]+k][v] = s.nextInt(); } } } catch (Exception e) { System.out.println("Exception in load terms reading "+debugStr+". Error:"+e.getMessage()); throw e; } } /** * * @param indNr * @param keyNr * @return */ public int[] getvaluesArray(int indNr, int keyNr) { return valuesArray[indexOffset[indNr]+keyNr]; } /** * * @param index * @return */ public String[] getKeys(String indexName) { int keyID = getIndexID(indexName); if (keyID==-1)return new String[0]; return keys[keyID]; } /** * * @param indexName * @return */ public int[] getKeyNumbers(String indexName) { int indID = getIndexID(indexName); int dim = indexOffset[indID+1] - indexOffset[indID]; int[] result = new int[dim]; for (int i=0;i<dim;i++)result[i]=i+indexOffset[indID]; return result; } /** * * @param indexName * @return */ public int getKeysDimension(String indexName) { int keyID = getIndexID(indexName); if (keyID==-1)return 0; return indexOffset[keyID+1] - indexOffset[keyID]; } /** * * @param indexName * @return */ private int getIndexID(String indexName){ for (int i=0;i<indexNames.length;i++) { if (indexNames[i].equalsIgnoreCase(indexName)) return i; } return -1; } public String getJsonResult(int nr) throws Exception { //String result = ""; fcData.seek(cmrPointer[(nr-1)]); return fcData.readUTF(); //int jsonBegin = cmrPointer[(nr-1)]; //int jsonEnd = cmrPointer[(nr-1)+1]; //byte[] jsonBytes = new byte[jsonEnd-jsonBegin]; //ByteBuffer jsonBuf = ByteBuffer.wrap(jsonBytes); //fcData.read(jsonBuf, jsonBegin); //return new String(jsonBuf.array()); //return result; } }