package com.interview.algorithms.design; import java.io.BufferedReader; import java.io.FileInputStream; import java.io.InputStreamReader; import java.util.ArrayList; import java.util.List; import java.util.Random; /** * Created with IntelliJ IDEA. * User: stefanie * Date: 7/30/14 * Time: 12:18 PM * * You have a stream of infinite queries (ie: real time Google search queries that people are entering). * Describe how you would go about finding a good estimate of 1000 samples from this never ending set of data and then write code for it. * * Idea: keep total number count N. If N<=m, just keep it. * For N>m, generate a random number R=rand(N) in [0, N), replace a[R] with new number if R falls in [0, m). */ public class C10_6_SampleForInfiniteSet { public static List<String> simpling(String filePath, int N){ List<String> queries = new ArrayList<>(); try { FileInputStream f = new FileInputStream(filePath); BufferedReader dr = new BufferedReader(new InputStreamReader(f)); String line = dr.readLine(); int count = 0; while (line != null) { count++; if(count <= N) queries.add(line.trim()); else { int r = new Random().nextInt(count); if(r < N) queries.set(r, line.trim()); } line = dr.readLine(); } } catch (Exception e) { e.printStackTrace(); } return queries; } }