/*
This file is part of leafdigital leafChat.
leafChat is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
leafChat is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with leafChat. If not, see <http://www.gnu.org/licenses/>.
Copyright 2011 Samuel Marshall.
*/
package com.leafdigital.logs;
import java.io.*;
import java.lang.Character.UnicodeBlock;
import java.text.*;
import java.util.*;
import java.util.regex.*;
import org.w3c.dom.*;
import util.xml.*;
import com.leafdigital.logs.api.Logger;
import leafchat.core.api.*;
class LoggerImp implements Logger,Runnable
{
static final String ISOFORMAT="yyyy-MM-dd";
private final static int DEBUGLEVEL=20;
private static final String INDEX_FILES="index.files";
private static final String INDEX_WORDS="index.words";
/** Max. number of bytes stored for a word in the index */
private static final int INDEX_WORDBYTES=8;
/** Max. number of files stored in a single index entry */
private static final int INDEX_FILESPERENTRY=6;
/** Number of bytes per entry as consequence */
private static final int INDEX_BYTESPERENTRY=INDEX_WORDBYTES+INDEX_FILESPERENTRY*4;
/** How many updates we can cache before flushing */
private static final int UPDATES_MAXPENDING=16384;
/** How frequently all logs are flushed */
private static final int FILEFLUSHTIME=5*1000;
/** How long before an unused logfile is discarded */
private static final int FILECLOSETIME=60*1000;
/** Log folder */
private File folder;
/** Context */
private PluginContext context;
/** Index file */
private RandomAccessFile indexFile;
/** Position at end of file */
private int fileEndPos;
/** File list from index, sorted by ID->name */
private Map<Integer, String> filesIDToName = new HashMap<Integer, String>();
/** File list from index, sorted by name->ID */
private Map<String, Integer> filesNameToID=new HashMap<String, Integer>();
/** Word data from index */
private Map<String, IndexEntry[]> words = new HashMap<String, IndexEntry[]>();
/** Map from log file (File) -> LogStream */
private Map<File, LogStream> currentStreams = new HashMap<File, LogStream>();
/** Buffered list of updates to index file */
private SortedSet<IndexUpdate> indexUpdates = new TreeSet<IndexUpdate>();
/** Regular expression matching files */
private final static Pattern LOGFILENAME=Pattern.compile(
"([0-9]{4}-[0-9]{2}-[0-9]{2})_([^_]+)_([^_]+)_([^_]+).lclog");
/**
* Maximum number of blank entries to remember the location of (so we don't
* waste memory storing too many of them)
*/
private final static int INDEX_MAXBLANKENTRIES=4096;
/**
* There must be at least 8 times as many real entries as blank ones, otherwise
* the file will be compacted.
*/
private final static int INDEX_COMPACTTHRESHOLD=8;
/**
* List of Integer storing file positions that are available for reuse
*/
private LinkedList<Integer> blankEntrySpaces = new LinkedList<Integer>();
private static boolean DEBUG_REBUILD_INDEX = false;
private static boolean DEBUG_ANALYSE_INDEX = false;
/** If bClose is true, closes down log and sets bClosed */
private boolean close,closed;
private void addIndexUpdate(IndexUpdate iu) throws IOException
{
indexUpdates.add(iu);
if(indexUpdates.size() >= UPDATES_MAXPENDING)
applyIndexChanges();
}
/** Stores a currently-open log file stream */
private static class LogStream
{
Writer w;
boolean dirty;
long lastUsed;
}
/** One entry from the index file */
private static class IndexEntry
{
/** Position in file for random updates */
int filePos;
/** List of file IDs stored */
int[] files=new int[INDEX_FILESPERENTRY];
}
/** Stores a random-access update to the index file */
private class IndexUpdate implements Comparable<IndexUpdate>
{
private int pos;
private ByteArrayOutputStream outputBytes=new ByteArrayOutputStream();
private DataOutputStream outputData=new DataOutputStream(outputBytes);
IndexUpdate(int iPos)
{
this.pos=iPos;
}
void writeBytes(byte[] ab) throws IOException
{
outputData.write(ab);
}
void writeInt(int i) throws IOException
{
outputData.writeInt(i);
}
void writeByte(int i) throws IOException
{
outputData.writeByte(i);
}
void apply() throws IOException
{
indexFile.seek(pos);
indexFile.write(outputBytes.toByteArray());
}
// Allow sort by file position in the hope that this will
// reduce HD seek time when making a change batch
@Override
public int compareTo(IndexUpdate otherUpdate)
{
if(otherUpdate == this) return 0;
else if(pos < otherUpdate.pos) return -1;
else if(pos > otherUpdate.pos) return 1;
// Because of the way writes work (we either write an entire new block,
// or else a single entry inside a block which will be indexed at least
// by 12 bytes), we should never try to write to the same position more
// than once.
throw new Error("Index writes inconsistent");
}
}
LoggerImp(PluginContext context,PluginLoadReporter plr,File folder) throws GeneralException
{
this.folder=folder;
this.context=context;
try
{
loadIndex(plr);
}
catch(IOException ioe)
{
throw new GeneralException("Failed to initialise log index. Are you running "+
"two copies of the program at once? That isn't supported.",ioe);
}
Thread t=new Thread(this,"Logger thread");
t.setPriority(Thread.MIN_PRIORITY);
t.start();
}
/** Handles thread that flushes logs etc. */
@Override
public synchronized void run()
{
try
{
while(true)
{
try
{
wait(FILEFLUSHTIME);
}
catch(InterruptedException ie)
{
}
if(close) return;
// Flush and/or close filehandles
long now=System.currentTimeMillis();
for(Iterator<Map.Entry<File, LogStream>> i =
currentStreams.entrySet().iterator();i.hasNext();)
{
Map.Entry<File, LogStream> me = i.next();
LogStream ls=me.getValue();
// Write dirty files each time around this loop (5 seconds)
if(ls.dirty)
{
try
{
ls.w.flush();
}
catch(IOException ioe)
{
// Close it, ignoring close errors
try { ls.w.close(); } catch(IOException ioe2) {}
i.remove();
ErrorMsg.report(
"Error writing to log file "+me.getKey(),ioe);
continue;
}
ls.dirty=false;
}
// Chuck files away if they haven't been written to for a minute
if(ls.lastUsed + FILECLOSETIME < now)
{
try { ls.w.close(); } catch(IOException ioe) {}
i.remove();
}
}
// Do index changes
try
{
applyIndexChanges();
}
catch(IOException ioe)
{
ErrorMsg.report("Error writing to log index ",ioe);
}
}
}
finally
{
for(Iterator<LogStream> i=currentStreams.values().iterator();i.hasNext();)
{
LogStream ls = i.next();
try { ls.w.close(); } catch(IOException ioe) {}
i.remove();
}
try
{
applyIndexChanges();
}
catch(IOException ioe)
{
ErrorMsg.report("Error writing to log index ",ioe);
}
closed=true;
notifyAll();
}
}
/** Closes the thread and all files. */
synchronized void close()
{
close=true;
notifyAll();
while(!closed)
{
try
{
wait();
}
catch(InterruptedException ie)
{
}
}
try
{
indexFile.close();
}
catch(IOException ioe)
{
}
debugLog(10,"Log files closed");
}
/**
* @return Array containing information about all log files
*/
LogFileInfo[] getAllLogs()
{
List<LogFileInfo> logInfo = new LinkedList<LogFileInfo>();
File[] logFiles=folder.listFiles();
if(logFiles==null) logFiles=new File[0];
for(int i=0;i<logFiles.length;i++)
{
try
{
logInfo.add(new LogFileInfo(logFiles[i]));
}
catch(GeneralException ge)
{
// OK, so it wasn't a logfile
}
}
return logInfo.toArray(new LogFileInfo[logInfo.size()]);
}
/** Holds parsed information about a logfile */
public static class LogFileInfo
{
private File f;
private String date;
private String server;
private String category;
private String item;
LogFileInfo(File f) throws GeneralException
{
this.f=f;
Matcher m=LOGFILENAME.matcher(f.getName());
if(!m.matches()) throw new GeneralException("Not a log file");
date=m.group(1);
server=fromFilePart(m.group(2));
category=fromFilePart(m.group(3));
item=fromFilePart(m.group(4));
}
public String getDate() { return date; }
public String getServer() { return server; }
public String getCategory() { return category; }
public String getItem() { return item; }
public File getFile() { return f; }
}
/**
* Converts time into a compact string representation of local time suitable for use
* in filenames.
* @param time Time in milliseconds
* @return String representation e.g. 2006-02-04
*/
private String convertTime(long time)
{
int rollTime=((LogsPlugin)context.getPlugin()).getRollTime();
// Subtract rollover time and convert to local time
SimpleDateFormat sdf=new SimpleDateFormat(ISOFORMAT);
return sdf.format(new Date(time-rollTime*60*60*1000));
}
public String displayDate(String isoDate) throws GeneralException
{
String today=convertTime(System.currentTimeMillis());
if(isoDate.equals(today))
return "Today";
String yesterday=convertTime(System.currentTimeMillis()-24*60*60*1000);
if(isoDate.equals(yesterday))
return "Yesterday";
// Get ISO time as Java date
SimpleDateFormat sdf=new SimpleDateFormat(ISOFORMAT);
Date d;
try
{
d=sdf.parse(isoDate);
}
catch(ParseException e)
{
throw new IllegalArgumentException("Date is not in ISO format");
}
// Same year
if(isoDate.split("-")[0].equals(today.split("-")[0]))
sdf=new SimpleDateFormat("E d MMMM");
else
sdf=new SimpleDateFormat("d MMMM yyyy");
return sdf.format(d);
}
/**
* Converts value to one safe for use in filenames (and not including the _
* separator).
* @param value Original string
* @return Safe string
*/
static String toFilePart(String value)
{
StringBuffer sb=new StringBuffer();
for(int i=0;i<value.length();i++)
{
char c=value.charAt(i);
if( (c>='A' && c<='Z') ||
(c>='a' && c<='z') ||
(c>='0' && c<='9') ||
" ,.#@$^&()-='".indexOf(c)!=-1)
{
sb.append(c);
}
else
{
// Special characters are represented as % then 4 hex digits
sb.append('%');
String sHex=Integer.toHexString(c);
for(int iZero=0;iZero<4-sHex.length();iZero++) sb.append('0');
sb.append(sHex);
}
}
return sb.toString();
}
/**
* Converts filename value back to the original string.
* @param filePart Value in filename
* @return Original filename
* @throws GeneralException If filename is invalid
*/
private static String fromFilePart(String filePart) throws GeneralException
{
StringBuffer sb=new StringBuffer();
for(int i=0;i<filePart.length();i++)
{
char c=filePart.charAt(i);
if(c!='%')
{
sb.append(c);
}
else
{
if(i+4>=filePart.length())
throw new GeneralException("Invalid part in filename");
String sCode=filePart.substring(i+1,i+5);
i+=4; // Eat those next 4 chars
try
{
sb.append((char)Integer.parseInt(sCode,16));
}
catch(NumberFormatException nfe)
{
throw new GeneralException("Invalid part in filename");
}
}
}
return sb.toString();
}
/**
* Obtains the file that should be used for a particular event. Files are
* determined based only on these pieces of information.
* @param time Time of event
* @param source Source e.g. server address
* @param category Category e.g. chan
* @param item Item name e.g. channel name
* @return File for that logging
*/
public File getFile(long time,String source,String category,String item)
{
return new File(folder,convertTime(time)+
"_"+toFilePart(source)+"_"+toFilePart(category)+"_"+toFilePart(item)+".lclog");
}
/**
* Extracts words from an XML string.
* @param xml XML string
* @return Array of words
* @throws XMLException If string can't be parsed
*/
static String[] extractWords(String xml) throws XMLException
{
Document d=XML.parse("<root>"+xml+"</root>");
List<String> l = new LinkedList<String>();
extractWords(d.getDocumentElement(),l);
return l.toArray(new String[l.size()]);
}
private static void extractWords(Node n, List<String> l)
{
if(n instanceof Text)
{
splitWords(n.getNodeValue(),l);
}
else
{
for(Node child=n.getFirstChild();child!=null;child=child.getNextSibling())
{
extractWords(child,l);
}
}
}
static void splitWords(String s, List<String> l)
{
StringBuffer current=new StringBuffer();
for(int i=0;i<s.length();i++)
{
char c=s.charAt(i);
// Note: This is not the official way of splitting words, as per Unicode
// standard appendix 29, which is hideously complicated. This only supports
// English and similar languages and (sketchily) Japanese
if(Character.isLetterOrDigit(c) || c=='\'')
{
current.append(c);
}
else
{
// Add any current data
if(current.length()>0) l.add(current.toString().toLowerCase());
current.setLength(0);
// Some characters get treated as single entries
UnicodeBlock ub=UnicodeBlock.of(c);
if(ub==UnicodeBlock.HIRAGANA || ub==UnicodeBlock.KATAKANA ||
ub==UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS)
{
l.add((c+"").toLowerCase());
}
}
}
// Add any current data
if(current.length()>0) l.add(current.toString().toLowerCase());
}
/**
* Does all the saved-up index changes
* @throws IOException If there is an error writing the index data
*/
private synchronized void applyIndexChanges() throws IOException
{
int count=indexUpdates.size();
long start=System.currentTimeMillis();
for(Iterator<IndexUpdate> i=indexUpdates.iterator();i.hasNext();)
{
IndexUpdate iu =i.next();
debugLog(40,"Saving changes @ "+iu.pos+" ("+iu.outputBytes.size()+" bytes)");
iu.apply();
i.remove();
}
if(count>0) debugLog(25,"Saved "+count+" index changes ("+(System.currentTimeMillis()-start)+" ms)");
}
private void debugLog(int iLevel,String sText)
{
if(iLevel <= DEBUGLEVEL)
context.logDebug(sText);
}
@Override
public String toString()
{
return "Logger";
}
/**
* Loads the index from disk.
* @param plr Reporter for information on load progress
* @throws IOException File error loading index
* @throws GeneralException Other error loading index
*/
private synchronized void loadIndex(PluginLoadReporter plr) throws IOException,GeneralException
{
plr.reportProgress("Loading full-text log index...");
if(!folder.exists()) folder.mkdirs();
// Clear existing data (if any; only needed after a compact run)
filesIDToName.clear();
filesNameToID.clear();
blankEntrySpaces.clear();
words.clear();
long startTime=System.currentTimeMillis();
// Read file list
List<File> expireFiles = new LinkedList<File>();
File fileIndex=new File(folder,INDEX_FILES);
if(!fileIndex.exists())
{
new FileOutputStream(fileIndex).close();
}
DataInputStream dis=new DataInputStream(
new BufferedInputStream(new FileInputStream(fileIndex)));
try
{
while(true)
{
int number = dis.readInt();
String name = dis.readUTF();
Integer key = number;
filesIDToName.put(key,name);
filesNameToID.put(name,key);
// Check file expiry
try
{
File f=new File(folder,name);
// Is this file due for expiry yet? OR does it not exist?
if(!f.exists())
expireFiles.add(f);
else
{
LogFileInfo lfi=new LogFileInfo(f);
if(((LogsPlugin)context.getPlugin()).shouldExpire(lfi.date,lfi.category,lfi.item))
expireFiles.add(f);
}
}
catch(GeneralException e)
{
// Shouldn't happen. Ignore for now.
System.err.println("Problem loading log "+name+" ("+e.getMessage()+")");
}
}
}
catch(EOFException eof)
{
}
dis.close();
debugLog(10,"File index loaded with "+
filesIDToName.keySet().size()+" files: "+(System.currentTimeMillis()-startTime)+"ms");
startTime=System.currentTimeMillis();
// Read main index
File wordsIndex=new File(folder,INDEX_WORDS);
if(!wordsIndex.exists())
{
new FileOutputStream(wordsIndex).close();
}
dis=new DataInputStream(
new BufferedInputStream(new FileInputStream(wordsIndex)));
byte[] wordBuffer=new byte[INDEX_WORDBYTES];
int filePos=0,blank=0;
try
{
while(true)
{
// Read 8-character max word
String word;
dis.readFully(wordBuffer);
int length=INDEX_WORDBYTES;
for(;length>0 && wordBuffer[length-1]==0;length--) ;
if(length<INDEX_WORDBYTES)
{
byte[] shortWord=new byte[length];
System.arraycopy(wordBuffer,0,shortWord,0,length);
word=new String(shortWord,"UTF-8");;
}
else
{
word=new String(wordBuffer,"UTF-8");
}
// Read 6 * int file IDs that contain word
boolean blankEntry=true;
IndexEntry thisEntry=new IndexEntry();
for(int i=0;i<INDEX_FILESPERENTRY;i++)
{
int fileID=dis.readInt();
thisEntry.files[i]=fileID;
if(fileID!=0) blankEntry=false;
}
blankEntry|=word.length()==0;
if(blankEntry)
{
// Remember blank entries for later reuse
if(blankEntrySpaces.size() < INDEX_MAXBLANKENTRIES)
{
blankEntrySpaces.addLast(filePos);
}
blank++;
}
else
{
// Find existing entries for that word
IndexEntry[] aie=words.get(word);
int newPos;
if(aie==null) // No existing entries, start new array
{
aie=new IndexEntry[1];
words.put(word,aie);
newPos=0;
}
else // Reallocate array to add entry
{
IndexEntry[] aieNew=new IndexEntry[aie.length+1];
System.arraycopy(aie,0,aieNew,0,aie.length);
aie=aieNew;
words.put(word,aie);
newPos=aie.length-1;
}
thisEntry.filePos=filePos;
aie[newPos]=thisEntry;
}
filePos+=INDEX_BYTESPERENTRY;
}
}
catch(EOFException eof)
{
}
dis.close();
fileEndPos=filePos;
int totalEntries=filePos/INDEX_BYTESPERENTRY;
debugLog(10,"Word index loaded with "+
words.keySet().size()+" words in "+totalEntries+" entries ("+
(filePos / 1024)+"KB), of which "+blank+" blank: "+(System.currentTimeMillis()-startTime)+"ms");
if(blank>0 && (totalEntries / blank < INDEX_COMPACTTHRESHOLD))
{
compactWordIndex(plr);
loadIndex(plr);
}
// Open file now (and keep it open so that we can't end up with two
// clients writing to it at once, which would be bad)
indexFile=new RandomAccessFile(wordsIndex,"rw");
// Expire files
if(!expireFiles.isEmpty())
{
File[] expired=expireFiles.toArray(new File[expireFiles.size()]);
boolean archive=((LogsPlugin)context.getPlugin()).shouldArchive();
plr.reportProgress((archive ? "Archiving ": "Deleting ")+
expired.length+" old log files...");
startTime=System.currentTimeMillis();
removeFiles(expired);
if(archive)
{
File archiveFolder=new File(folder,"archive");
if(!archiveFolder.exists()) archiveFolder.mkdirs();
for(int i=0;i<expired.length;i++)
{
if(!expired[i].exists()) continue;
if(!expired[i].renameTo(new File(archiveFolder,expired[i].getName())))
throw new IOException("Failed to move log into archive: "+expired[i]);
}
}
else
{
for(int i=0;i<expired.length;i++)
{
if(!expired[i].exists()) continue;
if(!expired[i].delete())
throw new IOException("Failed to delete log: "+expired[i]);
}
}
debugLog(10,(archive ? "Archived " : "Deleted ")+expired.length+
" old logs: "+(System.currentTimeMillis()-startTime)+"ms");
}
// Uncomment to rebuild index (should probably clear it first)
if(DEBUG_REBUILD_INDEX) regenerateWordIndex();
// Uncomment to obtain stats about index
if(DEBUG_ANALYSE_INDEX) analyseIndex();
}
/**
* Compacts the word index, removing any unused entries from the file's disk
* footprint.
* @param plr
* @throws IOException
*/
private synchronized void compactWordIndex(PluginLoadReporter plr) throws IOException
{
plr.reportProgress("Compacting full-text log index...");
long startTime=System.currentTimeMillis();
// New file
File newWordsIndex=new File(folder,INDEX_WORDS+".new");
DataOutputStream dos=new DataOutputStream(
new BufferedOutputStream(new FileOutputStream(newWordsIndex)));
// Go through each word...
int totalEntries=0,writtenEntries=0;
for(Map.Entry<String, IndexEntry[]> me : words.entrySet())
{
boolean writtenStart=false;
int entryPos=0;
String word = me.getKey();
IndexEntry[] aie = me.getValue();
for(int entry=0;entry<aie.length;entry++)
{
totalEntries++;
for(int file=0;file<INDEX_FILESPERENTRY;file++)
{
int fileID=aie[entry].files[file];
if(fileID!=0)
{
if(!writtenStart)
{
// Write word, zero-padded
byte[] abWord=word.getBytes("UTF-8");
dos.write(abWord);
for(int i=0;i<INDEX_WORDBYTES-abWord.length;i++)
dos.write(0);
writtenStart=true;
writtenEntries++;
}
dos.writeInt(fileID);
entryPos++;
if(entryPos==INDEX_FILESPERENTRY)
{
writtenStart=false;
entryPos=0;
}
}
}
}
if(writtenStart)
{
for(;entryPos<INDEX_FILESPERENTRY;entryPos++)
dos.writeInt(0);
}
}
dos.close();
// Rename files
File
wordsIndex=new File(folder,INDEX_WORDS),
oldWordsIndex=new File(folder,INDEX_FILES+".old");
if(oldWordsIndex.exists())
{
if(!oldWordsIndex.delete()) throw new IOException(
"Failed to delete old log file index "+oldWordsIndex);
}
if(!wordsIndex.renameTo(oldWordsIndex))
throw new IOException("Failed to rename old log file index "+wordsIndex);
if(!newWordsIndex.renameTo(wordsIndex))
throw new IOException("Failed to rename new log file index "+newWordsIndex);
oldWordsIndex.delete();
debugLog(10,"Word index compacted from "+totalEntries+" to "+writtenEntries+
" entries: "+
(System.currentTimeMillis()-startTime)+"ms");
}
private synchronized void regenerateWordIndex() throws IOException,XMLException,GeneralException
{
for(String name : filesNameToID.keySet())
{
File f=new File(folder,name);
debugLog(10,"Regenerating index for "+name);
reindex(f);
}
}
private void analyseIndex()
{
Map<Integer, Integer> mResults = new TreeMap<Integer, Integer>();
int iUsedWordFiles=0;
int allocatedWordFiles=0,wastedEntries=0;
for(Map.Entry<String, IndexEntry[]> me : words.entrySet())
{
int count=0;
IndexEntry[] aie = me.getValue();
for(int entry=0;entry<aie.length;entry++)
{
for(int file=0;file<aie[entry].files.length;file++)
{
if(aie[entry].files[file]!=0) count++;
}
}
allocatedWordFiles+=INDEX_FILESPERENTRY*aie.length;
int fullEntryEquivs=(( (count-1) / INDEX_FILESPERENTRY )+1);
if(fullEntryEquivs < aie.length)
{
System.err.println("Wasted entries: <"+me.getKey()+"> (wanted "+count+" files, using "+aie.length+" entries)");
}
wastedEntries+=aie.length - fullEntryEquivs;
Integer key = count;
Integer existing=mResults.get(key);
if(existing==null)
mResults.put(key,1);
else
mResults.put(key,existing + 1);
iUsedWordFiles+=count;
}
System.out.println("Total file/word references: "+iUsedWordFiles);
System.out.println(" out of allocated: "+allocatedWordFiles);
System.out.println(" Wasted entire entries: "+wastedEntries);
int MINFILES=1,MAXFILES=9;
int[] aiSize=new int[MAXFILES];
for(Map.Entry<Integer, Integer> me : mResults.entrySet())
{
int iUsage = me.getKey();
int iNumber = me.getValue();
for(int iBlockSize=MINFILES;iBlockSize<MAXFILES;iBlockSize++)
{
// Work out how many blocks it's taking per instance of this many words
// being full.
int iBlocks=((iUsage-1)/iBlockSize)+1;
// OK now multiply by number of instances and size of block
aiSize[iBlockSize]+=iBlocks*iNumber*(iBlockSize*4+INDEX_WORDBYTES);
}
}
for(int i=MINFILES;i<MAXFILES;i++)
{
System.out.println(i+": "+(aiSize[i]/1024)+"KB");
}
System.exit(0);
}
/**
* Find all files that contain a word.
* @param s Word being searched
* @return Set of File objects
*/
synchronized Set<File> findWord(String s)
{
Set<File> files=new HashSet<File>();
IndexEntry[] entries=words.get(getCroppedString(s));
if(entries!=null)
{
for(int iEntry=0;iEntry<entries.length;iEntry++)
{
for(int iFile=0;iFile<entries[iEntry].files.length;iFile++)
{
int iFileID=entries[iEntry].files[iFile];
if(iFileID!=0)
{
// Look up in file list
files.add(new File(folder,filesIDToName.get(iFileID)));
}
}
}
}
return files;
}
/**
* Gets ID for the given file. If necessary, adds a new file to the file index.
* @param f File to find or add
* @return ID of requested file
* @throws IOException Any problem updating file index
*/
synchronized private int getFileID(File f) throws IOException
{
// Look for existing file
String file=f.getName();
Integer id=filesNameToID.get(file);
if(id!=null)
{
debugLog(50,"File "+id+" ("+file+"): already indexed");
return id.intValue();
}
// OK, add new file. Begin by finding max ID of existing files
int max=0;
for(Integer i : filesIDToName.keySet())
{
max = Math.max(max, i);
}
// All good, so add that file to the end of the index on disk...
DataOutputStream dos=new DataOutputStream(
new FileOutputStream(new File(folder,INDEX_FILES),true));
dos.writeInt(max+1);
dos.writeUTF(file);
dos.close();
// ...and to the in-memory index
Integer key = max + 1;
filesIDToName.put(key,file);
filesNameToID.put(file,key);
debugLog(20,"File "+key+" ("+file+"): Added to index");
return max+1;
}
synchronized private void removeFiles(File[] files) throws IOException,GeneralException
{
// Find IDs for files and remove from memory maps
Set<Integer> allIDs = new HashSet<Integer>();
int[] ids=new int[files.length];
for(int i=0;i<files.length;i++)
{
Integer idInt=filesNameToID.get(files[i].getName());
if(idInt==null) throw new GeneralException("Log file "+files[i]+" not found in index");
allIDs.add(idInt);
ids[i]=idInt.intValue();
}
for(int i=0;i<files.length;i++)
{
filesIDToName.remove(ids[i]);
filesNameToID.remove(files[i].getName());
}
// Rewrite file index
File newFileIndex=new File(folder,INDEX_FILES+".new");
DataOutputStream dos=new DataOutputStream(
new FileOutputStream(newFileIndex));
for(Map.Entry<Integer, String> me : filesIDToName.entrySet())
{
int id=me.getKey().intValue();
dos.writeInt(id);
dos.writeUTF(me.getValue());
}
dos.close();
// Now the time-consuming bit! Scan entire word index...
for(Map.Entry<String, IndexEntry[]> me : words.entrySet())
{
IndexEntry[] aie = me.getValue();
for(int entry=0;entry<aie.length;entry++)
{
IndexEntry currentEntry=aie[entry];
for(int entryFile=0;entryFile<INDEX_FILESPERENTRY;entryFile++)
{
int compare=currentEntry.files[entryFile];
if(compare==0) continue;
if(allIDs.contains(compare))
{
// Squash this one...
currentEntry.files[entryFile]=0;
// Save the change...
IndexUpdate iu=new IndexUpdate(currentEntry.filePos+INDEX_WORDBYTES+entryFile*4);
iu.writeInt(0);
addIndexUpdate(iu);
}
}
}
}
// Switch new index for old
File
fileIndex=new File(folder,INDEX_FILES),
oldFileIndex=new File(folder,INDEX_FILES+".old");
if(oldFileIndex.exists())
{
if(!oldFileIndex.delete()) throw new IOException(
"Failed to delete old log file index "+oldFileIndex);
}
if(!fileIndex.renameTo(oldFileIndex))
throw new IOException("Failed to rename old log file index "+fileIndex);
if(!newFileIndex.renameTo(fileIndex))
throw new IOException("Failed to rename new log file index "+newFileIndex);
oldFileIndex.delete();
// Must apply all index changes now as these aren't compatible with normal
// writing (they can write to the same place, which causes a
// consistency-check error later on)
applyIndexChanges();
}
/**
* Adds a word to the index if necessary, updating in memory and on disk.
* @param fileID Log file that contains the given word
* @param s Word in question
* @throws IOException If there's any error writing the index changes
*/
synchronized private void addWord(int fileID,String s) throws IOException
{
String cropped=getCroppedString(s);
assert(cropped.length()>0);
IndexEntry[] entries=words.get(cropped);
if(entries!=null)
{
// We already have entries for this word. See if one of them matches
// the file in question; otherwise, look for a blank space
int blankEntry=-1,iBlankFile=-1;
for(int entry=0;entry<entries.length;entry++)
{
for(int file=0;file<INDEX_FILESPERENTRY;file++)
{
int thisFileID=entries[entry].files[file];
if(thisFileID==0 && blankEntry==-1)
{
blankEntry=entry;
iBlankFile=file;
}
if(fileID==thisFileID)
{
debugLog(50,"Word entry '"+cropped+"': already exists");
return; // Yay! No need to do anything.
}
}
}
// Did we find a blank space? If so, use it
if(blankEntry!=-1)
{
// Store in memory...
entries[blankEntry].files[iBlankFile]=fileID;
// ...and on disk
IndexUpdate iu=new IndexUpdate(entries[blankEntry].filePos+INDEX_WORDBYTES+4*iBlankFile);
iu.writeInt(fileID);
addIndexUpdate(iu);
// and we're done
debugLog(30,"Word entry '"+cropped+"': adding to existing entry");
return;
}
}
// Need to add a new entry in memory...
int newPos;
if(entries==null) // No existing entries, start new array
{
entries=new IndexEntry[1];
words.put(cropped,entries);
newPos=0;
debugLog(30,"Word entry '"+cropped+"': creating first entry");
}
else // Reallocate array to add entry
{
IndexEntry[] newEntries=new IndexEntry[entries.length+1];
System.arraycopy(entries,0,newEntries,0,entries.length);
entries=newEntries;
words.put(cropped,entries);
newPos=entries.length-1;
debugLog(30,"Word entry '"+cropped+"': creating additional entry");
}
entries[newPos]=new IndexEntry();
// If there's a blank space, add it there instead of at the end
if(!blankEntrySpaces.isEmpty())
{
Integer i=blankEntrySpaces.getFirst();
blankEntrySpaces.removeFirst();
entries[newPos].filePos=i.intValue();
}
else
{
entries[newPos].filePos=fileEndPos;
fileEndPos+=INDEX_BYTESPERENTRY;
}
entries[newPos].files[0]=fileID;
IndexUpdate iu=new IndexUpdate(entries[newPos].filePos);
byte[] word=cropped.getBytes("UTF-8");
// Write word, zero-padded
iu.writeBytes(word);
for(int i=0;i<INDEX_WORDBYTES-word.length;i++)
iu.writeByte(0);
// Write this file ID and the blank spaces
iu.writeInt(fileID);
for(int i=0;i<INDEX_FILESPERENTRY-1;i++) iu.writeInt(0);
addIndexUpdate(iu);
}
/**
* Crops a string to no more than 8 bytes in UTF-8.
* @param s The string
* @return Cropped string
*/
private String getCroppedString(String s)
{
int end=s.length();
for(;end>0;end--)
{
byte[] ab;
try
{
ab=s.substring(0,end).getBytes("UTF-8");
if(ab.length<=8) return s.substring(0,end);
}
catch(UnsupportedEncodingException e)
{
throw new Error(e);
}
}
throw new Error("Word could not be limited to 8 bytes");
}
/**
* Reindexes a particular file (if the index may have been damaged)
* @param f File to reindex
* @throws GeneralException Any problem reading file
* @throws IOException Index write error
* @throws XMLException Line not valid XML.
*/
private void reindex(File f) throws GeneralException, XMLException, IOException
{
for(String line : readFileLines(f))
{
line=line.replaceAll("^<e[^>]*>(.*)</e>","$1");
index(f,line);
}
}
/**
* Adds information about a particular file to the index.
* @param f File in question
* @param xml Some XML
* @throws IOException If addWord gives an error
* @throws XMLException IF the string isn't valid XML
*/
private void index(File f,String xml) throws IOException,XMLException
{
int fileID=getFileID(f);
String[] words=extractWords(xml);
for(int i=0;i<words.length;i++)
{
if(words[i].length()>0) addWord(fileID,words[i]);
}
}
/**
* Allow read of log files to be managed from here so we can synchronize it.
* Doing it unsynchronized caused problems in very rare cases, I think when
* it tried to open a file for writing at the same time as reading it.
* @param f File to read
* @return List of all lines in file
* @throws GeneralException If there's a problem loading the file
*/
public synchronized LinkedList<String> readFileLines(File f)
throws GeneralException
{
BufferedReader br = null;
LinkedList<String> results = new LinkedList<String>();
try
{
br = new BufferedReader(
new InputStreamReader(new FileInputStream(f),"UTF-8"));
while(true)
{
String line = br.readLine();
if(line==null) break;
results.addLast(line);
}
return results;
}
catch(IOException ioe)
{
throw new GeneralException("Failed to load log file",ioe);
}
finally
{
try
{
if(br!=null)
{
br.close();
}
}
catch(IOException ioe)
{
// Ignore exception on close
}
}
}
@Override
public synchronized void log(String source,String category,String item,String type,String displayXML)
{
if(close) return;
try
{
if(!((LogsPlugin)context.getPlugin()).shouldLog(category,item)) return;
// Pick current time and find file for it
long time=System.currentTimeMillis();
File f=getFile(time,source,category,item);
// Do we have a stream for that already? If not, make one
LogStream ls=currentStreams.get(f);
if(ls==null)
{
ls=new LogStream();
ls.w=new BufferedWriter(new OutputStreamWriter(new FileOutputStream(f,true),"UTF-8"));
currentStreams.put(f,ls);
}
// Index data (note: this checks it's valid XML so do it first)
index(f,displayXML);
// Write data
ls.w.write(
"<e time='"+time+"' type='"+type+"'>"+displayXML+"</e>\n"
);
ls.dirty=true;
ls.lastUsed=time;
}
catch(IOException ioe)
{
ErrorMsg.report("Error logging data",ioe);
}
}
}