/*************************************************************************************** * Copyright (c) 2012 Norbert Nagold <norbert.nagold@gmail.com> * * Copyright (c) 2016 Houssam Salem <houssam.salem.au@gmail.com> * * * * This program is free software; you can redistribute it and/or modify it under * * the terms of the GNU General Public License as published by the Free Software * * Foundation; either version 3 of the License, or (at your option) any later * * version. * * * * This program is distributed in the hope that it will be useful, but WITHOUT ANY * * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A * * PARTICULAR PURPOSE. See the GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License along with * * this program. If not, see <http://www.gnu.org/licenses/>. * ****************************************************************************************/ package com.ichi2.libanki.importer; import android.database.Cursor; import android.text.TextUtils; import com.ichi2.anki.R; import com.ichi2.async.DeckTask; import com.ichi2.libanki.Collection; import com.ichi2.libanki.Media; import com.ichi2.libanki.Storage; import com.ichi2.libanki.Utils; import org.json.JSONException; import org.json.JSONObject; import java.io.BufferedInputStream; import java.io.ByteArrayOutputStream; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Locale; import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; import timber.log.Timber; public class Anki2Importer extends Importer { private static final int GUID = 1; private static final int MID = 2; private static final int MOD = 3; private static final int MEDIAPICKLIMIT = 1024; private String mDeckPrefix; private boolean mAllowUpdate; private boolean mDupeOnSchemaChange; private Map<String, Object[]> mNotes; /** * Since we can't use a tuple as a key in Java, we resort to indexing twice with nested maps. * Python: (guid, ord) -> cid * Java: guid -> ord -> cid */ private Map<String, Map<Integer, Long>> mCards; private Map<Long, Long> mDecks; private Map<Long, Long> mModelMap; private Map<String, String> mChangedGuids; private Map<String, Boolean> mIgnoredGuids; private int mDupes; private int mAdded; private int mUpdated; public Anki2Importer(Collection col, String file) { super(col, file); mNeedMapper = false; mDeckPrefix = null; mAllowUpdate = true; mDupeOnSchemaChange = false; } @Override public void run() { publishProgress(0, 0, 0); try { _prepareFiles(); try { _import(); } finally { mSrc.close(false); } } catch (RuntimeException e) { Timber.e(e, "RuntimeException while importing"); } } private void _prepareFiles() { mDst = mCol; mSrc = Storage.Collection(mContext, mFile); } private void _import() { mDecks = new HashMap<>(); try { // Use transactions for performance and rollbacks in case of error mDst.getDb().getDatabase().beginTransaction(); mDst.getMedia().getDb().getDatabase().beginTransaction(); if (!TextUtils.isEmpty(mDeckPrefix)) { long id = mDst.getDecks().id(mDeckPrefix); mDst.getDecks().select(id); } _prepareTS(); _prepareModels(); _importNotes(); _importCards(); _importStaticMedia(); publishProgress(100, 100, 25); _postImport(); publishProgress(100, 100, 50); mDst.getDb().getDatabase().setTransactionSuccessful(); mDst.getMedia().getDb().getDatabase().setTransactionSuccessful(); } finally { mDst.getDb().getDatabase().endTransaction(); mDst.getMedia().getDb().getDatabase().endTransaction(); } mDst.getDb().execute("vacuum"); publishProgress(100, 100, 65); mDst.getDb().execute("analyze"); publishProgress(100, 100, 75); } /** * Notes * *********************************************************** */ private void _importNotes() { // build guid -> (id,mod,mid) hash & map of existing note ids mNotes = new HashMap<>(); Map<Long, Boolean> existing = new HashMap<>(); Cursor cur = null; try { cur = mDst.getDb().getDatabase().rawQuery("select id, guid, mod, mid from notes", null); while (cur.moveToNext()) { long id = cur.getLong(0); String guid = cur.getString(1); long mod = cur.getLong(2); long mid = cur.getLong(3); mNotes.put(guid, new Object[] { id, mod, mid }); existing.put(id, true); } } finally { if (cur != null) { cur.close(); } } // we may need to rewrite the guid if the model schemas don't match, // so we need to keep track of the changes for the card import stage mChangedGuids = new HashMap<>(); // apart from upgrading from anki1 decks, we ignore updates to changed // schemas. we need to note the ignored guids, so we avoid importing // invalid cards mIgnoredGuids = new HashMap<>(); // iterate over source collection ArrayList<Object[]> add = new ArrayList<>(); ArrayList<Object[]> update = new ArrayList<>(); ArrayList<Long> dirty = new ArrayList<>(); int usn = mDst.usn(); int dupes = 0; ArrayList<String> dupesIgnored = new ArrayList<>(); try { cur = mSrc.getDb().getDatabase().rawQuery("select * from notes", null); // Counters for progress updates int total = cur.getCount(); boolean largeCollection = total > 200; int onePercent = total/100; int i = 0; while (cur.moveToNext()) { // turn the db result into a mutable list Object[] note = new Object[]{cur.getLong(0), cur.getString(1), cur.getLong(2), cur.getLong(3), cur.getInt(4), cur.getString(5), cur.getString(6), cur.getString(7), cur.getLong(8), cur.getInt(9), cur.getString(10)}; boolean shouldAdd = _uniquifyNote(note); if (shouldAdd) { // ensure id is unique while (existing.containsKey(note[0])) { note[0] = ((Long) note[0]) + 999; } existing.put((Long) note[0], true); // bump usn note[4] = usn; // update media references in case of dupes note[6] = _mungeMedia((Long) note[MID], (String) note[6]); add.add(note); dirty.add((Long) note[0]); // note we have the added guid mNotes.put((String) note[GUID], new Object[]{note[0], note[3], note[MID]}); } else { // a duplicate or changed schema - safe to update? dupes += 1; if (mAllowUpdate) { Object[] n = mNotes.get(note[GUID]); long oldNid = (Long) n[0]; long oldMod = (Long) n[1]; long oldMid = (Long) n[2]; // will update if incoming note more recent if (oldMod < (Long) note[MOD]) { // safe if note types identical if (oldMid == (Long) note[MID]) { // incoming note should use existing id note[0] = oldNid; note[4] = usn; note[6] = _mungeMedia((Long) note[MID], (String) note[6]); update.add(note); dirty.add((Long) note[0]); } else { dupesIgnored.add(String.format("%s: %s", mCol.getModels().get(oldMid).getString("name"), ((String) note[6]).replace("\u001f", ","))); mIgnoredGuids.put((String) note[GUID], true); } } } } i++; if (total != 0 && (!largeCollection || i % onePercent == 0)) { // Calls to publishProgress are reasonably expensive due to res.getString() publishProgress(i * 100 / total, 0, 0); } } publishProgress(100, 0, 0); } catch (JSONException e) { throw new RuntimeException(e); } finally { if (cur != null) { cur.close(); } } if (dupes > 0) { int up = update.size(); mLog.add(getRes().getString(R.string.import_update_details, update.size(), dupes)); if (dupesIgnored.size() > 0) { mLog.add(getRes().getString(R.string.import_update_ignored)); // TODO: uncomment this and fix above string if we implement a detailed // log viewer dialog type. //mLog.addAll(dupesIgnored); } } // export info for calling code mDupes = dupes; mAdded = add.size(); mUpdated = update.size(); // add to col mDst.getDb().executeMany("insert or replace into notes values (?,?,?,?,?,?,?,?,?,?,?)", add); mDst.getDb().executeMany("insert or replace into notes values (?,?,?,?,?,?,?,?,?,?,?)", update); long[] das = Utils.arrayList2array(dirty); mDst.updateFieldCache(das); mDst.getTags().registerNotes(das); } // determine if note is a duplicate, and adjust mid and/or guid as required // returns true if note should be added private boolean _uniquifyNote(Object[] note) { String origGuid = (String) note[GUID]; long srcMid = (Long) note[MID]; long dstMid = _mid(srcMid); // duplicate Schemas? if (srcMid == dstMid) { return !mNotes.containsKey(origGuid); } // differing schemas and note doesn't exist? note[MID] = dstMid; if (!mNotes.containsKey(origGuid)) { return true; } // as the schemas differ and we already have a note with a different // note type, this note needs a new guid if (!mDupeOnSchemaChange) { return false; } while (true) { note[GUID] = Utils.incGuid((String) note[GUID]); mChangedGuids.put(origGuid, (String) note[GUID]); // if we don't have an existing guid, we can add if (!mNotes.containsKey(note[GUID])) { return true; } // if the existing guid shares the same mid, we can reuse if (dstMid == (Long) mNotes.get(note[GUID])[MID]) { return false; } } } /** * Models * *********************************************************** * Models in the two decks may share an ID but not a schema, so we need to * compare the field & template signature rather than just rely on ID. If * the schemas don't match, we increment the mid and try again, creating a * new model if necessary. */ /** Prepare index of schema hashes. */ private void _prepareModels() { mModelMap = new HashMap<>(); } /** Return local id for remote MID. */ private long _mid(long srcMid) { try { // already processed this mid? if (mModelMap.containsKey(srcMid)) { return mModelMap.get(srcMid); } long mid = srcMid; JSONObject srcModel = mSrc.getModels().get(srcMid); String srcScm = mSrc.getModels().scmhash(srcModel); while (true) { // missing from target col? if (!mDst.getModels().have(mid)) { // copy it over JSONObject model = new JSONObject(Utils.jsonToString(srcModel)); model.put("id", mid); model.put("mod", Utils.intNow()); model.put("usn", mCol.usn()); mDst.getModels().update(model); break; } // there's an existing model; do the schemas match? JSONObject dstModel = mDst.getModels().get(mid); String dstScm = mDst.getModels().scmhash(dstModel); if (srcScm.equals(dstScm)) { // they do; we can reuse this mid JSONObject model = new JSONObject(Utils.jsonToString(srcModel)); model.put("id", mid); model.put("mod", Utils.intNow()); model.put("usn", mCol.usn()); mDst.getModels().update(model); break; } // as they don't match, try next id mid += 1; } // save map and return new mid mModelMap.put(srcMid, mid); return mid; } catch (JSONException e) { throw new RuntimeException(e); } } /** * Decks * *********************************************************** */ /** Given did in src col, return local id. */ private long _did(long did) { try { // already converted? if (mDecks.containsKey(did)) { return mDecks.get(did); } // get the name in src JSONObject g = mSrc.getDecks().get(did); String name = g.getString("name"); // if there's a prefix, replace the top level deck if (!TextUtils.isEmpty(mDeckPrefix)) { List<String> parts = Arrays.asList(name.split("::", -1)); String tmpname = TextUtils.join("::", parts.subList(1, parts.size())); name = mDeckPrefix; if (!TextUtils.isEmpty(tmpname)) { name += "::" + tmpname; } } // Manually create any parents so we can pull in descriptions String head = ""; List<String> parents = Arrays.asList(name.split("::", -1)); for (String parent : parents.subList(0, parents.size() -1)) { if (!TextUtils.isEmpty(head)) { head += "::"; } head += parent; long idInSrc = mSrc.getDecks().id(head); _did(idInSrc); } // create in local long newid = mDst.getDecks().id(name); // pull conf over if (g.has("conf") && g.getLong("conf") != 1) { JSONObject conf = mSrc.getDecks().getConf(g.getLong("conf")); mDst.getDecks().save(conf); mDst.getDecks().updateConf(conf); JSONObject g2 = mDst.getDecks().get(newid); g2.put("conf", g.getLong("conf")); mDst.getDecks().save(g2); } // save desc JSONObject deck = mDst.getDecks().get(newid); deck.put("desc", g.getString("desc")); mDst.getDecks().save(deck); // add to deck map and return mDecks.put(did, newid); return newid; } catch (JSONException e) { throw new RuntimeException(e); } } /** * Cards * *********************************************************** */ private void _importCards() { // build map of guid -> (ord -> cid) and used id cache mCards = new HashMap<>(); Map<Long, Boolean> existing = new HashMap<>(); Cursor cur = null; try { cur = mDst.getDb().getDatabase().rawQuery( "select f.guid, c.ord, c.id from cards c, notes f " + "where c.nid = f.id", null); while (cur.moveToNext()) { String guid = cur.getString(0); int ord = cur.getInt(1); long cid = cur.getLong(2); existing.put(cid, true); if (mCards.containsKey(guid)) { mCards.get(guid).put(ord, cid); } else { Map<Integer, Long> map = new HashMap<>(); map.put(ord, cid); mCards.put(guid, map); } } } finally { if (cur != null) { cur.close(); } } // loop through src List<Object[]> cards = new ArrayList<>(); List<Object[]> revlog = new ArrayList<>(); int cnt = 0; int usn = mDst.usn(); long aheadBy = mSrc.getSched().getToday() - mDst.getSched().getToday(); try { cur = mSrc.getDb().getDatabase().rawQuery( "select f.guid, f.mid, c.* from cards c, notes f " + "where c.nid = f.id", null); // Counters for progress updates int total = cur.getCount(); boolean largeCollection = total > 200; int onePercent = total/100; int i = 0; while (cur.moveToNext()) { Object[] card = new Object[] { cur.getString(0), cur.getLong(1), cur.getLong(2), cur.getLong(3), cur.getLong(4), cur.getInt(5), cur.getLong(6), cur.getInt(7), cur.getInt(8), cur.getInt(9), cur.getLong(10), cur.getLong(11), cur.getLong(12), cur.getInt(13), cur.getInt(14), cur.getInt(15), cur.getLong(16), cur.getLong(17), cur.getInt(18), cur.getString(19) }; String guid = (String) card[0]; if (mChangedGuids.containsKey(guid)) { guid = mChangedGuids.get(guid); } if (mIgnoredGuids.containsKey(guid)) { continue; } // does the card's note exist in dst col? if (!mNotes.containsKey(guid)) { continue; } Object[] dnid = mNotes.get(guid); // does the card already exist in the dst col? int ord = (Integer) card[5]; if (mCards.containsKey(guid) && mCards.get(guid).containsKey(ord)) { // fixme: in future, could update if newer mod time continue; } // doesn't exist. strip off note info, and save src id for later Object[] oc = card; card = new Object[oc.length - 2]; System.arraycopy(oc, 2, card, 0, card.length); long scid = (Long) card[0]; // ensure the card id is unique while (existing.containsKey(card[0])) { card[0] = (Long) card[0] + 999; } existing.put((Long) card[0], true); // update cid, nid, etc card[1] = mNotes.get(guid)[0]; card[2] = _did((Long) card[2]); card[4] = Utils.intNow(); card[5] = usn; // review cards have a due date relative to collection if ((Integer) card[7] == 2 || (Integer) card[7] == 3 || (Integer) card[6] == 2) { card[8] = (Long) card[8] - aheadBy; } // if odid true, convert card from filtered to normal if ((Long) card[15] != 0) { // odid card[15] = 0; // odue card[8] = card[14]; card[14] = 0; // queue if ((Integer) card[6] == 1) { // type card[7] = 0; } else { card[7] = card[6]; } // type if ((Integer) card[6] == 1) { card[6] = 0; } } cards.add(card); // we need to import revlog, rewriting card ids and bumping usn Cursor cur2 = null; try { cur2 = mSrc.getDb().getDatabase().rawQuery("select * from revlog where cid = " + scid, null); while (cur2.moveToNext()) { Object[] rev = new Object[] { cur2.getLong(0), cur2.getLong(1), cur2.getInt(2), cur2.getInt(3), cur2.getLong(4), cur2.getLong(5), cur2.getLong(6), cur2.getLong(7), cur2.getInt(8) }; rev[1] = card[0]; rev[2] = mDst.usn(); revlog.add(rev); } } finally { if (cur2 != null) { cur2.close(); } } cnt += 1; i++; if (total != 0 && (!largeCollection || i % onePercent == 0)) { publishProgress(100, i * 100 / total, 0); } } publishProgress(100, 100, 0); } finally { if (cur != null) { cur.close(); } } // apply mDst.getDb().executeMany("insert or ignore into cards values (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)", cards); mDst.getDb().executeMany("insert or ignore into revlog values (?,?,?,?,?,?,?,?,?)", revlog); mLog.add(getRes().getString(R.string.import_complete_count, cnt)); } /** * Media * *********************************************************** */ // note: this func only applies to imports of .anki2. For .apkg files, the // apkg importer does the copying private void _importStaticMedia() { // Import any '_foo' prefixed media files regardless of whether // they're used on notes or not String dir = mSrc.getMedia().dir(); if (!new File(dir).exists()) { return; } for (File f : new File(dir).listFiles()) { String fname = f.getName(); if (fname.startsWith("_") && ! mDst.getMedia().have(fname)) { _writeDstMedia(fname, _srcMediaData(fname)); } } } private BufferedInputStream _mediaData(String fname) { return _mediaData(fname, null); } private BufferedInputStream _mediaData(String fname, String dir) { if (dir == null) { dir = mSrc.getMedia().dir(); } String path = new File(dir, fname).getAbsolutePath(); try { return new BufferedInputStream(new FileInputStream(path), MEDIAPICKLIMIT * 2); } catch (IOException e) { return null; } } /** * Data for FNAME in src collection. */ protected BufferedInputStream _srcMediaData(String fname) { return _mediaData(fname, mSrc.getMedia().dir()); } /** * Data for FNAME in dst collection. */ private BufferedInputStream _dstMediaData(String fname) { return _mediaData(fname, mDst.getMedia().dir()); } private void _writeDstMedia(String fname, BufferedInputStream data) { try { String path = new File(mDst.getMedia().dir(), Utils.nfcNormalized(fname)).getAbsolutePath(); Utils.writeToFile(data, path); // Mark file addition to media db (see note in Media.java) mDst.getMedia().markFileAdd(fname); } catch (IOException e) { // the user likely used subdirectories Timber.e(e, "Error copying file %s.", fname); } } // running splitFields() on every note is fairly expensive and actually not necessary private String _mungeMedia(long mid, String fields) { for (Pattern p : Media.mRegexps) { Matcher m = p.matcher(fields); StringBuffer sb = new StringBuffer(); int fnameIdx = Media.indexOfFname(p); while (m.find()) { String fname = m.group(fnameIdx); BufferedInputStream srcData = _srcMediaData(fname); BufferedInputStream dstData = _dstMediaData(fname); if (srcData == null) { // file was not in source, ignore m.appendReplacement(sb, Matcher.quoteReplacement(m.group(0))); continue; } // if model-local file exists from a previous import, use that String[] split = Utils.splitFilename(fname); String name = split[0]; String ext = split[1]; String lname = String.format(Locale.US, "%s_%s%s", name, mid, ext); if (mDst.getMedia().have(lname)) { m.appendReplacement(sb, Matcher.quoteReplacement(m.group(0).replace(fname, lname))); continue; } else if (dstData == null || compareMedia(srcData, dstData)) { // if missing or the same, pass unmodified // need to copy? if (dstData == null) { _writeDstMedia(fname, srcData); } m.appendReplacement(sb, Matcher.quoteReplacement(m.group(0))); continue; } // exists but does not match, so we need to dedupe _writeDstMedia(lname, srcData); m.appendReplacement(sb, Matcher.quoteReplacement(m.group(0).replace(fname, lname))); } m.appendTail(sb); fields = sb.toString(); } return fields; } /** * Post-import cleanup * *********************************************************** */ private void _postImport() { try { for (long did : mDecks.values()) { mCol.getSched().maybeRandomizeDeck(did); } // make sure new position is correct mDst.getConf().put("nextPos", mDst.getDb().queryLongScalar( "select max(due)+1 from cards where type = 0")); mDst.save(); } catch (JSONException e) { throw new RuntimeException(e); } } /** * The methods below are not in LibAnki. * *********************************************************** */ private boolean compareMedia(BufferedInputStream lhis, BufferedInputStream rhis) { byte[] lhbytes = _mediaPick(lhis); byte[] rhbytes = _mediaPick(rhis); return Arrays.equals(lhbytes, rhbytes); } /** * Return the contents of the given input stream, limited to Anki2Importer.MEDIAPICKLIMIT bytes This is only used * for comparison of media files with the limited resources of mobile devices */ byte[] _mediaPick(BufferedInputStream is) { try { ByteArrayOutputStream baos = new ByteArrayOutputStream(MEDIAPICKLIMIT * 2); byte[] buf = new byte[MEDIAPICKLIMIT]; int readLen; int readSoFar = 0; is.mark(MEDIAPICKLIMIT * 2); while (true) { readLen = is.read(buf); baos.write(buf); if (readLen == -1) { break; } readSoFar += readLen; if (readSoFar > MEDIAPICKLIMIT) { break; } } is.reset(); byte[] result = new byte[MEDIAPICKLIMIT]; System.arraycopy(baos.toByteArray(), 0, result, 0, Math.min(baos.size(), MEDIAPICKLIMIT)); return result; } catch (FileNotFoundException e) { return null; } catch (IOException e) { return null; } } /** * @param notesDone Percentage of notes complete. * @param cardsDone Percentage of cards complete. * @param postProcess Percentage of remaining tasks complete. */ protected void publishProgress(int notesDone, int cardsDone, int postProcess) { String checkmark = "\u2714"; if (mProgress != null) { mProgress.publishProgress(new DeckTask.TaskData(getRes().getString(R.string.import_progress, notesDone, cardsDone, postProcess))); } } /* The methods below are only used for testing. */ public void setDupeOnSchemaChange(boolean b) { mDupeOnSchemaChange = b; } public int getDupes() { return mDupes; } public int getAdded() { return mAdded; } public int getUpdated() { return mUpdated; } }