| /**************************************************************************** |
| ** |
| ** Copyright (C) 2016 The Qt Company Ltd. |
| ** Contact: https://www.qt.io/licensing/ |
| ** |
| ** This file is part of the Qt Assistant of the Qt Toolkit. |
| ** |
| ** $QT_BEGIN_LICENSE:LGPL$ |
| ** Commercial License Usage |
| ** Licensees holding valid commercial Qt licenses may use this file in |
| ** accordance with the commercial license agreement provided with the |
| ** Software or, alternatively, in accordance with the terms contained in |
| ** a written agreement between you and The Qt Company. For licensing terms |
| ** and conditions see https://www.qt.io/terms-conditions. For further |
| ** information use the contact form at https://www.qt.io/contact-us. |
| ** |
| ** GNU Lesser General Public License Usage |
| ** Alternatively, this file may be used under the terms of the GNU Lesser |
| ** General Public License version 3 as published by the Free Software |
| ** Foundation and appearing in the file LICENSE.LGPL3 included in the |
| ** packaging of this file. Please review the following information to |
| ** ensure the GNU Lesser General Public License version 3 requirements |
| ** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. |
| ** |
| ** GNU General Public License Usage |
| ** Alternatively, this file may be used under the terms of the GNU |
| ** General Public License version 2.0 or (at your option) the GNU General |
| ** Public license version 3 or any later version approved by the KDE Free |
| ** Qt Foundation. The licenses are as published by the Free Software |
| ** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 |
| ** included in the packaging of this file. Please review the following |
| ** information to ensure the GNU General Public License requirements will |
| ** be met: https://www.gnu.org/licenses/gpl-2.0.html and |
| ** https://www.gnu.org/licenses/gpl-3.0.html. |
| ** |
| ** $QT_END_LICENSE$ |
| ** |
| ****************************************************************************/ |
| |
| #include "qhelpsearchindexwriter_default_p.h" |
| #include "qhelp_global.h" |
| #include "qhelpenginecore.h" |
| #include "qhelpdbreader_p.h" |
| |
| #include <QtCore/QDataStream> |
| #include <QtCore/QDateTime> |
| #include <QtCore/QDir> |
| #include <QtCore/QTextCodec> |
| #include <QtCore/QTextStream> |
| #include <QtCore/QSet> |
| #include <QtCore/QUrl> |
| #include <QtCore/QVariant> |
| #include <QtSql/QSqlDatabase> |
| #include <QtSql/QSqlDriver> |
| #include <QtSql/QSqlError> |
| #include <QtSql/QSqlQuery> |
| |
| #include <QTextDocument> |
| |
| QT_BEGIN_NAMESPACE |
| |
| namespace fulltextsearch { |
| namespace qt { |
| |
| const char FTS_DB_NAME[] = "fts"; |
| |
| Writer::Writer(const QString &path) |
| : m_dbDir(path) |
| { |
| clearLegacyIndex(); |
| QDir().mkpath(m_dbDir); |
| m_uniqueId = QHelpGlobal::uniquifyConnectionName(QLatin1String("QHelpWriter"), this); |
| m_db = new QSqlDatabase(); |
| *m_db = QSqlDatabase::addDatabase(QLatin1String("QSQLITE"), m_uniqueId); |
| const QString dbPath = m_dbDir + QLatin1Char('/') + QLatin1String(FTS_DB_NAME); |
| m_db->setDatabaseName(dbPath); |
| if (!m_db->open()) { |
| const QString &error = QHelpSearchIndexWriter::tr("Cannot open database \"%1\" using connection \"%2\": %3") |
| .arg(dbPath, m_uniqueId, m_db->lastError().text()); |
| qWarning("%s", qUtf8Printable(error)); |
| delete m_db; |
| m_db = nullptr; |
| QSqlDatabase::removeDatabase(m_uniqueId); |
| m_uniqueId = QString(); |
| } else { |
| startTransaction(); |
| } |
| } |
| |
| bool Writer::tryInit(bool reindex) |
| { |
| if (!m_db) |
| return true; |
| |
| QSqlQuery query(*m_db); |
| // HACK: we try to perform any modifying command just to check if |
| // we don't get SQLITE_BUSY code (SQLITE_BUSY is defined to 5 in sqlite driver) |
| if (!query.exec(QLatin1String("CREATE TABLE foo ();"))) { |
| if (query.lastError().nativeErrorCode() == QLatin1String("5")) // db is locked |
| return false; |
| } |
| // HACK: clear what we have created |
| query.exec(QLatin1String("DROP TABLE foo;")); |
| |
| init(reindex); |
| return true; |
| } |
| |
| bool Writer::hasDB() |
| { |
| if (!m_db) |
| return false; |
| |
| QSqlQuery query(*m_db); |
| |
| query.prepare(QLatin1String("SELECT id FROM info LIMIT 1")); |
| query.exec(); |
| |
| return query.next(); |
| } |
| |
| void Writer::clearLegacyIndex() |
| { |
| // Clear old legacy clucene index. |
| // More important in case of Creator, since |
| // the index folder is common for all Creator versions |
| QDir dir(m_dbDir); |
| if (!dir.exists()) |
| return; |
| |
| const QStringList &list = dir.entryList(QDir::Files | QDir::Hidden); |
| if (!list.contains(QLatin1String(FTS_DB_NAME))) { |
| for (const QString &item : list) |
| dir.remove(item); |
| } |
| } |
| |
| void Writer::init(bool reindex) |
| { |
| if (!m_db) |
| return; |
| |
| QSqlQuery query(*m_db); |
| |
| if (reindex && hasDB()) { |
| m_needOptimize = true; |
| |
| query.exec(QLatin1String("DROP TABLE titles;")); |
| query.exec(QLatin1String("DROP TABLE contents;")); |
| query.exec(QLatin1String("DROP TABLE info;")); |
| } |
| |
| query.exec(QLatin1String("CREATE TABLE info (id INTEGER PRIMARY KEY, namespace, attributes, url, title, data);")); |
| |
| query.exec(QLatin1String("CREATE VIRTUAL TABLE titles USING fts5(" |
| "namespace UNINDEXED, attributes UNINDEXED, " |
| "url UNINDEXED, title, " |
| "tokenize = 'porter unicode61', content = 'info', content_rowid='id');")); |
| query.exec(QLatin1String("CREATE TRIGGER titles_insert AFTER INSERT ON info BEGIN " |
| "INSERT INTO titles(rowid, namespace, attributes, url, title) " |
| "VALUES(new.id, new.namespace, new.attributes, new.url, new.title); " |
| "END;")); |
| query.exec(QLatin1String("CREATE TRIGGER titles_delete AFTER DELETE ON info BEGIN " |
| "INSERT INTO titles(titles, rowid, namespace, attributes, url, title) " |
| "VALUES('delete', old.id, old.namespace, old.attributes, old.url, old.title); " |
| "END;")); |
| query.exec(QLatin1String("CREATE TRIGGER titles_update AFTER UPDATE ON info BEGIN " |
| "INSERT INTO titles(titles, rowid, namespace, attributes, url, title) " |
| "VALUES('delete', old.id, old.namespace, old.attributes, old.url, old.title); " |
| "INSERT INTO titles(rowid, namespace, attributes, url, title) " |
| "VALUES(new.id, new.namespace, new.attributes, new.url, new.title); " |
| "END;")); |
| |
| query.exec(QLatin1String("CREATE VIRTUAL TABLE contents USING fts5(" |
| "namespace UNINDEXED, attributes UNINDEXED, " |
| "url UNINDEXED, title, data, " |
| "tokenize = 'porter unicode61', content = 'info', content_rowid='id');")); |
| query.exec(QLatin1String("CREATE TRIGGER contents_insert AFTER INSERT ON info BEGIN " |
| "INSERT INTO contents(rowid, namespace, attributes, url, title, data) " |
| "VALUES(new.id, new.namespace, new.attributes, new.url, new.title, new.data); " |
| "END;")); |
| query.exec(QLatin1String("CREATE TRIGGER contents_delete AFTER DELETE ON info BEGIN " |
| "INSERT INTO contents(contents, rowid, namespace, attributes, url, title, data) " |
| "VALUES('delete', old.id, old.namespace, old.attributes, old.url, old.title, old.data); " |
| "END;")); |
| query.exec(QLatin1String("CREATE TRIGGER contents_update AFTER UPDATE ON info BEGIN " |
| "INSERT INTO contents(contents, rowid, namespace, attributes, url, title, data) " |
| "VALUES('delete', old.id, old.namespace, old.attributes, old.url, old.title, old.data); " |
| "INSERT INTO contents(rowid, namespace, attributes, url, title, data) " |
| "VALUES(new.id, new.namespace, new.attributes, new.url, new.title, new.data); " |
| "END;")); |
| } |
| |
| Writer::~Writer() |
| { |
| if (m_db) { |
| m_db->close(); |
| delete m_db; |
| } |
| |
| if (!m_uniqueId.isEmpty()) |
| QSqlDatabase::removeDatabase(m_uniqueId); |
| } |
| |
| void Writer::flush() |
| { |
| if (!m_db) |
| return; |
| |
| QSqlQuery query(*m_db); |
| |
| query.prepare(QLatin1String("INSERT INTO info (namespace, attributes, url, title, data) VALUES (?, ?, ?, ?, ?)")); |
| query.addBindValue(m_namespaces); |
| query.addBindValue(m_attributes); |
| query.addBindValue(m_urls); |
| query.addBindValue(m_titles); |
| query.addBindValue(m_contents); |
| query.execBatch(); |
| |
| m_namespaces = QVariantList(); |
| m_attributes = QVariantList(); |
| m_urls = QVariantList(); |
| m_titles = QVariantList(); |
| m_contents = QVariantList(); |
| } |
| |
| void Writer::removeNamespace(const QString &namespaceName) |
| { |
| if (!m_db) |
| return; |
| |
| if (!hasNamespace(namespaceName)) |
| return; // no data to delete |
| |
| m_needOptimize = true; |
| |
| QSqlQuery query(*m_db); |
| |
| query.prepare(QLatin1String("DELETE FROM info WHERE namespace = ?")); |
| query.addBindValue(namespaceName); |
| query.exec(); |
| } |
| |
| bool Writer::hasNamespace(const QString &namespaceName) |
| { |
| if (!m_db) |
| return false; |
| |
| QSqlQuery query(*m_db); |
| |
| query.prepare(QLatin1String("SELECT id FROM info WHERE namespace = ? LIMIT 1")); |
| query.addBindValue(namespaceName); |
| query.exec(); |
| |
| return query.next(); |
| } |
| |
| void Writer::insertDoc(const QString &namespaceName, |
| const QString &attributes, |
| const QString &url, |
| const QString &title, |
| const QString &contents) |
| { |
| m_namespaces.append(namespaceName); |
| m_attributes.append(attributes); |
| m_urls.append(url); |
| m_titles.append(title); |
| m_contents.append(contents); |
| } |
| |
| void Writer::startTransaction() |
| { |
| if (!m_db) |
| return; |
| |
| m_needOptimize = false; |
| if (m_db && m_db->driver()->hasFeature(QSqlDriver::Transactions)) |
| m_db->transaction(); |
| } |
| |
| void Writer::endTransaction() |
| { |
| if (!m_db) |
| return; |
| |
| QSqlQuery query(*m_db); |
| |
| if (m_needOptimize) { |
| query.exec(QLatin1String("INSERT INTO titles(titles) VALUES('rebuild')")); |
| query.exec(QLatin1String("INSERT INTO contents(contents) VALUES('rebuild')")); |
| } |
| |
| if (m_db && m_db->driver()->hasFeature(QSqlDriver::Transactions)) |
| m_db->commit(); |
| |
| if (m_needOptimize) |
| query.exec(QLatin1String("VACUUM")); |
| } |
| |
| QHelpSearchIndexWriter::QHelpSearchIndexWriter() |
| : QThread() |
| , m_cancel(false) |
| { |
| } |
| |
| QHelpSearchIndexWriter::~QHelpSearchIndexWriter() |
| { |
| m_mutex.lock(); |
| this->m_cancel = true; |
| m_mutex.unlock(); |
| |
| wait(); |
| } |
| |
| void QHelpSearchIndexWriter::cancelIndexing() |
| { |
| QMutexLocker lock(&m_mutex); |
| m_cancel = true; |
| } |
| |
| void QHelpSearchIndexWriter::updateIndex(const QString &collectionFile, |
| const QString &indexFilesFolder, |
| bool reindex) |
| { |
| wait(); |
| QMutexLocker lock(&m_mutex); |
| |
| m_cancel = false; |
| m_reindex = reindex; |
| m_collectionFile = collectionFile; |
| m_indexFilesFolder = indexFilesFolder; |
| |
| lock.unlock(); |
| |
| start(QThread::LowestPriority); |
| } |
| |
| static const char IndexedNamespacesKey[] = "FTS5IndexedNamespaces"; |
| |
| static QMap<QString, QDateTime> readIndexMap(const QHelpEngineCore &engine) |
| { |
| QMap<QString, QDateTime> indexMap; |
| QDataStream dataStream(engine.customValue( |
| QLatin1String(IndexedNamespacesKey)).toByteArray()); |
| dataStream >> indexMap; |
| return indexMap; |
| } |
| |
| static bool writeIndexMap(QHelpEngineCore *engine, |
| const QMap<QString, QDateTime> &indexMap) |
| { |
| QByteArray data; |
| |
| QDataStream dataStream(&data, QIODevice::ReadWrite); |
| dataStream << indexMap; |
| |
| return engine->setCustomValue( |
| QLatin1String(IndexedNamespacesKey), data); |
| } |
| |
| static bool clearIndexMap(QHelpEngineCore *engine) |
| { |
| return engine->removeCustomValue(QLatin1String(IndexedNamespacesKey)); |
| } |
| |
| void QHelpSearchIndexWriter::run() |
| { |
| QMutexLocker lock(&m_mutex); |
| |
| if (m_cancel) |
| return; |
| |
| const bool reindex(m_reindex); |
| const QString collectionFile(m_collectionFile); |
| const QString indexPath(m_indexFilesFolder); |
| |
| lock.unlock(); |
| |
| QHelpEngineCore engine(collectionFile, nullptr); |
| if (!engine.setupData()) |
| return; |
| |
| if (reindex) |
| clearIndexMap(&engine); |
| |
| emit indexingStarted(); |
| |
| Writer writer(indexPath); |
| |
| while (!writer.tryInit(reindex)) |
| sleep(1); |
| |
| const QStringList ®isteredDocs = engine.registeredDocumentations(); |
| QMap<QString, QDateTime> indexMap = readIndexMap(engine); |
| |
| if (!reindex) { |
| for (const QString &namespaceName : registeredDocs) { |
| if (indexMap.contains(namespaceName)) { |
| const QString path = engine.documentationFileName(namespaceName); |
| if (indexMap.value(namespaceName) < QFileInfo(path).lastModified()) { |
| // Remove some outdated indexed stuff |
| indexMap.remove(namespaceName); |
| writer.removeNamespace(namespaceName); |
| } else if (!writer.hasNamespace(namespaceName)) { |
| // No data in fts db for namespace. |
| // The namespace could have been removed from fts db |
| // or the whole fts db have been removed |
| // without removing it from indexMap. |
| indexMap.remove(namespaceName); |
| } |
| } else { |
| // Needed in case namespaceName was removed from indexMap |
| // without removing it from fts db. |
| // May happen when e.g. qch file was removed manually |
| // without removing fts db. |
| writer.removeNamespace(namespaceName); |
| } |
| // TODO: we may also detect if there are any other data |
| // and remove it |
| } |
| } else { |
| indexMap.clear(); |
| } |
| |
| for (const QString &namespaceName : indexMap.keys()) { |
| if (!registeredDocs.contains(namespaceName)) { |
| indexMap.remove(namespaceName); |
| writer.removeNamespace(namespaceName); |
| } |
| } |
| |
| for (const QString &namespaceName : registeredDocs) { |
| lock.relock(); |
| if (m_cancel) { |
| // store what we have done so far |
| writeIndexMap(&engine, indexMap); |
| writer.endTransaction(); |
| emit indexingFinished(); |
| return; |
| } |
| lock.unlock(); |
| |
| // if indexed, continue |
| if (indexMap.contains(namespaceName)) |
| continue; |
| |
| const QString fileName = engine.documentationFileName(namespaceName); |
| QHelpDBReader reader(fileName, QHelpGlobal::uniquifyConnectionName( |
| fileName, this), nullptr); |
| if (!reader.init()) |
| continue; |
| |
| const QString virtualFolder = reader.virtualFolder(); |
| |
| const QList<QStringList> &attributeSets = |
| engine.filterAttributeSets(namespaceName); |
| |
| for (const QStringList &attributes : attributeSets) { |
| const QString &attributesString = attributes.join(QLatin1Char('|')); |
| |
| const QMap<QString, QByteArray> htmlFiles |
| = reader.filesData(attributes, QLatin1String("html")); |
| const QMap<QString, QByteArray> htmFiles |
| = reader.filesData(attributes, QLatin1String("htm")); |
| const QMap<QString, QByteArray> txtFiles |
| = reader.filesData(attributes, QLatin1String("txt")); |
| |
| QMap<QString, QByteArray> files = htmlFiles; |
| files.unite(htmFiles); |
| files.unite(txtFiles); |
| |
| for (auto it = files.cbegin(), end = files.cend(); it != end ; ++it) { |
| lock.relock(); |
| if (m_cancel) { |
| // store what we have done so far |
| writeIndexMap(&engine, indexMap); |
| writer.endTransaction(); |
| emit indexingFinished(); |
| return; |
| } |
| lock.unlock(); |
| |
| const QString &file = it.key(); |
| const QByteArray &data = it.value(); |
| |
| if (data.isEmpty()) |
| continue; |
| |
| QUrl url; |
| url.setScheme(QLatin1String("qthelp")); |
| url.setAuthority(namespaceName); |
| url.setPath(QLatin1Char('/') + virtualFolder + QLatin1Char('/') + file); |
| |
| if (url.hasFragment()) |
| url.setFragment(QString()); |
| |
| const QString &fullFileName = url.toString(); |
| if (!fullFileName.endsWith(QLatin1String(".html")) |
| && !fullFileName.endsWith(QLatin1String(".htm")) |
| && !fullFileName.endsWith(QLatin1String(".txt"))) { |
| continue; |
| } |
| |
| QTextStream s(data); |
| const QString &en = QHelpGlobal::codecFromData(data); |
| s.setCodec(QTextCodec::codecForName(en.toLatin1().constData())); |
| |
| const QString &text = s.readAll(); |
| if (text.isEmpty()) |
| continue; |
| |
| QString title; |
| QString contents; |
| if (fullFileName.endsWith(QLatin1String(".txt"))) { |
| title = fullFileName.mid(fullFileName.lastIndexOf(QLatin1Char('/')) + 1); |
| contents = text.toHtmlEscaped(); |
| } else { |
| QTextDocument doc; |
| doc.setHtml(text); |
| |
| title = doc.metaInformation(QTextDocument::DocumentTitle).toHtmlEscaped(); |
| contents = doc.toPlainText().toHtmlEscaped(); |
| } |
| |
| writer.insertDoc(namespaceName, attributesString, fullFileName, title, contents); |
| } |
| } |
| writer.flush(); |
| const QString &path = engine.documentationFileName(namespaceName); |
| indexMap.insert(namespaceName, QFileInfo(path).lastModified()); |
| } |
| |
| writeIndexMap(&engine, indexMap); |
| |
| writer.endTransaction(); |
| emit indexingFinished(); |
| } |
| |
| } // namespace std |
| } // namespace fulltextsearch |
| |
| QT_END_NAMESPACE |