blob: 37b6e01714005f0299d660f3d141e4437dc4efcd [file] [log] [blame]
/******************************************************************************
** This is just slightly modified version of convert_dict.cc
** chromium/chrome/tools/convert_dict/convert_dict.cc
**
** Original work:
** Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
** Modified work:
** Copyright (C) 2016 The Qt Company Ltd.
**
** Use of this source code is governed by a BSD-style license that can be
** found in the LICENSE.Chromium file.
**
** This tool converts Hunspell .aff/.dic pairs to a combined binary dictionary
** format (.bdic). This format is more compact, and can be more efficiently
** read by the client application.
**
******************************************************************************/
#include <base/at_exit.h>
#include <base/containers/span.h>
#include <base/files/file_path.h>
#include <base/files/file_util.h>
#include <base/i18n/icu_util.h>
#include <build/build_config.h>
#include <chrome/tools/convert_dict/aff_reader.h>
#include <chrome/tools/convert_dict/dic_reader.h>
#include <third_party/hunspell/google/bdict_reader.h>
#include <third_party/hunspell/google/bdict_writer.h>
#include <base/path_service.h>
#include <QTextStream>
#include <QLibraryInfo>
#include <QDir>
#include <QCoreApplication>
// see also src/core/type_conversion.h
inline base::FilePath::StringType toFilePathString(const QString &str)
{
#if defined(Q_OS_WIN)
return QDir::toNativeSeparators(str).toStdWString();
#else
return str.toStdString();
#endif
}
inline base::FilePath toFilePath(const QString &str)
{
return base::FilePath(toFilePathString(str));
}
inline QString toQt(const base::string16 &string)
{
#if defined(OS_WIN)
return QString::fromStdWString(string.data());
#else
return QString::fromUtf16(string.data());
#endif
}
inline QString toQt(const std::string &string)
{
return QString::fromStdString(string);
}
template<class T>
QTextStream &operator<<(QTextStream &out, base::span<T> span)
{
out << '[';
QString prefix;
for (const auto &element : span) {
out << prefix;
out << element;
prefix = QStringLiteral(",");
}
out << ']';
return out;
}
// Compares the given word list with the serialized trie to make sure they
// are the same.
inline bool VerifyWords(const convert_dict::DicReader::WordList& org_words,
const std::string& serialized, QTextStream& out)
{
hunspell::BDictReader reader;
if (!reader.Init(reinterpret_cast<const unsigned char*>(serialized.data()),
serialized.size())) {
out << "BDict is invalid\n";
return false;
}
hunspell::WordIterator iter = reader.GetAllWordIterator();
int affix_ids[hunspell::BDict::MAX_AFFIXES_PER_WORD];
static const int buf_size = 128;
char buf[buf_size];
for (size_t i = 0; i < org_words.size(); i++) {
int affix_matches = iter.Advance(buf, buf_size, affix_ids);
if (affix_matches == 0) {
out << "Found the end before we expected\n";
return false;
}
if (org_words[i].first != buf) {
out << "Word does not match!\n"
<< " Index: " << i << "\n"
<< " Expected: " << QString::fromStdString(org_words[i].first) << "\n"
<< " Actual: " << QString::fromUtf8(buf) << "\n";
return false;
}
base::span<const int> expectedAffixes(org_words[i].second);
base::span<const int> actualAffixes(affix_ids, affix_matches);
if (!std::equal(expectedAffixes.begin(), expectedAffixes.end(),
actualAffixes.begin(), actualAffixes.end(),
[](int a, int b) { return a == b; })) {
out << "Affixes do not match!\n"
<< " Index: " << i << "\n"
<< " Word: " << QString::fromUtf8(buf) << "\n"
<< " Expected: " << expectedAffixes << "\n"
<< " Actual: " << actualAffixes << "\n";
return false;
}
}
return true;
}
#if defined(OS_MACOSX) && defined(QT_MAC_FRAMEWORK_BUILD)
QString frameworkIcuDataPath()
{
return QLibraryInfo::location(QLibraryInfo::LibrariesPath) +
QStringLiteral("/QtWebEngineCore.framework/Resources/");
}
#endif
int main(int argc, char *argv[])
{
// Required only for making QLibraryInfo::location() return a valid path, when the application
// picks up a qt.conf file (which is the case for official Qt packages).
QCoreApplication app(argc, argv);
Q_UNUSED(app);
QTextStream out(stdout);
if (argc != 3) {
QTextStream out(stdout);
out << "Usage: qwebengine_convert_dict <dic file> <bdic file>\n\nExample:\n"
"qwebengine_convert_dict ./en-US.dic ./en-US.bdic\nwill read en-US.dic, "
"en-US.dic_delta, and en-US.aff from the current directory and generate "
"en-US.bdic\n\n";
return 1;
}
bool icuDataDirFound = false;
QString icuDataDir = QLibraryInfo::location(QLibraryInfo::DataPath)
% QLatin1String("/resources");
// Try to look up the path to the ICU data directory via an environment variable
// (e.g. for the case when the tool is ran during build phase, and regular installed
// ICU data file is not available).
const QString icuPossibleEnvDataDir = qEnvironmentVariable("QT_WEBENGINE_ICU_DATA_DIR");
if (!icuPossibleEnvDataDir.isEmpty() && QFileInfo::exists(icuPossibleEnvDataDir)) {
icuDataDir = icuPossibleEnvDataDir;
icuDataDirFound = true;
}
#if defined(OS_MACOSX) && defined(QT_MAC_FRAMEWORK_BUILD)
// In a macOS Qt framework build, the resources are inside the QtWebEngineCore framework
// Resources directory, rather than in the Qt install location.
else if (QFileInfo::exists(frameworkIcuDataPath())) {
icuDataDir = frameworkIcuDataPath();
icuDataDirFound = true;
}
#endif
// Try to find the ICU data directory in the installed Qt location.
else if (QFileInfo::exists(icuDataDir)) {
icuDataDirFound = true;
}
if (icuDataDirFound) {
base::PathService::Override(base::DIR_QT_LIBRARY_DATA, toFilePath(icuDataDir));
} else {
QTextStream out(stdout);
out << "Couldn't find ICU data directory. Please check that the following path exists: "
<< icuDataDir
<< "\nAlternatively provide the directory path via the QT_WEBENGINE_ICU_DAT_DIR "
"environment variable.\n\n";
return 1;
}
base::AtExitManager exit_manager;
base::i18n::InitializeICU();
base::FilePath file_in_path = toFilePath(argv[1]);
base::FilePath file_out_path = toFilePath(argv[2]);
base::FilePath aff_path = file_in_path.ReplaceExtension(FILE_PATH_LITERAL(".aff"));
out << "Reading " << toQt(aff_path.value()) << "\n";
convert_dict::AffReader aff_reader(aff_path);
if (!aff_reader.Read()) {
out << "Unable to read the aff file.\n";
return 1;
}
base::FilePath dic_path = file_in_path.ReplaceExtension(FILE_PATH_LITERAL(".dic"));
out << "Reading " << toQt(dic_path.value()) << "\n";
// DicReader will also read the .dic_delta file.
convert_dict::DicReader dic_reader(dic_path);
if (!dic_reader.Read(&aff_reader)) {
out << "Unable to read the dic file.\n";
return 1;
}
hunspell::BDictWriter writer;
writer.SetComment(aff_reader.comments());
writer.SetAffixRules(aff_reader.affix_rules());
writer.SetAffixGroups(aff_reader.GetAffixGroups());
writer.SetReplacements(aff_reader.replacements());
writer.SetOtherCommands(aff_reader.other_commands());
writer.SetWords(dic_reader.words());
out << "Serializing...\n";
std::string serialized = writer.GetBDict();
out << "Verifying...\n";
if (!VerifyWords(dic_reader.words(), serialized, out)) {
out << "ERROR converting, the dictionary does not check out OK.\n";
return 1;
}
out << "Writing " << toQt(file_out_path.value()) << "\n";
FILE *out_file = base::OpenFile(file_out_path, "wb");
if (!out_file) {
out << "ERROR writing file\n";
return 1;
}
size_t written = fwrite(&serialized[0], 1, serialized.size(), out_file);
Q_ASSERT(written == serialized.size());
base::CloseFile(out_file);
out << "Success. Dictionary converted.\n";
return 0;
}