blob: a97fe5bdc47f8af6ebb73207cfbd3839f3ead5dd [file] [log] [blame]
/****************************************************************************
**
** Copyright (C) 2016 The Qt Company Ltd.
** Contact: https://www.qt.io/licensing/
**
** This file is part of the QtCore module of the Qt Toolkit.
**
** $QT_BEGIN_LICENSE:LGPL$
** Commercial License Usage
** Licensees holding valid commercial Qt licenses may use this file in
** accordance with the commercial license agreement provided with the
** Software or, alternatively, in accordance with the terms contained in
** a written agreement between you and The Qt Company. For licensing terms
** and conditions see https://www.qt.io/terms-conditions. For further
** information use the contact form at https://www.qt.io/contact-us.
**
** GNU Lesser General Public License Usage
** Alternatively, this file may be used under the terms of the GNU Lesser
** General Public License version 3 as published by the Free Software
** Foundation and appearing in the file LICENSE.LGPL3 included in the
** packaging of this file. Please review the following information to
** ensure the GNU Lesser General Public License version 3 requirements
** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
**
** GNU General Public License Usage
** Alternatively, this file may be used under the terms of the GNU
** General Public License version 2.0 or (at your option) the GNU General
** Public license version 3 or any later version approved by the KDE Free
** Qt Foundation. The licenses are as published by the Free Software
** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
** included in the packaging of this file. Please review the following
** information to ensure the GNU General Public License requirements will
** be met: https://www.gnu.org/licenses/gpl-2.0.html and
** https://www.gnu.org/licenses/gpl-3.0.html.
**
** $QT_END_LICENSE$
**
****************************************************************************/
#define QT_NO_CAST_FROM_ASCII
#include "qmimemagicrule_p.h"
#include "qmimetypeparser_p.h"
#include <QtCore/QList>
#include <QtCore/QDebug>
#include <qendian.h>
QT_BEGIN_NAMESPACE
// in the same order as Type!
static const char magicRuleTypes_string[] =
"invalid\0"
"string\0"
"host16\0"
"host32\0"
"big16\0"
"big32\0"
"little16\0"
"little32\0"
"byte\0"
"\0";
static const int magicRuleTypes_indices[] = {
0, 8, 15, 22, 29, 35, 41, 50, 59, 64, 0
};
QMimeMagicRule::Type QMimeMagicRule::type(const QByteArray &theTypeName)
{
for (int i = String; i <= Byte; ++i) {
if (theTypeName == magicRuleTypes_string + magicRuleTypes_indices[i])
return Type(i);
}
return Invalid;
}
QByteArray QMimeMagicRule::typeName(QMimeMagicRule::Type theType)
{
return magicRuleTypes_string + magicRuleTypes_indices[theType];
}
bool QMimeMagicRule::operator==(const QMimeMagicRule &other) const
{
return m_type == other.m_type &&
m_value == other.m_value &&
m_startPos == other.m_startPos &&
m_endPos == other.m_endPos &&
m_mask == other.m_mask &&
m_pattern == other.m_pattern &&
m_number == other.m_number &&
m_numberMask == other.m_numberMask &&
m_matchFunction == other.m_matchFunction;
}
// Used by both providers
bool QMimeMagicRule::matchSubstring(const char *dataPtr, int dataSize, int rangeStart, int rangeLength,
int valueLength, const char *valueData, const char *mask)
{
// Size of searched data.
// Example: value="ABC", rangeLength=3 -> we need 3+3-1=5 bytes (ABCxx,xABCx,xxABC would match)
const int dataNeeded = qMin(rangeLength + valueLength - 1, dataSize - rangeStart);
if (!mask) {
// callgrind says QByteArray::indexOf is much slower, since our strings are typically too
// short for be worth Boyer-Moore matching (1 to 71 bytes, 11 bytes on average).
bool found = false;
for (int i = rangeStart; i < rangeStart + rangeLength; ++i) {
if (i + valueLength > dataSize)
break;
if (memcmp(valueData, dataPtr + i, valueLength) == 0) {
found = true;
break;
}
}
if (!found)
return false;
} else {
bool found = false;
const char *readDataBase = dataPtr + rangeStart;
// Example (continued from above):
// deviceSize is 4, so dataNeeded was max'ed to 4.
// maxStartPos = 4 - 3 + 1 = 2, and indeed
// we need to check for a match a positions 0 and 1 (ABCx and xABC).
const int maxStartPos = dataNeeded - valueLength + 1;
for (int i = 0; i < maxStartPos; ++i) {
const char *d = readDataBase + i;
bool valid = true;
for (int idx = 0; idx < valueLength; ++idx) {
if (((*d++) & mask[idx]) != (valueData[idx] & mask[idx])) {
valid = false;
break;
}
}
if (valid)
found = true;
}
if (!found)
return false;
}
//qDebug() << "Found" << value << "in" << searchedData;
return true;
}
bool QMimeMagicRule::matchString(const QByteArray &data) const
{
const int rangeLength = m_endPos - m_startPos + 1;
return QMimeMagicRule::matchSubstring(data.constData(), data.size(), m_startPos, rangeLength, m_pattern.size(), m_pattern.constData(), m_mask.constData());
}
template <typename T>
bool QMimeMagicRule::matchNumber(const QByteArray &data) const
{
const T value(m_number);
const T mask(m_numberMask);
//qDebug() << "matchNumber" << "0x" << QString::number(m_number, 16) << "size" << sizeof(T);
//qDebug() << "mask" << QString::number(m_numberMask, 16);
const char *p = data.constData() + m_startPos;
const char *e = data.constData() + qMin(data.size() - int(sizeof(T)), m_endPos);
for ( ; p <= e; ++p) {
if ((qFromUnaligned<T>(p) & mask) == (value & mask))
return true;
}
return false;
}
static inline QByteArray makePattern(const QByteArray &value)
{
QByteArray pattern(value.size(), Qt::Uninitialized);
char *data = pattern.data();
const char *p = value.constData();
const char *e = p + value.size();
for ( ; p < e; ++p) {
if (*p == '\\' && ++p < e) {
if (*p == 'x') { // hex (\\xff)
char c = 0;
for (int i = 0; i < 2 && p + 1 < e; ++i) {
++p;
if (*p >= '0' && *p <= '9')
c = (c << 4) + *p - '0';
else if (*p >= 'a' && *p <= 'f')
c = (c << 4) + *p - 'a' + 10;
else if (*p >= 'A' && *p <= 'F')
c = (c << 4) + *p - 'A' + 10;
else
continue;
}
*data++ = c;
} else if (*p >= '0' && *p <= '7') { // oct (\\7, or \\77, or \\377)
char c = *p - '0';
if (p + 1 < e && p[1] >= '0' && p[1] <= '7') {
c = (c << 3) + *(++p) - '0';
if (p + 1 < e && p[1] >= '0' && p[1] <= '7' && p[-1] <= '3')
c = (c << 3) + *(++p) - '0';
}
*data++ = c;
} else if (*p == 'n') {
*data++ = '\n';
} else if (*p == 'r') {
*data++ = '\r';
} else if (*p == 't') {
*data++ = '\t';
} else { // escaped
*data++ = *p;
}
} else {
*data++ = *p;
}
}
pattern.truncate(data - pattern.data());
return pattern;
}
// Evaluate a magic match rule like
// <match value="must be converted with BinHex" type="string" offset="11"/>
// <match value="0x9501" type="big16" offset="0:64"/>
QMimeMagicRule::QMimeMagicRule(const QString &type,
const QByteArray &value,
const QString &offsets,
const QByteArray &mask,
QString *errorString)
: m_type(QMimeMagicRule::type(type.toLatin1())),
m_value(value),
m_mask(mask),
m_matchFunction(nullptr)
{
if (Q_UNLIKELY(m_type == Invalid))
*errorString = QLatin1String("Type ") + type + QLatin1String(" is not supported");
// Parse for offset as "1" or "1:10"
const int colonIndex = offsets.indexOf(QLatin1Char(':'));
const QStringRef startPosStr = offsets.midRef(0, colonIndex); // \ These decay to returning 'offsets'
const QStringRef endPosStr = offsets.midRef(colonIndex + 1);// / unchanged when colonIndex == -1
if (Q_UNLIKELY(!QMimeTypeParserBase::parseNumber(startPosStr, &m_startPos, errorString)) ||
Q_UNLIKELY(!QMimeTypeParserBase::parseNumber(endPosStr, &m_endPos, errorString))) {
m_type = Invalid;
return;
}
if (Q_UNLIKELY(m_value.isEmpty())) {
m_type = Invalid;
if (errorString)
*errorString = QStringLiteral("Invalid empty magic rule value");
return;
}
if (m_type >= Host16 && m_type <= Byte) {
bool ok;
m_number = m_value.toUInt(&ok, 0); // autodetect base
if (Q_UNLIKELY(!ok)) {
m_type = Invalid;
if (errorString)
*errorString = QLatin1String("Invalid magic rule value \"") + QLatin1String(m_value) + QLatin1Char('"');
return;
}
m_numberMask = !m_mask.isEmpty() ? m_mask.toUInt(&ok, 0) : 0; // autodetect base
}
switch (m_type) {
case String:
m_pattern = makePattern(m_value);
m_pattern.squeeze();
if (!m_mask.isEmpty()) {
if (Q_UNLIKELY(m_mask.size() < 4 || !m_mask.startsWith("0x"))) {
m_type = Invalid;
if (errorString)
*errorString = QLatin1String("Invalid magic rule mask \"") + QLatin1String(m_mask) + QLatin1Char('"');
return;
}
const QByteArray &tempMask = QByteArray::fromHex(QByteArray::fromRawData(
m_mask.constData() + 2, m_mask.size() - 2));
if (Q_UNLIKELY(tempMask.size() != m_pattern.size())) {
m_type = Invalid;
if (errorString)
*errorString = QLatin1String("Invalid magic rule mask size \"") + QLatin1String(m_mask) + QLatin1Char('"');
return;
}
m_mask = tempMask;
} else {
m_mask.fill(char(-1), m_pattern.size());
}
m_mask.squeeze();
m_matchFunction = &QMimeMagicRule::matchString;
break;
case Byte:
if (m_number <= quint8(-1)) {
if (m_numberMask == 0)
m_numberMask = quint8(-1);
m_matchFunction = &QMimeMagicRule::matchNumber<quint8>;
}
break;
case Big16:
case Little16:
if (m_number <= quint16(-1)) {
m_number = m_type == Little16 ? qFromLittleEndian<quint16>(m_number) : qFromBigEndian<quint16>(m_number);
if (m_numberMask != 0)
m_numberMask = m_type == Little16 ? qFromLittleEndian<quint16>(m_numberMask) : qFromBigEndian<quint16>(m_numberMask);
}
Q_FALLTHROUGH();
case Host16:
if (m_number <= quint16(-1)) {
if (m_numberMask == 0)
m_numberMask = quint16(-1);
m_matchFunction = &QMimeMagicRule::matchNumber<quint16>;
}
break;
case Big32:
case Little32:
m_number = m_type == Little32 ? qFromLittleEndian<quint32>(m_number) : qFromBigEndian<quint32>(m_number);
if (m_numberMask != 0)
m_numberMask = m_type == Little32 ? qFromLittleEndian<quint32>(m_numberMask) : qFromBigEndian<quint32>(m_numberMask);
Q_FALLTHROUGH();
case Host32:
if (m_numberMask == 0)
m_numberMask = quint32(-1);
m_matchFunction = &QMimeMagicRule::matchNumber<quint32>;
break;
default:
break;
}
}
QByteArray QMimeMagicRule::mask() const
{
QByteArray result = m_mask;
if (m_type == String) {
// restore '0x'
result = "0x" + result.toHex();
}
return result;
}
bool QMimeMagicRule::matches(const QByteArray &data) const
{
const bool ok = m_matchFunction && (this->*m_matchFunction)(data);
if (!ok)
return false;
// No submatch? Then we are done.
if (m_subMatches.isEmpty())
return true;
//qDebug() << "Checking" << m_subMatches.count() << "sub-rules";
// Check that one of the submatches matches too
for ( QList<QMimeMagicRule>::const_iterator it = m_subMatches.begin(), end = m_subMatches.end() ;
it != end ; ++it ) {
if ((*it).matches(data)) {
// One of the hierarchies matched -> mimetype recognized.
return true;
}
}
return false;
}
QT_END_NAMESPACE