No public description
PiperOrigin-RevId: 700634225
diff --git a/sandbox/README.md b/sandbox/README.md
new file mode 100644
index 0000000..aeedf11
--- /dev/null
+++ b/sandbox/README.md
@@ -0,0 +1,82 @@
+<!--* freshness: { owner: 'ise-sandboxing-members' reviewed: '2024-10-28' } *-->
+
+# Sandboxed hunspell API
+
+As part of ISE-Sandboxing's Sandboxing Enforcement Program
+(go/ise-sandboxing-enforcement), we have identified the `hunspell` library
+as a target that requires sandboxing if processing data originating from outside
+of Alphabet. This is in accordance with our guideline go/untrusted-workloads.
+
+This package contains a sandboxed version of the **spellcheck APIs** of
+[//third_party/hunspell:hunspell](http:///BUILD).
+
+Sandboxing `hunspell` provides an additional security boundary that requires
+a malicious actor to successfully exploit two vulnerabilities before they are
+able to laterally move away from the initial entry point.
+
+TODO: Update the next paragraph depending on the `sapi_library` target in BUILD.
+
+The `sandboxed_hunspell`
+([//third_party/hunspell/sandbox:sandboxed_hunspell](http:///sandbox/BUILD))
+library provides only a subset of the unsandboxed read/decode
+APIs. This subset is listed in the `functions` argument of the `sapi_library`
+BUILD rule. If more functions are needed, reach out to ISE Sandboxing
+(go/ise-sandboxing) for guidance.
+
+## sandboxed_hunspell
+
+This is the SAPI implementation of `//third_party/hunspell:hunspell` and provides
+users of the sandboxed API with the most control with regards to the sandbox's
+life-time and the possibility to implement additional customizations. This comes
+at the cost of simplicity.
+
+### Getting started
+
+Follow these steps:
+
+1. In your BUILD file, add the target to your `deps` list:
+
+ ```
+ "//third_party/hunspell/sandbox:sandboxed_hunspell",
+ ```
+
+1. Add the library headers in your source files:
+
+ ```
+ #include "third_party/hunspell/sandbox/sandbox.h"
+ #include "third_party/hunspell/sandbox/sandboxed_hunspell.sapi.h"
+ ```
+
+1. Create and then initialize the sandbox.
+
+ ```
+ sandboxed_hunspell::LibHunspellSapiSandbox sbx;
+ SAPI_RETURN_IF_ERROR(sbx.Init());
+ ```
+
+1. Create the API object with the initialized sandbox:
+
+ ```
+ sandboxed_hunspell::LibHunspellApi api(&sbx);
+ ```
+
+1. Prepare the SAPI variables for the sandboxed API function call. For an
+ explanation of SAPI variables, please consult go/sapi/variables.
+
+ ```
+ std::string s_afn = "utf8.aff";
+ std::string s_dfn = "utf8.dic";
+
+ sapi::v::ConstCStr c_afn(s_afn.c_str());
+ sapi::v::ConstCStr c_dfn(s_dfn.c_str());
+
+ SAPI_ASSERT_OK_AND_ASSIGN(
+ sandboxed_hunspell::Hunhandle * hunspell,
+ api.Hunspell_create(c_afn.PtrBefore(), c_dfn.PtrBefore()));
+ ```
+
+### sandboxed_hunspell_test
+
+This tests the basic functionality of the `sandboxed_hunspell` and compares
+the output to the `unsandboxed hunspell`. It is also a good source to
+understand how the SAPI variables need to be prepared.
diff --git a/sandbox/example_suggestions.cc b/sandbox/example_suggestions.cc
new file mode 100644
index 0000000..d9dcace
--- /dev/null
+++ b/sandbox/example_suggestions.cc
@@ -0,0 +1,125 @@
+// This program checks spelling and suggests corrections for misspelled words.
+//
+// This example illustrates the use of the sandboxed_hunspell library with an
+// affix file (definitions for the dictionary) and a dictionary file to check
+// the contents of the WORDS_TO_CHECK file.
+
+#include <unistd.h>
+
+#include <cstdlib>
+#include <string>
+
+#include "base/init_google.h"
+#include "file/base/filelineiter.h"
+#include "third_party/absl/flags/flag.h"
+#include "third_party/absl/log/flags.h"
+#include "third_party/absl/log/log.h"
+#include "third_party/absl/status/status.h"
+#include "third_party/absl/status/statusor.h"
+#include "third_party/hunspell/sandbox/sandbox.h"
+#include "third_party/hunspell/sandbox/sandboxed_hunspell.sapi.h"
+#include "third_party/sandboxed_api/util/status_macros.h"
+#include "third_party/sandboxed_api/var_array.h"
+#include "third_party/sandboxed_api/var_int.h"
+#include "third_party/sandboxed_api/var_ptr.h"
+
+ABSL_FLAG(std::string, affix_file, "", "File path to affix file.");
+ABSL_FLAG(std::string, dict_file, "", "File path to dictionarie file.");
+ABSL_FLAG(std::string, in_file, "", "File path to file to check.");
+
+absl::Status PrintSuggest(sandboxed_hunspell::LibHunspellApi& api,
+ sapi::v::RemotePtr& hunspellrp,
+ sapi::v::ConstCStr& word) {
+ sapi::v::GenericPtr outptr;
+
+ SAPI_ASSIGN_OR_RETURN(
+ int nlist,
+ api.Hunspell_suggest(&hunspellrp, outptr.PtrAfter(), word.PtrBefore()));
+
+ if (nlist == 0) {
+ LOG(INFO) << "No suggestions.";
+ return absl::OkStatus();
+ }
+
+ sapi::v::Array<char*> suggestion_listp(nlist);
+ suggestion_listp.SetRemote(reinterpret_cast<void*>(outptr.GetValue()));
+ SAPI_RETURN_IF_ERROR(
+ api.GetSandbox()->TransferFromSandboxee(&suggestion_listp));
+
+ LOG(INFO) << "Suggestions:";
+ for (int i = 0; i < nlist; ++i) {
+ sapi::v::RemotePtr sugrp(suggestion_listp[i]);
+ SAPI_ASSIGN_OR_RETURN(std::string sugestion,
+ api.GetSandbox()->GetCString(sugrp));
+ LOG(INFO) << sugestion;
+ }
+
+ return api.Hunspell_free_list(&hunspellrp, outptr.PtrNone(), nlist);
+}
+
+int main(int argc, char* argv[]) {
+ InitGoogle(argv[0], &argc, &argv, /*remove_flags=*/true);
+
+ // Always log to stderr
+ absl::SetFlag(&FLAGS_logtostderr, true);
+
+ if (absl::GetFlag(FLAGS_affix_file).empty() ||
+ absl::GetFlag(FLAGS_dict_file).empty()) {
+ LOG(ERROR) << "--" << absl::GetFlagReflectionHandle(FLAGS_affix_file).Name()
+ << ", --"
+ << absl::GetFlagReflectionHandle(FLAGS_dict_file).Name()
+ << ", and --"
+ << absl::GetFlagReflectionHandle(FLAGS_in_file).Name()
+ << " are required";
+ return EXIT_FAILURE;
+ }
+
+ std::string affix_file = absl::GetFlag(FLAGS_affix_file);
+ std::string dict_file = absl::GetFlag(FLAGS_dict_file);
+ sapi::v::ConstCStr c_affix_file(affix_file.c_str());
+ sapi::v::ConstCStr c_dictionary_file(dict_file.c_str());
+
+ sandboxed_hunspell::LibHunspellSapiSandbox sandbox(affix_file, dict_file);
+ if (!sandbox.Init().ok()) {
+ LOG(ERROR) << "Unable to start sandbox";
+ return EXIT_FAILURE;
+ }
+
+ sandboxed_hunspell::LibHunspellApi api(&sandbox);
+ absl::StatusOr<sandboxed_hunspell::Hunhandle*> hunspell = api.Hunspell_create(
+ c_affix_file.PtrBefore(), c_dictionary_file.PtrBefore());
+ if (!hunspell.ok()) {
+ LOG(ERROR) << "Could not initialize hunsepll";
+ return EXIT_FAILURE;
+ }
+
+ sapi::v::RemotePtr hunspellrp(*hunspell);
+ for (std::string& buf :
+ FileLines(absl::GetFlag(FLAGS_in_file), FileLineIterator::NO_LF)) {
+ sapi::v::ConstCStr cbuf(buf.c_str());
+ absl::StatusOr<int> result =
+ api.Hunspell_spell(&hunspellrp, cbuf.PtrBefore());
+ if (!result.ok()) {
+ LOG(ERROR) << "Could not check word" << result.status();
+ return EXIT_FAILURE;
+ }
+
+ if (*result) {
+ LOG(INFO) << "Word " << buf << " is ok";
+ continue;
+ }
+ LOG(INFO) << "Word " << buf << " is incorrect";
+ absl::Status status = PrintSuggest(api, hunspellrp, cbuf);
+ if (!status.ok()) {
+ LOG(ERROR) << "Unable to get all suggestion" << status;
+ return EXIT_FAILURE;
+ }
+ }
+
+ if (!api.Hunspell_destroy(&hunspellrp).ok()) {
+ LOG(ERROR) << "Unable to destroy hunspell";
+ return EXIT_FAILURE;
+ }
+
+ return EXIT_SUCCESS;
+}
diff --git a/sandbox/sandbox.h b/sandbox/sandbox.h
new file mode 100644
index 0000000..9896b34
--- /dev/null
+++ b/sandbox/sandbox.h
@@ -0,0 +1,34 @@
+#ifndef THIRD_PARTY_HUNSPELL_SANDBOX_SANDBOX_H_
+#define THIRD_PARTY_HUNSPELL_SANDBOX_SANDBOX_H_
+
+#include <memory>
+#include <string>
+#include <utility>
+
+#include "third_party/hunspell/sandbox/sandboxed_hunspell.sapi.h"
+#include "third_party/sandboxed_api/sandbox2/policy.h"
+
+namespace sandboxed_hunspell {
+
+class LibHunspellSapiSandbox : public LibHunspellSandbox {
+ public:
+ explicit LibHunspellSapiSandbox(std::string affix_file_name,
+ std::string dictionary_file_name)
+ : affix_file_name_(std::move(affix_file_name)),
+ dictionary_file_name_(std::move(dictionary_file_name)) {}
+
+ private:
+ std::unique_ptr<sandbox2::Policy> ModifyPolicy(
+ sandbox2::PolicyBuilder* builder) override {
+ return builder->AddFile(affix_file_name_, /*is_ro=*/true)
+ .AddFile(dictionary_file_name_, /*is_ro=*/true)
+ .BuildOrDie();
+ }
+
+ std::string affix_file_name_;
+ std::string dictionary_file_name_;
+};
+
+} // namespace sandboxed_hunspell
+
+#endif // THIRD_PARTY_HUNSPELL_SANDBOX_SANDBOX_H_
diff --git a/sandbox/sandboxed_hunspell_test.cc b/sandbox/sandboxed_hunspell_test.cc
new file mode 100644
index 0000000..2477da9
--- /dev/null
+++ b/sandbox/sandboxed_hunspell_test.cc
@@ -0,0 +1,197 @@
+// Unit test for sandboxed_hunspell
+
+#include <optional>
+#include <string>
+
+#include "devtools/build/runtime/get_runfiles_dir.h"
+#include "file/base/filelineiter.h"
+#include "file/base/path.h"
+#include "testing/base/public/benchmark.h"
+#include "testing/base/public/gmock.h"
+#include "testing/base/public/gunit.h"
+#include "third_party/absl/log/check.h"
+#include "third_party/absl/log/log.h"
+#include "third_party/absl/status/status.h"
+#include "third_party/absl/strings/string_view.h"
+#include "third_party/hunspell/sandbox/sandbox.h"
+#include "third_party/hunspell/sandbox/sandboxed_hunspell.sapi.h"
+#include "third_party/hunspell/src/hunspell/hunspell.h"
+#include "third_party/sandboxed_api/util/status_matchers.h"
+#include "third_party/sandboxed_api/vars.h"
+
+namespace {
+
+static constexpr absl::string_view test_dir_ =
+ "/tests";
+
+static constexpr absl::string_view kAffixFileName = "utf8.aff";
+static constexpr absl::string_view kDictionaryFileName = "utf8.dic";
+
+static constexpr absl::string_view kGoodFileName = "utf8.good";
+static constexpr absl::string_view kWrongFileName = "utf8_nonbmp.wrong";
+
+static constexpr absl::string_view kSuggestion = "fo";
+static constexpr absl::string_view kRandomWord = "random_word123";
+
+std::string GetTestFilePath(const absl::string_view& filename) {
+ return file::JoinPath(devtools_build::GetDataDependencyFilepath(test_dir_),
+ filename);
+}
+
+class HunspellTest : public ::testing::Test {
+ public:
+ HunspellTest()
+ : s_affix_filename_(GetTestFilePath(kAffixFileName)),
+ s_dictionary_filename_(GetTestFilePath(kDictionaryFileName)),
+ sandbox_(s_affix_filename_, s_dictionary_filename_),
+ api_(&sandbox_) {
+ sapi::v::ConstCStr c_affix_filename(s_affix_filename_.c_str());
+ sapi::v::ConstCStr c_dictionary_filename(s_dictionary_filename_.c_str());
+
+ CHECK(sandbox_.Init().ok());
+ absl::StatusOr<sandboxed_hunspell::Hunhandle*> hunspell =
+ api_.Hunspell_create(c_affix_filename.PtrBefore(),
+ c_dictionary_filename.PtrBefore());
+ CHECK(hunspell.ok());
+
+ hunspellrp_.emplace(*hunspell);
+ }
+
+ ~HunspellTest() override {
+ if (hunspellrp_) {
+ absl::Status status = api_.Hunspell_destroy(&(*hunspellrp_));
+ CHECK(status.ok());
+ }
+ }
+
+ protected:
+ const std::string s_affix_filename_;
+ const std::string s_dictionary_filename_;
+ sandboxed_hunspell::LibHunspellSapiSandbox sandbox_;
+ sandboxed_hunspell::LibHunspellApi api_;
+ std::optional<sapi::v::RemotePtr> hunspellrp_;
+};
+
+TEST_F(HunspellTest, CheckEncoding) {
+ SAPI_ASSERT_OK_AND_ASSIGN(char* ret,
+ api_.Hunspell_get_dic_encoding(&(*hunspellrp_)));
+ SAPI_ASSERT_OK_AND_ASSIGN(std::string encoding, api_.GetSandbox()->GetCString(
+ sapi::v::RemotePtr(ret)));
+ EXPECT_EQ(encoding, "UTF-8");
+}
+
+TEST_F(HunspellTest, CheckGoodSpell) {
+ SAPI_ASSERT_OK_AND_ASSIGN(char* _,
+ api_.Hunspell_get_dic_encoding(&(*hunspellrp_)));
+
+ for (std::string& buf :
+ FileLines(GetTestFilePath(kGoodFileName), FileLineIterator::NO_LF)) {
+ sapi::v::ConstCStr cbuf(buf.c_str());
+ SAPI_ASSERT_OK_AND_ASSIGN(
+ int result, api_.Hunspell_spell(&(*hunspellrp_), cbuf.PtrBefore()));
+ ASSERT_EQ(result, 1);
+ }
+}
+
+TEST_F(HunspellTest, CheckWrongSpell) {
+ SAPI_ASSERT_OK_AND_ASSIGN(char* _,
+ api_.Hunspell_get_dic_encoding(&(*hunspellrp_)));
+
+ for (std::string& buf :
+ FileLines(GetTestFilePath(kWrongFileName), FileLineIterator::NO_LF)) {
+ sapi::v::ConstCStr cbuf(buf.c_str());
+ SAPI_ASSERT_OK_AND_ASSIGN(
+ int result, api_.Hunspell_spell(&(*hunspellrp_), cbuf.PtrBefore()));
+ ASSERT_EQ(result, 0);
+ }
+}
+
+TEST_F(HunspellTest, CheckAddToDict) {
+ sapi::v::ConstCStr cbuf(kRandomWord.data());
+
+ int result;
+ SAPI_ASSERT_OK_AND_ASSIGN(
+ result, api_.Hunspell_spell(&(*hunspellrp_), cbuf.PtrBefore()));
+ ASSERT_EQ(result, 0);
+
+ SAPI_ASSERT_OK_AND_ASSIGN(
+ result, api_.Hunspell_add(&(*hunspellrp_), cbuf.PtrBefore()));
+ ASSERT_EQ(result, 0);
+
+ SAPI_ASSERT_OK_AND_ASSIGN(
+ result, api_.Hunspell_spell(&(*hunspellrp_), cbuf.PtrBefore()));
+ ASSERT_EQ(result, 1);
+
+ SAPI_ASSERT_OK_AND_ASSIGN(
+ result, api_.Hunspell_remove(&(*hunspellrp_), cbuf.PtrBefore()));
+ ASSERT_EQ(result, 0);
+
+ SAPI_ASSERT_OK_AND_ASSIGN(
+ result, api_.Hunspell_spell(&(*hunspellrp_), cbuf.PtrBefore()));
+ ASSERT_EQ(result, 0);
+}
+
+TEST_F(HunspellTest, CheckSuggestion) {
+ sapi::v::ConstCStr cbuf(kSuggestion.data());
+
+ SAPI_ASSERT_OK_AND_ASSIGN(
+ int result, api_.Hunspell_spell(&(*hunspellrp_), cbuf.PtrBefore()));
+ ASSERT_EQ(result, 0);
+
+ sapi::v::GenericPtr outptr;
+ SAPI_ASSERT_OK_AND_ASSIGN(
+ int nlist, api_.Hunspell_suggest(&(*hunspellrp_), outptr.PtrAfter(),
+ cbuf.PtrBefore()));
+ ASSERT_GT(nlist, 0);
+}
+
+// The following go/benchmark tests allow viewing the performance overhead
+// the sandboxed API causes. Due to the nature of SAPI sandboxes, the only
+// reliable metric is `time/op`.
+// Please note that every SAPI sandbox cold start takes ~10ms.
+
+void BM_Sandboxed_hunspell_spell(benchmark::State& state) {
+ std::string suggestion = "fo";
+ for (const auto _ : state) {
+ std::string s_affix_filename = GetTestFilePath("utf8.aff");
+ std::string s_dictionary_filename = GetTestFilePath("utf8.dic");
+ sandboxed_hunspell::LibHunspellSapiSandbox sandbox(s_affix_filename,
+ s_dictionary_filename);
+
+ ASSERT_OK(sandbox.Init());
+ sandboxed_hunspell::LibHunspellApi api(&sandbox);
+
+ sapi::v::ConstCStr c_affix_filename(s_affix_filename.c_str());
+ sapi::v::ConstCStr c_dictionary_filename(s_dictionary_filename.c_str());
+
+ SAPI_ASSERT_OK_AND_ASSIGN(
+ sandboxed_hunspell::Hunhandle * hunspell,
+ api.Hunspell_create(c_affix_filename.PtrBefore(),
+ c_dictionary_filename.PtrBefore()));
+
+ sapi::v::RemotePtr hunspellrp(hunspell);
+
+ sapi::v::ConstCStr cbuf(suggestion.c_str());
+ SAPI_ASSERT_OK_AND_ASSIGN(
+ int result, api.Hunspell_spell(&hunspellrp, cbuf.PtrBefore()));
+ ASSERT_EQ(result, 0);
+
+ absl::Status status = api.Hunspell_destroy(&hunspellrp);
+ ASSERT_THAT(status, sapi::IsOk());
+ }
+}
+
+void BM_Unsandboxed_hunspell_spell(benchmark::State& state) {
+ std::string suggestion = "fo";
+ for (const auto _ : state) {
+ Hunhandle* hunspell = Hunspell_create(GetTestFilePath("utf8.aff").c_str(),
+ GetTestFilePath("utf8.dic").c_str());
+ ASSERT_EQ(Hunspell_spell(hunspell, suggestion.c_str()), 0);
+ Hunspell_destroy(hunspell);
+ }
+}
+
+BENCHMARK(BM_Sandboxed_hunspell_spell);
+BENCHMARK(BM_Unsandboxed_hunspell_spell);
+
+} // namespace