No public description

PiperOrigin-RevId: 700634225
diff --git a/sandbox/README.md b/sandbox/README.md
new file mode 100644
index 0000000..aeedf11
--- /dev/null
+++ b/sandbox/README.md
@@ -0,0 +1,82 @@
+<!--* freshness: { owner: 'ise-sandboxing-members' reviewed: '2024-10-28' } *-->
+
+# Sandboxed hunspell API
+
+As part of ISE-Sandboxing's Sandboxing Enforcement Program
+(go/ise-sandboxing-enforcement), we have identified the `hunspell` library
+as a target that requires sandboxing if processing data originating from outside
+of Alphabet. This is in accordance with our guideline go/untrusted-workloads.
+
+This package contains a sandboxed version of the **spellcheck APIs** of
+[//third_party/hunspell:hunspell](http:///BUILD).
+
+Sandboxing `hunspell` provides an additional security boundary that requires
+a malicious actor to successfully exploit two vulnerabilities before they are
+able to laterally move away from the initial entry point.
+
+TODO: Update the next paragraph depending on the `sapi_library` target in BUILD.
+
+The `sandboxed_hunspell`
+([//third_party/hunspell/sandbox:sandboxed_hunspell](http:///sandbox/BUILD))
+library provides only a subset of the unsandboxed read/decode
+APIs. This subset is listed in the `functions` argument of the `sapi_library`
+BUILD rule. If more functions are needed, reach out to ISE Sandboxing
+(go/ise-sandboxing) for guidance.
+
+## sandboxed_hunspell
+
+This is the SAPI implementation of `//third_party/hunspell:hunspell` and provides
+users of the sandboxed API with the most control with regards to the sandbox's
+life-time and the possibility to implement additional customizations. This comes
+at the cost of simplicity.
+
+### Getting started
+
+Follow these steps:
+
+1. In your BUILD file, add the target to your `deps` list:
+
+   ```
+   "//third_party/hunspell/sandbox:sandboxed_hunspell",
+   ```
+
+1. Add the library headers in your source files:
+
+   ```
+   #include "third_party/hunspell/sandbox/sandbox.h"
+   #include "third_party/hunspell/sandbox/sandboxed_hunspell.sapi.h"
+   ```
+
+1. Create and then initialize the sandbox.
+
+   ```
+   sandboxed_hunspell::LibHunspellSapiSandbox sbx;
+   SAPI_RETURN_IF_ERROR(sbx.Init());
+   ```
+
+1. Create the API object with the initialized sandbox:
+
+   ```
+   sandboxed_hunspell::LibHunspellApi api(&sbx);
+   ```
+
+1. Prepare the SAPI variables for the sandboxed API function call. For an
+   explanation of SAPI variables, please consult go/sapi/variables.
+
+   ```
+    std::string s_afn = "utf8.aff";
+    std::string s_dfn = "utf8.dic";
+
+    sapi::v::ConstCStr c_afn(s_afn.c_str());
+    sapi::v::ConstCStr c_dfn(s_dfn.c_str());
+
+    SAPI_ASSERT_OK_AND_ASSIGN(
+        sandboxed_hunspell::Hunhandle * hunspell,
+        api.Hunspell_create(c_afn.PtrBefore(), c_dfn.PtrBefore()));
+   ```
+
+### sandboxed_hunspell_test
+
+This tests the basic functionality of the `sandboxed_hunspell` and compares
+the output to the `unsandboxed hunspell`. It is also a good source to
+understand how the SAPI variables need to be prepared.
diff --git a/sandbox/example_suggestions.cc b/sandbox/example_suggestions.cc
new file mode 100644
index 0000000..d9dcace
--- /dev/null
+++ b/sandbox/example_suggestions.cc
@@ -0,0 +1,125 @@
+// This program checks spelling and suggests corrections for misspelled words.
+//
+// This example illustrates the use of the sandboxed_hunspell library with an
+// affix file (definitions for the dictionary) and a dictionary file to check
+// the contents of the WORDS_TO_CHECK file.
+
+#include <unistd.h>
+
+#include <cstdlib>
+#include <string>
+
+#include "base/init_google.h"
+#include "file/base/filelineiter.h"
+#include "third_party/absl/flags/flag.h"
+#include "third_party/absl/log/flags.h"
+#include "third_party/absl/log/log.h"
+#include "third_party/absl/status/status.h"
+#include "third_party/absl/status/statusor.h"
+#include "third_party/hunspell/sandbox/sandbox.h"
+#include "third_party/hunspell/sandbox/sandboxed_hunspell.sapi.h"
+#include "third_party/sandboxed_api/util/status_macros.h"
+#include "third_party/sandboxed_api/var_array.h"
+#include "third_party/sandboxed_api/var_int.h"
+#include "third_party/sandboxed_api/var_ptr.h"
+
+ABSL_FLAG(std::string, affix_file, "", "File path to affix file.");
+ABSL_FLAG(std::string, dict_file, "", "File path to dictionarie file.");
+ABSL_FLAG(std::string, in_file, "", "File path to file to check.");
+
+absl::Status PrintSuggest(sandboxed_hunspell::LibHunspellApi& api,
+                          sapi::v::RemotePtr& hunspellrp,
+                          sapi::v::ConstCStr& word) {
+  sapi::v::GenericPtr outptr;
+
+  SAPI_ASSIGN_OR_RETURN(
+      int nlist,
+      api.Hunspell_suggest(&hunspellrp, outptr.PtrAfter(), word.PtrBefore()));
+
+  if (nlist == 0) {
+    LOG(INFO) << "No suggestions.";
+    return absl::OkStatus();
+  }
+
+  sapi::v::Array<char*> suggestion_listp(nlist);
+  suggestion_listp.SetRemote(reinterpret_cast<void*>(outptr.GetValue()));
+  SAPI_RETURN_IF_ERROR(
+      api.GetSandbox()->TransferFromSandboxee(&suggestion_listp));
+
+  LOG(INFO) << "Suggestions:";
+  for (int i = 0; i < nlist; ++i) {
+    sapi::v::RemotePtr sugrp(suggestion_listp[i]);
+    SAPI_ASSIGN_OR_RETURN(std::string sugestion,
+                          api.GetSandbox()->GetCString(sugrp));
+    LOG(INFO) << sugestion;
+  }
+
+  return api.Hunspell_free_list(&hunspellrp, outptr.PtrNone(), nlist);
+}
+
+int main(int argc, char* argv[]) {
+  InitGoogle(argv[0], &argc, &argv, /*remove_flags=*/true);
+
+  // Always log to stderr
+  absl::SetFlag(&FLAGS_logtostderr, true);
+
+  if (absl::GetFlag(FLAGS_affix_file).empty() ||
+      absl::GetFlag(FLAGS_dict_file).empty()) {
+    LOG(ERROR) << "--" << absl::GetFlagReflectionHandle(FLAGS_affix_file).Name()
+               << ", --"
+               << absl::GetFlagReflectionHandle(FLAGS_dict_file).Name()
+               << ", and --"
+               << absl::GetFlagReflectionHandle(FLAGS_in_file).Name()
+               << " are required";
+    return EXIT_FAILURE;
+  }
+
+  std::string affix_file = absl::GetFlag(FLAGS_affix_file);
+  std::string dict_file = absl::GetFlag(FLAGS_dict_file);
+  sapi::v::ConstCStr c_affix_file(affix_file.c_str());
+  sapi::v::ConstCStr c_dictionary_file(dict_file.c_str());
+
+  sandboxed_hunspell::LibHunspellSapiSandbox sandbox(affix_file, dict_file);
+  if (!sandbox.Init().ok()) {
+    LOG(ERROR) << "Unable to start sandbox";
+    return EXIT_FAILURE;
+  }
+
+  sandboxed_hunspell::LibHunspellApi api(&sandbox);
+  absl::StatusOr<sandboxed_hunspell::Hunhandle*> hunspell = api.Hunspell_create(
+      c_affix_file.PtrBefore(), c_dictionary_file.PtrBefore());
+  if (!hunspell.ok()) {
+    LOG(ERROR) << "Could not initialize hunsepll";
+    return EXIT_FAILURE;
+  }
+
+  sapi::v::RemotePtr hunspellrp(*hunspell);
+  for (std::string& buf :
+       FileLines(absl::GetFlag(FLAGS_in_file), FileLineIterator::NO_LF)) {
+    sapi::v::ConstCStr cbuf(buf.c_str());
+    absl::StatusOr<int> result =
+        api.Hunspell_spell(&hunspellrp, cbuf.PtrBefore());
+    if (!result.ok()) {
+      LOG(ERROR) << "Could not check word" << result.status();
+      return EXIT_FAILURE;
+    }
+
+    if (*result) {
+      LOG(INFO) << "Word " << buf << " is ok";
+      continue;
+    }
+    LOG(INFO) << "Word " << buf << " is incorrect";
+    absl::Status status = PrintSuggest(api, hunspellrp, cbuf);
+    if (!status.ok()) {
+      LOG(ERROR) << "Unable to get all suggestion" << status;
+      return EXIT_FAILURE;
+    }
+  }
+
+  if (!api.Hunspell_destroy(&hunspellrp).ok()) {
+    LOG(ERROR) << "Unable to destroy hunspell";
+    return EXIT_FAILURE;
+  }
+
+  return EXIT_SUCCESS;
+}
diff --git a/sandbox/sandbox.h b/sandbox/sandbox.h
new file mode 100644
index 0000000..9896b34
--- /dev/null
+++ b/sandbox/sandbox.h
@@ -0,0 +1,34 @@
+#ifndef THIRD_PARTY_HUNSPELL_SANDBOX_SANDBOX_H_
+#define THIRD_PARTY_HUNSPELL_SANDBOX_SANDBOX_H_
+
+#include <memory>
+#include <string>
+#include <utility>
+
+#include "third_party/hunspell/sandbox/sandboxed_hunspell.sapi.h"
+#include "third_party/sandboxed_api/sandbox2/policy.h"
+
+namespace sandboxed_hunspell {
+
+class LibHunspellSapiSandbox : public LibHunspellSandbox {
+ public:
+  explicit LibHunspellSapiSandbox(std::string affix_file_name,
+                                  std::string dictionary_file_name)
+      : affix_file_name_(std::move(affix_file_name)),
+        dictionary_file_name_(std::move(dictionary_file_name)) {}
+
+ private:
+  std::unique_ptr<sandbox2::Policy> ModifyPolicy(
+      sandbox2::PolicyBuilder* builder) override {
+    return builder->AddFile(affix_file_name_, /*is_ro=*/true)
+        .AddFile(dictionary_file_name_, /*is_ro=*/true)
+        .BuildOrDie();
+  }
+
+  std::string affix_file_name_;
+  std::string dictionary_file_name_;
+};
+
+}  // namespace sandboxed_hunspell
+
+#endif  // THIRD_PARTY_HUNSPELL_SANDBOX_SANDBOX_H_
diff --git a/sandbox/sandboxed_hunspell_test.cc b/sandbox/sandboxed_hunspell_test.cc
new file mode 100644
index 0000000..2477da9
--- /dev/null
+++ b/sandbox/sandboxed_hunspell_test.cc
@@ -0,0 +1,197 @@
+// Unit test for sandboxed_hunspell
+
+#include <optional>
+#include <string>
+
+#include "devtools/build/runtime/get_runfiles_dir.h"
+#include "file/base/filelineiter.h"
+#include "file/base/path.h"
+#include "testing/base/public/benchmark.h"
+#include "testing/base/public/gmock.h"
+#include "testing/base/public/gunit.h"
+#include "third_party/absl/log/check.h"
+#include "third_party/absl/log/log.h"
+#include "third_party/absl/status/status.h"
+#include "third_party/absl/strings/string_view.h"
+#include "third_party/hunspell/sandbox/sandbox.h"
+#include "third_party/hunspell/sandbox/sandboxed_hunspell.sapi.h"
+#include "third_party/hunspell/src/hunspell/hunspell.h"
+#include "third_party/sandboxed_api/util/status_matchers.h"
+#include "third_party/sandboxed_api/vars.h"
+
+namespace {
+
+static constexpr absl::string_view test_dir_ =
+    "/tests";
+
+static constexpr absl::string_view kAffixFileName = "utf8.aff";
+static constexpr absl::string_view kDictionaryFileName = "utf8.dic";
+
+static constexpr absl::string_view kGoodFileName = "utf8.good";
+static constexpr absl::string_view kWrongFileName = "utf8_nonbmp.wrong";
+
+static constexpr absl::string_view kSuggestion = "fo";
+static constexpr absl::string_view kRandomWord = "random_word123";
+
+std::string GetTestFilePath(const absl::string_view& filename) {
+  return file::JoinPath(devtools_build::GetDataDependencyFilepath(test_dir_),
+                        filename);
+}
+
+class HunspellTest : public ::testing::Test {
+ public:
+  HunspellTest()
+      : s_affix_filename_(GetTestFilePath(kAffixFileName)),
+        s_dictionary_filename_(GetTestFilePath(kDictionaryFileName)),
+        sandbox_(s_affix_filename_, s_dictionary_filename_),
+        api_(&sandbox_) {
+    sapi::v::ConstCStr c_affix_filename(s_affix_filename_.c_str());
+    sapi::v::ConstCStr c_dictionary_filename(s_dictionary_filename_.c_str());
+
+    CHECK(sandbox_.Init().ok());
+    absl::StatusOr<sandboxed_hunspell::Hunhandle*> hunspell =
+        api_.Hunspell_create(c_affix_filename.PtrBefore(),
+                             c_dictionary_filename.PtrBefore());
+    CHECK(hunspell.ok());
+
+    hunspellrp_.emplace(*hunspell);
+  }
+
+  ~HunspellTest() override {
+    if (hunspellrp_) {
+      absl::Status status = api_.Hunspell_destroy(&(*hunspellrp_));
+      CHECK(status.ok());
+    }
+  }
+
+ protected:
+  const std::string s_affix_filename_;
+  const std::string s_dictionary_filename_;
+  sandboxed_hunspell::LibHunspellSapiSandbox sandbox_;
+  sandboxed_hunspell::LibHunspellApi api_;
+  std::optional<sapi::v::RemotePtr> hunspellrp_;
+};
+
+TEST_F(HunspellTest, CheckEncoding) {
+  SAPI_ASSERT_OK_AND_ASSIGN(char* ret,
+                            api_.Hunspell_get_dic_encoding(&(*hunspellrp_)));
+  SAPI_ASSERT_OK_AND_ASSIGN(std::string encoding, api_.GetSandbox()->GetCString(
+                                                      sapi::v::RemotePtr(ret)));
+  EXPECT_EQ(encoding, "UTF-8");
+}
+
+TEST_F(HunspellTest, CheckGoodSpell) {
+  SAPI_ASSERT_OK_AND_ASSIGN(char* _,
+                            api_.Hunspell_get_dic_encoding(&(*hunspellrp_)));
+
+  for (std::string& buf :
+       FileLines(GetTestFilePath(kGoodFileName), FileLineIterator::NO_LF)) {
+    sapi::v::ConstCStr cbuf(buf.c_str());
+    SAPI_ASSERT_OK_AND_ASSIGN(
+        int result, api_.Hunspell_spell(&(*hunspellrp_), cbuf.PtrBefore()));
+    ASSERT_EQ(result, 1);
+  }
+}
+
+TEST_F(HunspellTest, CheckWrongSpell) {
+  SAPI_ASSERT_OK_AND_ASSIGN(char* _,
+                            api_.Hunspell_get_dic_encoding(&(*hunspellrp_)));
+
+  for (std::string& buf :
+       FileLines(GetTestFilePath(kWrongFileName), FileLineIterator::NO_LF)) {
+    sapi::v::ConstCStr cbuf(buf.c_str());
+    SAPI_ASSERT_OK_AND_ASSIGN(
+        int result, api_.Hunspell_spell(&(*hunspellrp_), cbuf.PtrBefore()));
+    ASSERT_EQ(result, 0);
+  }
+}
+
+TEST_F(HunspellTest, CheckAddToDict) {
+  sapi::v::ConstCStr cbuf(kRandomWord.data());
+
+  int result;
+  SAPI_ASSERT_OK_AND_ASSIGN(
+      result, api_.Hunspell_spell(&(*hunspellrp_), cbuf.PtrBefore()));
+  ASSERT_EQ(result, 0);
+
+  SAPI_ASSERT_OK_AND_ASSIGN(
+      result, api_.Hunspell_add(&(*hunspellrp_), cbuf.PtrBefore()));
+  ASSERT_EQ(result, 0);
+
+  SAPI_ASSERT_OK_AND_ASSIGN(
+      result, api_.Hunspell_spell(&(*hunspellrp_), cbuf.PtrBefore()));
+  ASSERT_EQ(result, 1);
+
+  SAPI_ASSERT_OK_AND_ASSIGN(
+      result, api_.Hunspell_remove(&(*hunspellrp_), cbuf.PtrBefore()));
+  ASSERT_EQ(result, 0);
+
+  SAPI_ASSERT_OK_AND_ASSIGN(
+      result, api_.Hunspell_spell(&(*hunspellrp_), cbuf.PtrBefore()));
+  ASSERT_EQ(result, 0);
+}
+
+TEST_F(HunspellTest, CheckSuggestion) {
+  sapi::v::ConstCStr cbuf(kSuggestion.data());
+
+  SAPI_ASSERT_OK_AND_ASSIGN(
+      int result, api_.Hunspell_spell(&(*hunspellrp_), cbuf.PtrBefore()));
+  ASSERT_EQ(result, 0);
+
+  sapi::v::GenericPtr outptr;
+  SAPI_ASSERT_OK_AND_ASSIGN(
+      int nlist, api_.Hunspell_suggest(&(*hunspellrp_), outptr.PtrAfter(),
+                                       cbuf.PtrBefore()));
+  ASSERT_GT(nlist, 0);
+}
+
+// The following go/benchmark tests allow viewing the performance overhead
+// the sandboxed API causes. Due to the nature of SAPI sandboxes, the only
+// reliable metric is `time/op`.
+// Please note that every SAPI sandbox cold start takes ~10ms.
+
+void BM_Sandboxed_hunspell_spell(benchmark::State& state) {
+  std::string suggestion = "fo";
+  for (const auto _ : state) {
+    std::string s_affix_filename = GetTestFilePath("utf8.aff");
+    std::string s_dictionary_filename = GetTestFilePath("utf8.dic");
+    sandboxed_hunspell::LibHunspellSapiSandbox sandbox(s_affix_filename,
+                                                       s_dictionary_filename);
+
+    ASSERT_OK(sandbox.Init());
+    sandboxed_hunspell::LibHunspellApi api(&sandbox);
+
+    sapi::v::ConstCStr c_affix_filename(s_affix_filename.c_str());
+    sapi::v::ConstCStr c_dictionary_filename(s_dictionary_filename.c_str());
+
+    SAPI_ASSERT_OK_AND_ASSIGN(
+        sandboxed_hunspell::Hunhandle * hunspell,
+        api.Hunspell_create(c_affix_filename.PtrBefore(),
+                            c_dictionary_filename.PtrBefore()));
+
+    sapi::v::RemotePtr hunspellrp(hunspell);
+
+    sapi::v::ConstCStr cbuf(suggestion.c_str());
+    SAPI_ASSERT_OK_AND_ASSIGN(
+        int result, api.Hunspell_spell(&hunspellrp, cbuf.PtrBefore()));
+    ASSERT_EQ(result, 0);
+
+    absl::Status status = api.Hunspell_destroy(&hunspellrp);
+    ASSERT_THAT(status, sapi::IsOk());
+  }
+}
+
+void BM_Unsandboxed_hunspell_spell(benchmark::State& state) {
+  std::string suggestion = "fo";
+  for (const auto _ : state) {
+    Hunhandle* hunspell = Hunspell_create(GetTestFilePath("utf8.aff").c_str(),
+                                          GetTestFilePath("utf8.dic").c_str());
+    ASSERT_EQ(Hunspell_spell(hunspell, suggestion.c_str()), 0);
+    Hunspell_destroy(hunspell);
+  }
+}
+
+BENCHMARK(BM_Sandboxed_hunspell_spell);
+BENCHMARK(BM_Unsandboxed_hunspell_spell);
+
+}  // namespace