From 9fa3bc40d7c0f6a5adce79824ae11a28279b19e3 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Wed, 30 Oct 2024 12:13:11 +0800 Subject: [PATCH] Fix reading tokens.txt on Windows. (#1497) --- sherpa-onnx/csrc/symbol-table.cc | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/sherpa-onnx/csrc/symbol-table.cc b/sherpa-onnx/csrc/symbol-table.cc index eed7a1e53..173b060b4 100644 --- a/sherpa-onnx/csrc/symbol-table.cc +++ b/sherpa-onnx/csrc/symbol-table.cc @@ -23,6 +23,29 @@ namespace sherpa_onnx { +namespace { +// copied from +// https://stackoverflow.com/questions/216823/how-to-trim-a-stdstring +const char *ws = " \t\n\r\f\v"; + +// trim from end of string (right) +inline std::string &TrimRight(std::string &s, const char *t = ws) { + s.erase(s.find_last_not_of(t) + 1); + return s; +} + +// trim from beginning of string (left) +inline std::string &TrimLeft(std::string &s, const char *t = ws) { + s.erase(0, s.find_first_not_of(t)); + return s; +} + +// trim from both ends of string (right then left) +inline std::string &Trim(std::string &s, const char *t = ws) { + return TrimLeft(TrimRight(s, t), t); +} +} // namespace + std::unordered_map ReadTokens( std::istream &is, std::unordered_map *id2token /*= nullptr*/) { @@ -33,6 +56,7 @@ std::unordered_map ReadTokens( std::string sym; int32_t id = -1; while (std::getline(is, line)) { + Trim(line); std::istringstream iss(line); iss >> sym; if (iss.eof()) {