nvaccess · seanbudd · May 11, 2026 · May 4, 2026 · May 4, 2026 · May 4, 2026
@@ -15,7 +15,11 @@ JiebaSingleton* JiebaSingleton::instance = nullptr;
 std::once_flag JiebaSingleton::initFlag;
 
 JiebaSingleton& JiebaSingleton::getInstance(const char* dictDir) {
-    // convert incoming C-string+length to std::string (handles dictDir == nullptr)
+    if (!dictDir) {
+        throw std::invalid_argument("JiebaSingleton::getInstance() requires a non-null dictionary path.");
+    }
+
+    // convert incoming C-string to std::string before entering call_once
     std::string dir = dictDir;
 
     // ensure singleton is constructed exactly once
@@ -64,6 +68,8 @@ void JiebaSingleton::getWordEndOffsets(const std::string& text, std::vector<int>
 extern "C" {
 
 bool initJieba(const char* dictDir) {
+    if (!dictDir) return false;
+
     try {
         // simply force the singleton into existence
         (void)JiebaSingleton::getInstance(dictDir);
@@ -105,16 +111,34 @@ bool calculateWordOffsets(const char* text, int** wordEndOffsets, int* outLen) {
     }
 }
 
-bool insertUserWord(const char* word, int freq, const char* tag = cppjieba::UNKNOWN_TAG) {
-	return JiebaSingleton::getInstance().InsertUserWord(string(word), freq, string(tag));
+bool insertUserWord(const char* word, int freq, const char* tag) {
+    if (!word || !tag) return false;
+
+    try {
+        return JiebaSingleton::getInstance().InsertUserWord(string(word), freq, string(tag));
+    } catch (...) {
+        return false;
+    }
 }
 
-bool deleteUserWord(const char* word, const char* tag = cppjieba::UNKNOWN_TAG) {
-	return JiebaSingleton::getInstance().DeleteUserWord(string(word), string(tag));
+bool deleteUserWord(const char* word, const char* tag) {
+    if (!word || !tag) return false;
+
+    try {
+        return JiebaSingleton::getInstance().DeleteUserWord(string(word), string(tag));
+    } catch (...) {
+        return false;
+    }
 }
 
 bool find(const char* word) {
-	return JiebaSingleton::getInstance().Find(string(word));
+    if (!word) return false;
+
+    try {
+        return JiebaSingleton::getInstance().Find(string(word));
+    } catch (...) {
+        return false;
+    }
 }
 
 void freeOffsets(int* ptr) {

@@ -27,8 +27,8 @@ We currently have the following templates:
   * Please note that these are reported differently, for more information refer to our [disclosure policy/procedure](https://github.com/nvaccess/nvda/blob/master/security.md)
 * Issues with materials handled by translators should be reported to the [NVDA Translators list](https://groups.io/g/nvda-translations).
 These include:
-  * NVDA interface text that is incorrect in languages other than English
-  * Contents of the User Guide and Changes documents that are incorrect in languages other than English
+  * NVDA interface text that is incorrect in languages other than English
+  * Contents of the User Guide and Changes documents that are incorrect in languages other than English
   * Input gestures, punctuation/symbol pronunciations, and character descriptions in languages other than English
 
 These templates are fillable forms that guide you through the process of providing the necessary information for your issue.

@@ -10,8 +10,8 @@ Do not report security concerns via GitHub issues, instead follow our [security
 Issues with materials handled by translators should be reported to the [NVDA Translators list](https://groups.io/g/nvda-translations).
 These include:
 
-* NVDA interface text that is incorrect in languages other than English
-* Contents of the User Guide and Changes documents that are incorrect in languages other than English
+* NVDA interface text that is incorrect in languages other than English
+* Contents of the User Guide and Changes documents that are incorrect in languages other than English
 * Input gestures, punctuation/symbol pronunciations, and character descriptions in languages other than English
 
 If you are reporting an issue with an application or website, please consider reporting the issue to the [authors of the application/website](./thirdPartyReporting.md) first.

@@ -600,32 +600,33 @@ def update(self):
 		if config.conf["braille"]["expandAtCursor"] and self.cursorPos is not None:
 			mode |= louis.compbrlAtCursor
 
-		converter: OffsetConverter | None = None
+		converters: list[OffsetConverter] = []
 		textToTranslate = self.rawText
 		textToTranslateTypeforms = self.rawTextTypeforms
 		cursorPos = self.cursorPos
+
+		def _applyConverter(converter: OffsetConverter) -> None:
+			nonlocal cursorPos, textToTranslate, textToTranslateTypeforms
+			if textToTranslateTypeforms is not None:
+				textToTranslateTypeforms = [
+					textToTranslateTypeforms[converter.encodedToStrOffsets(encodedOffset)]
+					for encodedOffset in range(converter.encodedStringLength)
+				]
+			if cursorPos is not None:
+				cursorPos = converter.strToEncodedOffsets(cursorPos)
+			textToTranslate = converter.encoded
+			converters.append(converter)
+
 		if (
 			config.conf["braille"]["translationTable"].startswith("zh")
 			or config.conf["braille"]["translationTable"] == "auto"
 			and brailleTables.getDefaultTableForCurLang(brailleTables.TableType.OUTPUT).startswith("zh")
 		):
 			from textUtils.wordSeg.wordSegUtils import WordSegWithSeparatorOffsetConverter  # noqa: F401
 
-			converter = WordSegWithSeparatorOffsetConverter(textToTranslate)
-			textToTranslate = converter.encoded
-			if cursorPos is not None:
-				cursorPos = converter.strToEncodedOffsets(cursorPos)
+			_applyConverter(WordSegWithSeparatorOffsetConverter(textToTranslate))
 		if config.conf["braille"]["unicodeNormalization"] and not isUnicodeNormalized(textToTranslate):
-			converter = UnicodeNormalizationOffsetConverter(textToTranslate)
-			textToTranslate = converter.encoded
-			if textToTranslateTypeforms is not None:
-				# Typeforms must be adapted to represent normalized characters.
-				textToTranslateTypeforms = [
-					textToTranslateTypeforms[strOffset] for strOffset in converter.computedEncodedToStrOffsets
-				]
-			if cursorPos is not None:
-				# Convert the cursor position to a normalized offset.
-				cursorPos = converter.strToEncodedOffsets(cursorPos)
+			_applyConverter(UnicodeNormalizationOffsetConverter(textToTranslate))
 		self.brailleCells, brailleToRawPos, rawToBraillePos, self.brailleCursorPos = louisHelper.translate(
 			[handler.table.fileName, "braille-patterns.cti"],
 			textToTranslate,
@@ -634,13 +635,13 @@ def update(self):
 			cursorPos=cursorPos,
 		)
 
-		if converter:
-			# The received brailleToRawPos contains braille to normalized positions.
-			# Process them to represent real raw positions by converting them from normalized ones.
+		for converter in reversed(converters):
+			# Convert liblouis offsets from the most recently transformed text
+			# back through each transformation to the original raw text.
 			brailleToRawPos = [converter.encodedToStrOffsets(i) for i in brailleToRawPos]
-			# The received rawToBraillePos contains normalized to braille positions.
-			# Create a new list based on real raw positions.
-			rawToBraillePos = [rawToBraillePos[i] for i in converter.computedStrToEncodedOffsets]
+			rawToBraillePos = [
+				rawToBraillePos[converter.strToEncodedOffsets(i)] for i in range(converter.strLength)
+			]
 		self.brailleToRawPos = brailleToRawPos
 		self.rawToBraillePos = rawToBraillePos
 

@@ -280,6 +280,7 @@
 	reportClickable = boolean(default=true)
 
 [documentNavigation]
+	# Hidden option to eagerly initialize Chinese word segmentation even when the current languages do not use it.
 	initWordSegForUnusedLang = boolean(default=false)
 	wordSegmentationStandard = featureFlag(optionsEnum="WordNavigationUnitFlag", behaviorOfDefault="Auto")
 	paragraphStyle = featureFlag(optionsEnum="ParagraphNavigationFlag", behaviorOfDefault="application")

@@ -171,7 +171,8 @@ def wordSegFlag(self) -> WordSegFlag | None:
 			case config.featureFlagEnums.WordNavigationUnitFlag.CHINESE:
 				return WordSegFlag.CHINESE
 			case _:
-				log.error(f"Unknown word segmentation standard, {self.__wordSegConf.calculated()!r}")
+				log.error(f"Unknown word segmentation standard, {self.wordSegConf.calculated()!r}")
+				return None
 
 	#: The encoding internal to the underlying text info implementation.
 	encoding: Optional[str] = textUtils.WCHAR_ENCODING

@@ -602,7 +602,9 @@ def __init__(self, text: str, encoding: str = "UTF-8", wordSegFlag: WordSegFlag
 		self.wordSegFlag: WordSegFlag = wordSegFlag
 		self.strategy: wordSegStrategy.WordSegmentationStrategy = self._chooseStrategy()
 
-	def _chooseStrategy(self) -> wordSegStrategy.WordSegmentationStrategy:  # TODO: optimize
+	def _chooseStrategy(
+		self,
+	) -> wordSegStrategy.WordSegmentationStrategy:  # TODO: Limit regex scans for large text.
 		"""Choose the appropriate segmentation strategy based on the text content."""
 		if self.wordSegFlag == WordSegFlag.AUTO:
 			if (
@@ -627,6 +629,7 @@ def _chooseStrategy(self) -> wordSegStrategy.WordSegmentationStrategy:  # TODO:
 						return wordSegStrategy.UniscribeWordSegmentationStrategy(self.text, self.encoding)
 				case _:
 					return wordSegStrategy.UniscribeWordSegmentationStrategy(self.text, self.encoding)
+		return wordSegStrategy.UniscribeWordSegmentationStrategy(self.text, self.encoding)
 
 	def getSegmentForOffset(self, offset: int) -> tuple[int, int] | None:
 		"""Get the segment containing the given offset."""

@@ -43,4 +43,3 @@ def initialize():
 			Thread(target=callable_to_call, args=args, kwargs=kwargs, daemon=True).start()
 		except Exception as e:
 			log.debug("Initializer %s.%s failed: %s", module_name, qualname, e)
-		return