Skip to content
Merged
36 changes: 30 additions & 6 deletions nvdaHelper/cppjieba/cppjieba.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,11 @@ JiebaSingleton* JiebaSingleton::instance = nullptr;
std::once_flag JiebaSingleton::initFlag;

JiebaSingleton& JiebaSingleton::getInstance(const char* dictDir) {
// convert incoming C-string+length to std::string (handles dictDir == nullptr)
if (!dictDir) {
throw std::invalid_argument("JiebaSingleton::getInstance() requires a non-null dictionary path.");
}

// convert incoming C-string to std::string before entering call_once
std::string dir = dictDir;

// ensure singleton is constructed exactly once
Expand Down Expand Up @@ -64,6 +68,8 @@ void JiebaSingleton::getWordEndOffsets(const std::string& text, std::vector<int>
extern "C" {

bool initJieba(const char* dictDir) {
if (!dictDir) return false;

try {
// simply force the singleton into existence
(void)JiebaSingleton::getInstance(dictDir);
Expand Down Expand Up @@ -105,16 +111,34 @@ bool calculateWordOffsets(const char* text, int** wordEndOffsets, int* outLen) {
}
}

bool insertUserWord(const char* word, int freq, const char* tag = cppjieba::UNKNOWN_TAG) {
return JiebaSingleton::getInstance().InsertUserWord(string(word), freq, string(tag));
bool insertUserWord(const char* word, int freq, const char* tag) {
if (!word || !tag) return false;

try {
return JiebaSingleton::getInstance().InsertUserWord(string(word), freq, string(tag));
} catch (...) {
return false;
}
}

bool deleteUserWord(const char* word, const char* tag = cppjieba::UNKNOWN_TAG) {
return JiebaSingleton::getInstance().DeleteUserWord(string(word), string(tag));
bool deleteUserWord(const char* word, const char* tag) {
if (!word || !tag) return false;

try {
return JiebaSingleton::getInstance().DeleteUserWord(string(word), string(tag));
} catch (...) {
return false;
}
}

bool find(const char* word) {
return JiebaSingleton::getInstance().Find(string(word));
if (!word) return false;

try {
return JiebaSingleton::getInstance().Find(string(word));
} catch (...) {
return false;
}
}

void freeOffsets(int* ptr) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ We currently have the following templates:
* Please note that these are reported differently, for more information refer to our [disclosure policy/procedure](https://github.com/nvaccess/nvda/blob/master/security.md)
* Issues with materials handled by translators should be reported to the [NVDA Translators list](https://groups.io/g/nvda-translations).
These include:
* NVDA interface text that is incorrect in languages other than English
* Contents of the User Guide and Changes documents that are incorrect in languages other than English
* NVDA interface text that is incorrect in languages other than English
Comment thread
CrazySteve0605 marked this conversation as resolved.
* Contents of the User Guide and Changes documents that are incorrect in languages other than English
* Input gestures, punctuation/symbol pronunciations, and character descriptions in languages other than English

These templates are fillable forms that guide you through the process of providing the necessary information for your issue.
Expand Down
4 changes: 2 additions & 2 deletions projectDocs/issues/readme.md
Comment thread
CrazySteve0605 marked this conversation as resolved.
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@ Do not report security concerns via GitHub issues, instead follow our [security
Issues with materials handled by translators should be reported to the [NVDA Translators list](https://groups.io/g/nvda-translations).
These include:

* NVDA interface text that is incorrect in languages other than English
* Contents of the User Guide and Changes documents that are incorrect in languages other than English
* NVDA interface text that is incorrect in languages other than English
* Contents of the User Guide and Changes documents that are incorrect in languages other than English
* Input gestures, punctuation/symbol pronunciations, and character descriptions in languages other than English

If you are reporting an issue with an application or website, please consider reporting the issue to the [authors of the application/website](./thirdPartyReporting.md) first.
Expand Down
43 changes: 22 additions & 21 deletions source/braille.py
Original file line number Diff line number Diff line change
Expand Up @@ -600,32 +600,33 @@ def update(self):
if config.conf["braille"]["expandAtCursor"] and self.cursorPos is not None:
mode |= louis.compbrlAtCursor

converter: OffsetConverter | None = None
converters: list[OffsetConverter] = []
textToTranslate = self.rawText
textToTranslateTypeforms = self.rawTextTypeforms
cursorPos = self.cursorPos

def _applyConverter(converter: OffsetConverter) -> None:
nonlocal cursorPos, textToTranslate, textToTranslateTypeforms
if textToTranslateTypeforms is not None:
textToTranslateTypeforms = [
textToTranslateTypeforms[converter.encodedToStrOffsets(encodedOffset)]
for encodedOffset in range(converter.encodedStringLength)
]
if cursorPos is not None:
cursorPos = converter.strToEncodedOffsets(cursorPos)
textToTranslate = converter.encoded
converters.append(converter)

if (
config.conf["braille"]["translationTable"].startswith("zh")
or config.conf["braille"]["translationTable"] == "auto"
and brailleTables.getDefaultTableForCurLang(brailleTables.TableType.OUTPUT).startswith("zh")
):
from textUtils.wordSeg.wordSegUtils import WordSegWithSeparatorOffsetConverter # noqa: F401

converter = WordSegWithSeparatorOffsetConverter(textToTranslate)
textToTranslate = converter.encoded
if cursorPos is not None:
cursorPos = converter.strToEncodedOffsets(cursorPos)
_applyConverter(WordSegWithSeparatorOffsetConverter(textToTranslate))
if config.conf["braille"]["unicodeNormalization"] and not isUnicodeNormalized(textToTranslate):
converter = UnicodeNormalizationOffsetConverter(textToTranslate)
textToTranslate = converter.encoded
if textToTranslateTypeforms is not None:
# Typeforms must be adapted to represent normalized characters.
textToTranslateTypeforms = [
textToTranslateTypeforms[strOffset] for strOffset in converter.computedEncodedToStrOffsets
]
if cursorPos is not None:
# Convert the cursor position to a normalized offset.
cursorPos = converter.strToEncodedOffsets(cursorPos)
_applyConverter(UnicodeNormalizationOffsetConverter(textToTranslate))
self.brailleCells, brailleToRawPos, rawToBraillePos, self.brailleCursorPos = louisHelper.translate(
[handler.table.fileName, "braille-patterns.cti"],
textToTranslate,
Expand All @@ -634,13 +635,13 @@ def update(self):
cursorPos=cursorPos,
)

if converter:
# The received brailleToRawPos contains braille to normalized positions.
# Process them to represent real raw positions by converting them from normalized ones.
for converter in reversed(converters):
# Convert liblouis offsets from the most recently transformed text
# back through each transformation to the original raw text.
brailleToRawPos = [converter.encodedToStrOffsets(i) for i in brailleToRawPos]
# The received rawToBraillePos contains normalized to braille positions.
# Create a new list based on real raw positions.
rawToBraillePos = [rawToBraillePos[i] for i in converter.computedStrToEncodedOffsets]
rawToBraillePos = [
rawToBraillePos[converter.strToEncodedOffsets(i)] for i in range(converter.strLength)
]
self.brailleToRawPos = brailleToRawPos
self.rawToBraillePos = rawToBraillePos

Expand Down
1 change: 1 addition & 0 deletions source/config/configSpec.py
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,7 @@
reportClickable = boolean(default=true)

[documentNavigation]
# Hidden option to eagerly initialize Chinese word segmentation even when the current languages do not use it.
initWordSegForUnusedLang = boolean(default=false)
wordSegmentationStandard = featureFlag(optionsEnum="WordNavigationUnitFlag", behaviorOfDefault="Auto")
paragraphStyle = featureFlag(optionsEnum="ParagraphNavigationFlag", behaviorOfDefault="application")
Expand Down
3 changes: 2 additions & 1 deletion source/textInfos/offsets.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,8 @@ def wordSegFlag(self) -> WordSegFlag | None:
case config.featureFlagEnums.WordNavigationUnitFlag.CHINESE:
return WordSegFlag.CHINESE
case _:
log.error(f"Unknown word segmentation standard, {self.__wordSegConf.calculated()!r}")
log.error(f"Unknown word segmentation standard, {self.wordSegConf.calculated()!r}")
return None

#: The encoding internal to the underlying text info implementation.
encoding: Optional[str] = textUtils.WCHAR_ENCODING
Expand Down
5 changes: 4 additions & 1 deletion source/textUtils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -602,7 +602,9 @@ def __init__(self, text: str, encoding: str = "UTF-8", wordSegFlag: WordSegFlag
self.wordSegFlag: WordSegFlag = wordSegFlag
self.strategy: wordSegStrategy.WordSegmentationStrategy = self._chooseStrategy()

def _chooseStrategy(self) -> wordSegStrategy.WordSegmentationStrategy: # TODO: optimize
def _chooseStrategy(
self,
) -> wordSegStrategy.WordSegmentationStrategy: # TODO: Limit regex scans for large text.
"""Choose the appropriate segmentation strategy based on the text content."""
if self.wordSegFlag == WordSegFlag.AUTO:
if (
Expand All @@ -627,6 +629,7 @@ def _chooseStrategy(self) -> wordSegStrategy.WordSegmentationStrategy: # TODO:
return wordSegStrategy.UniscribeWordSegmentationStrategy(self.text, self.encoding)
case _:
return wordSegStrategy.UniscribeWordSegmentationStrategy(self.text, self.encoding)
return wordSegStrategy.UniscribeWordSegmentationStrategy(self.text, self.encoding)

def getSegmentForOffset(self, offset: int) -> tuple[int, int] | None:
"""Get the segment containing the given offset."""
Expand Down
1 change: 0 additions & 1 deletion source/textUtils/wordSeg/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,4 +43,3 @@ def initialize():
Thread(target=callable_to_call, args=args, kwargs=kwargs, daemon=True).start()
except Exception as e:
log.debug("Initializer %s.%s failed: %s", module_name, qualname, e)
return
Loading
Loading