diff --git a/README.md b/README.md index c09e4d8..e6c1a02 100644 --- a/README.md +++ b/README.md @@ -48,12 +48,14 @@ OCR Plugin enables many use cases for enhancing your stream or recording: Available now: - Add OCR Filter to any source with image or video output - Choose from Scoreboard model or English, French, Spanish, German, Chinese, Japanese, Arabic, Turkish, Portugese, Hindi, Russian and Italian - - Output OCR result to an OBS Text Source - Choose the segmentation mode: Word, Line, Page, etc. - "Semantic Smoothing": getting more consistent outputs with higher accuracy and confidence by "averaging" several text outputs - Timing/Running modes: per X-milliseconds + - Output OCR result to an OBS Text Source + - Output to a text file (with/out aggregation) - Output formatting (with inja): e.g. "Score: {{score}}" - - Output text detection to image source + - Output text detection to image source (draws boxes, text, etc.) + - Output to settings (e.g. for other plugins to use as triggers) - Binarization methods (threshold, Otsu, Triangle, adaptive) - Image Dilation - Rescale (optimal Tesseract performance is at 35 pixels / character) @@ -62,7 +64,7 @@ Coming soon: - More languages built-in (pretrained Tesseract models) - Allowing external model files - More output capabilities e.g. Parsing, websocket event, etc. - - Extracting text from complex image layouts + - Detection area selection (to prevent using Crop/Pad Filter) - Different timing/run modes: per X-frames, image change, etc. - Image stabilization - Optical flow tracking for fast moving text diff --git a/buildspec.json b/buildspec.json index a569660..8363d4f 100644 --- a/buildspec.json +++ b/buildspec.json @@ -38,7 +38,7 @@ }, "name": "obs-ocr", "displayName": "OBS OCR Plugin", - "version": "0.0.6", + "version": "0.0.7", "author": "Roy Shilkrot", "website": "https://github.com/occ-ai/obs-ocr", "email": "roy.shil@gmail.com", diff --git a/data/locale/ar-SA.ini b/data/locale/ar-SA.ini new file mode 100644 index 0000000..2decd33 --- /dev/null +++ b/data/locale/ar-SA.ini @@ -0,0 +1,36 @@ +OCRFilter="التصفية الضوئية للحروف" +PageSegmentationMode="نمط تجزئة الصفحة" +Language="اللغة" +EnableSmoothing="تفعيل التنعيم" +ConfThreshold="عتبة الثقة" +WordLength="طول الكلمة" +WindowSize="حجم النافذة" +CharWhitelist="قائمة الحروف المسموح بها" +UserPatterns="أنماط المستخدم" +OutputTextSource="مصدر النص الناتج" +NoOutput="لا يوجد ناتج" +UpdateTimer="تحديث المؤقت (مللي ثانية)" +AdvancedSettings="الإعدادات المتقدمة" +UpdateOnChange="التحديث فقط عند تغيير الصورة" +UpdateOnChangeThreshold="عتبة التغيير %" +OutputFormatting="تنسيق الناتج" +OutputTextDetectionMaskSource="مصدر قناع النص المكتشف" +SaveToFile="حفظ في ملف" +OutputFilePath="مسار ملف الناتج" +BinarizationMode="نمط التثنية" +BinarizationThreshold="عتبة التثنية" +BinarizationBlockSize="حجم الكتلة في التثنية" +PreviewBinarization="معاينة التثنية" +RescaleImage="تغيير حجم الصورة" +RescaleTargetSize="حجم الهدف لتغيير الحجم" +DilationIterations="تكرارات التوسيع" +ImageOutputOption="خيار ناتج الصورة" +DetectionBoxesMask="قناع صناديق الكشف" +TextRendering="عرض النص" +TextWithBackground="نص مع خلفية" +SelectPresets="اختيار الإعدادات المسبقة" +CharWhitelistPreset="إعداد قائمة الحروف المسموح بها مسبقًا" +NumericPunctuation="أرقام / علامات ترقيم" +OutputFlatten="تسطيح الناتج إلى سطر واحد" +OutputFileAppend="إضافة إلى الملف؟" +current_output="الناتج الحالي" diff --git a/data/locale/de-DE.ini b/data/locale/de-DE.ini new file mode 100644 index 0000000..b30a16f --- /dev/null +++ b/data/locale/de-DE.ini @@ -0,0 +1,36 @@ +OCRFilter="OCR-Filter" +PageSegmentationMode="Seiten Segmentierungsmodus" +Language="Sprache" +EnableSmoothing="Glättung aktivieren" +ConfThreshold="Vertrauensschwelle" +WordLength="Wortlänge" +WindowSize="Fenstergröße" +CharWhitelist="Zeichen-Whitelist" +UserPatterns="Benutzermuster" +OutputTextSource="Quelle des Ausgabetextes" +NoOutput="Keine Ausgabe" +UpdateTimer="Aktualisierungs-Timer (ms)" +AdvancedSettings="Erweiterte Einstellungen" +UpdateOnChange="Nur bei Bildänderung aktualisieren" +UpdateOnChangeThreshold="Änderungsschwelle %" +OutputFormatting="Ausgabeformatierung" +OutputTextDetectionMaskSource="Quelle der Ausgabemaske" +SaveToFile="In Datei speichern" +OutputFilePath="Ausgabedateipfad" +BinarizationMode="Binarisierungsmodus" +BinarizationThreshold="Binarisierungsschwelle" +BinarizationBlockSize="Binarisierungsblockgröße" +PreviewBinarization="Binarisierungsvorschau" +RescaleImage="Bild skalieren" +RescaleTargetSize="Zielgröße skalieren" +DilationIterations="Dilatationsiterationen" +ImageOutputOption="Bildausgabeoption" +DetectionBoxesMask="Erkennungsboxen Maske" +TextRendering="Textüberlagerung" +TextWithBackground="Text mit Hintergrund" +SelectPresets="Voreinstellung auswählen" +CharWhitelistPreset="Zeichen-Whitelist-Voreinstellung" +NumericPunctuation="Numerisch / Interpunktion" +OutputFlatten="Ausgabe zu einer einzigen Zeile glätten" +OutputFileAppend="An Datei anhängen?" +current_output="Aktuelle Ausgabe" diff --git a/data/locale/en-US.ini b/data/locale/en-US.ini index dbff779..90aee89 100644 --- a/data/locale/en-US.ini +++ b/data/locale/en-US.ini @@ -28,3 +28,9 @@ ImageOutputOption="Image Output Option" DetectionBoxesMask="Detection Boxes Mask" TextRendering="Text Overlay" TextWithBackground="Text with Background" +SelectPresets="Select Preset" +CharWhitelistPreset="Character Whitelist Preset" +NumericPunctuation="Numeric / Punctuation" +OutputFlatten="Flatten Output to Single Line" +OutputFileAppend="Append to File?" +current_output="Current Output" diff --git a/data/locale/es-ES.ini b/data/locale/es-ES.ini new file mode 100644 index 0000000..f3751b8 --- /dev/null +++ b/data/locale/es-ES.ini @@ -0,0 +1,36 @@ +OCRFilter="OCR" +PageSegmentationMode="Modo de Segmentación de Página" +Language="Idioma" +EnableSmoothing="Habilitar Suavizado" +ConfThreshold="Umbral de Confianza" +WordLength="Longitud de Palabra" +WindowSize="Tamaño de Ventana" +CharWhitelist="Lista de Caracteres Permitidos" +UserPatterns="Patrones de Usuario" +OutputTextSource="Fuente de Texto de Salida" +NoOutput="Sin Salida" +UpdateTimer="Temporizador de Actualización (ms)" +AdvancedSettings="Configuración Avanzada" +UpdateOnChange="Actualizar Solo al Cambiar la Imagen" +UpdateOnChangeThreshold="Umbral de Cambio %" +OutputFormatting="Formato de Salida" +OutputTextDetectionMaskSource="Fuente de Máscara de Detección de Texto" +SaveToFile="Guardar en Archivo" +OutputFilePath="Ruta de Archivo de Salida" +BinarizationMode="Modo de Binario" +BinarizationThreshold="Umbral de Binario" +BinarizationBlockSize="Tamaño de Bloque de Binario" +PreviewBinarization="Vista Previa de Binario" +RescaleImage="Reescalar Imagen" +RescaleTargetSize="Tamaño de Destino de Reescalado" +DilationIterations="Iteraciones de Dilatación" +ImageOutputOption="Opción de Salida de Imagen" +DetectionBoxesMask="Máscara de Cuadros de Detección" +TextRendering="Superposición de Texto" +TextWithBackground="Texto con Fondo" +SelectPresets="Seleccionar Preajustes" +CharWhitelistPreset="Preajuste de Lista de Caracteres Permitidos" +NumericPunctuation="Numérico / Puntuación" +OutputFlatten="Aplanar Salida a una Línea Única" +OutputFileAppend="¿Agregar al Archivo?" +current_output="Salida Actual" diff --git a/data/locale/fr-FR.ini b/data/locale/fr-FR.ini new file mode 100644 index 0000000..2a462d8 --- /dev/null +++ b/data/locale/fr-FR.ini @@ -0,0 +1,36 @@ +OCRFilter="OCR" +PageSegmentationMode="Mode de segmentation de page" +Language="Langue" +EnableSmoothing="Activer le lissage" +ConfThreshold="Seuil de confiance" +WordLength="Longueur du mot" +WindowSize="Taille de la fenêtre" +CharWhitelist="Liste blanche de caractères" +UserPatterns="Motifs utilisateur" +OutputTextSource="Source de texte de sortie" +NoOutput="Pas de sortie" +UpdateTimer="Minuterie de mise à jour (ms)" +AdvancedSettings="Paramètres avancés" +UpdateOnChange="Mettre à jour uniquement en cas de changement d'image" +UpdateOnChangeThreshold="Seuil de changement %" +OutputFormatting="Formatage de sortie" +OutputTextDetectionMaskSource="Source de masque de détection de texte" +SaveToFile="Enregistrer dans un fichier" +OutputFilePath="Chemin du fichier de sortie" +BinarizationMode="Mode de binarisation" +BinarizationThreshold="Seuil de binarisation" +BinarizationBlockSize="Taille de bloc de binarisation" +PreviewBinarization="Aperçu de la binarisation" +RescaleImage="Redimensionner l'image" +RescaleTargetSize="Taille cible de redimensionnement" +DilationIterations="Itérations de dilatation" +ImageOutputOption="Option de sortie d'image" +DetectionBoxesMask="Masque de boîtes de détection" +TextRendering="Superposition de texte" +TextWithBackground="Texte avec arrière-plan" +SelectPresets="Sélectionner un préréglage" +CharWhitelistPreset="Préréglage de liste blanche de caractères" +NumericPunctuation="Numérique / Ponctuation" +OutputFlatten="Aplatir la sortie en une seule ligne" +OutputFileAppend="Ajouter au fichier ?" +current_output="Sortie actuelle" diff --git a/data/locale/ja-JP.ini b/data/locale/ja-JP.ini new file mode 100644 index 0000000..1f3e47a --- /dev/null +++ b/data/locale/ja-JP.ini @@ -0,0 +1,36 @@ +OCRFilter="OCR" +PageSegmentationMode="ページセグメンテーションモード" +Language="言語" +EnableSmoothing="スムージングを有効にする" +ConfThreshold="信頼度の閾値" +WordLength="単語の長さ" +WindowSize="ウィンドウサイズ" +CharWhitelist="文字のホワイトリスト" +UserPatterns="ユーザーパターン" +OutputTextSource="出力テキストのソース" +NoOutput="出力なし" +UpdateTimer="更新タイマー(ミリ秒)" +AdvancedSettings="高度な設定" +UpdateOnChange="画像の変更時のみ更新" +UpdateOnChangeThreshold="変更の閾値 %" +OutputFormatting="出力の書式設定" +OutputTextDetectionMaskSource="マスクソースの出力テキスト検出" +SaveToFile="ファイルに保存" +OutputFilePath="出力ファイルパス" +BinarizationMode="バイナリ化モード" +BinarizationThreshold="バイナリ化の閾値" +BinarizationBlockSize="バイナリ化のブロックサイズ" +PreviewBinarization="バイナリ化のプレビュー" +RescaleImage="画像のリスケール" +RescaleTargetSize="リスケールのターゲットサイズ" +DilationIterations="膨張の反復回数" +ImageOutputOption="画像の出力オプション" +DetectionBoxesMask="検出ボックスのマスク" +TextRendering="テキストのオーバーレイ" +TextWithBackground="背景付きテキスト" +SelectPresets="プリセットの選択" +CharWhitelistPreset="文字のホワイトリストプリセット" +NumericPunctuation="数字/句読点" +OutputFlatten="出力を単一行にフラット化" +OutputFileAppend="ファイルに追記する?" +current_output="現在の出力" diff --git a/data/locale/pt-BR.ini b/data/locale/pt-BR.ini new file mode 100644 index 0000000..e1127dc --- /dev/null +++ b/data/locale/pt-BR.ini @@ -0,0 +1,36 @@ +OCRFilter="Filtro OCR" +PageSegmentationMode="Modo de Segmentação de Página" +Language="Idioma" +EnableSmoothing="Ativar Suavização" +ConfThreshold="Limiar de Confiança" +WordLength="Comprimento da Palavra" +WindowSize="Tamanho da Janela" +CharWhitelist="Lista de Caracteres Permitidos" +UserPatterns="Padrões do Usuário" +OutputTextSource="Fonte do Texto de Saída" +NoOutput="Sem Saída" +UpdateTimer="Tempo de Atualização (ms)" +AdvancedSettings="Configurações Avançadas" +UpdateOnChange="Atualizar Somente na Mudança da Imagem" +UpdateOnChangeThreshold="Limiar de Mudança %" +OutputFormatting="Formatação de Saída" +OutputTextDetectionMaskSource="Fonte da Máscara de Detecção de Texto" +SaveToFile="Salvar em Arquivo" +OutputFilePath="Caminho do Arquivo de Saída" +BinarizationMode="Modo de Binzarização" +BinarizationThreshold="Limiar de Binzarização" +BinarizationBlockSize="Tamanho do Bloco de Binzarização" +PreviewBinarization="Visualizar Binzarização" +RescaleImage="Redimensionar Imagem" +RescaleTargetSize="Tamanho de Destino para Redimensionamento" +DilationIterations="Iterações de Dilatação" +ImageOutputOption="Opção de Saída de Imagem" +DetectionBoxesMask="Máscara de Caixas de Detecção" +TextRendering="Sobreposição de Texto" +TextWithBackground="Texto com Fundo" +SelectPresets="Selecionar Preset" +CharWhitelistPreset="Preset de Lista de Caracteres Permitidos" +NumericPunctuation="Numérico / Pontuação" +OutputFlatten="Aplanar Saída para uma Única Linha" +OutputFileAppend="Anexar ao Arquivo?" +current_output="Saída Atual" diff --git a/data/locale/ru-RU.ini b/data/locale/ru-RU.ini new file mode 100644 index 0000000..c8f1dae --- /dev/null +++ b/data/locale/ru-RU.ini @@ -0,0 +1,36 @@ +OCRFilter="Фильтр OCR" +PageSegmentationMode="Режим сегментации страницы" +Language="Язык" +EnableSmoothing="Включить сглаживание" +ConfThreshold="Порог уверенности" +WordLength="Длина слова" +WindowSize="Размер окна" +CharWhitelist="Белый список символов" +UserPatterns="Пользовательские шаблоны" +OutputTextSource="Источник выходного текста" +NoOutput="Нет выхода" +UpdateTimer="Таймер обновления (мс)" +AdvancedSettings="Расширенные настройки" +UpdateOnChange="Обновление только при изменении изображения" +UpdateOnChangeThreshold="Порог изменения %" +OutputFormatting="Форматирование вывода" +OutputTextDetectionMaskSource="Источник маски обнаружения текста" +SaveToFile="Сохранить в файл" +OutputFilePath="Путь к выходному файлу" +BinarizationMode="Режим бинаризации" +BinarizationThreshold="Порог бинаризации" +BinarizationBlockSize="Размер блока бинаризации" +PreviewBinarization="Предварительный просмотр бинаризации" +RescaleImage="Масштабирование изображения" +RescaleTargetSize="Целевой размер масштабирования" +DilationIterations="Итерации расширения" +ImageOutputOption="Опция вывода изображения" +DetectionBoxesMask="Маска рамок обнаружения" +TextRendering="Отображение текста" +TextWithBackground="Текст на фоне" +SelectPresets="Выбор предустановок" +CharWhitelistPreset="Предустановленный белый список символов" +NumericPunctuation="Числа / Пунктуация" +OutputFlatten="Вывод в одну строку" +OutputFileAppend="Добавить к файлу?" +current_output="Текущий вывод" diff --git a/data/locale/zh-CN.ini b/data/locale/zh-CN.ini new file mode 100644 index 0000000..37f3db4 --- /dev/null +++ b/data/locale/zh-CN.ini @@ -0,0 +1,36 @@ +OCRFilter="OCR" +PageSegmentationMode="页面分割模式" +Language="语言" +EnableSmoothing="启用平滑" +ConfThreshold="置信度阈值" +WordLength="词长度" +WindowSize="窗口大小" +CharWhitelist="字符白名单" +UserPatterns="用户模式" +OutputTextSource="输出文本来源" +NoOutput="无输出" +UpdateTimer="更新定时器(毫秒)" +AdvancedSettings="高级设置" +UpdateOnChange="仅在图像更改时更新" +UpdateOnChangeThreshold="更改阈值 %" +OutputFormatting="输出格式" +OutputTextDetectionMaskSource="输出掩码来源" +SaveToFile="保存到文件" +OutputFilePath="输出文件路径" +BinarizationMode="二值化模式" +BinarizationThreshold="二值化阈值" +BinarizationBlockSize="二值化块大小" +PreviewBinarization="预览二值化" +RescaleImage="重新缩放图像" +RescaleTargetSize="重新缩放目标大小" +DilationIterations="膨胀迭代次数" +ImageOutputOption="图像输出选项" +DetectionBoxesMask="检测框掩码" +TextRendering="文本叠加" +TextWithBackground="带背景的文本" +SelectPresets="选择预设" +CharWhitelistPreset="字符白名单预设" +NumericPunctuation="数字/标点符号" +OutputFlatten="将输出平铺为单行" +OutputFileAppend="追加到文件?" +current_output="当前输出" diff --git a/src/consts.h b/src/consts.h index d6a7c47..b04f0f2 100644 --- a/src/consts.h +++ b/src/consts.h @@ -23,9 +23,19 @@ const char *const WHITELIST_CHARS_ITALIAN = // add portuguese characters with accents const char *const WHITELIST_CHARS_PORTUGUESE = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789!@#$%^&*()_+-=[]{}|;':\",./<>?`~\\áàãâéêíóôõúüç "; -// add russian characters +// add russian characters for cyrillic, both upper and lower case const char *const WHITELIST_CHARS_RUSSIAN = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789!@#$%^&*()_+-=[]{}|;':\",./<>?`~\\абвгдеёжзийклмнопрстуфхцчшщъыьэюя "; +const char *const WHITELIST_CHARS_JAPANESE = + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789!@#$%^&*()_+-=[]{}|;':\",./<>?`~\\あいうえおかきくけこさしすせそたちつてとなにぬねのはひふへほまみむめもやゆよらりるれろわをん "; +// hindi characters +const char *const WHITELIST_CHARS_HINDI = + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789!@#$%^&*()_+-=[]{}|;':\",./<>?`~\\अआइईउऊऋएऐओऔकखगघङचछजझञटठडढणतथदधनपफबभमयरलवशषसह "; +// arabic characters +const char *const WHITELIST_CHARS_ARABIC = + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789!@#$%^&*()_+-=[]{}|;':\",./<>?`~\\ا ب ت ث ج ح خ د ذ ر ز س ش ص ض ط ظ ع غ ف ق ك ل م ن ه و ي "; +// numeric characters with punctuation for time, date, currency, etc. +const char *const WHITELIST_CHARS_NUMERIC = "0123456789!@#$%^&*()_+-=[]{}|;':\",./<>?`~\\ "; const int OUTPUT_IMAGE_OPTION_DETECTION_MASK = 0; const int OUTPUT_IMAGE_OPTION_TEXT_OVERLAY = 1; diff --git a/src/filter-data.h b/src/filter-data.h index c7a53a3..03a7ed7 100644 --- a/src/filter-data.h +++ b/src/filter-data.h @@ -54,6 +54,8 @@ struct filter_data { bool update_on_change; int update_on_change_threshold; int output_image_option; + bool output_file_append; + bool output_flatten; bool isDisabled; diff --git a/src/obs-utils.cpp b/src/obs-utils.cpp index 5bbe3f3..b6efcc9 100644 --- a/src/obs-utils.cpp +++ b/src/obs-utils.cpp @@ -13,6 +13,7 @@ #include #include #include +#include /** * @brief Get RGBA from the stage surface @@ -111,21 +112,39 @@ void acquire_weak_output_source_ref(struct filter_data *usd, char *output_source } } -void setTextCallback(const std::string &str, struct filter_data *usd) +void setTextCallback(const std::string &str_in, struct filter_data *usd) { if (!usd->output_source_mutex) { obs_log(LOG_ERROR, "output_source_mutex is null"); return; } + std::string str = str_in; + if (usd->output_flatten) { + // remove newlines and tabs, replace with spaces + std::replace(str.begin(), str.end(), '\n', ' '); + std::replace(str.begin(), str.end(), '\t', ' '); + std::replace(str.begin(), str.end(), '\r', ' '); + + // remove multiple spaces + str = std::regex_replace(str, std::regex(" +"), " "); + } + + // update internal settings + auto internal_source_settings = obs_source_get_settings(usd->source); + obs_data_set_string(internal_source_settings, "current_output", str.c_str()); + obs_data_release(internal_source_settings); + // check if save_to_file is selected if (strcmp(usd->output_source_name, "!!save_to_file!!") == 0) { // save_to_file is selected, write the text to a file if (usd->output_file_path.empty()) { - obs_log(LOG_ERROR, "output_file_path is empty"); return; } - std::ofstream file(usd->output_file_path); + // append flag according to tf->output_file_append + std::ofstream file(usd->output_file_path, usd->output_file_append + ? std::ios_base::app + : std::ios_base::trunc); if (!file.is_open()) { obs_log(LOG_ERROR, "failed to open file %s", usd->output_file_path.c_str()); return; diff --git a/src/ocr-filter.cpp b/src/ocr-filter.cpp index a36bf5b..7c31bc2 100644 --- a/src/ocr-filter.cpp +++ b/src/ocr-filter.cpp @@ -119,7 +119,8 @@ obs_properties_t *ocr_filter_properties(void *data) "update_on_change", "binarization_mode", "preview_binarization", "binarization_threshold", "binarization_block_size", "rescale_image", "rescale_target_size", "update_on_change_threshold", - "dilation_iterations"}) { + "dilation_iterations", "output_flatten", "char_whitelist_preset", + "current_output"}) { obs_property_set_visible(obs_properties_get(props_modified, prop), advanced_settings); } @@ -204,6 +205,47 @@ obs_properties_t *ocr_filter_properties(void *data) return true; }); + // add preset selector for char whitelist + obs_property_t *char_whitelist_preset = obs_properties_add_list( + props, "char_whitelist_preset", obs_module_text("CharWhitelistPreset"), + OBS_COMBO_TYPE_LIST, OBS_COMBO_FORMAT_STRING); + obs_property_list_add_string(char_whitelist_preset, obs_module_text("SelectPresets"), + "none"); + obs_property_list_add_string(char_whitelist_preset, obs_module_text("English"), + WHITELIST_CHARS_ENGLISH); + obs_property_list_add_string(char_whitelist_preset, obs_module_text("NumericPunctuation"), + WHITELIST_CHARS_NUMERIC); + obs_property_list_add_string(char_whitelist_preset, obs_module_text("French"), + WHITELIST_CHARS_FRENCH); + obs_property_list_add_string(char_whitelist_preset, obs_module_text("German"), + WHITELIST_CHARS_GERMAN); + obs_property_list_add_string(char_whitelist_preset, obs_module_text("Spanish"), + WHITELIST_CHARS_SPANISH); + obs_property_list_add_string(char_whitelist_preset, obs_module_text("Italian"), + WHITELIST_CHARS_ITALIAN); + obs_property_list_add_string(char_whitelist_preset, obs_module_text("Portuguese"), + WHITELIST_CHARS_PORTUGUESE); + obs_property_list_add_string(char_whitelist_preset, obs_module_text("Cyrillic"), + WHITELIST_CHARS_RUSSIAN); + obs_property_list_add_string(char_whitelist_preset, obs_module_text("Japanese"), + WHITELIST_CHARS_JAPANESE); + obs_property_list_add_string(char_whitelist_preset, obs_module_text("Hindi"), + WHITELIST_CHARS_HINDI); + obs_property_list_add_string(char_whitelist_preset, obs_module_text("Arabic"), + WHITELIST_CHARS_ARABIC); + // add callback to set the char whitelist based on the selected preset + obs_property_set_modified_callback( + obs_properties_get(props, "char_whitelist_preset"), + [](obs_properties_t *, obs_property_t *property, obs_data_t *settings) { + const char *selected_preset = + obs_data_get_string(settings, "char_whitelist_preset"); + if (strcmp(selected_preset, "none") != 0) { + obs_data_set_string(settings, "char_whitelist", selected_preset); + } + UNUSED_PARAMETER(property); + return true; + }); + // Add character whitelist obs_properties_add_text(props, "char_whitelist", obs_module_text("CharWhitelist"), OBS_TEXT_DEFAULT); @@ -219,7 +261,6 @@ obs_properties_t *ocr_filter_properties(void *data) 1); obs_properties_add_int_slider(props, "window_size", obs_module_text("WindowSize"), 1, 20, 1); - obs_property_set_modified_callback(enable_smoothing_property, enable_smoothing_modified); // Output formatting @@ -228,6 +269,9 @@ obs_properties_t *ocr_filter_properties(void *data) // hide the output formatting property by default obs_property_set_visible(obs_properties_get(props, "output_formatting"), false); + // add option to "flatten" the output text to a single line + obs_properties_add_bool(props, "output_flatten", obs_module_text("OutputFlatten")); + // Add a property for the output text source obs_property_t *text_sources = obs_properties_add_list(props, "text_sources", obs_module_text("OutputTextSource"), @@ -244,6 +288,8 @@ obs_properties_t *ocr_filter_properties(void *data) // Add an option to set the output file path obs_properties_add_path(props, "output_file_path", obs_module_text("OutputFilePath"), OBS_PATH_FILE, nullptr, nullptr); + // add an option to control output file aggegation / "append" mode + obs_properties_add_bool(props, "output_file_append", obs_module_text("OutputFileAppend")); // add callback to enable or disable the output file path property obs_property_set_modified_callback( @@ -255,6 +301,9 @@ obs_properties_t *ocr_filter_properties(void *data) obs_property_set_visible(obs_properties_get(props_modified, "output_file_path"), save_to_file); + obs_property_set_visible(obs_properties_get(props_modified, + "output_file_append"), + save_to_file); // show/hide "output_formatting" property based on the selected output source bool show_output_formatting = strcmp(obs_data_get_string(settings, "text_sources"), "none") != 0; @@ -300,6 +349,11 @@ obs_properties_t *ocr_filter_properties(void *data) obs_property_list_add_int(output_format, obs_module_text("TextWithBackground"), OUTPUT_IMAGE_OPTION_TEXT_BACKGROUND); + // add current output text box, disabled by default + obs_properties_add_text(props, "current_output", obs_module_text("current_output"), + OBS_TEXT_DEFAULT); + obs_property_set_enabled(obs_properties_get(props, "current_output"), false); + // Add a informative text about the plugin obs_properties_add_text( props, "info", @@ -317,13 +371,13 @@ void ocr_filter_defaults(obs_data_t *settings) obs_data_set_default_int(settings, "update_on_change_threshold", 15); obs_data_set_default_string(settings, "language", "eng"); obs_data_set_default_bool(settings, "advanced_settings", false); - obs_data_set_default_int(settings, "page_segmentation_mode", tesseract::PSM_SINGLE_WORD); + obs_data_set_default_int(settings, "page_segmentation_mode", tesseract::PSM_AUTO); obs_data_set_default_int(settings, "binarization_mode", 0); obs_data_set_default_int(settings, "binarization_threshold", 127); obs_data_set_default_int(settings, "binarization_block_size", 15); obs_data_set_default_bool(settings, "preview_binarization", false); obs_data_set_default_int(settings, "dilation_iterations", 0); - obs_data_set_default_bool(settings, "rescale_image", true); + obs_data_set_default_bool(settings, "rescale_image", false); obs_data_set_default_int(settings, "rescale_target_size", 35); obs_data_set_default_string(settings, "text_sources", "none"); obs_data_set_default_string(settings, "text_detection_mask_sources", "none"); @@ -335,6 +389,8 @@ void ocr_filter_defaults(obs_data_t *settings) obs_data_set_default_int(settings, "window_size", 10); obs_data_set_default_string(settings, "output_formatting", "{{output}}"); obs_data_set_default_int(settings, "image_output_option", 0); + obs_data_set_default_bool(settings, "output_file_append", false); + obs_data_set_default_bool(settings, "output_flatten", false); } void ocr_filter_update(void *data, obs_data_t *settings) @@ -374,6 +430,8 @@ void ocr_filter_update(void *data, obs_data_t *settings) tf->update_on_change_threshold = (int)obs_data_get_int(settings, "update_on_change_threshold"); tf->output_image_option = (int)obs_data_get_int(settings, "image_output_option"); + tf->output_file_append = obs_data_get_bool(settings, "output_file_append"); + tf->output_flatten = obs_data_get_bool(settings, "output_flatten"); // Initialize the Tesseract OCR model initialize_tesseract_ocr(tf, hard_tesseract_init_required); diff --git a/src/tesseract-ocr-utils.cpp b/src/tesseract-ocr-utils.cpp index 11be005..7668a7b 100644 --- a/src/tesseract-ocr-utils.cpp +++ b/src/tesseract-ocr-utils.cpp @@ -425,24 +425,7 @@ void tesseract_thread(void *data) extract_text_detection_boxes(tf, imageBGRA.size()); if (tf->output_image_option == - OUTPUT_IMAGE_OPTION_TEXT_OVERLAY) { - // Create a text overlay image - QImage text_overlay_image = - render_boxes_with_qtextdocument( - boxes, imageBGRA.cols, - imageBGRA.rows); - cv::Mat text_overlay_image_mat( - text_overlay_image.height(), - text_overlay_image.width(), CV_8UC4, - text_overlay_image.bits(), - text_overlay_image.bytesPerLine()); - text_overlay_image_mat.copyTo( - text_detection_output); - // } else if (tf->output_image_option == - // OUTPUT_IMAGE_OPTION_TEXT_BACKGROUND) { - // // Draw the text detection boxes on the image with a background - - } else { + OUTPUT_IMAGE_OPTION_DETECTION_MASK) { text_detection_output.setTo( cv::Scalar(0, 0, 0, 255)); @@ -452,6 +435,19 @@ void tesseract_thread(void *data) text_detection_output, box.box, cv::Scalar(255, 255, 255, 255), -1); } + } else { + // Create a text overlay image + QImage text_overlay_image = render_boxes_with_qtextdocument( + boxes, imageBGRA.cols, imageBGRA.rows, + tf->output_image_option == + OUTPUT_IMAGE_OPTION_TEXT_BACKGROUND); + cv::Mat text_overlay_image_mat( + text_overlay_image.height(), + text_overlay_image.width(), CV_8UC4, + text_overlay_image.bits(), + text_overlay_image.bytesPerLine()); + text_overlay_image_mat.copyTo( + text_detection_output); } setTextDetectionMaskCallback(text_detection_output, tf); diff --git a/src/text-render-helper.cpp b/src/text-render-helper.cpp index ffda1bb..57dc243 100644 --- a/src/text-render-helper.cpp +++ b/src/text-render-helper.cpp @@ -12,7 +12,7 @@ * @param css_props CSS properties to apply to the text */ QImage render_boxes_with_qtextdocument(const std::vector &boxes, uint32_t width, - uint32_t height) + uint32_t height, bool add_background) { QPixmap pixmap(width, height); pixmap.fill(Qt::transparent); @@ -22,6 +22,11 @@ QImage render_boxes_with_qtextdocument(const std::vector &boxes, uint32_ // draw individual boxes on the pixmap for (const OCRBox &box : boxes) { + if (add_background) { + painter.setBrush(Qt::white); + painter.fillRect(box.box.x, box.box.y, box.box.width, box.box.height, + Qt::white); + } painter.setPen(Qt::blue); // set the character size according to the box height QFont font = painter.font(); diff --git a/src/text-render-helper.h b/src/text-render-helper.h index e299d1a..a31ef21 100644 --- a/src/text-render-helper.h +++ b/src/text-render-helper.h @@ -9,6 +9,6 @@ #include QImage render_boxes_with_qtextdocument(const std::vector &boxes, uint32_t width, - uint32_t height); + uint32_t height, bool add_background = false); #endif // TEXT_RENDER_HELPER_H