Merge branch 'cuda' into for_webui

This commit is contained in:
misyaguziya
2024-10-23 14:14:16 +09:00
20 changed files with 267 additions and 36 deletions

1
.gitignore vendored
View File

@@ -6,6 +6,7 @@ memo.txt
*.pyc
logs/
.venv/
.venv_cuda/
weights/
.vscode
error.log

View File

@@ -5,7 +5,7 @@ a = Analysis(
['src-python\\webui_mainloop.py'],
pathex=[],
binaries=[],
datas=[('./fonts', 'fonts/'), ('.venv/Lib/site-packages/zeroconf', 'zeroconf/'), ('.venv/Lib/site-packages/openvr', 'openvr/'), ('.venv/Lib/site-packages/pykakasi', 'pykakasi/')],
datas=[('./fonts', 'fonts/'), ('.venv/Lib/site-packages/zeroconf', 'zeroconf/'), ('.venv/Lib/site-packages/openvr', 'openvr/'), ('.venv/Lib/site-packages/pykakasi', 'pykakasi/'), ('.venv/Lib/site-packages/faster_whisper', 'faster_whisper/')],
hiddenimports=[],
hookspath=[],
hooksconfig={},

45
backend_cuda.spec Normal file
View File

@@ -0,0 +1,45 @@
# -*- mode: python ; coding: utf-8 -*-
a = Analysis(
['src-python\\webui_mainloop.py'],
pathex=[],
binaries=[],
datas=[('./fonts', 'fonts/'), ('.venv_cuda/Lib/site-packages/zeroconf', 'zeroconf/'), ('.venv_cuda/Lib/site-packages/openvr', 'openvr/'), ('.venv_cuda/Lib/site-packages/pykakasi', 'pykakasi/'), ('.venv_cuda/Lib/site-packages/faster_whisper', 'faster_whisper/')],
hiddenimports=[],
hookspath=[],
hooksconfig={},
runtime_hooks=[],
excludes=['pandas', 'matplotlib', 'PyQt5'],
noarchive=False,
optimize=0,
)
pyz = PYZ(a.pure)
exe = EXE(
pyz,
a.scripts,
[],
exclude_binaries=True,
name='backend-x86_64-pc-windows-msvc',
debug=False,
bootloader_ignore_signals=False,
strip=False,
upx=True,
console=True,
disable_windowed_traceback=False,
argv_emulation=False,
target_arch=None,
codesign_identity=None,
entitlements_file=None,
icon=['src-ui\\assets\\chato_icon_fill.png'],
)
coll = COLLECT(
exe,
a.binaries,
a.datas,
strip=False,
upx=True,
upx_exclude=[],
name='.',
)

View File

@@ -1,2 +1,2 @@
pyinstaller --windowed --clean --noconfirm --icon="./img/vrct_logo_mark_black.ico" --add-data "./img;img/" --add-data "./fonts;fonts/" --add-data "./locales;locales/" --add-data "./batch;batch/" --name VRCT --add-data ".venv\Lib\site-packages\customtkinter;customtkinter/" --add-data ".venv\Lib\site-packages\zeroconf;zeroconf/" --add-data ".venv\Lib\site-packages\openvr;openvr/" --exclude-module pandas --exclude-module matplotlib --exclude-module PyQt5 main.py
"C:\Program Files (x86)\NSIS\makensis.exe" installer/installer.nsi
call .venv/Scripts/activate
pyinstaller backend.spec --distpath src-tauri/bin --clean --noconfirm

2
build_cuda.bat Normal file
View File

@@ -0,0 +1,2 @@
call .venv_cuda/Scripts/activate
pyinstaller backend_cuda.spec --distpath src-tauri/bin --clean --noconfirm

View File

@@ -1,2 +1,10 @@
python -m venv .venv
python -m venv .venv_cuda
call .venv/Scripts/activate
python.exe -m pip install --upgrade pip
pip install -r requirements.txt
pip install -r requirements.txt
call .venv_cuda/Scripts/activate
python.exe -m pip install --upgrade pip
pip install -r requirements_cuda.txt

View File

@@ -5,15 +5,18 @@
"type": "module",
"scripts": {
"setup-python": "install.bat",
"build-python": "pyinstaller backend.spec --distpath src-tauri/bin --clean --noconfirm",
"build-python": "build.bat",
"build-python-cuda": "build_cuda.bat",
"vite": "vite",
"vite-build": "vite build",
"vite-preview": "vite preview",
"tauri": "tauri",
"tauri-dev": "tauri dev",
"dev": "npm run build-python && npm run dev-ui",
"dev-cuda": "npm run build-python-cuda && npm run dev-ui",
"dev-ui": "npm-run-all --parallel vite tauri-dev",
"build": "npm run build-python && npm run vite-build && npm run tauri build",
"build-cuda": "npm run build-python-cuda && npm run vite-build && npm run tauri build",
"build-ui": "npm run vite-build && npm run tauri build"
},
"dependencies": {

View File

@@ -1,3 +1,7 @@
torch==2.2.2
faster-whisper==1.0.3
ctranslate2==4.3.1
transformers==4.40.2
pillow == 10.0.0
PyAudioWPatch == 0.2.12.6
python-osc == 1.8.3
@@ -5,11 +9,7 @@ deepl == 1.15.0
flashtext ==2.7
pyinstaller==6.10.0
numpy==1.26.4
torch==2.2.2
transformers==4.37.2
sentencepiece==0.1.99
ctranslate2==4.1.0
faster-whisper==1.0.3
openvr==1.26.701
pydub==0.25.1
psutil==5.9.8

21
requirements_cuda.txt Normal file
View File

@@ -0,0 +1,21 @@
torch==2.2.2
--extra-index-url https://download.pytorch.org/whl/cu121
faster-whisper==1.0.3
ctranslate2==4.3.1
transformers==4.40.2
pillow == 10.0.0
PyAudioWPatch == 0.2.12.6
python-osc == 1.8.3
deepl == 1.15.0
flashtext ==2.7
pyinstaller==6.10.0
numpy==1.26.4
sentencepiece==0.1.99
openvr==1.26.701
pydub==0.25.1
psutil==5.9.8
pykakasi==2.3.0
pycaw==20240210
translators @ git+https://github.com/misyaguziya/translators@5.9.2.1
SpeechRecognition @ git+https://github.com/misyaguziya/custom_speech_recognition@3.10.4.1
tinyoscquery @ git+https://github.com/cyberkitsune/tinyoscquery@0.1.2

View File

@@ -724,6 +724,28 @@ class Config:
self._USE_WHISPER_FEATURE = value
saveJson(self.PATH_CONFIG, inspect.currentframe().f_code.co_name, value)
@property
@json_serializable('SELECTED_TRANSLATION_COMPUTE_DEVICE')
def SELECTED_TRANSLATION_COMPUTE_DEVICE(self):
return self._SELECTED_TRANSLATION_COMPUTE_DEVICE
@SELECTED_TRANSLATION_COMPUTE_DEVICE.setter
def SELECTED_TRANSLATION_COMPUTE_DEVICE(self, value):
if isinstance(value, dict):
self._SELECTED_TRANSLATION_COMPUTE_DEVICE = value
saveJson(self.PATH_CONFIG, inspect.currentframe().f_code.co_name, value)
@property
@json_serializable('SELECTED_TRANSCRIPTION_COMPUTE_DEVICE')
def SELECTED_TRANSCRIPTION_COMPUTE_DEVICE(self):
return self._SELECTED_TRANSCRIPTION_COMPUTE_DEVICE
@SELECTED_TRANSCRIPTION_COMPUTE_DEVICE.setter
def SELECTED_TRANSCRIPTION_COMPUTE_DEVICE(self, value):
if isinstance(value, dict):
self._SELECTED_TRANSCRIPTION_COMPUTE_DEVICE = value
saveJson(self.PATH_CONFIG, inspect.currentframe().f_code.co_name, value)
@property
@json_serializable('CTRANSLATE2_WEIGHT_TYPE')
def CTRANSLATE2_WEIGHT_TYPE(self):
@@ -1105,8 +1127,10 @@ class Config:
}
self._USE_EXCLUDE_WORDS = True
self._USE_TRANSLATION_FEATURE = True
self._CTRANSLATE2_WEIGHT_TYPE = "Small"
self._USE_WHISPER_FEATURE = False
self._SELECTED_TRANSLATION_COMPUTE_DEVICE = {"device": "cpu", "device_index": 0, "device_name":"cpu"}
self._SELECTED_TRANSCRIPTION_COMPUTE_DEVICE = {"device": "cpu", "device_index": 0, "device_name":"cpu"}
self._CTRANSLATE2_WEIGHT_TYPE = "Small"
self._WHISPER_WEIGHT_TYPE = "base"
self._SEND_MESSAGE_FORMAT = "[message]"
self._SEND_MESSAGE_FORMAT_WITH_T = "[message]([translation])"

View File

@@ -109,7 +109,11 @@ class Model:
return checkCTranslate2Weight(config.PATH_LOCAL, config.CTRANSLATE2_WEIGHT_TYPE)
def changeTranslatorCTranslate2Model(self):
self.translator.changeCTranslate2Model(config.PATH_LOCAL, config.CTRANSLATE2_WEIGHT_TYPE)
self.translator.changeCTranslate2Model(
config.PATH_LOCAL,
config.CTRANSLATE2_WEIGHT_TYPE,
config.SELECTED_TRANSLATION_COMPUTE_DEVICE["device"],
config.SELECTED_TRANSLATION_COMPUTE_DEVICE["device_index"])
def downloadCTranslate2ModelWeight(self, callbackFunc=None):
return downloadCTranslate2Weight(config.PATH_LOCAL, config.CTRANSLATE2_WEIGHT_TYPE, callbackFunc)
@@ -425,6 +429,8 @@ class Model:
transcription_engine=config.SELECTED_TRANSCRIPTION_ENGINE,
root=config.PATH_LOCAL,
whisper_weight_type=config.WHISPER_WEIGHT_TYPE,
device=config.SELECTED_TRANSCRIPTION_COMPUTE_DEVICE["device"],
device_index=config.SELECTED_TRANSCRIPTION_COMPUTE_DEVICE["device_index"],
)
def sendMicTranscript():
try:
@@ -587,6 +593,8 @@ class Model:
transcription_engine=config.SELECTED_TRANSCRIPTION_ENGINE,
root=config.PATH_LOCAL,
whisper_weight_type=config.WHISPER_WEIGHT_TYPE,
device=config.SELECTED_TRANSCRIPTION_COMPUTE_DEVICE["device"],
device_index=config.SELECTED_TRANSCRIPTION_COMPUTE_DEVICE["device_index"],
)
def sendSpeakerTranscript():
try:

View File

@@ -18,7 +18,7 @@ PHRASE_TIMEOUT = 3
MAX_PHRASES = 10
class AudioTranscriber:
def __init__(self, speaker, source, phrase_timeout, max_phrases, transcription_engine, root=None, whisper_weight_type=None):
def __init__(self, speaker, source, phrase_timeout, max_phrases, transcription_engine, root=None, whisper_weight_type=None, device="cpu", device_index=0):
self.speaker = speaker
self.phrase_timeout = phrase_timeout
self.max_phrases = max_phrases
@@ -38,7 +38,7 @@ class AudioTranscriber:
}
if transcription_engine == "Whisper" and checkWhisperWeight(root, whisper_weight_type) is True:
self.whisper_model = getWhisperModel(root, whisper_weight_type)
self.whisper_model = getWhisperModel(root, whisper_weight_type, device=device, device_index=device_index)
self.transcription_engine = "Whisper"
def transcribeAudioQueue(self, audio_queue, language, country, avg_logprob=-0.8, no_speech_prob=0.6):

View File

@@ -1,6 +1,7 @@
from os import path as os_path, makedirs as os_makedirs
from requests import get as requests_get
from typing import Callable
import torch
import huggingface_hub
from faster_whisper import WhisperModel
import logging
@@ -75,13 +76,14 @@ def downloadWhisperWeight(root, weight_type, callbackFunc):
url = huggingface_hub.hf_hub_url(_MODELS[weight_type], filename)
downloadFile(url, file_path, func=callbackFunc)
def getWhisperModel(root, weight_type):
def getWhisperModel(root, weight_type, device="cpu", device_index=0):
path = os_path.join(root, "weights", "whisper", weight_type)
compute_type = "int8" if device == "cpu" else "float16"
return WhisperModel(
path,
device="cpu",
device_index=0,
compute_type="int8",
device=device,
device_index=device_index,
compute_type=compute_type,
cpu_threads=4,
num_workers=1,
local_files_only=True,

View File

@@ -29,17 +29,19 @@ class Translator():
result = False
return result
def changeCTranslate2Model(self, path, model_type):
def changeCTranslate2Model(self, path, model_type, device="cpu", device_index=0):
self.is_loaded_ctranslate2_model = False
directory_name = ctranslate2_weights[model_type]["directory_name"]
tokenizer = ctranslate2_weights[model_type]["tokenizer"]
weight_path = os_path.join(path, "weights", "ctranslate2", directory_name)
tokenizer_path = os_path.join(path, "weights", "ctranslate2", directory_name, "tokenizer")
compute_type = "int8" if device == "cpu" else "float16"
self.ctranslate2_translator = ctranslate2.Translator(
weight_path,
device="cpu",
device_index=0,
compute_type="int8",
device=device,
device_index=device_index,
compute_type=compute_type,
inter_threads=1,
intra_threads=4
)

View File

@@ -7,6 +7,7 @@ from device_manager import device_manager
from config import config
from model import model
from utils import isUniqueStrings, printLog
import torch
class Controller:
def __init__(self) -> None:
@@ -363,10 +364,36 @@ class Controller:
def getMessageBoxRatioRange(*args, **kwargs) -> dict:
return {"status":200, "result":config.MESSAGE_BOX_RATIO_RANGE}
@staticmethod
def getComputeDeviceList(*args, **kwargs) -> dict:
device_list = [{"type":"cuda", "device_index": i, "name": torch.cuda.get_device_name(i)} for i in range(torch.cuda.device_count())]
device_list.append({"type":"cpu", "device_index": 0, "name": "cpu"})
return {"status":200, "result":device_list}
@staticmethod
def getSelectedTranslationComputeDevice(*args, **kwargs) -> dict:
return {"status":200, "result":config.SELECTED_TRANSLATION_COMPUTE_DEVICE}
@staticmethod
def setSelectedTranslationComputeDevice(device:str, *args, **kwargs) -> dict:
printLog("setSelectedTranslationComputeDevice", device)
config.SELECTED_TRANSLATION_COMPUTE_DEVICE = device
return {"status":200,"result":config.SELECTED_TRANSLATION_COMPUTE_DEVICE}
@staticmethod
def getSelectableCtranslate2WeightTypeDict(*args, **kwargs) -> dict:
return {"status":200, "result":config.SELECTABLE_CTRANSLATE2_WEIGHT_TYPE_DICT}
@staticmethod
def getSelectedTranscriptionComputeDevice(*args, **kwargs) -> dict:
return {"status":200, "result":config.SELECTED_TRANSCRIPTION_COMPUTE_DEVICE}
@staticmethod
def setSelectedTranscriptionComputeDevice(device:str, *args, **kwargs) -> dict:
printLog("setSelectedTranscriptionComputeDevice", device)
config.SELECTED_TRANSCRIPTION_COMPUTE_DEVICE = device
return {"status":200,"result":config.SELECTED_TRANSCRIPTION_COMPUTE_DEVICE}
@staticmethod
def getSelectableWhisperModelTypeDict(*args, **kwargs) -> dict:
return {"status":200, "result":config.SELECTABLE_WHISPER_WEIGHT_TYPE_DICT}

View File

@@ -128,6 +128,10 @@ mapping = {
"/set/enable/use_translation_feature": {"status": True, "variable":controller.setEnableUseTranslationFeature},
"/set/disable/use_translation_feature": {"status": True, "variable":controller.setDisableUseTranslationFeature},
"/get/data/translation_compute_device_dict": {"status": True, "variable":controller.getComputeDeviceList},
"/get/data/selected_translation_compute_device": {"status": True, "variable":controller.getSelectedTranslationComputeDevice},
"/set/data/selected_translation_compute_device": {"status": True, "variable":controller.setSelectedTranslationComputeDevice},
"/get/data/selectable_ctranslate2_weight_type_dict": {"status": True, "variable":controller.getSelectableCtranslate2WeightTypeDict},
"/get/data/ctranslate2_weight_type": {"status": True, "variable":controller.getCtranslate2WeightType},
@@ -229,6 +233,10 @@ mapping = {
"/set/enable/check_speaker_threshold": {"status": True, "variable":controller.setEnableCheckSpeakerThreshold},
"/set/disable/check_speaker_threshold": {"status": True, "variable":controller.setDisableCheckSpeakerThreshold},
"/get/data/transcription_compute_device_dict": {"status": True, "variable":controller.getComputeDeviceList},
"/get/data/selected_transcription_compute_device": {"status": True, "variable":controller.getSelectedTranscriptionComputeDevice},
"/set/data/selected_transcription_compute_device": {"status": True, "variable":controller.setSelectedTranscriptionComputeDevice},
"/get/data/selectable_whisper_weight_type_dict": {"status": True, "variable":controller.getSelectableWhisperModelTypeDict},
"/get/data/whisper_weight_type": {"status": True, "variable":controller.getWhisperWeightType},

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@@ -243,7 +243,7 @@ Function PageTranscript
${NSD_CreateDropList} 33% 20u 33% 12u ""
Pop $DropLListTranscriptEngines
${NSD_CB_AddString} $DropLListTranscriptEngines "Google"
${NSD_CB_AddString} $DropLListTranscriptEngines "Wishper(USE CPU)"
${NSD_CB_AddString} $DropLListTranscriptEngines "Wishper"
${NSD_CB_SelectString} $DropLListTranscriptEngines "Google"
${NSD_CreateLabel} 0 52u 33% 12u "Select AI Model Size"
${NSD_CreateDropList} 33% 50u 40% 12u ""
@@ -290,13 +290,41 @@ FunctionEnd
Function OnDropListWishperDownloadWeightClick
${NSD_GetText} $DropLListTranscriptEngines $0
${If} $0 == "Wishper(USE CPU)"
${If} $0 == "Wishper"
EnableWindow $DropListWhisperDownloadWeightType 1
${Else}
EnableWindow $DropListWhisperDownloadWeightType 0
${EndIf}
FunctionEnd
Var CheckboxUseCUDA
Var DialogSelectInstallDeviceVersion
Page custom PageSelectInstallDeviceVersion PageLeaveSelectInstallDeviceVersion
Function PageSelectInstallDeviceVersion
!insertmacro MUI_HEADER_TEXT "Initial Settings" "Enable GPUs for translation and transcription."
nsDialogs::Create 1018
Pop $DialogSelectInstallDeviceVersion
${If} $DialogSelectInstallDeviceVersion == error
Abort
${EndIf}
${NSD_CreateLabel} 0 21u 33% 12u "Enable the use of GPUs"
${NSD_CreateCheckBox} 33% 20u 33% 12u ""
Pop $CheckboxUseCUDA
nsDialogs::Show
FunctionEnd
Function PageLeaveSelectInstallDeviceVersion
${NSD_GetState} $CheckboxUseCUDA $0
${If} $0 == 1
StrCpy $CheckboxUseCUDA "true"
${Else}
StrCpy $CheckboxUseCUDA "false"
${EndIf}
FunctionEnd
!insertmacro MUI_PAGE_COMPONENTS
; 4-4. Custom page to ask user if he wants to reinstall/uninstall
@@ -715,21 +743,73 @@ Section Install
!insertmacro CheckIfAppIsRunning
; Copy main executable
File "${MAINBINARYSRCPATH}"
; ; Copy main executable
; File "${MAINBINARYSRCPATH}"
; Copy resources
{{#each resources_dirs}}
CreateDirectory "$INSTDIR\\{{this}}"
{{/each}}
{{#each resources}}
File /a "/oname={{this.[1]}}" "{{unescape-dollar-sign @key}}"
{{/each}}
; ; Copy resources
; {{#each resources_dirs}}
; CreateDirectory "$INSTDIR\\{{this}}"
; {{/each}}
; {{#each resources}}
; File /a "/oname={{this.[1]}}" "{{unescape-dollar-sign @key}}"
; {{/each}}
; Copy external binaries
{{#each binaries}}
File /a "/oname={{this}}" "{{unescape-dollar-sign @key}}"
{{/each}}
; ; Copy external binaries
; {{#each binaries}}
; File /a "/oname={{this}}" "{{unescape-dollar-sign @key}}"
; {{/each}}
!addplugindir "..\..\..\..\nsis\plugins\x86-unicode"
; 指定のURLからファイルをダウンロード
!define SOFTWARE_RELEASE_URL "https://api.github.com/repos/misyaguziya/VRCT/releases/latest"
!define SOFTWARE_DOWNLOAD_FILENAME "VRCT.zip"
!define SOFTWARE_CUDA_DOWNLOAD_FILENAME "VRCT_cuda.zip"
!define SOFTWARE_JSON_FILENAME "response.json"
Var /GLOBAL i
Var /GLOBAL cmder_dl
Var /GLOBAL cmder_version
Var /GLOBAL file_name
${If} $CheckboxUseCUDA == "true"
StrCpy $file_name "${SOFTWARE_CUDA_DOWNLOAD_FILENAME}"
${Else}
StrCpy $file_name "${SOFTWARE_DOWNLOAD_FILENAME}"
${EndIf}
DetailPrint "Fetching Latest Release from GitHub (${SOFTWARE_RELEASE_URL})"
inetc::get /SILENT "${SOFTWARE_RELEASE_URL}" "$TEMP\${SOFTWARE_JSON_FILENAME}"
DetailPrint "Parsing JSON..."
nsJSON::Set /file "$TEMP\${SOFTWARE_JSON_FILENAME}"
nsJSON::Get 'tag_name' /end
Pop $cmder_version
DetailPrint "Found version $cmder_version"
nsJSON::Get /count 'assets' /end
Pop $R0
${ForEach} $i 0 $R0 + 1
nsJSON::Get 'assets' /index $i 'name' /end
Pop $R1
StrCmp $R1 $file_name done
${Next}
done:
nsJSON::Get 'assets' /index $i 'browser_download_url' /end
Pop $cmder_dl
DetailPrint "Got URL : $cmder_dl"
DetailPrint "Downloading $file_name..."
inetc::get $cmder_dl "$TEMP\$file_name"
Pop $0
StrCmp "$0" "OK" dlok
DetailPrint "Download Failed $0"
Abort
dlok:
DetailPrint "Extracting $file_name ..."
nsisunz::UnzipToStack "$TEMP\$file_name" $INSTDIR
; Create uninstaller
WriteUninstaller "$INSTDIR\uninstall.exe"