added NLLB translation

2023-03-27 21:28:47 +09:00
parent 4e82fb64b3
commit 3fde2e0f02
4 changed files with 84 additions and 2 deletions
--- a/app.py
+++ b/app.py
@@ -1,7 +1,8 @@
 import gradio as gr
 from modules.whisper_Inference import WhisperInference
+from modules.nllb_inference import NLLBInference
 import os
-from ui.htmls import CSS, MARKDOWN
+from ui.htmls import *
 from modules.youtube_manager import get_ytmetas


@@ -21,6 +22,7 @@ def on_change_models(model_size):


 whisper_inf = WhisperInference()
+nllb_inf = NLLBInference()
 block = gr.Blocks(css=CSS).queue(api_open=False)

 with block:
@@ -100,4 +102,29 @@ with block:
            btn_openfolder.click(fn=lambda: open_fodler("outputs"), inputs=None, outputs=None)
            dd_model.change(fn=on_change_models, inputs=[dd_model], outputs=[cb_translate])

+        with gr.TabItem("T2T Translation"):  # tab 4
+            with gr.Row():
+                file_subs = gr.Files(type="file", label="Upload Subtitle Files to translate here",
+                                     file_types=['.vtt', '.srt'])
+
+            with gr.TabItem("NLLB"):  # sub tab1
+                with gr.Row():
+                    dd_nllb_model = gr.Dropdown(label="Model", value=nllb_inf.default_model_size,
+                                                choices=nllb_inf.available_models)
+                    dd_nllb_sourcelang = gr.Dropdown(label="Source Language", choices=nllb_inf.available_source_langs)
+                    dd_nllb_targetlang = gr.Dropdown(label="Target Language", choices=nllb_inf.available_target_langs)
+                with gr.Row():
+                    btn_run = gr.Button("TRANSLATE SUBTITLE FILE", variant="primary")
+                with gr.Row():
+                    tb_indicator = gr.Textbox(label="Output")
+                    btn_openfolder = gr.Button('📂').style(full_width=False)
+                with gr.Column():
+                    md_vram_table = gr.HTML(NLLB_VRAM_TABLE, elem_id="md_nllb_vram_table")
+
+            btn_run.click(fn=nllb_inf.translate_file,
+                          inputs=[file_subs, dd_nllb_model, dd_nllb_sourcelang, dd_nllb_targetlang],
+                          outputs=[tb_indicator])
+            btn_openfolder.click(fn=lambda: open_fodler("outputs\\translations"), inputs=None, outputs=None)
+
+
 block.launch()
--- a/modules/nllb_inference.py
+++ b/modules/nllb_inference.py
@@ -68,7 +68,7 @@ class NLLBInference:

                write_file(subtitle, f"{output_path}.srt")

-            elif file_path == ".vtt":
+            elif file_ext == ".vtt":
                parsed_dicts = parse_vtt(file_path=file_path)
                total_progress = len(parsed_dicts)
                for index, dic in enumerate(parsed_dicts):
--- a/outputs/translations/outputs
+++ b/outputs/translations/outputs
--- a/ui/htmls.py
+++ b/ui/htmls.py
@@ -39,4 +39,59 @@ CSS = """

 MARKDOWN = """
 ### [Whisper Web-UI](https://github.com/jhj0517/Whsiper-WebUI)
+"""
+
+
+NLLB_VRAM_TABLE = """
+<!DOCTYPE html>
+<html lang="en">
+<head>
+  <meta charset="UTF-8">
+  <meta name="viewport" content="width=device-width, initial-scale=1.0">
+  <style>
+    table {
+      border-collapse: collapse;
+      width: 100%;
+    }
+    th, td {
+      border: 1px solid #dddddd;
+      text-align: left;
+      padding: 8px;
+    }
+    th {
+      background-color: #f2f2f2;
+    }
+  </style>
+</head>
+<body>
+
+<details>
+  <summary>VRAM usage for each model</summary>
+  <table>
+    <thead>
+      <tr>
+        <th>Model name</th>
+        <th>Required VRAM</th>
+      </tr>
+    </thead>
+    <tbody>
+      <tr>
+        <td>nllb-200-3.3B</td>
+        <td>~16GB</td>
+      </tr>
+      <tr>
+        <td>nllb-200-1.3B</td>
+        <td>~8GB</td>
+      </tr>
+      <tr>
+        <td>nllb-200-distilled-600M</td>
+        <td>~4GB</td>
+      </tr>
+    </tbody>
+  </table>
+  <p><strong>Note:</strong> Be mindful of your VRAM! The table above provides an approximate VRAM usage for each model.</p>
+</details>
+
+</body>
+</html>
 """