From 8dff6448b28f14b8a3c88cf9748cd4b1cc80aa11 Mon Sep 17 00:00:00 2001
From: vitaliibudnyi <27superuser@gmail.com>
Date: Sat, 21 Feb 2026 21:10:40 +0200
Subject: [PATCH] add "text only" as another advanced option for captions
 format

---
 README.md           |  2 +-
 app/dl_formats.py   |  6 ++++-
 app/ytdl.py         | 60 ++++++++++++++++++++++++++++++++++++++++++---
 ui/src/app/app.html |  3 +++
 ui/src/app/app.ts   |  1 +
 5 files changed, 67 insertions(+), 5 deletions(-)

diff --git a/README.md b/README.md
index 8c79ba6..91b7e5e 100644
--- a/README.md
+++ b/README.md
@@ -59,7 +59,7 @@ Certain values can be set via environment variables, using the `-e` parameter on
 * __OUTPUT_TEMPLATE_CHANNEL__: The template for the filenames of the downloaded videos when downloaded as a channel. Defaults to `%(channel)s/%(title)s.%(ext)s`. When empty, then `OUTPUT_TEMPLATE` is used.
 * __YTDL_OPTIONS__: Additional options to pass to yt-dlp in JSON format. [See available options here](https://github.com/yt-dlp/yt-dlp/blob/master/yt_dlp/YoutubeDL.py#L222). They roughly correspond to command-line options, though some do not have exact equivalents here. For example, `--recode-video` has to be specified via `postprocessors`. Also note that dashes are replaced with underscores. You may find [this script](https://github.com/yt-dlp/yt-dlp/blob/master/devscripts/cli_to_api.py) helpful for converting from command-line options to `YTDL_OPTIONS`.
 * __YTDL_OPTIONS_FILE__: A path to a JSON file that will be loaded and used for populating `YTDL_OPTIONS` above. Please note that if both `YTDL_OPTIONS_FILE` and `YTDL_OPTIONS` are specified, the options in `YTDL_OPTIONS` take precedence. The file will be monitored for changes and reloaded automatically when changes are detected.
-* UI format __Captions__: Downloads subtitles/captions only (no media). Subtitle format, language, and source preference are configurable from Advanced Options (defaults: `srt`, `en`, `prefer_manual`).
+* UI format __Captions__: Downloads subtitles/captions only (no media). Subtitle format, language, and source preference are configurable from Advanced Options (defaults: `srt`, `en`, `prefer_manual`). `txt` is generated from `srt` by stripping timestamps and cue numbers.
 
 ### 🌐 Web Server & URLs
 
diff --git a/app/dl_formats.py b/app/dl_formats.py
index 867ceee..e94ec9a 100644
--- a/app/dl_formats.py
+++ b/app/dl_formats.py
@@ -124,7 +124,11 @@ def get_opts(
         mode = _normalize_caption_mode(subtitle_mode)
         language = _normalize_subtitle_language(subtitle_language)
         opts["skip_download"] = True
-        opts["subtitlesformat"] = subtitle_format or "srt"
+        requested_subtitle_format = (subtitle_format or "srt").lower()
+        # txt is a derived, non-timed format produced from SRT after download.
+        if requested_subtitle_format == "txt":
+            requested_subtitle_format = "srt"
+        opts["subtitlesformat"] = requested_subtitle_format
         if mode == "manual_only":
             opts["writesubtitles"] = True
             opts["writeautomaticsub"] = False
diff --git a/app/ytdl.py b/app/ytdl.py
index f7924de..b1d187d 100644
--- a/app/ytdl.py
+++ b/app/ytdl.py
@@ -69,6 +69,45 @@ def _convert_generators_to_lists(obj):
     else:
         return obj
 
+
+def _convert_srt_to_txt_file(subtitle_path: str):
+    """Convert an SRT subtitle file into plain text by stripping cue numbers/timestamps."""
+    txt_path = os.path.splitext(subtitle_path)[0] + ".txt"
+    try:
+        with open(subtitle_path, "r", encoding="utf-8", errors="replace") as infile:
+            content = infile.read()
+
+        # Normalize newlines so cue splitting is consistent across platforms.
+        content = content.replace("\r\n", "\n").replace("\r", "\n")
+        cues = []
+        for block in re.split(r"\n{2,}", content):
+            lines = [line.strip() for line in block.split("\n") if line.strip()]
+            if not lines:
+                continue
+            if re.fullmatch(r"\d+", lines[0]):
+                lines = lines[1:]
+            if lines and "-->" in lines[0]:
+                lines = lines[1:]
+
+            text_lines = []
+            for line in lines:
+                if "-->" in line:
+                    continue
+                clean_line = re.sub(r"<[^>]+>", "", line).strip()
+                if clean_line:
+                    text_lines.append(clean_line)
+            if text_lines:
+                cues.append(" ".join(text_lines))
+
+        with open(txt_path, "w", encoding="utf-8") as outfile:
+            if cues:
+                outfile.write("\n".join(cues))
+                outfile.write("\n")
+        return txt_path
+    except OSError as exc:
+        log.warning(f"Failed to convert subtitle file {subtitle_path} to txt: {exc}")
+        return None
+
 class DownloadQueueNotifier:
     async def added(self, dl):
         raise NotImplementedError
@@ -298,7 +337,7 @@ class Download:
                 rel_name = os.path.relpath(fileName, self.download_dir)
                 # For captions mode, ignore media-like placeholders and let subtitle_file
                 # statuses define the final file shown in the UI.
-                if not (self.info.format == 'captions' and not rel_name.endswith(('.vtt', '.srt', '.ass', '.ttml'))):
+                if not (self.info.format == 'captions' and not rel_name.endswith(('.vtt', '.srt', '.ttml', '.txt'))):
                     self.info.filename = rel_name
                     self.info.size = os.path.getsize(fileName) if os.path.exists(fileName) else None
                     if self.info.format == 'thumbnail':
@@ -321,10 +360,25 @@ class Download:
 
             if 'subtitle_file' in status:
                 subtitle_file = status.get('subtitle_file')
+                if not subtitle_file:
+                    continue
+                subtitle_output_file = subtitle_file
+
+                # txt mode is derived from SRT by stripping cue metadata.
+                if self.info.format == 'captions' and str(getattr(self.info, 'subtitle_format', '')).lower() == 'txt':
+                    converted_txt = _convert_srt_to_txt_file(subtitle_file)
+                    if converted_txt:
+                        subtitle_output_file = converted_txt
+                        if converted_txt != subtitle_file:
+                            try:
+                                os.remove(subtitle_file)
+                            except OSError as exc:
+                                log.debug(f"Could not remove temporary SRT file {subtitle_file}: {exc}")
+
                 if not hasattr(self.info, 'subtitle_files'):
                     self.info.subtitle_files = []
-                rel_path = os.path.relpath(subtitle_file, self.download_dir)
-                file_size = os.path.getsize(subtitle_file) if os.path.exists(subtitle_file) else None
+                rel_path = os.path.relpath(subtitle_output_file, self.download_dir)
+                file_size = os.path.getsize(subtitle_output_file) if os.path.exists(subtitle_output_file) else None
                 existing = next((sf for sf in self.info.subtitle_files if sf['filename'] == rel_path), None)
                 if not existing:
                     self.info.subtitle_files.append({'filename': rel_path, 'size': file_size})
diff --git a/ui/src/app/app.html b/ui/src/app/app.html
index b7da999..9149a7f 100644
--- a/ui/src/app/app.html
+++ b/ui/src/app/app.html
@@ -232,6 +232,9 @@
                       }
                     </select>
                   </div>
+                  @if (subtitleFormat === 'txt') {
+                    <div class="form-text">TXT is generated from SRT by stripping timestamps and cue numbers.</div>
+                  }
                 </div>
                 <div class="col-md-4">
                   <div class="input-group">
diff --git a/ui/src/app/app.ts b/ui/src/app/app.ts
index d7651d9..525d668 100644
--- a/ui/src/app/app.ts
+++ b/ui/src/app/app.ts
@@ -102,6 +102,7 @@ export class App implements AfterViewInit, OnInit {
   faTachometerAlt = faTachometerAlt;
   subtitleFormats = [
     { id: 'srt', text: 'SRT' },
+    { id: 'txt', text: 'TXT (Text only)' },
     { id: 'vtt', text: 'VTT' },
     { id: 'ttml', text: 'TTML' }
   ];