From 8dff6448b28f14b8a3c88cf9748cd4b1cc80aa11 Mon Sep 17 00:00:00 2001 From: vitaliibudnyi <27superuser@gmail.com> Date: Sat, 21 Feb 2026 21:10:40 +0200 Subject: [PATCH] add "text only" as another advanced option for captions format --- README.md | 2 +- app/dl_formats.py | 6 ++++- app/ytdl.py | 60 ++++++++++++++++++++++++++++++++++++++++++--- ui/src/app/app.html | 3 +++ ui/src/app/app.ts | 1 + 5 files changed, 67 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 8c79ba6..91b7e5e 100644 --- a/README.md +++ b/README.md @@ -59,7 +59,7 @@ Certain values can be set via environment variables, using the `-e` parameter on * __OUTPUT_TEMPLATE_CHANNEL__: The template for the filenames of the downloaded videos when downloaded as a channel. Defaults to `%(channel)s/%(title)s.%(ext)s`. When empty, then `OUTPUT_TEMPLATE` is used. * __YTDL_OPTIONS__: Additional options to pass to yt-dlp in JSON format. [See available options here](https://github.com/yt-dlp/yt-dlp/blob/master/yt_dlp/YoutubeDL.py#L222). They roughly correspond to command-line options, though some do not have exact equivalents here. For example, `--recode-video` has to be specified via `postprocessors`. Also note that dashes are replaced with underscores. You may find [this script](https://github.com/yt-dlp/yt-dlp/blob/master/devscripts/cli_to_api.py) helpful for converting from command-line options to `YTDL_OPTIONS`. * __YTDL_OPTIONS_FILE__: A path to a JSON file that will be loaded and used for populating `YTDL_OPTIONS` above. Please note that if both `YTDL_OPTIONS_FILE` and `YTDL_OPTIONS` are specified, the options in `YTDL_OPTIONS` take precedence. The file will be monitored for changes and reloaded automatically when changes are detected. -* UI format __Captions__: Downloads subtitles/captions only (no media). Subtitle format, language, and source preference are configurable from Advanced Options (defaults: `srt`, `en`, `prefer_manual`). +* UI format __Captions__: Downloads subtitles/captions only (no media). Subtitle format, language, and source preference are configurable from Advanced Options (defaults: `srt`, `en`, `prefer_manual`). `txt` is generated from `srt` by stripping timestamps and cue numbers. ### 🌐 Web Server & URLs diff --git a/app/dl_formats.py b/app/dl_formats.py index 867ceee..e94ec9a 100644 --- a/app/dl_formats.py +++ b/app/dl_formats.py @@ -124,7 +124,11 @@ def get_opts( mode = _normalize_caption_mode(subtitle_mode) language = _normalize_subtitle_language(subtitle_language) opts["skip_download"] = True - opts["subtitlesformat"] = subtitle_format or "srt" + requested_subtitle_format = (subtitle_format or "srt").lower() + # txt is a derived, non-timed format produced from SRT after download. + if requested_subtitle_format == "txt": + requested_subtitle_format = "srt" + opts["subtitlesformat"] = requested_subtitle_format if mode == "manual_only": opts["writesubtitles"] = True opts["writeautomaticsub"] = False diff --git a/app/ytdl.py b/app/ytdl.py index f7924de..b1d187d 100644 --- a/app/ytdl.py +++ b/app/ytdl.py @@ -69,6 +69,45 @@ def _convert_generators_to_lists(obj): else: return obj + +def _convert_srt_to_txt_file(subtitle_path: str): + """Convert an SRT subtitle file into plain text by stripping cue numbers/timestamps.""" + txt_path = os.path.splitext(subtitle_path)[0] + ".txt" + try: + with open(subtitle_path, "r", encoding="utf-8", errors="replace") as infile: + content = infile.read() + + # Normalize newlines so cue splitting is consistent across platforms. + content = content.replace("\r\n", "\n").replace("\r", "\n") + cues = [] + for block in re.split(r"\n{2,}", content): + lines = [line.strip() for line in block.split("\n") if line.strip()] + if not lines: + continue + if re.fullmatch(r"\d+", lines[0]): + lines = lines[1:] + if lines and "-->" in lines[0]: + lines = lines[1:] + + text_lines = [] + for line in lines: + if "-->" in line: + continue + clean_line = re.sub(r"<[^>]+>", "", line).strip() + if clean_line: + text_lines.append(clean_line) + if text_lines: + cues.append(" ".join(text_lines)) + + with open(txt_path, "w", encoding="utf-8") as outfile: + if cues: + outfile.write("\n".join(cues)) + outfile.write("\n") + return txt_path + except OSError as exc: + log.warning(f"Failed to convert subtitle file {subtitle_path} to txt: {exc}") + return None + class DownloadQueueNotifier: async def added(self, dl): raise NotImplementedError @@ -298,7 +337,7 @@ class Download: rel_name = os.path.relpath(fileName, self.download_dir) # For captions mode, ignore media-like placeholders and let subtitle_file # statuses define the final file shown in the UI. - if not (self.info.format == 'captions' and not rel_name.endswith(('.vtt', '.srt', '.ass', '.ttml'))): + if not (self.info.format == 'captions' and not rel_name.endswith(('.vtt', '.srt', '.ttml', '.txt'))): self.info.filename = rel_name self.info.size = os.path.getsize(fileName) if os.path.exists(fileName) else None if self.info.format == 'thumbnail': @@ -321,10 +360,25 @@ class Download: if 'subtitle_file' in status: subtitle_file = status.get('subtitle_file') + if not subtitle_file: + continue + subtitle_output_file = subtitle_file + + # txt mode is derived from SRT by stripping cue metadata. + if self.info.format == 'captions' and str(getattr(self.info, 'subtitle_format', '')).lower() == 'txt': + converted_txt = _convert_srt_to_txt_file(subtitle_file) + if converted_txt: + subtitle_output_file = converted_txt + if converted_txt != subtitle_file: + try: + os.remove(subtitle_file) + except OSError as exc: + log.debug(f"Could not remove temporary SRT file {subtitle_file}: {exc}") + if not hasattr(self.info, 'subtitle_files'): self.info.subtitle_files = [] - rel_path = os.path.relpath(subtitle_file, self.download_dir) - file_size = os.path.getsize(subtitle_file) if os.path.exists(subtitle_file) else None + rel_path = os.path.relpath(subtitle_output_file, self.download_dir) + file_size = os.path.getsize(subtitle_output_file) if os.path.exists(subtitle_output_file) else None existing = next((sf for sf in self.info.subtitle_files if sf['filename'] == rel_path), None) if not existing: self.info.subtitle_files.append({'filename': rel_path, 'size': file_size}) diff --git a/ui/src/app/app.html b/ui/src/app/app.html index b7da999..9149a7f 100644 --- a/ui/src/app/app.html +++ b/ui/src/app/app.html @@ -232,6 +232,9 @@ } + @if (subtitleFormat === 'txt') { +
TXT is generated from SRT by stripping timestamps and cue numbers.
+ }
diff --git a/ui/src/app/app.ts b/ui/src/app/app.ts index d7651d9..525d668 100644 --- a/ui/src/app/app.ts +++ b/ui/src/app/app.ts @@ -102,6 +102,7 @@ export class App implements AfterViewInit, OnInit { faTachometerAlt = faTachometerAlt; subtitleFormats = [ { id: 'srt', text: 'SRT' }, + { id: 'txt', text: 'TXT (Text only)' }, { id: 'vtt', text: 'VTT' }, { id: 'ttml', text: 'TTML' } ];