add "text only" as another advanced option for captions format

This commit is contained in:
vitaliibudnyi
2026-02-21 21:10:40 +02:00
committed by Alex Shnitman
parent dd4e05325a
commit 8dff6448b2
5 changed files with 67 additions and 5 deletions

View File

@@ -59,7 +59,7 @@ Certain values can be set via environment variables, using the `-e` parameter on
* __OUTPUT_TEMPLATE_CHANNEL__: The template for the filenames of the downloaded videos when downloaded as a channel. Defaults to `%(channel)s/%(title)s.%(ext)s`. When empty, then `OUTPUT_TEMPLATE` is used.
* __YTDL_OPTIONS__: Additional options to pass to yt-dlp in JSON format. [See available options here](https://github.com/yt-dlp/yt-dlp/blob/master/yt_dlp/YoutubeDL.py#L222). They roughly correspond to command-line options, though some do not have exact equivalents here. For example, `--recode-video` has to be specified via `postprocessors`. Also note that dashes are replaced with underscores. You may find [this script](https://github.com/yt-dlp/yt-dlp/blob/master/devscripts/cli_to_api.py) helpful for converting from command-line options to `YTDL_OPTIONS`.
* __YTDL_OPTIONS_FILE__: A path to a JSON file that will be loaded and used for populating `YTDL_OPTIONS` above. Please note that if both `YTDL_OPTIONS_FILE` and `YTDL_OPTIONS` are specified, the options in `YTDL_OPTIONS` take precedence. The file will be monitored for changes and reloaded automatically when changes are detected.
* UI format __Captions__: Downloads subtitles/captions only (no media). Subtitle format, language, and source preference are configurable from Advanced Options (defaults: `srt`, `en`, `prefer_manual`).
* UI format __Captions__: Downloads subtitles/captions only (no media). Subtitle format, language, and source preference are configurable from Advanced Options (defaults: `srt`, `en`, `prefer_manual`). `txt` is generated from `srt` by stripping timestamps and cue numbers.
### 🌐 Web Server & URLs

View File

@@ -124,7 +124,11 @@ def get_opts(
mode = _normalize_caption_mode(subtitle_mode)
language = _normalize_subtitle_language(subtitle_language)
opts["skip_download"] = True
opts["subtitlesformat"] = subtitle_format or "srt"
requested_subtitle_format = (subtitle_format or "srt").lower()
# txt is a derived, non-timed format produced from SRT after download.
if requested_subtitle_format == "txt":
requested_subtitle_format = "srt"
opts["subtitlesformat"] = requested_subtitle_format
if mode == "manual_only":
opts["writesubtitles"] = True
opts["writeautomaticsub"] = False

View File

@@ -69,6 +69,45 @@ def _convert_generators_to_lists(obj):
else:
return obj
def _convert_srt_to_txt_file(subtitle_path: str):
"""Convert an SRT subtitle file into plain text by stripping cue numbers/timestamps."""
txt_path = os.path.splitext(subtitle_path)[0] + ".txt"
try:
with open(subtitle_path, "r", encoding="utf-8", errors="replace") as infile:
content = infile.read()
# Normalize newlines so cue splitting is consistent across platforms.
content = content.replace("\r\n", "\n").replace("\r", "\n")
cues = []
for block in re.split(r"\n{2,}", content):
lines = [line.strip() for line in block.split("\n") if line.strip()]
if not lines:
continue
if re.fullmatch(r"\d+", lines[0]):
lines = lines[1:]
if lines and "-->" in lines[0]:
lines = lines[1:]
text_lines = []
for line in lines:
if "-->" in line:
continue
clean_line = re.sub(r"<[^>]+>", "", line).strip()
if clean_line:
text_lines.append(clean_line)
if text_lines:
cues.append(" ".join(text_lines))
with open(txt_path, "w", encoding="utf-8") as outfile:
if cues:
outfile.write("\n".join(cues))
outfile.write("\n")
return txt_path
except OSError as exc:
log.warning(f"Failed to convert subtitle file {subtitle_path} to txt: {exc}")
return None
class DownloadQueueNotifier:
async def added(self, dl):
raise NotImplementedError
@@ -298,7 +337,7 @@ class Download:
rel_name = os.path.relpath(fileName, self.download_dir)
# For captions mode, ignore media-like placeholders and let subtitle_file
# statuses define the final file shown in the UI.
if not (self.info.format == 'captions' and not rel_name.endswith(('.vtt', '.srt', '.ass', '.ttml'))):
if not (self.info.format == 'captions' and not rel_name.endswith(('.vtt', '.srt', '.ttml', '.txt'))):
self.info.filename = rel_name
self.info.size = os.path.getsize(fileName) if os.path.exists(fileName) else None
if self.info.format == 'thumbnail':
@@ -321,10 +360,25 @@ class Download:
if 'subtitle_file' in status:
subtitle_file = status.get('subtitle_file')
if not subtitle_file:
continue
subtitle_output_file = subtitle_file
# txt mode is derived from SRT by stripping cue metadata.
if self.info.format == 'captions' and str(getattr(self.info, 'subtitle_format', '')).lower() == 'txt':
converted_txt = _convert_srt_to_txt_file(subtitle_file)
if converted_txt:
subtitle_output_file = converted_txt
if converted_txt != subtitle_file:
try:
os.remove(subtitle_file)
except OSError as exc:
log.debug(f"Could not remove temporary SRT file {subtitle_file}: {exc}")
if not hasattr(self.info, 'subtitle_files'):
self.info.subtitle_files = []
rel_path = os.path.relpath(subtitle_file, self.download_dir)
file_size = os.path.getsize(subtitle_file) if os.path.exists(subtitle_file) else None
rel_path = os.path.relpath(subtitle_output_file, self.download_dir)
file_size = os.path.getsize(subtitle_output_file) if os.path.exists(subtitle_output_file) else None
existing = next((sf for sf in self.info.subtitle_files if sf['filename'] == rel_path), None)
if not existing:
self.info.subtitle_files.append({'filename': rel_path, 'size': file_size})

View File

@@ -232,6 +232,9 @@
}
</select>
</div>
@if (subtitleFormat === 'txt') {
<div class="form-text">TXT is generated from SRT by stripping timestamps and cue numbers.</div>
}
</div>
<div class="col-md-4">
<div class="input-group">

View File

@@ -102,6 +102,7 @@ export class App implements AfterViewInit, OnInit {
faTachometerAlt = faTachometerAlt;
subtitleFormats = [
{ id: 'srt', text: 'SRT' },
{ id: 'txt', text: 'TXT (Text only)' },
{ id: 'vtt', text: 'VTT' },
{ id: 'ttml', text: 'TTML' }
];