mirror of
https://github.com/alexta69/metube.git
synced 2026-03-18 14:33:50 +00:00
add "text only" as another advanced option for captions format
This commit is contained in:
committed by
Alex Shnitman
parent
dd4e05325a
commit
8dff6448b2
@@ -59,7 +59,7 @@ Certain values can be set via environment variables, using the `-e` parameter on
|
||||
* __OUTPUT_TEMPLATE_CHANNEL__: The template for the filenames of the downloaded videos when downloaded as a channel. Defaults to `%(channel)s/%(title)s.%(ext)s`. When empty, then `OUTPUT_TEMPLATE` is used.
|
||||
* __YTDL_OPTIONS__: Additional options to pass to yt-dlp in JSON format. [See available options here](https://github.com/yt-dlp/yt-dlp/blob/master/yt_dlp/YoutubeDL.py#L222). They roughly correspond to command-line options, though some do not have exact equivalents here. For example, `--recode-video` has to be specified via `postprocessors`. Also note that dashes are replaced with underscores. You may find [this script](https://github.com/yt-dlp/yt-dlp/blob/master/devscripts/cli_to_api.py) helpful for converting from command-line options to `YTDL_OPTIONS`.
|
||||
* __YTDL_OPTIONS_FILE__: A path to a JSON file that will be loaded and used for populating `YTDL_OPTIONS` above. Please note that if both `YTDL_OPTIONS_FILE` and `YTDL_OPTIONS` are specified, the options in `YTDL_OPTIONS` take precedence. The file will be monitored for changes and reloaded automatically when changes are detected.
|
||||
* UI format __Captions__: Downloads subtitles/captions only (no media). Subtitle format, language, and source preference are configurable from Advanced Options (defaults: `srt`, `en`, `prefer_manual`).
|
||||
* UI format __Captions__: Downloads subtitles/captions only (no media). Subtitle format, language, and source preference are configurable from Advanced Options (defaults: `srt`, `en`, `prefer_manual`). `txt` is generated from `srt` by stripping timestamps and cue numbers.
|
||||
|
||||
### 🌐 Web Server & URLs
|
||||
|
||||
|
||||
@@ -124,7 +124,11 @@ def get_opts(
|
||||
mode = _normalize_caption_mode(subtitle_mode)
|
||||
language = _normalize_subtitle_language(subtitle_language)
|
||||
opts["skip_download"] = True
|
||||
opts["subtitlesformat"] = subtitle_format or "srt"
|
||||
requested_subtitle_format = (subtitle_format or "srt").lower()
|
||||
# txt is a derived, non-timed format produced from SRT after download.
|
||||
if requested_subtitle_format == "txt":
|
||||
requested_subtitle_format = "srt"
|
||||
opts["subtitlesformat"] = requested_subtitle_format
|
||||
if mode == "manual_only":
|
||||
opts["writesubtitles"] = True
|
||||
opts["writeautomaticsub"] = False
|
||||
|
||||
60
app/ytdl.py
60
app/ytdl.py
@@ -69,6 +69,45 @@ def _convert_generators_to_lists(obj):
|
||||
else:
|
||||
return obj
|
||||
|
||||
|
||||
def _convert_srt_to_txt_file(subtitle_path: str):
|
||||
"""Convert an SRT subtitle file into plain text by stripping cue numbers/timestamps."""
|
||||
txt_path = os.path.splitext(subtitle_path)[0] + ".txt"
|
||||
try:
|
||||
with open(subtitle_path, "r", encoding="utf-8", errors="replace") as infile:
|
||||
content = infile.read()
|
||||
|
||||
# Normalize newlines so cue splitting is consistent across platforms.
|
||||
content = content.replace("\r\n", "\n").replace("\r", "\n")
|
||||
cues = []
|
||||
for block in re.split(r"\n{2,}", content):
|
||||
lines = [line.strip() for line in block.split("\n") if line.strip()]
|
||||
if not lines:
|
||||
continue
|
||||
if re.fullmatch(r"\d+", lines[0]):
|
||||
lines = lines[1:]
|
||||
if lines and "-->" in lines[0]:
|
||||
lines = lines[1:]
|
||||
|
||||
text_lines = []
|
||||
for line in lines:
|
||||
if "-->" in line:
|
||||
continue
|
||||
clean_line = re.sub(r"<[^>]+>", "", line).strip()
|
||||
if clean_line:
|
||||
text_lines.append(clean_line)
|
||||
if text_lines:
|
||||
cues.append(" ".join(text_lines))
|
||||
|
||||
with open(txt_path, "w", encoding="utf-8") as outfile:
|
||||
if cues:
|
||||
outfile.write("\n".join(cues))
|
||||
outfile.write("\n")
|
||||
return txt_path
|
||||
except OSError as exc:
|
||||
log.warning(f"Failed to convert subtitle file {subtitle_path} to txt: {exc}")
|
||||
return None
|
||||
|
||||
class DownloadQueueNotifier:
|
||||
async def added(self, dl):
|
||||
raise NotImplementedError
|
||||
@@ -298,7 +337,7 @@ class Download:
|
||||
rel_name = os.path.relpath(fileName, self.download_dir)
|
||||
# For captions mode, ignore media-like placeholders and let subtitle_file
|
||||
# statuses define the final file shown in the UI.
|
||||
if not (self.info.format == 'captions' and not rel_name.endswith(('.vtt', '.srt', '.ass', '.ttml'))):
|
||||
if not (self.info.format == 'captions' and not rel_name.endswith(('.vtt', '.srt', '.ttml', '.txt'))):
|
||||
self.info.filename = rel_name
|
||||
self.info.size = os.path.getsize(fileName) if os.path.exists(fileName) else None
|
||||
if self.info.format == 'thumbnail':
|
||||
@@ -321,10 +360,25 @@ class Download:
|
||||
|
||||
if 'subtitle_file' in status:
|
||||
subtitle_file = status.get('subtitle_file')
|
||||
if not subtitle_file:
|
||||
continue
|
||||
subtitle_output_file = subtitle_file
|
||||
|
||||
# txt mode is derived from SRT by stripping cue metadata.
|
||||
if self.info.format == 'captions' and str(getattr(self.info, 'subtitle_format', '')).lower() == 'txt':
|
||||
converted_txt = _convert_srt_to_txt_file(subtitle_file)
|
||||
if converted_txt:
|
||||
subtitle_output_file = converted_txt
|
||||
if converted_txt != subtitle_file:
|
||||
try:
|
||||
os.remove(subtitle_file)
|
||||
except OSError as exc:
|
||||
log.debug(f"Could not remove temporary SRT file {subtitle_file}: {exc}")
|
||||
|
||||
if not hasattr(self.info, 'subtitle_files'):
|
||||
self.info.subtitle_files = []
|
||||
rel_path = os.path.relpath(subtitle_file, self.download_dir)
|
||||
file_size = os.path.getsize(subtitle_file) if os.path.exists(subtitle_file) else None
|
||||
rel_path = os.path.relpath(subtitle_output_file, self.download_dir)
|
||||
file_size = os.path.getsize(subtitle_output_file) if os.path.exists(subtitle_output_file) else None
|
||||
existing = next((sf for sf in self.info.subtitle_files if sf['filename'] == rel_path), None)
|
||||
if not existing:
|
||||
self.info.subtitle_files.append({'filename': rel_path, 'size': file_size})
|
||||
|
||||
@@ -232,6 +232,9 @@
|
||||
}
|
||||
</select>
|
||||
</div>
|
||||
@if (subtitleFormat === 'txt') {
|
||||
<div class="form-text">TXT is generated from SRT by stripping timestamps and cue numbers.</div>
|
||||
}
|
||||
</div>
|
||||
<div class="col-md-4">
|
||||
<div class="input-group">
|
||||
|
||||
@@ -102,6 +102,7 @@ export class App implements AfterViewInit, OnInit {
|
||||
faTachometerAlt = faTachometerAlt;
|
||||
subtitleFormats = [
|
||||
{ id: 'srt', text: 'SRT' },
|
||||
{ id: 'txt', text: 'TXT (Text only)' },
|
||||
{ id: 'vtt', text: 'VTT' },
|
||||
{ id: 'ttml', text: 'TTML' }
|
||||
];
|
||||
|
||||
Reference in New Issue
Block a user