Loading modules/Admin/Language/en/Episode.php +2 −2 Original line number Diff line number Diff line Loading @@ -139,9 +139,9 @@ return [ 'location_name' => 'Location name or address', 'location_name_hint' => 'This can be a real or fictional location', 'transcript' => 'Transcript (subtitles / closed captions)', 'transcript_hint' => 'Only .srt are allowed.', 'transcript_hint' => 'Only .srt or .vtt are allowed.', 'transcript_download' => 'Download transcript', 'transcript_file' => 'Transcript file (.srt)', 'transcript_file' => 'Transcript file (.srt or .vtt)', 'transcript_remote_url' => 'Remote url for transcript', 'transcript_file_delete' => 'Delete transcript file', 'chapters' => 'Chapters', Loading modules/Media/Entities/Transcript.php +13 −4 Original line number Diff line number Diff line Loading @@ -76,16 +76,25 @@ class Transcript extends BaseMedia private function saveJsonTranscript(): void { $srtContent = file_get_contents($this->file->getRealPath()); $transcriptContent = file_get_contents($this->file->getRealPath()); $transcriptParser = new TranscriptParser(); if ($srtContent === false) { if ($transcriptContent === false) { throw new Exception('Could not read transcript file at ' . $this->file->getRealPath()); } $transcriptJson = $transcriptParser->loadString($srtContent) $transcript_format = $this->file->getExtension(); switch ($transcript_format) { case 'vtt': $transcriptJson = $transcriptParser->loadString($transcriptContent) ->parseVtt(); break; case 'srt': default: $transcriptJson = $transcriptParser->loadString($transcriptContent) ->parseSrt(); } $tempFilePath = WRITEPATH . 'uploads/' . $this->file->getRandomName(); file_put_contents($tempFilePath, $transcriptJson); Loading modules/Media/TranscriptParser.php +106 −1 Original line number Diff line number Diff line Loading @@ -3,7 +3,7 @@ declare(strict_types=1); /** * Generates and renders a breadcrumb based on the current url segments * Converts a SRT or VTT file to JSON * * @copyright 2022 Ad Aures * @license https://www.gnu.org/licenses/agpl-3.0.en.html AGPL3 Loading Loading @@ -107,9 +107,114 @@ class TranscriptParser return $jsonString; } public function parseVtt(): string { if (! defined('VTT_STATE_HEADER')) { define('VTT_STATE_HEADER', 0); } if (! defined('VTT_STATE_BLANK')) { define('VTT_STATE_BLANK', 1); } if (! defined('VTT_STATE_TIME')) { define('VTT_STATE_TIME', 2); } if (! defined('VTT_STATE_TEXT')) { define('VTT_STATE_TEXT', 3); } $subs = []; $state = VTT_STATE_HEADER; $subNum = 0; $subText = ''; $subTime = ''; $lines = explode(PHP_EOL, $this->transcriptContent); // add a newline as last item, if it isn't already a newline if ($lines[array_key_last($lines)] !== '') { $lines[] = PHP_EOL; } foreach ($lines as $line) { switch ($state) { case VTT_STATE_HEADER: $state = VTT_STATE_BLANK; break; case VTT_STATE_BLANK: $state = VTT_STATE_TIME; break; case VTT_STATE_TIME: $subTime = trim($line); $state = VTT_STATE_TEXT; break; case VTT_STATE_TEXT: if (trim($line) === '') { $sub = new stdClass(); $sub->number = $subNum; [$startTime, $endTime] = explode(' --> ', $subTime); $sub->startTime = $this->getSecondsFromVTTTimeString($startTime); $sub->endTime = $this->getSecondsFromVTTTimeString($endTime); $sub->text = trim($subText); if ($subSpeaker !== '') { $sub->speaker = trim((string) $subSpeaker); } $subText = ''; $state = VTT_STATE_TIME; $subs[] = $sub; ++$subNum; } elseif ($subText !== '') { $subText .= PHP_EOL . $line; } else { /** VTT includes a lot of information on the spoken line * An example may look like this: * <v.loud.top John>So this is it * We need to break this down into it's components, namely: * 1. The actual words for the caption * 2. Who is speaking * 3. Any styling cues encoded in the VTT (which we dump) * More information: https://www.w3.org/TR/webvtt1/ */ $vtt_speaker_pattern = '/^<.*>/'; $removethese = ['<', '>']; preg_match($vtt_speaker_pattern, $line, $matches); if (isset($matches[0])) { $subVoiceCue = explode(' ', str_replace($removethese, '', $matches[0])); $subSpeaker = $subVoiceCue[1]; } else { $subSpeaker = ''; } $subText .= preg_replace($vtt_speaker_pattern, '', $line); } break; } } $jsonString = json_encode($subs, JSON_PRETTY_PRINT); if (! $jsonString) { throw new Exception('Failed to parse VTT to JSON.'); } return $jsonString; } private function getSecondsFromTimeString(string $timeString): float { $timeString = explode(',', $timeString); return (strtotime($timeString[0]) - strtotime('TODAY')) + (float) "0.{$timeString[1]}"; } private function getSecondsFromVTTTimeString(string $timeString): float { $timeString = explode('.', $timeString); return (strtotime($timeString[0]) - strtotime('TODAY')) + (float) "0.{$timeString[1]}"; } } themes/cp_admin/episode/create.php +1 −1 Original line number Diff line number Diff line Loading @@ -167,7 +167,7 @@ <div class="py-2 tab-panels"> <section id="transcript-file-upload" class="flex items-center tab-panel"> <Forms.Label class="sr-only" for="transcript_file" isOptional="true"><?= lang('Episode.form.transcript_file') ?></Forms.Label> <Forms.Input class="w-full" name="transcript_file" type="file" accept=".txt,.html,.srt,.json" /> <Forms.Input class="w-full" name="transcript_file" type="file" accept=".srt,.vtt" /> </section> <section id="transcript-file-remote-url" class="tab-panel"> <Forms.Label class="sr-only" for="transcript_remote_url" isOptional="true"><?= lang('Episode.form.transcript_remote_url') ?></Forms.Label> Loading themes/cp_admin/episode/edit.php +1 −1 Original line number Diff line number Diff line Loading @@ -197,7 +197,7 @@ </div> <?php endif; ?> <Forms.Label class="sr-only" for="transcript_file" isOptional="true"><?= lang('Episode.form.transcript_file') ?></Forms.Label> <Forms.Input class="w-full" name="transcript_file" type="file" accept=".txt,.html,.srt,.json" /> <Forms.Input class="w-full" name="transcript_file" type="file" accept=".srt,.vtt" /> </section> <section id="transcript-file-remote-url" class="tab-panel"> <Forms.Label class="sr-only" for="transcript_remote_url" isOptional="true"><?= lang('Episode.form.transcript_remote_url') ?></Forms.Label> Loading Loading
modules/Admin/Language/en/Episode.php +2 −2 Original line number Diff line number Diff line Loading @@ -139,9 +139,9 @@ return [ 'location_name' => 'Location name or address', 'location_name_hint' => 'This can be a real or fictional location', 'transcript' => 'Transcript (subtitles / closed captions)', 'transcript_hint' => 'Only .srt are allowed.', 'transcript_hint' => 'Only .srt or .vtt are allowed.', 'transcript_download' => 'Download transcript', 'transcript_file' => 'Transcript file (.srt)', 'transcript_file' => 'Transcript file (.srt or .vtt)', 'transcript_remote_url' => 'Remote url for transcript', 'transcript_file_delete' => 'Delete transcript file', 'chapters' => 'Chapters', Loading
modules/Media/Entities/Transcript.php +13 −4 Original line number Diff line number Diff line Loading @@ -76,16 +76,25 @@ class Transcript extends BaseMedia private function saveJsonTranscript(): void { $srtContent = file_get_contents($this->file->getRealPath()); $transcriptContent = file_get_contents($this->file->getRealPath()); $transcriptParser = new TranscriptParser(); if ($srtContent === false) { if ($transcriptContent === false) { throw new Exception('Could not read transcript file at ' . $this->file->getRealPath()); } $transcriptJson = $transcriptParser->loadString($srtContent) $transcript_format = $this->file->getExtension(); switch ($transcript_format) { case 'vtt': $transcriptJson = $transcriptParser->loadString($transcriptContent) ->parseVtt(); break; case 'srt': default: $transcriptJson = $transcriptParser->loadString($transcriptContent) ->parseSrt(); } $tempFilePath = WRITEPATH . 'uploads/' . $this->file->getRandomName(); file_put_contents($tempFilePath, $transcriptJson); Loading
modules/Media/TranscriptParser.php +106 −1 Original line number Diff line number Diff line Loading @@ -3,7 +3,7 @@ declare(strict_types=1); /** * Generates and renders a breadcrumb based on the current url segments * Converts a SRT or VTT file to JSON * * @copyright 2022 Ad Aures * @license https://www.gnu.org/licenses/agpl-3.0.en.html AGPL3 Loading Loading @@ -107,9 +107,114 @@ class TranscriptParser return $jsonString; } public function parseVtt(): string { if (! defined('VTT_STATE_HEADER')) { define('VTT_STATE_HEADER', 0); } if (! defined('VTT_STATE_BLANK')) { define('VTT_STATE_BLANK', 1); } if (! defined('VTT_STATE_TIME')) { define('VTT_STATE_TIME', 2); } if (! defined('VTT_STATE_TEXT')) { define('VTT_STATE_TEXT', 3); } $subs = []; $state = VTT_STATE_HEADER; $subNum = 0; $subText = ''; $subTime = ''; $lines = explode(PHP_EOL, $this->transcriptContent); // add a newline as last item, if it isn't already a newline if ($lines[array_key_last($lines)] !== '') { $lines[] = PHP_EOL; } foreach ($lines as $line) { switch ($state) { case VTT_STATE_HEADER: $state = VTT_STATE_BLANK; break; case VTT_STATE_BLANK: $state = VTT_STATE_TIME; break; case VTT_STATE_TIME: $subTime = trim($line); $state = VTT_STATE_TEXT; break; case VTT_STATE_TEXT: if (trim($line) === '') { $sub = new stdClass(); $sub->number = $subNum; [$startTime, $endTime] = explode(' --> ', $subTime); $sub->startTime = $this->getSecondsFromVTTTimeString($startTime); $sub->endTime = $this->getSecondsFromVTTTimeString($endTime); $sub->text = trim($subText); if ($subSpeaker !== '') { $sub->speaker = trim((string) $subSpeaker); } $subText = ''; $state = VTT_STATE_TIME; $subs[] = $sub; ++$subNum; } elseif ($subText !== '') { $subText .= PHP_EOL . $line; } else { /** VTT includes a lot of information on the spoken line * An example may look like this: * <v.loud.top John>So this is it * We need to break this down into it's components, namely: * 1. The actual words for the caption * 2. Who is speaking * 3. Any styling cues encoded in the VTT (which we dump) * More information: https://www.w3.org/TR/webvtt1/ */ $vtt_speaker_pattern = '/^<.*>/'; $removethese = ['<', '>']; preg_match($vtt_speaker_pattern, $line, $matches); if (isset($matches[0])) { $subVoiceCue = explode(' ', str_replace($removethese, '', $matches[0])); $subSpeaker = $subVoiceCue[1]; } else { $subSpeaker = ''; } $subText .= preg_replace($vtt_speaker_pattern, '', $line); } break; } } $jsonString = json_encode($subs, JSON_PRETTY_PRINT); if (! $jsonString) { throw new Exception('Failed to parse VTT to JSON.'); } return $jsonString; } private function getSecondsFromTimeString(string $timeString): float { $timeString = explode(',', $timeString); return (strtotime($timeString[0]) - strtotime('TODAY')) + (float) "0.{$timeString[1]}"; } private function getSecondsFromVTTTimeString(string $timeString): float { $timeString = explode('.', $timeString); return (strtotime($timeString[0]) - strtotime('TODAY')) + (float) "0.{$timeString[1]}"; } }
themes/cp_admin/episode/create.php +1 −1 Original line number Diff line number Diff line Loading @@ -167,7 +167,7 @@ <div class="py-2 tab-panels"> <section id="transcript-file-upload" class="flex items-center tab-panel"> <Forms.Label class="sr-only" for="transcript_file" isOptional="true"><?= lang('Episode.form.transcript_file') ?></Forms.Label> <Forms.Input class="w-full" name="transcript_file" type="file" accept=".txt,.html,.srt,.json" /> <Forms.Input class="w-full" name="transcript_file" type="file" accept=".srt,.vtt" /> </section> <section id="transcript-file-remote-url" class="tab-panel"> <Forms.Label class="sr-only" for="transcript_remote_url" isOptional="true"><?= lang('Episode.form.transcript_remote_url') ?></Forms.Label> Loading
themes/cp_admin/episode/edit.php +1 −1 Original line number Diff line number Diff line Loading @@ -197,7 +197,7 @@ </div> <?php endif; ?> <Forms.Label class="sr-only" for="transcript_file" isOptional="true"><?= lang('Episode.form.transcript_file') ?></Forms.Label> <Forms.Input class="w-full" name="transcript_file" type="file" accept=".txt,.html,.srt,.json" /> <Forms.Input class="w-full" name="transcript_file" type="file" accept=".srt,.vtt" /> </section> <section id="transcript-file-remote-url" class="tab-panel"> <Forms.Label class="sr-only" for="transcript_remote_url" isOptional="true"><?= lang('Episode.form.transcript_remote_url') ?></Forms.Label> Loading