fix: fix episode transcription parser

This commit is contained in:
Hossein Mehrabi 2023-11-30 14:32:11 +03:30
parent 8e888f17dd
commit 3d60b443ee
No known key found for this signature in database
GPG Key ID: 45C04964191AFAA1
2 changed files with 37 additions and 21 deletions

View File

@ -2,7 +2,7 @@ import {
extractClassFromFirstTag,
extractIdFromFirstTag,
} from '@/utils/html.utils'
import { parseText, parseTimestamp } from '@/utils/string.utils'
import { parseTranscriptionText } from '@/utils/string.utils'
import { Typography } from '@acid-info/lsd-react'
import styled from '@emotion/styled'
import ReactPlayer from 'react-player'
@ -20,20 +20,27 @@ export const RenderEpisodeBlock = ({
const isYoutube = isYoutubeRegex.test(block.html)
const youtubeLink = block.html.match(isYoutubeRegex) ?? []
return isYoutube ? (
<ReactPlayer url={youtubeLink[0]} />
) : (
<TranscriptionItem variant="body1" component={'p'}>
<span>{parseTimestamp(block.text)}</span>
<span>|</span>
<span
className={extractClassFromFirstTag(block.html) || ''}
id={extractIdFromFirstTag(block.html) || `p-${block.id}`}
>
{parseText(block.text.replace(/-/, '')).trim()}
</span>
</TranscriptionItem>
)
if (isYoutube) return <ReactPlayer url={youtubeLink[0]} />
else {
const { time, transcript } = parseTranscriptionText(block.text)
return (
<TranscriptionItem variant="body1" component={'p'}>
{time && (
<>
<span>{time}</span>
<span>|</span>
</>
)}
<span
className={extractClassFromFirstTag(block.html) || ''}
id={extractIdFromFirstTag(block.html) || `p-${block.id}`}
>
{transcript}
</span>
</TranscriptionItem>
)
}
}
const TranscriptionItem = styled(Typography)`

View File

@ -66,13 +66,22 @@ export function convertToIframe(url: string) {
return `<iframe height="200px" width="100%" frameborder="no" scrolling="no" seamless src="${url}"></iframe>`
}
export function parseText(text: string) {
return text.replace(/^(\d{2}:)?\d{2}:\d{2}\s|\[\d+\]/g, '')
}
const removeFootnoteReferences = (text: string) =>
text.replaceAll(/\[\d+\]/g, '')
export function parseTimestamp(text: string) {
const time = text.match(/^(\d{2}:)?\d{2}:\d{2}/g)
return time ? time[0] : ''
export const parseTranscriptionText = (text: string) => {
const time = text.match(/^(\d{1,2}:?){2,3}/g)?.[0] ?? ''
const transcript = removeFootnoteReferences(
time ? text.replace(time, '') : text,
).trim()
const parsedTime = time.endsWith(':') ? time.slice(0, -1) : time
const parsedTranscript = transcript.replace(/^(-|\||\s)*/, '')
return {
time: parsedTime,
transcript: parsedTranscript,
}
}
export function formatTagText(tag: string) {