From 7bbf5a65c144ab91c4694275e3f280e4afe54881 Mon Sep 17 00:00:00 2001 From: Doug Coleman Date: Wed, 17 Dec 2014 16:44:21 -0800 Subject: [PATCH] subrip-subtitles: Add parser for .srt files. Kind of ugly. --- extra/subrip-subtitles/authors.txt | 1 + .../subrip-subtitles-tests.factor | 60 +++++++++++++++++++ .../subrip-subtitles/subrip-subtitles.factor | 47 +++++++++++++++ 3 files changed, 108 insertions(+) create mode 100644 extra/subrip-subtitles/authors.txt create mode 100644 extra/subrip-subtitles/subrip-subtitles-tests.factor create mode 100644 extra/subrip-subtitles/subrip-subtitles.factor diff --git a/extra/subrip-subtitles/authors.txt b/extra/subrip-subtitles/authors.txt new file mode 100644 index 0000000000..7c1b2f2279 --- /dev/null +++ b/extra/subrip-subtitles/authors.txt @@ -0,0 +1 @@ +Doug Coleman diff --git a/extra/subrip-subtitles/subrip-subtitles-tests.factor b/extra/subrip-subtitles/subrip-subtitles-tests.factor new file mode 100644 index 0000000000..b90346f687 --- /dev/null +++ b/extra/subrip-subtitles/subrip-subtitles-tests.factor @@ -0,0 +1,60 @@ +! Copyright (C) 2014 Doug Coleman. +! See http://factorcode.org/license.txt for BSD license. +USING: calendar subrip-subtitles tools.test ; +IN: subrip-subtitles.tests + +{ + { + T{ srt-chunk + { id 1 } + { begin-time T{ duration { second 10.5 } } } + { end-time T{ duration { second 13.0 } } } + { text "Elephant's Dream" } + } + T{ srt-chunk + { id 2 } + { begin-time T{ duration { second 15.0 } } } + { end-time T{ duration { second 18.0 } } } + { text "At the left we can see..." } + } + } +} [ +"""1 +00:00:10,500 --> 00:00:13,000 +Elephant's Dream + +2 +00:00:15,000 --> 00:00:18,000 +At the left we can see...""" + parse-srt-string +] unit-test + +{ + { + T{ srt-chunk + { id 1 } + { begin-time T{ duration { second 10.5 } } } + { end-time T{ duration { second 13.0 } } } + { rect { { 63 43 } { 223 58 } } } + { text "Elephant's Dream" } + } + T{ srt-chunk + { id 2 } + { begin-time T{ duration { second 15.0 } } } + { end-time T{ duration { second 18.0 } } } + { rect { { 53 438 } { 303 453 } } } + { text + "At the left we can see..." + } + } + } +} [ +"""1 +00:00:10,500 --> 00:00:13,000 X1:63 X2:223 Y1:43 Y2:58 +Elephant's Dream + +2 +00:00:15,000 --> 00:00:18,000 X1:53 X2:303 Y1:438 Y2:453 +At the left we can see...""" + parse-srt-string +] unit-test diff --git a/extra/subrip-subtitles/subrip-subtitles.factor b/extra/subrip-subtitles/subrip-subtitles.factor new file mode 100644 index 0000000000..e7d2cb8eb1 --- /dev/null +++ b/extra/subrip-subtitles/subrip-subtitles.factor @@ -0,0 +1,47 @@ +! Copyright (C) 2014 Doug Coleman. +! See http://factorcode.org/license.txt for BSD license. +USING: accessors arrays calendar calendar.format +io.encodings.utf8 io.files io.streams.string kernel math +math.parser sequences splitting ascii ; +IN: subrip-subtitles + +! http://en.wikipedia.org/wiki/SubRip +! .srt + +TUPLE: srt-chunk id begin-time end-time rect text ; + +: read-srt-timestamp ( -- duration ) + instant + read-00 >>hour ":" expect + read-00 >>minute ":" expect + read-00 "," expect + read-000 1000 /f + >>second ; + +: parse-srt-timestamp ( string -- duration ) + [ read-srt-timestamp ] with-string-reader ; + +: parse-srt-chunk ( seq -- srt-chunk ) + [ ?first string>number ] + [ + ?second " " split1 + [ "-->" split1 [ [ blank? ] trim parse-srt-timestamp ] bi@ ] + [ + [ blank? ] trim " " split sift [ + f + ] [ + [ ":" split1 nip string>number ] map + first4 swapd [ 2array ] 2dip 2array 2array + ] if-empty + ] bi* + ] + [ 2 tail "\n" join ] tri srt-chunk boa ; + +: parse-srt-lines ( seq -- seq' ) + { "" } split harvest + [ parse-srt-chunk ] { } map-as ; + +: parse-srt-string ( seq -- seq' ) + string-lines parse-srt-lines ; + +: parse-srt-file ( path -- seq ) + utf8 file-lines parse-srt-lines ;