commit 50a25c2a28bf050aa837fcc97131478f81e684f9 Author: Steve Ayerhart <steve@ayerh.art> Date: Fri Nov 20 19:16:38 2020 -0500 add mail diff --git a/mail/mail.factor b/mail/mail.factor new file mode 100644 index 0000000..e3b7784 --- /dev/null +++ b/mail/mail.factor @@ -0,0 +1,5 @@ +USING: kernel ; +USING: mail.parser ; + +IN: mail + diff --git a/mail/multipart/multipart.factor b/mail/multipart/multipart.factor new file mode 100644 index 0000000..ac222c7 --- /dev/null +++ b/mail/multipart/multipart.factor @@ -0,0 +1,161 @@ +! Copyright (C) 2009 Doug Coleman. +! See http://factorcode.org/license.txt for BSD license. +USING: accessors ascii assocs byte-arrays combinators fry +hashtables http http.parsers io io.encodings.binary io.files +io.files.temp io.files.unique io.streams.string kernel math +quoting sequences splitting namespaces sequences.parser arrays regexp ; +USING: prettyprint ; +IN: mail.multipart + +CONSTANT: buffer-size 65536 +CONSTANT: separator-prefix "\r\n--" + +SYMBOL: current-boundary + +TUPLE: multipart +end-of-stream? +current-separator mime-separator +header +content-disposition bytes +filename temp-file +name name-content +mime-parts ; + +TUPLE: mime-file headers filename temporary-path ; +C: <mime-file> mime-file + +TUPLE: mime-variable headers key value ; +C: <mime-variable> mime-variable + +: <multipart> ( mime-separator -- multipart ) + multipart new + swap >>mime-separator + { } clone >>mime-parts ; + +: mime-write ( sequence -- ) + >byte-array write ; + +: parse-headers ( string -- hashtable ) + string-lines harvest [ parse-header-line ] map >hashtable ; + +: fill-bytes ( multipart -- multipart ) + buffer-size read + [ '[ _ B{ } append-as ] change-bytes ] + [ t >>end-of-stream? ] if* ; + +ERROR: mime-decoding-ran-out-of-bytes ; +: dump-until-separator ( multipart -- multipart ) + [ ] [ current-separator>> ] [ bytes>> ] tri + dup [ mime-decoding-ran-out-of-bytes ] unless + 2dup subseq-start [ + cut-slice + [ mime-write ] + [ swap length tail-slice >>bytes ] bi* + ] [ + tuck [ length ] bi@ - 1 - cut-slice + [ mime-write ] + [ >>bytes ] bi* fill-bytes + dup end-of-stream?>> [ dump-until-separator ] unless + ] if* ; + +: dump-string ( multipart separator -- multipart string ) + >>current-separator + [ dump-until-separator ] with-string-writer ; + +: read-header ( multipart -- multipart ) + dup bytes>> "--\r\n" sequence= [ + t >>end-of-stream? + ] [ + "\r\n\r\n" dump-string parse-headers >>header + ] if ; + +: empty-name? ( string -- ? ) + { "''" "\"\"" "" f } member? ; + +: save-uploaded-file ( multipart -- ) + dup filename>> empty-name? [ + drop + ] [ + [ [ header>> ] [ filename>> ] [ temp-file>> ] tri <mime-file> ] + [ content-disposition>> "name" of unquote ] + [ mime-parts>> set-at ] tri + ] if ; + +: save-mime-part ( multipart -- ) + dup name>> empty-name? [ + drop + ] [ + [ name-content>> ] + [ name>> unquote ] + [ mime-parts>> set-at ] tri + ] if ; + +: dump-mime-file ( multipart filename -- multipart ) + binary <file-writer> [ + dup mime-separator>> >>current-separator dump-until-separator + ] with-output-stream ; + +: dump-file ( multipart -- multipart ) + [ "factor-" "-upload" unique-file ] with-temp-directory + [ >>temp-file ] [ dump-mime-file ] bi ; + +: parse-content-disposition-form-data ( string -- hashtable ) + ";" split + [ "=" split1 [ [ blank? ] trim ] bi@ ] H{ } map>assoc ; + +: lookup-disposition ( multipart string -- multipart value/f ) + over content-disposition>> at ; + +ERROR: unknown-content-disposition multipart ; + +: parse-form-data ( multipart -- multipart ) + "filename" lookup-disposition [ + unquote + >>filename + [ dump-file ] [ save-uploaded-file ] bi + ] [ + "name" lookup-disposition [ + [ dup mime-separator>> dump-string >>name-content ] dip + >>name dup save-mime-part + ] [ + unknown-content-disposition + ] if* + ] if* ; + +ERROR: no-content-disposition multipart ; + +: process-header ( multipart -- multipart ) + dup "content-disposition" header ";" split1 swap { + { "form-data" [ + parse-content-disposition-form-data >>content-disposition + parse-form-data + ] } + [ no-content-disposition ] + } case ; + +: read-assert-sequence= ( sequence -- ) + [ length read ] keep assert-sequence= ; + +! : with-boundary ( boundary quote -- ) +! [] dip +! current-boundary swap with-variable ; inline + +: parse-beginning ( str boundary -- str' ) + [ <sequence-parser> ] dip + [ take-sequence drop ] curry + [ take-rest ] bi ; + +: parse-header ( str -- header str' ) + [ + [ + "\r\n\r\n" take-until-sequence* + string-lines harvest [ parse-header-line + parse-content-disposition-form-data ] map >hashtable + ] + [ take-rest ] bi + ] parse-sequence ; + +: parse-multipart ( str separator -- seq ) + "--" prepend "\r\n" append dup <regexp> [ parse-beginning ] dip re-split + [ [ from>> ] [ to>> ] [ seq>> ] tri subseq parse-header 2array ] map ; + diff --git a/mail/parser/parser.factor b/mail/parser/parser.factor new file mode 100644 index 0000000..d283f6a --- /dev/null +++ b/mail/parser/parser.factor @@ -0,0 +1,499 @@ +USING: kernel peg.parsers sequences sequences.parser make strings qw sequences.deep calendar calendar.english calendar.parser math math.parser math.order accessors combinators peg unicode parser arrays combinators.short-circuit splitting assocs vocabs.parser words slots.syntax quotations regexp documents.private ; +USING: prettyprint ; + +IN: mail.parser + +TUPLE: mailbox-field display-name local domain ; + +TUPLE: received-field tokens date-time ; + +TUPLE: content-type sub-type parameters ; + +TUPLE: content-type/text < content-type ; +TUPLE: content-type/image < content-type ; +TUPLE: content-type/video < content-type ; +TUPLE: content-type/audio < content-type ; +TUPLE: content-type/application < content-type ; +TUPLE: content-type/multipart < content-type ; +TUPLE: content-type/unknown < content-type ; + +TUPLE: mail-header + mime-version content-type + bcc cc comments date from in-reply-to keywords + message-id references reply-to sender subject to + { optional-fields initial: { } } ; + +TUPLE: multipart-header + content-type content-transfer-encoding content-id content-disposition ; + +TUPLE: multipart header message ; + +TUPLE: mail { header mail-header } body ; + +! Core rules + +: wsp-parser ( -- parser ) + " " token "\t" token 2choice ; + +: crlf-parser ( -- parser ) + "\r\n" token hide ; + +: special? ( ch -- ? ) + "()<>[]:;@\\,.\"" member? ; + +: except ( quot -- parser ) + [ not ] compose satisfy [ 1string ] action ; inline + +: except-these ( quots -- parser ) + [ 1|| ] curry except ; inline + +: digit-parser ( -- parser ) + "0-9" range-pattern [ 1string ] action ; + +: digit-parser2 ( -- parser ) + digit-parser 2 exactly-n ; + +: digit-parser4 ( -- parser ) + digit-parser 4 exactly-n ; + +: vchar-parser ( -- parser ) + CHAR: \x20 CHAR: \x7e range [ 1string ] action ; + +: quoted-pair ( -- parser ) + CHAR: \ 1token vchar-parser wsp-parser 2choice 2seq ; + +! Folding White Space and Comments + +: ctext-parser ( -- parser ) + { [ control? ] [ "()" member? ] } except-these ; + +: foldable-wsp-parser ( -- parser ) + wsp-parser repeat0 crlf-parser 2seq optional + wsp-parser repeat1 2seq ; + +: comment-parser ( -- parser ) + ctext-parser [ comment-parser ] delay 2choice repeat0 "(" ")" surrounded-by hide ; + +: comment-foldable-wsp-parser ( -- parser ) + foldable-wsp-parser optional + comment-parser 2seq repeat1 foldable-wsp-parser optional 2seq + foldable-wsp-parser 2choice ; + +! Atom + +: atext-parser ( -- parser ) + "a-zA-Z0-9!#$%&'*+-/=?^_`{}|~" range-pattern [ 1string ] action ; + +: atom-parser ( -- parser ) + comment-foldable-wsp-parser optional hide + atext-parser repeat1 + comment-foldable-wsp-parser optional hide 3seq [ flatten "" concat-as ] action ; + +: dot-atom-parser ( -- parser ) + atom-parser "." token atom-parser 2seq repeat0 optional 2seq sp [ flatten "" concat-as ] action ; + +! Quoted Strings + +: quoted-string-parser ( -- parser ) + string-parser ; + +! Miscellaneous Tokens + +: word-parser ( -- parser ) + atom-parser quoted-string-parser 2choice ; + +: phrase-parser ( -- parser ) + word-parser repeat1 [ "" concat-as ] action ; + +: unstructured-parser ( -- parser ) + foldable-wsp-parser optional vchar-parser 2seq repeat0 wsp-parser repeat0 2seq + [ flatten "" concat-as ] action sp ; + +! Date and Time Specification + +: date-time-zone-parser ( -- parser ) + foldable-wsp-parser hide "+" token "-" token 2choice digit-parser4 3seq + [ flatten "" concat-as parse-rfc822-gmt-offset ] action ; + +: date-time-second-parser ( -- parser ) + digit-parser2 [ "" concat-as string>number ] action + [ 0 60 between? ] semantic ; + +: date-time-hour-parser ( -- parser ) + digit-parser2 [ "" concat-as string>number ] action + [ 0 23 between? ] semantic ; + +: date-time-minute-parser ( -- parser ) + digit-parser2 [ "" concat-as string>number ] action + [ 0 59 between? ] semantic ; + +: time-of-day-parser ( -- parser ) + date-time-hour-parser ":" token hide + date-time-minute-parser ":" token hide + date-time-second-parser 2seq optional + [ [ first ] [ 0 ] if* ] action + 4seq ; + +: date-time-time-parser ( -- parser ) + time-of-day-parser date-time-zone-parser 2seq ; + +: date-time-year-parser ( -- parser ) + foldable-wsp-parser hide + digit-parser 4 at-least-n + foldable-wsp-parser hide 3seq + [ flatten "" concat-as string>number ] action + [ 1900 >= ] semantic ; + +: date-time-month-parser ( -- parse ) + qw{ Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec } [ token ] map choice + [ month-abbreviations index 1 + ] action ; + +: date-time-day-parser ( -- parser ) + foldable-wsp-parser optional hide + digit-parser 1 2 from-m-to-n + foldable-wsp-parser hide 3seq + [ flatten "" concat-as string>number ] action ; + +: date-time-date-parser ( -- parser ) + date-time-day-parser + date-time-month-parser + date-time-year-parser 3seq ; + +: date-time-day-name-parser ( -- parser ) + qw{ Mon Tue Wed Thu Fri Sat Sun } [ token ] map choice ; + +: date-time-day-of-week-parser ( -- parser ) + foldable-wsp-parser optional + date-time-day-name-parser 2seq ; + +: date-time-parser ( -- parser ) + date-time-day-of-week-parser "," token 2seq optional hide + date-time-date-parser + date-time-time-parser + comment-foldable-wsp-parser optional hide 4seq + [ + flatten + [ timestamp new ] dip + [ ] each + { + [ >>day ] [ >>month ] [ >>year ] + [ >>hour ] [ >>minute ] [ >>second ] + [ >>gmt-offset ] + } spread + ] action ; + +! Addr-Spec Specification + +: dtext-parser ( -- parser ) + { [ " []\\" member? ] [ control? ] } except-these repeat0 ; + +: domain-literal-parser ( -- parser ) + [ + comment-foldable-wsp-parser optional , + foldable-wsp-parser optional + dtext-parser 2seq + foldable-wsp-parser optional 2seq + "[" "]" surrounded-by , + comment-foldable-wsp-parser optional , + ] seq* [ flatten "" concat-as ] action ; + +: domain-parser ( -- parser ) + dot-atom-parser domain-literal-parser 2choice ; + +: local-part-parser ( -- parser ) + dot-atom-parser quoted-string-parser 2choice ; + +: addr-spec-parser ( -- parser ) + local-part-parser "@" token hide domain-parser 3seq + [ f swap [ ] each mailbox-field boa ] action ; + +! Address Specification + +: display-name-parser ( -- parser ) + phrase-parser ; + +: angle-addr ( -- parser ) + comment-foldable-wsp-parser optional hide + addr-spec-parser "<" ">" surrounded-by + comment-foldable-wsp-parser optional hide 3seq [ first ] action ; + +: name-addr-parser ( -- parser ) + display-name-parser optional angle-addr 2seq [ [ ] each swap >>display-name ] action ; + +: mailbox-parser ( -- parser ) + name-addr-parser addr-spec-parser 2choice ; + +: mailbox-list-parser ( -- parser ) + mailbox-parser "," token list-of [ >array ] action ; + +DEFER: group-parser + +: address-parser ( -- parser ) + mailbox-parser group-parser 2choice [ ] action ; + +: group-list-parser ( -- parser ) + mailbox-list-parser comment-foldable-wsp-parser 2choice ; + +: group-parser ( -- parser ) + [ + display-name-parser , + ":" token , + group-list-parser optional , + ";" token , + comment-foldable-wsp-parser optional hide , + ] seq* ; + +: address-list-parser ( -- parser ) + address-parser "," token list-of [ >array ] action ; + +! Optional Fields + +: ftext ( -- parser ) + CHAR: \x21 CHAR: \x39 range + CHAR: \x3b CHAR: \x7e range + 2choice [ 1string ] action ; + +: known-field-name-parser ( str -- parser ) + [ ftext repeat1 [ "" concat-as >lower ] action ] dip [ = ] curry semantic ; + +: field-name-parser ( -- parser ) + ftext repeat1 [ "" concat-as >lower ] action ; + +: field-parser ( -- parser ) + [ + field-name-parser , + ":" token , + unstructured-parser , + crlf-parser , + ] seq* ; + +: known-field-parser ( str parser -- parser ) + [ + known-field-name-parser + ":" token hide + ] dip + crlf-parser 4seq [ >array ] action ; + +: no-fold-literal ( -- parser ) + dtext-parser repeat0 "[" "]" surrounded-by ; + +: id-left-parser ( -- parser ) + dot-atom-parser ; + +: id-right-parser ( -- parser ) + dot-atom-parser no-fold-literal 2choice ; + +: msg-id-parser ( -- parser ) + comment-foldable-wsp-parser optional hide + id-left-parser "@" token id-right-parser 3seq [ "" concat-as ] action + "<" ">" surrounded-by + comment-foldable-wsp-parser optional hide 3seq [ first ] action ; + +! The origination Date Field + +: orig-date-parser ( -- parser ) + "date" date-time-parser known-field-parser ; + +! Originator fields + +: from-parser ( -- parser ) + "from" mailbox-list-parser known-field-parser ; +: sender-parser ( -- parser ) + "sender" mailbox-parser known-field-parser ; +: reply-to-parser ( -- parser ) + "reply-to" address-list-parser known-field-parser ; + +! Destination Address fields + +: to-parser ( -- parser ) + "to" address-list-parser known-field-parser ; +: cc-parser ( -- parser ) + "cc" address-list-parser known-field-parser ; +: bcc-parser ( -- parser ) + "bcc" address-list-parser known-field-parser ; + +: message-id-parser ( -- parser ) + "message-id" msg-id-parser known-field-parser ; + +: in-reply-to-parser ( -- parser ) + "in-reply-to" msg-id-parser repeat1 [ >array ] action known-field-parser ; + +: references-parser ( -- parser ) + "references" msg-id-parser repeat1 [ >array ] action known-field-parser ; + +: subject-parser ( -- parser ) + "subject" unstructured-parser known-field-parser ; + +: comments-parser ( -- parser ) + "comments" unstructured-parser known-field-parser ; + +: keywords-parser ( -- parser ) + "keywords" phrase-parser sp "," token list-of [ >array ] action known-field-parser ; + +! Resent Fields + +: resent-date-parser ( -- parser ) + "resent-date" date-time-parser known-field-parser ; + +: resent-from-parser ( -- parser ) + "resent-from" mailbox-list-parser known-field-parser ; + +: resent-sender-parser ( -- parser ) + "resent-sender" mailbox-parser known-field-parser ; + +: resent-to-parser ( -- parser ) + "resent-to" address-list-parser known-field-parser ; + +: resent-cc-parser ( -- parser ) + "resent-cc" address-list-parser known-field-parser ; + +: resent-bcc-parser ( -- parser ) + "resent-bcc" address-list-parser known-field-parser ; + +: resent-message-id-parser ( -- parser ) + "resent-message-id" msg-id-parser known-field-parser ; + +! Trace Fields + +: received-token-parser ( -- parser ) + angle-addr addr-spec-parser domain-parser word-parser 4choice ; + +: path-parser ( -- parser ) + angle-addr comment-foldable-wsp-parser "<" ">" surrounded-by 2choice ; + +: return-path-parser ( -- parser ) + "return-path" path-parser known-field-parser ; + +: received-parser ( -- parser ) + "received" + received-token-parser repeat0 [ >array ] action + ";" token hide + date-time-parser 3seq + [ [ ] each received-field boa ] action + known-field-parser ; + +: trace-parser ( -- parser ) + return-path-parser optional + received-parser repeat1 2seq [ flatten >array ] action ; + +! Content-Type Field + +: trim-quotes ( str -- str' ) + [ CHAR: \" = ] trim ; +: trim-blanks ( str -- str' ) + [ blank? ] trim ; + +: parse-content-type ( str -- mime-type ) + [ + [ "/" take-until-sequence* >lower ] + [ ";" take-until-sequence* >lower ] + [ take-rest ";" split [ "=" split1 [ trim-blanks trim-quotes ] bi@ ] H{ } map>assoc ] tri rot + ] parse-sequence >lower + { + { "text" [ content-type/text boa ] } + { "image" [ content-type/image boa ] } + { "audio" [ content-type/audio boa ] } + { "video" [ content-type/video boa ] } + { "application" [ content-type/application boa ] } + { "multipart" [ content-type/multipart boa ] } + { "unknown" [ content-type/unknown boa ] } + [ drop content-type/unknown boa ] + } case ; + +: content-type-parser ( -- parser ) + "content-type" unstructured-parser [ parse-content-type ] action known-field-parser ; + +: parse-content-disposition ( str -- content-disposition ) + ";" split [ "=" split1 [ trim-blanks trim-quotes ] bi@ ] H{ } map>assoc ; + +: optional-field-parser ( -- parser ) + field-name-parser ":" token hide unstructured-parser crlf-parser 4seq [ >array ] action ; + +: known-multipart-field-parser ( str parser -- parser ) + [ + known-field-name-parser + ":" token hide + ] dip + 3seq [ >array ] action ; + +: multipart-header-field-parser ( -- parser ) + "content-type" unstructured-parser [ parse-content-type ] action known-multipart-field-parser + "content-disposition" unstructured-parser [ parse-content-disposition ] action known-multipart-field-parser + "content-id" unstructured-parser known-multipart-field-parser + "content-transfer-encoding" unstructured-parser known-multipart-field-parser + 4choice ; + +PEG: parse-multipart-field-header ( str -- header ) multipart-header-field-parser ; + +: parse-multipart-beginning ( str boundary -- str' ) + [ <sequence-parser> ] dip + [ take-sequence drop ] curry + [ take-rest ] bi ; + +: parse-multipart-header ( str -- header str' ) + [ + [ + "\r\n\r\n" take-until-sequence* + [ + string-lines + multipart-header new + [ + parse-multipart-field-header + [ second ] [ first ] bi >>writer-word + 1quotation call( multipart-header value -- multipart-header ) + ] reduce + ] + [ f ] if* + ] + [ take-rest ] bi + ] parse-sequence ; + + +: parse-multipart ( str separator -- seq ) + "--" prepend "-?-?\r\n" append dup <regexp> [ parse-multipart-beginning ] dip re-split + [ + [ from>> ] [ to>> ] [ seq>> ] tri subseq + parse-multipart-header [ multipart boa ] when* + ] map sift ; + +: fields-parser ( -- seq ) + [ + trace-parser , + resent-date-parser , + resent-from-parser , + resent-sender-parser , + resent-to-parser , + resent-cc-parser , + resent-bcc-parser , + resent-message-id-parser , + orig-date-parser , + from-parser , + sender-parser , + reply-to-parser , + to-parser , + cc-parser , + bcc-parser , + message-id-parser , + in-reply-to-parser , + references-parser , + subject-parser , + comments-parser , + keywords-parser , + content-type-parser , + optional-field-parser , + ] choice* repeat0 + [ + mail-header new + [ + [ second ] [ first ] bi dup >>writer-word + [ swap drop 1quotation call( mail-header value -- mail-header ) ] + [ + [ dup optional-fields>> ] 2dip + swap 2array 1array append >>optional-fields + ] if* + ] reduce + ] action ; + +PEG: parse-mail ( str -- mail ) + fields-parser crlf-parser 2seq + [ first input-slice [ seq>> ] [ from>> ] bi tail mail boa ] action ;