From 72d3c986b627e25d4bd3102d7fdf5310059a9c7e Mon Sep 17 00:00:00 2001 From: John Benediktsson Date: Sat, 11 Apr 2015 10:06:00 -0700 Subject: [PATCH] english: words for working with english language text. --- extra/english/authors.txt | 1 + extra/english/english-tests.factor | 12 +++ extra/english/english.factor | 150 +++++++++++++++++++++++++++++ extra/english/summary.txt | 1 + 4 files changed, 164 insertions(+) create mode 100644 extra/english/authors.txt create mode 100644 extra/english/english-tests.factor create mode 100644 extra/english/english.factor create mode 100644 extra/english/summary.txt diff --git a/extra/english/authors.txt b/extra/english/authors.txt new file mode 100644 index 0000000000..e091bb8164 --- /dev/null +++ b/extra/english/authors.txt @@ -0,0 +1 @@ +John Benediktsson diff --git a/extra/english/english-tests.factor b/extra/english/english-tests.factor new file mode 100644 index 0000000000..93fa549322 --- /dev/null +++ b/extra/english/english-tests.factor @@ -0,0 +1,12 @@ +USE: tools.test +IN: english + +{ "record" } [ "records" singularize ] unit-test +{ "FOOT" } [ "FEET" singularize ] unit-test + +{ "friends" } [ "friend" pluralize ] unit-test +{ "enemies" } [ "enemy" pluralize ] unit-test +{ "Sheep" } [ "Sheep" pluralize ] unit-test + +{ "a10n" } [ "abbreviation" a10n ] unit-test +{ "i18n" } [ "internationalization" a10n ] unit-test diff --git a/extra/english/english.factor b/extra/english/english.factor new file mode 100644 index 0000000000..eb642446e7 --- /dev/null +++ b/extra/english/english.factor @@ -0,0 +1,150 @@ +! Copyright (C) 2009 John Benediktsson +! See http://factorcode.org/license.txt for BSD license + +USING: assocs assocs.extras combinators kernel literals locals +math math.parser sequences splitting unicode.case +unicode.categories ; + +IN: english + + i + { "alumnus" "alumni" } + { "cactus" "cacti" } + { "focus" "foci" } + { "fungus" "fungi" } + { "nucleus" "nuclei" } + { "radius" "radii" } + { "stimulus" "stimuli" } + + ! is -> es + { "analysis" "analyses" } + { "axis" "axes" } + { "basis" "bases" } + { "crisis" "crises" } + { "diagnosis" "diagnoses" } + { "ellipsis" "ellipses" } + { "hypothesis" "hypotheses" } + { "oasis" "oases" } + { "paralysis" "paralyses" } + { "parenthesis" "parentheses" } + { "synopsis" "synopses" } + { "synthesis" "syntheses" } + { "thesis" "theses" } + + ! ix -> ices + { "appendix" "appendices" } + { "index" "indices" } + { "matrix" "matrices" } + + ! eau -> eaux + { "beau" "beaux" } + { "bureau" "bureaus" } + { "tableau" "tableaux" } + + ! ? -> en + { "child" "children" } + { "man" "men" } + { "ox" "oxen" } + { "woman" "women" } + + ! ? -> a + { "bacterium" "bacteria" } + { "corpus" "corpora" } + { "criterion" "criteria" } + { "curriculum" "curricula" } + { "datum" "data" } + { "genus" "genera" } + { "medium" "media" } + { "memorandum" "memoranda" } + { "phenomenon" "phenomena" } + { "stratum" "strata" } + + ! no change + { "bison" "bison" } + { "deer" "deer" } + { "fish" "fish" } + { "means" "means" } + { "moose" "moose" } + { "offspring" "offspring" } + { "series" "series" } + { "sheep" "sheep" } + { "species" "species" } + { "swine" "swine" } + + ! oo -> ee + { "foot" "feet" } + { "goose" "geese" } + { "tooth" "teeth" } + + ! a -> ae + { "antenna" "antennae" } + { "formula" "formulae" } + { "nebula" "nebulae" } + { "vertebra" "vertebrae" } + { "vita" "vitae" } + + ! ouse -> ice + { "louse" "lice" } + { "mouse" "mice" } +} +>> + +CONSTANT: plural-to-singular $[ singular-to-plural assoc-invert ] + +:: match-case ( master disciple -- master' ) + { + { [ master >lower master = ] [ disciple >lower ] } + { [ master >upper master = ] [ disciple >upper ] } + { [ master >title master = ] [ disciple >title ] } + [ disciple ] + } cond ; + +PRIVATE> + +: singularize ( word -- singular ) + dup >lower { + { [ dup empty? ] [ ] } + { [ dup singular-to-plural key? ] [ ] } + { [ plural-to-singular ?at ] [ ] } + { [ dup "s" tail? not ] [ ] } + { + [ + dup "ies" ?tail [ + last "aeiou" member? not + ] [ drop f ] if + ] [ 3 head* "y" append ] + } + { [ dup "es" tail? ] [ 2 head* ] } + [ but-last ] + } cond match-case ; + +: pluralize ( word -- plural ) + dup >lower { + { [ dup empty? ] [ ] } + { [ dup plural-to-singular key? ] [ ] } + { [ singular-to-plural ?at ] [ ] } + { + [ + dup "y" ?tail [ + last "aeiou" member? not + ] [ drop f ] if + ] [ but-last "ies" append ] + } + { + [ dup { "s" "ch" "sh" } [ tail? ] with any? ] + [ dup "es" tail? [ "es" append ] unless ] + } + [ "s" append ] + } cond match-case ; + +: a10n ( str -- str' ) + dup length 3 > [ + [ 1 head ] [ length 2 - number>string ] [ 1 tail* ] tri + 3append + ] when ; diff --git a/extra/english/summary.txt b/extra/english/summary.txt new file mode 100644 index 0000000000..078a45a74c --- /dev/null +++ b/extra/english/summary.txt @@ -0,0 +1 @@ +English language