From 6b8d8029854f7e986f823b209641483ae8caf411 Mon Sep 17 00:00:00 2001 From: John Benediktsson Date: Thu, 3 May 2012 10:57:30 -0700 Subject: [PATCH] math.similarity: adding a few similarity metrics. --- extra/math/similarity/authors.txt | 1 + extra/math/similarity/similarity-tests.factor | 18 ++++++++++++++++++ extra/math/similarity/similarity.factor | 15 +++++++++++++++ 3 files changed, 34 insertions(+) create mode 100644 extra/math/similarity/authors.txt create mode 100644 extra/math/similarity/similarity-tests.factor create mode 100644 extra/math/similarity/similarity.factor diff --git a/extra/math/similarity/authors.txt b/extra/math/similarity/authors.txt new file mode 100644 index 0000000000..e091bb8164 --- /dev/null +++ b/extra/math/similarity/authors.txt @@ -0,0 +1 @@ +John Benediktsson diff --git a/extra/math/similarity/similarity-tests.factor b/extra/math/similarity/similarity-tests.factor new file mode 100644 index 0000000000..d8be58b6c3 --- /dev/null +++ b/extra/math/similarity/similarity-tests.factor @@ -0,0 +1,18 @@ +! Copyright (C) 2012 John Benediktsson +! See http://factorcode.org/license.txt for BSD license + +USING: math.functions math.similarity tools.test ; + +IN: math.similarity.tests + +CONSTANT: a { 1 2 1 5 1 0 0 } +CONSTANT: b { 0 0 0 0 2 3 1 } + +{ t } [ a a euclidian-similarity 1.0 1e-10 ~ ] unit-test +{ t } [ a b euclidian-similarity 0.1336766024001917 1e-10 ~ ] unit-test + +{ t } [ a a pearson-similarity 1.0 1e-10 ~ ] unit-test +{ t } [ a b pearson-similarity 0.2376861940759582 1e-10 ~ ] unit-test + +{ t } [ a a cosine-similarity 1.0 1e-10 ~ ] unit-test +{ t } [ a b cosine-similarity 0.5472455591261534 1e-10 ~ ] unit-test diff --git a/extra/math/similarity/similarity.factor b/extra/math/similarity/similarity.factor new file mode 100644 index 0000000000..a8ae722282 --- /dev/null +++ b/extra/math/similarity/similarity.factor @@ -0,0 +1,15 @@ +! Copyright (C) 2012 John Benediktsson +! See http://factorcode.org/license.txt for BSD license + +USING: kernel math math.statistics math.vectors sequences ; + +IN: math.similarity + +: euclidian-similarity ( a b -- n ) + v- norm 1 + recip ; + +: pearson-similarity ( a b -- n ) + over length 3 < [ 2drop 1.0 ] [ corr 0.5 * 0.5 + ] if ; + +: cosine-similarity ( a b -- n ) + [ v* sum ] [ [ norm ] bi@ * ] 2bi / 0.5 * 0.5 + ;