diff --git a/extra/machine-learning/decision-trees/decision-trees-tests.factor b/extra/machine-learning/decision-trees/decision-trees-tests.factor
new file mode 100644
index 0000000000..31d2d24970
--- /dev/null
+++ b/extra/machine-learning/decision-trees/decision-trees-tests.factor
@@ -0,0 +1,16 @@
+! Copyright (C) 2018 Björn Lindqvist
+! See http://factorcode.org/license.txt for BSD license
+USING: kernel machine-learning.data-sets
+machine-learning.decision-trees math.extras sequences tools.test ;
+IN: machine-learning.decision-trees.tests
+
+{ { 0.08 0.01 0.0 0.03 0.29 0.0 } } [
+    "monks-1.train" load-monks
+    6 <iota> [
+        average-gain 2 round-to-decimal
+    ] with map
+] unit-test
+
+{ 4 } [
+    "monks-1.train" load-monks highest-gain-index
+] unit-test
diff --git a/extra/machine-learning/decision-trees/decision-trees.factor b/extra/machine-learning/decision-trees/decision-trees.factor
new file mode 100644
index 0000000000..b843e7847f
--- /dev/null
+++ b/extra/machine-learning/decision-trees/decision-trees.factor
@@ -0,0 +1,33 @@
+! Copyright (C) 2018 Björn Lindqvist
+! See http://factorcode.org/license.txt for BSD license
+USING: accessors assocs fry grouping.extras kernel locals math
+math.functions math.statistics sequences sequences.extras sorting ;
+IN: machine-learning.decision-trees
+
+! Why convert the logarithm to base 2? I don't know.
+: entropy2 ( seq -- e )
+    normalized-histogram values entropy 2 log / ;
+
+: group-by-sorted ( seq quot: ( elt -- key ) -- groups )
+    [ sort-with ] keep group-by ; inline
+
+: subsets-weighted-entropy ( data-target idx -- seq )
+    ! Group the data according to the given index.
+    '[ first _ swap nth ] group-by-sorted
+    ! Then unpack the partitioned groups of targets
+    '[ [ second ] map ] assoc-map values
+    ! Finally, calculate the weighted entropy for each group
+    [ [ entropy2 ] [ length ] bi * ] map-sum ; inline
+
+:: average-gain ( dataset idx -- gain )
+    dataset target>> :> target
+    dataset data>> :> data
+    data target zip :> data-target
+    data-target idx subsets-weighted-entropy :> weighted
+
+    target entropy2 weighted data length / - ;
+
+: highest-gain-index ( dataset -- idx )
+    dup feature-names>> length <iota> [
+        average-gain
+    ] with map arg-max ;