diff --git a/lib/.DS_Store b/lib/.DS_Store new file mode 100644 index 0000000..707d654 Binary files /dev/null and b/lib/.DS_Store differ diff --git a/lib/core_extensions/array.rb b/lib/core_extensions/array.rb index c726d5f..3e6e6cb 100644 --- a/lib/core_extensions/array.rb +++ b/lib/core_extensions/array.rb @@ -2,9 +2,10 @@ class Array def entropy each_with_object(Hash.new(0)) do |i, result| result[i] += 1 - end.values.inject(0) do |sum, count| + end.values.sum do |count| percentage = count.to_f / length - sum + -percentage * Math.log2(percentage) + + -percentage * Math.log2(percentage) end end end diff --git a/lib/decisiontree/id3_tree.rb b/lib/decisiontree/id3_tree.rb index 9434299..9097ebf 100755 --- a/lib/decisiontree/id3_tree.rb +++ b/lib/decisiontree/id3_tree.rb @@ -120,12 +120,12 @@ module DecisionTree index = attributes.index(attribute) values = data.map { |row| row[index] }.uniq - remainder = values.sort.inject(0) do |sum, val| + remainder = values.sort.sum do |val| classification = data.each_with_object([]) do |row, result| result << row.last if row[index] == val end - sum + ((classification.size.to_f / data.size) * classification.entropy) + ((classification.size.to_f / data.size) * classification.entropy) end [data.classification.entropy - remainder, index]