mirror of
https://github.com/dkam/decisiontree.git
synced 2025-12-28 07:04:53 +00:00
Changes for performance as well as some general refactoring
This commit is contained in:
@@ -1,29 +1,19 @@
|
||||
class Array
|
||||
def classification
|
||||
collect(&:last)
|
||||
end
|
||||
|
||||
# calculate information entropy
|
||||
def entropy
|
||||
return 0 if empty?
|
||||
|
||||
info = {}
|
||||
each do |i|
|
||||
info[i] = !info[i] ? 1 : (info[i] + 1)
|
||||
each_with_object(Hash.new(0)) do |i, result|
|
||||
result[i] += 1
|
||||
end.values.inject(0) do |sum, count|
|
||||
percentage = count.to_f / length
|
||||
sum + -percentage * Math.log2(percentage)
|
||||
end
|
||||
|
||||
result(info, length)
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def result(info, total)
|
||||
final = 0
|
||||
info.each do |_symbol, count|
|
||||
next unless count > 0
|
||||
percentage = count.to_f / total
|
||||
final += -percentage * Math.log(percentage) / Math.log(2.0)
|
||||
end
|
||||
final
|
||||
end
|
||||
end
|
||||
|
||||
module ArrayClassification
|
||||
refine Array do
|
||||
def classification
|
||||
collect(&:last)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
Reference in New Issue
Block a user