number_attributes = $tokens[0]; $this->number_cases = $tokens[1]; } if ($line_counter==1){ $this->number_attribute_values = explode(" ", $buffer); } if ($line_counter==2){ $this->attribute_titles = explode(" ", $buffer); } //store the cases in a matrix if($line_counter>2 && $line_counter < $this->number_cases+3){ $tokens = explode(",", $buffer); while($cases_j < $this->number_attributes){ //print "[".$cases_i."]"."[".$cases_j."] = ".$tokens[$cases_j]; $this->cases[$cases_i][$cases_j] = $tokens[$cases_j]; $cases_j++; } $cases_i++; $cases_j = 0; } $line_counter++; } fclose($csv_file); } } /*----------------------------------------------------------------------------------------------------- Function : calculate_entropy() Description: calculates entropy of a given set s with possible values c according to the definition of entropy(s) => sum(1toc) -pi log2 pi parameters: s => array, source set, c => number of possible attributes, possible values => array of possible values returns: float entropy ------------------------------------------------------------------------------------------------------*/ function calculate_entropy($s){ //get possible values-------------------------------------------------------------------- $possible_values = $this->get_possible_values($s); $c = count($possible_values); //count the occurance ratios-------------------------------------------------------------- $ratio = $this->get_occurance_ratios($s,$possible_values); //calculate the entropy------------------------------------------------------------------- $entropy = 0; for($i=0;$i<$c;$i++){ $p = $ratio[$i]; $entropy += (-1*$p)*log( $p, 2); } //print "
entropy :".$entropy."
"; return $entropy; } /*----------------------------------------------------------------------------------------------------- Function : calculate_info_gain() Description: ------------------------------------------------------------------------------------------------------*/ function calculate_info_gain($set=''){ $info_gain = array(); $sum_sub_entropy = 0; // the sum of sub entropys for info gain v E {weak,Strong} $possible_values = array(); //get the entropy of the decision attribute $entropy_s = $this->calculate_entropy($this->get_attribute(0)); //calculate the information gain for each attribute; for($i=1;$i<$this->number_attributes;$i++){ //print "attribute ".$i."
"; //get attribute $this->get_attribute($i); //get possible values for $i-------------------------------------------------------------------- $possible_values = $this->get_possible_values($this->get_attribute($i)); //count the occurance ratios for $i-------------------------------------------------------------- $s = $this->get_attribute($i); $ratios = $this->get_occurance_ratios($s,$possible_values); //build source sets for entropy calculation------------------------------------------------------ //iterate through each possible value $sum_sub_entropy = 0; for($j=0; $jtarget_attribute_values( $this->get_attribute($i), $possible_values[$j]); } //get entropy for that sub source set of target attribute values $sum_sub_entropy += $ratios[$j]*$this->calculate_entropy($sub_source_set); //print "sum entropy = ". $ratios[$j]." * ".$this->calculate_entropy($sub_source_set)."
"; } $info_gain[$i] = $entropy_s - $sum_sub_entropy; //print "attribute: ".$this->attribute_titles[$i]." ig = ".$entropy_s." - ".$sum_sub_entropy." = ".$info_gain[$i]."
"; } //store in an object global $this->info_gain = $info_gain; //print_r($this->info_gain); } /*----------------------------------------------------------------------------------------------------- Function : gets_attribute() Description: gets an attribute and all the associated cases data parameters: $i the x index of of the cases matrix returns: an array containing all the values for a given attribute ------------------------------------------------------------------------------------------------------*/ function get_attribute($i){ $attribute = array(); for ($j=0;$j<$this->number_cases;$j++){ //print "i: ".$i." j: ".$j." value: ".$this->cases[$j][$i]."
"; array_push ($attribute, $this->cases[$j][$i]); } return $attribute; } /*----------------------------------------------------------------------------------------------------- Function : get_possible_values() Description: gets all possible values for a given set parameters: $s source set, $c # of possible values returns: an array $possible_values of all possible values from the incomeing set ------------------------------------------------------------------------------------------------------*/ function get_possible_values($s){ $possible_values = array(); //get unique values $temp = array_unique($s); //put temp into possible_values, important this rekeys the array $possible_values = array_values($temp); return $possible_values; } /*----------------------------------------------------------------------------------------------------- Function : get_occurance_ratios Description: counts number of times each possible value occures in a given set $s parameters: $s source set, $c number of possible valus, $possible_values array of possible values returns: array $ratios that contains the ratio of each possible value in the source set ------------------------------------------------------------------------------------------------------*/ function get_occurance_ratios($s, $possible_values){ $c = count($possible_values); //init a ration array $ratio = array(); for($i=0;$i<$c;$i++){ $ratio[$i] = 0; } //iterate through possible values for($i=0;$i<$c;$i++){ //iterate through attribute cases for($j=0;$j"; if ($possible_values[$i] == $s[$j]){ $ratio[$i]++; } } } for($i=0;$i<$c;$i++){ $ratio[$i] = $ratio[$i] / count($s) ; } return $ratio; } /*----------------------------------------------------------------------------------------------------- Function : target-attriubte_values Description: returns all the target attribute values for a given set parameters: $s: array set of values, $possible_value: arry of possible values for the set returns: $target_attribute_values array of target attribute values for that set ------------------------------------------------------------------------------------------------------*/ function target_attribute_values( $s, $possible_value){ $target_attribute_values = array(); //get the target attribute values, need the values to build the sub source sets $target_attribute = $this->get_attribute(0); //iterate through attribute cases for($j=0;$j < count($s);$j++){ //print "does ".$possible_value." == ".$s[$j]."
"; if ($possible_value == $s[$j]){ array_push($target_attribute_values,$target_attribute[$j]); } } return $target_attribute_values ; } /*----------------------------------------------------------------------------------------------------- Function : print_matrix() Description: used for testing the csv import ------------------------------------------------------------------------------------------------------*/ function print_matrix($matrix, $height, $width){ $print_line=''; //print for ($h=0;$h<$height;$h++){ $print_line=''; for($w=0;$w<$width;$w++){ $print_line.=" ".$matrix[$h][$w]; } $print_line.="
"; print $print_line; } } function print_array($array){ print "
";
		print_r($array);
		print "
";
		
	}
	/*-----------------------------------------------------------------------------------------------------
	Function : print_decision_tree()  
	Description: 
	------------------------------------------------------------------------------------------------------*/
	function print_decision_tree(){
		//print "Information Gain for Each Attribute
"; print "
Starting Information Gain Values
"; for($i=0;$i<$this->number_attributes;$i++){ if ($i !=0){ print $this->attribute_titles[$i].": ".$this->info_gain[$i]."
"; } } print "

Decision Tree
"; for($i=0;$idecision_tree);$i++){ print $this->decision_tree[$i]."
"; } print "

Training Data
"; //test print the cases have been imported correctly $this->print_matrix($this->cases, $this->number_cases, $this->number_attributes); } /*----------------------------------------------------------------------------------------------------- Function : build_decision_tree() Description: ------------------------------------------------------------------------------------------------------*/ function build_decision_tree(){ $this->calculate_info_gain(); //build name value associated info gain array, ie keys are attribute names $info_gain_names = array(); for($i=1; $i<$this->number_attributes; $i++){ $info_gain_names[$this->attribute_titles[$i]] = (int)($this->info_gain[$i]*10000); } //get the attribute info gains and sort them highest at index 0 etc $info_gain = $this->info_gain; asort($info_gain_names); //swap values with key names $this->info_gain_names = array_flip($info_gain_names); //print_r ($info_gain_names); $this->tree_level = ''; //print_r ($this->info_gain_names); //print "
"; //build attribute node $this->build_attribute_node($root=true); }//end build decision tree /*----------------------------------------------------------------------------------------------------- Function : build_attribute_node() Description: ------------------------------------------------------------------------------------------------------*/ function build_attribute_node($root=true){ //if sub_info_gain you need to rebuild the info_names array //get highest info gain from info gain array and make node $attribute_title = array_pop($this->info_gain_names); //array_push($this->info_gain_names,$attribute_title); $node = "+".$this->tree_level.$attribute_title; //print $node."
"; //save attribute node array_push($this->decision_tree,$node); //make the braches (leafs) //get subset of target attribute values for that possible value $attribute_index = array_search($attribute_title, $this->attribute_titles); //get target attribute value set if($root)$set = $this->get_attribute($attribute_index); //else //get the possible values $possible_values = $this->get_possible_values($set); //for each possible value build possible value node $tree_level_pnode="--"; for($j=0; $jbuild_branch($possible_values[$j],$set,$tree_level_pnode); } //remove attribure $tree_level_pnode.= "-"; } /*----------------------------------------------------------------------------------------------------- Function : build_branch() Description: builds a possible value branch for a given attribute node ------------------------------------------------------------------------------------------------------*/ function build_branch($possible_value,$set,$tree_level_pnode){ //make possible value node $pnode = $tree_level_pnode.$possible_value; //get target attribute values for subset $sub_set = $this->target_attribute_values($set,$possible_value); //calculate the entropy of the subset $entropy = $this->calculate_entropy($sub_set); //push the possible value node on the decision tree stack array_push($this->decision_tree,$pnode); while($this->info_gain_names){ //check stop conditions //entropy is not 0 or 1 start building for the next attribute if($entropy != 0 || $entropy != 1 ){ //calculate info gain for sub set-------------------------------------- //needs to be implemented need to make a build subset function no time so will use original info gain //$sub_info_gain = $this->calculate_info_gain($sub_set) $this->tree_level .= "--"; $this->build_attribute_node($this->tree_level); } //else you at the bottom of the tree //get sub set positive and negitive values else{ $yes_no = $this->get_set_pos_neg_values($sub_set); $pnode .= " : ".$yes_no; } } //print "    ".$pnode."
"; } function get_set_pos_neg_values($set){ $yes=''; $no=''; for ($i=0; $i $no) return 'yes'; else return 'no'; } }//end decisiion tree class