/*******************************************************************/
/* FUZZYIDT.PL            Last modification: Tue Dec  9 15:37:41 1997        */
/* Torgos ID3-like system based on the gain-ratio measure with fuzzy logic  */
/******************************************************************/
%
%    Copyright (c) 1989 Luis Torgo
%
%    This program is free software; you can redistribute it and/or 
%    modify it under the terms of the GNU General Public License 
%    Version 1 as published by the Free Software Foundation.
%
%    This program is distributed in the hope that it will be useful,
%    but WITHOUT ANY WARRANTY; without even the implied warranty of
%    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
%    GNU General Public License for more details.
%
%    You should have received a copy of the GNU General Public 
%    License along with this program; if not, write to the Free 
%    Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, 
%    USA.
%
/******************************************************************/
/* impl. by     : Luis Torgo, Laboratorio Inteligencia Artificial */
/*                e Ciencas de Computacao,                        */
/*                Universidade do Porto,                          */
/*                Rua Campo Alegre 823,                           */
/*                4100 Porto,                                     */
/*                Portugal                                        */
/*                1989                                            */
/*                                                                */
/*                Thomas Hoppe                                    */
/*                Mommsenstr. 50                                  */
/*                D-10629 Berlin                                  */
/*                F.R.G.                                          */
/*                E-Mail: hoppet@cs.tu-berlin.de                  */
/*                1990                                            */
/*                                                                */
/*  reference   : Learning Efficient Classification Procedures    */
/*                and Their Application to Chess End Games,       */
/*                Quinlan, J. R., in: Machine Learning,           */
/*                Michalski, R.S., Carbonell, J.G., Mitchell, T.M.*/
/*                (eds.), Tioga Publishing Company, Palo Alto,    */
/*                1983.                                           */
/*                                                                */
/*                Induction of Decision Trees, J. Ross Quinlan    */
/*                Machine Learning 1(1), 81-106, 1986             */
/*                                                                */
/*  call        : idt                                             */
/*                                                                */
/******************************************************************/

/******************************************************************/
/* SWI-, YAP-, C- and M-Prolog specific declaration of dynamical  */
/* clauses.                                                       */
/******************************************************************/
:- dynamic node/3.
:- dynamic decision_tree/1.
:- dynamic example/3.
:- dynamic attributes/1.
:- dynamic classes/1.
:- dynamic current_node/1.
:- dynamic table/3.
:- dynamic found/1.

% Comment this out if you use Quintus Prolog
log(X,Y) :- 
	Y is log(X).

/******************************************************************/
/* Quintus-Prolog specific declaration.                           */
/******************************************************************/
% :- ensure_loaded(library(math)).  
% :- ensure_loaded(library(basics)).  

/******************************************************************/
/*                                                                */
/*  call        : idt                                             */
/*                                                                */
/*  side effects: assertz and retracts clauses                    */
/*                                                                */
/******************************************************************/
/* idt reads a filename from the terminal, initializes the know-  */
/* base, consults the correponding file builds a decision tree    */
/* and displays the tree.                                         */
/* The program assertz the following predicates, which must be    */
/* declared as dynamic in some Prolog dialects:                   */
/* node/3, decision_tree/1, example/3, attributes/1, classes/1,   */
/* current_node/1 and table/3.                                    */
/******************************************************************/

idt :-
        initialize_kb,
        build_decision_tree, listing(node),
        show_decision_tree.
/*
idt :-
    repeat,
    	nl,
        write('Which file to use ? '),
        read(FileName),nl,
        initialize_kb,
        readfile(FileName),
        build_decision_tree, listing(node),
        show_decision_tree,
        nl,
        write('Quit (y/n) ? '),
        read(y).
*/
initialize_kb :-
    abolish(node,3),
    abolish(decision_tree,1),
   % abolish(example,3),
   % abolish(attributes,1),
   % abolish(classes,1),
    abolish(current_node,1), !.

readfile(FileName) :-
	concat(FileName,'.pl',File),
	see(File),
	repeat,
	read(Term),
	( Term = end_of_file ->
	      !, seen
	; assert(Term),
	  fail ).

build_decision_tree :-
    generate_node_id(_),
    clause(attributes(Attributes),true),
    findbag(Ex,clause(example(Ex,_,_),true),Exs),
    clause(pesi(W),true),			% Aggiunta per i pesi
    idt(Exs,Attributes,Node,W),
    assert(decision_tree(Node)), !.

generate_node_id(Y) :-
    clause(current_node(X),true), !,
    retract(current_node(X)),
    Y is X + 1,
    assert(current_node(Y)).
generate_node_id(0) :-
    assert(current_node(0)).

/******************************************************************/
/*                                                                */
/*  call        : idt(+Examples,+Attributes,-Class)               */
/*                                                                */
/*  arguments   : Examples   = List of Examples                   */
/*                Attributes = List of Attributes                 */
/*                Class      = Node ID of Class or leaf(Class)    */
/*                                                                */

/******************************************************************/
/* IDT determines an attribute-value pair which best splits the   */
/* examples according to the information-theoretical 'gain-ration'*/
/* measure. The attribute-value pair is deleted from the set of   */
/* all attribute-value pairs and the process of generating a sub- */
/* decision tree is called recursively with the according to the  */
/* attribute-value pair splitted examples. The recursion          */
/* terminates either if there is no more example to process or if */
/* all examples belong to the same class. In the last case        */
/* leaf(Class) is returned insteed of the SubtreeIDs.             */
/* In the end for every generated subtree an ID is generated and  */
/* the tree structure is asserted in the database.                */
/******************************************************************/
idt([],_,[],_).		% condizioni di termine della ricorsione 
idt(_,[],[],_). 	% o non vi sono pi esempi o non vi sono pi attributi da splittare 
idt(Exs,Attributes,ID,W) :-
    get_best_attribute(Attributes,Exs,BestAttribute,W),
    split_values(BestAttribute,DividedValues),
    delete(BestAttribute,Attributes,NewAttributes),
    generate_subtrees(DividedValues,Exs,NewAttributes,SubtreeIDs),
    generate_node_id(ID),
    assert(node(ID,BestAttribute,SubtreeIDs)).

preleva_classe([],[],[],0).

preleva_classe([T|_],[TClass|_],T,TClass).

termination_criterion([Ex|Exs],Class) :-
    clause(example(Ex,Classi,_),true),
    clause(classes(L),true),
    preleva_classe(L,Classi,Class,_),
     !,
    all_in_same_class(Exs,Class).

all_in_same_class([],_).
all_in_same_class([Ex|Exs],C) :-
    clause(example(Ex,C,_),true),
    !,
    all_in_same_class(Exs,C).
	

get_best_attribute(Attributes,Exs,BestAttribute,W) :-
    construct_contingency_table(Attributes,Exs,W),
    common_calculations(MC,N),
    calculate_parameter_classification(Attributes,MC,N,Values),
    get_best(Attributes,Values,BestAttribute).

construct_contingency_table(Attributes,Exs,W) :-
    clause(classes(Lc),true),
    length(Lc,NroColTab),
    abolish(table,3),
    create_list_of_zeros(NroColTab,List),
    initialize_contingency_tables(Attributes,List),
    construct_contingency_tables(Attributes,Exs,W).

initialize_contingency_tables([],_).
initialize_contingency_tables([A|As],List) :-
    assert(table(A,[],List)),
    initialize_contingency_tables(As,List).

create_list_of_zeros(0,[]).
create_list_of_zeros(N,[0|R]) :- 
     N > 0,
     N1 is N - 1,
     create_list_of_zeros(N1,R).

construct_contingency_tables([],_,_).
construct_contingency_tables([Attribute|Attributes],ExampleList,W) :-
    contingency_table(Attribute,ExampleList,W),
    !,
    construct_contingency_tables(Attributes,ExampleList,W).


per_ogni_classe(_,_,_,_,[],_,_).

per_ogni_classe(Attribute,Attr,L,Ex,[TClassi|CClassi], OldPc,[TW|CW]):-
     Pc is OldPc+1,
     my_update(Attribute,Attr,L,Ex,Pc,TClassi,TW),
     per_ogni_classe(Attribute,Attr,L,Ex,CClassi,Pc,CW). 


contingency_table(_,[],_).
contingency_table(Attribute,[Ex|Exs],W) :-
     clause(valori(Attribute = Attr),true),
     clause(example(Ex,Classi,L),true),
     per_ogni_classe(Attribute,Attr,L,Ex,Classi,0,W),
     !, 
     contingency_table(Attribute,Exs,W).


preleva_uscita(0,[],0).
%preleva_uscita(1,T,T).
preleva_uscita(1,[T|_],T).
preleva_uscita(P,[_|C],G):-
preleva_uscita(P1,C,G),
P is P1+1.

my_update(_,[],_,_,_,_,_).
my_update(_,_,_,_,0,_,_).

my_update(A,[Attr|Attrs],L,Ex,Pc,Guscita,W):-
    value3(A,L,Attr,G),
     %position_of_class(Ex,Pc,Classi,Guscita),     % position_of_class diventa Preleva uscita
    and(G,Guscita,NewG1),	   		     % adattamento
    and(NewG1,W,NewG),		   	    % CONSIDERO IL PESO PER QUELLA CLASSE	
    update_table(A,Attr,NewG,Pc),
    my_update(A,Attrs,L,Ex,Pc,Guscita,W).
     
avanza([],[],_,no,0).
avanza([TL|_],[TAttr|_],V,si,TL):-TAttr = V,!.          %not(TL=0.0),!.
avanza([_|CL],[TAttr|CAttr],V,OUT,G):-not(TAttr=V),avanza(CL,CAttr,V,OUT,G).

%value3(_,[],[],[]).
value3(A,[A = L|_],V,G) :- clause(valori(A = Attr),true),avanza(L,Attr,V,si,G), !. % write(A=V),write(' '),write(G),nl, 
value3(A,[_|Sels],V,G) :- value3(A,Sels,V,G).
value3(A,No,V,G) :-clause(example(No,_,Ex),true),value3(A,Ex,V,G).

value(A,[A = V|_],V) :- !.
value(A,[_|Sels],V) :- 
      value(A,Sels,V).
value(A,No,V) :-
      clause(example(No,_,Ex),true),
      value(A,Ex,V).

update_table(Attribute,V,G,Pc) :-
     retract(table(Attribute,TabLines,TotClass)),
     modify_table(TabLines,V,G,Pc,NewLines),
     increment_position_list(1,Pc,TotClass,NewTotal,G),
     assert(table(Attribute,NewLines,NewTotal)).

modify_table([],V,G,Pc,[(V,Values,G)]) :-
     clause(classes(Classes),true),
     length(Classes,NoOfColums),
     create_list_of_zeros(NoOfColums,L),
     increment_position_list(1,Pc,L,Values,G).
modify_table([(V,Nums,Tot)|Rest],V,G,Pc,[(V,NewNums,NewTot)|Rest]) :-
     NewTot is Tot + G,
     increment_position_list(1,Pc,Nums,NewNums,G).
modify_table([X|Rest1],V,G,Pc,[X|Rest2]) :-
     modify_table(Rest1,V,G,Pc,Rest2).

increment_position_list(N,N,[X|R],[Y|R],G) :-
     Y is X + G.
increment_position_list(N1,N,[X|R1],[X|R2],G) :-
     N2 is N1 + 1,
     increment_position_list(N2,N,R1,R2,G).

common_calculations(MC,N) :-
    clause(table(_,_,Xjs),true),
    common_calculations(Xjs,0,0,MC,N).

common_calculations([],TotalSum,N,MC,N) :-
    log(N,NLog),
    MC is (-1 / N) * ( TotalSum - N * NLog ).
common_calculations([0],S,N,S,N) :- !.
common_calculations([Xj|Xjs],Ac1,Ac2,MC,N) :-
    log(Xj,XjLog),
    NAc1 is Ac1 + Xj * XjLog,
    NAc2 is Ac2 + Xj,
    common_calculations(Xjs,NAc1,NAc2,MC,N).

calculate_parameter_classification([],_,_,[]).
calculate_parameter_classification([A|As],MC,N,[V|Vs]) :-
    gain_ratio(A,MC,N,V),
    calculate_parameter_classification(As,MC,N,Vs).

gain_ratio(A,MC,N,GR) :-
    clause(table(A,Lines,_),true),
    calculate_factors_B_and_IV(Lines,N,0,0,B,IV),
    IM is MC - B,
    ( IV > 0 -> 
	  GR is IM / IV 
    ; GR = 1 ).

calculate_factors_B_and_IV([],N,Sum1,Sum2,B,IV) :-
    log(N,NLog),
    B  is ( -1 / N ) * ( Sum1 - Sum2 ),
    IV is ( -1 / N ) * ( Sum2 - N * NLog ).
calculate_factors_B_and_IV([(_,L,TotL)|Rest],N,Ac1,Ac2,B,IV) :-
    sum_of_lines(L,0,SL),
    log(TotL,TotLog),
    NAc1 is Ac1 + SL,
    NAc2 is Ac2 + TotL * TotLog,
    calculate_factors_B_and_IV(Rest,N,NAc1,NAc2,B,IV).

sum_of_lines([],X,X).
sum_of_lines([0|Ns],Ac,Tot) :- 
    sum_of_lines(Ns,Ac,Tot).
sum_of_lines([N|Ns],Ac,Tot) :-
    log(N,NLog),
    Nac is Ac + N * NLog,
    sum_of_lines(Ns,Nac,Tot).

get_best([A|As],[V|Vs],Result) :-
    best_value(As,Vs,(A,V),Result).

best_value([],[],(A,_),A).
best_value([A|As],[V|Vs],(_,TV),Result) :-
    V > TV,
    best_value(As,Vs,(A,V),Result).
best_value([_|As],[_|Vs],(TA,TV),Result) :-
    best_value(As,Vs,(TA,TV),Result).

split_values(Attribute,Result) :-
      get_values(Attribute,Values),            %     listing(table),
     my_split_examples(Attribute,Values,Result).

get_values(Attribute,Vals) :-
    clause(valori(Attribute=Vals),true).

split_examples(_,[V],Exs,[(V,Exs)]).
split_examples(A,[V|Vs],Exs,[(V,VExs)|Rest]) :-
    findbag(Ex,(member(Ex,Exs),value3(A,Ex,V,G) ,not(G=0.0)),VExs),
    difference(VExs,Exs,RestEx),
    split_examples(A,Vs,RestEx,Rest).

dammi([],_,[]).
dammi(_,0,[]).
 
dammi([TV|CV],T,[Out|Outs]):-
Out is TV,
dammi(CV,T,Outs).


recupera1([],[]).
recupera1([Testa|Coda], [(V,Exit)|Rest]):-
recupera(Testa,V,Exit),
recupera1(Coda,Rest).

recupera([],_,[]).
recupera((V,LValClassi,Tot),V,Exit) :-
dammi(LValClassi,Tot,Exit).

recupera( [(_,_,_)|Coda], V, USCITA):-
recupera(Coda,V, USCITA).


my_split_examples(_,[],[]).
my_split_examples(A,_,Exits) :- 
    clause(table(A,Valori,_),true),
    recupera1(Valori,Exits).
    
generate_subtrees([],_,_,[]).

generate_subtrees([(_,[])|Rest1],Exs,Attributes,Rest2) :-
    % idt(Exs,Attributes,Id),
    !,
    generate_subtrees(Rest1,Exs,Attributes,Rest2).

generate_subtrees([(Value,Exits)|Rest1],Exs,Attributes,[(Value,Exits,Id)|Rest2]) :-
    idt(Exs,Attributes,Id,Exits),
    !,
    generate_subtrees(Rest1,Exs,Attributes,Rest2).

/******************************************************************/
/*                                                                */
/*  call        : show_decision_tree                              */
/*                                                                */
/******************************************************************/
/* A simple pretty-print procedure for displaying decision trees. */
/* In steed of this procedure, we can also generate rules from the*/
/* decision tree by traversing every path in the tree until a     */
/* leaf node was reached and collecting the attribute-value pairs */
/* of that path. Then the leaf node forms the head of a Horn-     */
/* formula and the set of attribute-value pairs of the path forms */
/* the body of the clause.                                        */
/******************************************************************/
show_decision_tree :-
	nl,			 %listing(node).
	clause(decision_tree(Node),true),
	show_subtree(Node,0), !.

show_subtree(NodeNo,Indent) :-
	clause(node(NodeNo,Attribute,SubtreeList),true),
	show_subtrees(SubtreeList,Attribute,Indent).

scrivi_grado([]).
scrivi_grado([46|C]):-write('.'),!, scrivi_grado(C).
scrivi_grado([T|C]):-N is T-48 ,write(N), scrivi_grado(C).

scrivivirgola([]).
scrivivirgola(_):-write(,).

scrivi_gradi([]).
scrivi_gradi([TG|CG]):-
name(TG,List3),
scrivi_grado(List3),
scrivivirgola(CG),
scrivi_gradi(CG).
	
show_subtrees([],_,_) :- nl.
show_subtrees([(Value,Gradi,[])|Brothers],Attribute,Indent) :- 
	write(Attribute=Value), 
	write(' ==> '), write('class = '), write('['), scrivi_gradi(Gradi), write(']'), nl,
	space(Indent),
	show_subtrees(Brothers,Attribute,Indent).	
show_subtrees([(Value,_,NodeNo)|Brothers],Attribute,Indent) :-
	name(Attribute,List1), length(List1,N1),
	name(Value,List2), length(List2,N2),
	% name(Grado,List3), length(List3,N3),
	write(Attribute=Value), %write('('),scrivi_grado(List3), write(')'),
	write(' and '), 
	Offset is Indent + N1 + 3 + N2 + 3  + 2 ,              %+N3
	show_subtree(NodeNo,Offset),
	space(Indent),
	show_subtrees(Brothers,Attribute,Indent).

/******************************************************************/
/* Utility predicates                                             */
/******************************************************************/
space(0).
space(N) :-
    	N > 0, write(' '), N1 is N - 1, space(N1).

remove_duplicates([],[]).
remove_duplicates([X|Xs],Ys) :-  
      member(X,Xs),
      remove_duplicates(Xs,Ys).
remove_duplicates([X|Xs],[X|Ys]) :- 
      remove_duplicates(Xs,Ys).

%length([],0).
%length([L|Ls],N) :- 
%      length(Ls,N1),
%      N is N1 + 1.

delete(X,[X|Xs],Xs).
delete(X,[Y|Ys],[Y|Zs]) :- 
    delete(X,Ys,Zs).

difference(L1,L2,L3) :-
    findbag(N,(member(N,L2),\+(member(N,L1))),L3).

findbag(X,G,_) :-                                              
     asserta(found(mark)), call(G),                         
       asserta(found(X)), fail .                            
findbag(_,_,L) :-                                              
     collect_found([],L) .                                     

collect_found(L,L1) :-                                       
     getnext(X), collect_found([X|L],L1) .                      
collect_found(L,L) .                                        
                                                              
getnext(X) :-                                               
     retract(found(X)), !, \+(X == mark) .                

help :- write('Start IDT with command: idt.'), nl.

and(A,B,C):-C is A*B.

:- help.



