数据挖掘——决策树
i:=0;
-- Compute populations and update query
OPEN curs FOR
'SELECT '||class||', COUNT('||class||') FROM ' || view_name || ' GROUP BY ' || class || ' ORDER BY ' || class; LOOP
FETCH curs INTO v, p;
EXIT WHEN curs%NOTFOUND; i := i + 1; -- Debug DBMS_OUTPUT.PUT_LINE('v='||v||' classval('||TO_CHAR(i)||')='||classval(i)); WHILE (v <> classval(i)) LOOP resq := resq || ', 0'; i := i + 1; END LOOP;
resq := resq || ', ' || TO_CHAR(p); END LOOP; CLOSE curs;
-- Complete and execute query
FOR j IN i+1..classval.COUNT LOOP resq := resq || ', 0'; END LOOP; resq := resq || ')';
-- Debug DBMS_OUTPUT.PUT_LINE(resq); EXECUTE IMMEDIATE resq; END; --
-- Main program code --
BEGIN
-- Echo parameters
DBMS_OUTPUT.PUT_LINE('table_name=' || table_name); DBMS_OUTPUT.PUT_LINE('class=' || class);
DBMS_OUTPUT.PUT_LINE('res_name=' || res_name);
DBMS_OUTPUT.PUT_LINE('min_gain=' || TO_CHAR(min_gain));
14
p='||TO_CHAR(p)||' 数据挖掘——决策树
DBMS_OUTPUT.PUT_LINE('root_view=' || root_view); IF del THEN
DBMS_OUTPUT.PUT_LINE('del=TRUE'); ELSE
DBMS_OUTPUT.PUT_LINE('del=FALSE'); END IF;
-- Create result table + list of class attribute values
resq := 'CREATE TABLE '||res_name||'(node NUMBER(2) PRIMARY KEY, parent NUMBER(2), rule VARCHAR(30)';
-- Fetch class attribute values OPEN att_values FOR
'SELECT DISTINCT '||class||' FROM '||table_name||' ORDER BY '||class; LOOP
FETCH att_values INTO cval;
EXIT WHEN att_values%NOTFOUND; -- Exit loop when last row is fetched classval.EXTEND;
classval(classval.COUNT) := cval;
resq := resq || ', '||class||'_' || cval || ' NUMBER(5)'; END LOOP; -- On att_values CLOSE att_values;
resq := resq || ', CONSTRAINT ' || res_name || '_H FOREIGN KEY(parent) REFERENCES ' || res_name || '(node))';
-- Debug DBMS_OUTPUT.PUT_LINE(resq); EXECUTE IMMEDIATE resq;
DBMS_OUTPUT.PUT_LINE('Creating result table ' || res_name);
-- Create starting node and stack it
EXECUTE IMMEDIATE
'CREATE VIEW ' || root_view || ' AS SELECT * FROM ' || table_name; -- Push
stack.EXTEND;
stack(stack.COUNT).num := nodenum; stack(stack.COUNT).nview := root_view;
Entropy(root_view, stack(stack.COUNT).entrop, stack(stack.COUNT).pop); DBMS_OUTPUT.PUT_LINE('Creating node view ' || root_view); Result(nodenum, 'NULL', 'NULL', root_view); -- Output result nodenum := nodenum + 1; -- Next node
-- Apply treatment while the stack is not empty
WHILE stack.COUNT > 0 LOOP
15
数据挖掘——决策树
-- Pop current node
cnode := stack(stack.COUNT); stack.TRIM;
/* Debug
DBMS_OUTPUT.PUT_LINE('cnode.nview=' || cnode.nview);
DBMS_OUTPUT.PUT_LINE('cnode.entrop=' || TO_CHAR(cnode.entrop)); DBMS_OUTPUT.PUT_LINE('cnode.pop=' || TO_CHAR(cnode.pop)); */
-- Clean candidate list
EXECUTE IMMEDIATE 'DELETE FROM BTCandidate';
-- For each attribute but the class attribute
OPEN att_names FOR 'SELECT COLUMN_NAME FROM USER_TAB_COLUMNS WHERE TABLE_NAME=UPPER('''||cnode.nview||''') AND COLUMN_NAME<>UPPER('''||class||''') ORDER BY COLUMN_NAME';
LOOP
-- Read current attribute name
FETCH att_names INTO catt;
EXIT WHEN att_names%NOTFOUND; -- Exit loop when last row is fetched
-- Debug DBMS_OUTPUT.PUT_LINE('catt=' || catt);
-- Create a candidate in list for current attribute
candq := '';
egain := cnode.entrop; -- Information gain is reset
-- For each distinct value of current attribute
OPEN att_values FOR
'SELECT DISTINCT '||catt||' FROM '||cnode.nview||' ORDER BY '||catt;
i := 0; LOOP
16
数据挖掘——决策树
-- Read current attribute value
FETCH att_values INTO cval;
EXIT WHEN att_values%NOTFOUND; -- Exit loop when last row is fetched
-- Debug DBMS_OUTPUT.PUT_LINE('cval=' || cval);
-- Build new node
i := i + 1;
nnode.nview := cnode.nview || '_' || catt || TO_CHAR(i); viewq := 'CREATE VIEW ' || nnode.nview || ' AS SELECT '; nnode.rule := catt || '=' || cval;
-- Select all attributes besides current attribute
OPEN att_names2 FOR 'SELECT COLUMN_NAME FROM USER_TAB_COLUMNS TABLE_NAME=UPPER('''||cnode.nview||''') AND COLUMN_NAME<>UPPER('''||catt||''')'; j := 0; LOOP
FETCH att_names2 INTO att_name;
EXIT WHEN att_names2%NOTFOUND; -- Exit loop when last row is fetched j := j + 1;
IF j > 1 THEN
viewq := viewq || ','; END IF;
viewq := viewq || att_name; END LOOP;
CLOSE att_names2;
-- Conclude query and execute it
viewq := viewq || ' FROM ' || cnode.nview || ' WHERE ' || catt || '=''' || cval || ''''; -- Debug DBMS_OUTPUT.PUT_LINE(viewq); EXECUTE IMMEDIATE viewq;
-- Compute entropy and update information gain Entropy(nnode.nview, nnode.entrop, nnode.pop);
egain := egain - (nnode.pop/cnode.pop)*nnode.entrop;
-- Insert node in list
17
WHERE
数据挖掘——决策树
IF i > 1 THEN candq := candq || ','; END IF; candq := candq || 'CNode('''||nnode.nview||''','''||nnode.rule||''','||REPLACE(TO_CHAR(nnode.entrop),',','.')||','||TO_CHAR(nnode.pop)||')'; END LOOP; -- On att_values
CLOSE att_values;
-- Actual candidate insertion
egain := ROUND(egain,8); -- Precision adaptation
-- Debug DBMS_OUTPUT.PUT_LINE('egain='||TO_CHAR(egain));
candq := 'INSERT INTO BTCandidate VALUES (''' || catt || REPLACE(TO_CHAR(egain),',','.') ||', CNodeList(' || candq || '))'; -- Debug DBMS_OUTPUT.PUT_LINE(candq); EXECUTE IMMEDIATE candq;
END LOOP; -- On att_names
CLOSE att_names;
-- Search for maximum information gain in candidate list
maxg := 0; max_var := '';
OPEN candidates FOR 'SELECT att_name, gain FROM BTCandidate'; LOOP
FETCH candidates INTO cvar, egain;
EXIT WHEN candidates%NOTFOUND; -- Exit loop when last row is fetched IF egain > maxg THEN maxg := egain; max_var := cvar; END IF; END LOOP;
CLOSE candidates;
-- Debug DBMS_OUTPUT.PUT_LINE('max_var='||max_var);
-- Stack nodes for attribute with maximum infortmation gain superior to the minimum -- Drop the others
18
''', '||