Skip to content

Commit

Permalink
Closes #266. Closes #237. BGM.bf was still using old implementation o…
Browse files Browse the repository at this point in the history
…f BGM class, since refactored as BayesianGraphicalModel. Now uses utility functions in bayesgraph.ibf. Also adding unit tests for BGM, see tests/hbltests/BayesianGraphicalModels. Good reference for how to use these models.
  • Loading branch information
ArtPoon committed Mar 14, 2015
1 parent 6de09fe commit f376178
Show file tree
Hide file tree
Showing 3 changed files with 666 additions and 72 deletions.
99 changes: 27 additions & 72 deletions res/TemplateBatchFiles/BGM.bf
Original file line number Diff line number Diff line change
@@ -1,38 +1,4 @@
RequireVersion ("0.9920060815");

/* ________________________________________________________________ */

function make_discrete_node (node_id, sample_size, max_parents)
{
dnode = {};
dnode["NodeID"] = node_id;
dnode["PriorSize"] = sample_size;
dnode["MaxParents"] = max_parents;
return dnode;
}

/* ________________________________________________________________ */

function make_continuous_node (node_id, sample_size, mean, precision)
{
cnode = {};
cnode["NodeID"] = node_id;
cnode["PriorSize"] = sample_size;
cnode["MaxParents"] = max_parents;
cnode["PriorMean"] = mean;
cnode["PriorVar"] = precision;
return cnode;
}

/* ________________________________________________________________ */

function make_banned_edge (parent, child)
{
a_rule = {};
a_rule["BanParent"] = parent;
a_rule["BanChild"] = child;
return a_rule;
}
ExecuteAFile("bayesgraph.ibf");

/* ________________________________________________________________ */

Expand Down Expand Up @@ -182,23 +148,6 @@ function obtainSubstitutionMatrix (_lfID, sample_flag, site_map, _filterMatrix)

/* ________________________________________________________________ */

function handleMPIBGM (_bgm_data, jobID)
{
if (MPI_NODE_COUNT <= 1)
{
if (jobID >= 0)
{
_sample_results [jobID] = runBGM(_bgm_data);
}
}
else
{

}
}

/* ________________________________________________________________ */

function handleMPIBGM (_bgm_data, jobID)
{
if (MPI_NODE_COUNT <= 1)
Expand Down Expand Up @@ -228,7 +177,6 @@ function handleMPIBGM (_bgm_data, jobID)
break;
}
}

}
doReceive = (jobID < 0) || (mpiNode == MPI_NODE_COUNT-1);
if (doReceive)
Expand All @@ -250,19 +198,20 @@ function handleMPIBGM (_bgm_data, jobID)
return 0;
}

/* ________________________________________________________________ */

function runBGM (_bgm_data)
function runBGM (_bgm_data)
{

num_nodes = Abs (_bgm_data["MAP"]);
num_nodes = Abs (_bgm_data["MAP"]);
num_parents = _bgm_data["PARENTS"];
num_parents = num_parents$1;

branches = Rows(_bgm_data["MATRIX"]);

BGM_MCMC_DURATION = _bgm_data ["BGM_MCMC_DURATION"];
BGM_MCMC_BURNIN = _bgm_data ["BGM_MCMC_BURNIN"];
BGM_MCMC_SAMPLES = _bgm_data ["BGM_MCMC_SAMPLES"];

/* convert data to matrix form */
bgm_data_matrix = {branches,num_nodes};

for (k = 0; k < num_nodes; k=k+1)
Expand All @@ -273,21 +222,27 @@ function runBGM (_bgm_data)
bgm_data_matrix[j][k] = (_bgm_data["MATRIX"])[j][i];
}
}

discreteNodes = {};
continuousNodes = {};

/* BGM_NTHREADS = 2; */
num_parents = num_parents$1;


nodes = {};
for (k = 0; k < num_nodes; k = k+1)
{
discreteNodes[Abs(discreteNodes)] = make_discrete_node (k, 0, num_parents);
}

BGM gen_bgm = (discreteNodes, continuousNodes);
SetParameter (gen_bgm, BGM_DATA_MATRIX, bgm_data_matrix);
SetParameter (gen_bgm, BGM_WEIGHT_MATRIX, num_nodes);
CovarianceMatrix (postp, gen_bgm);
return postp;
/* Arguments:
1. node name, must be a string
2. maximum number of parents
3. prior sample size - always uninformative (count split evenly across levels)
- if we were truly Bayesian, we would let the user set informative priors..
4. number of levels - always binary in this case (substitution mapped to branch)
*/
node_name = ""+k;
nodes[Abs(nodes)] = add_discrete_node (node_name, num_parents, 0, 2);
}

BayesianGraphicalModel gen_bgm = (nodes);

// no imputation of missing data (setting args to 0)
attach_data("gen_bgm", bgm_data_matrix, 0, 0, 0);

bgm_result = order_MCMC("gen_bgm", BGM_MCMC_DURATION, BGM_MCMC_BURNIN, BGM_MCMC_SAMPLES);

return bgm_result;
}
167 changes: 167 additions & 0 deletions tests/hbltests/BayesianGraphicalModels/TestBGM.bf
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
ncases = 500;

fprintf (stdout, "\nTesting BayesianGraphicalModel discrete graph functionality\n\n");


ExecuteAFile (HYPHY_LIB_DIRECTORY+"TemplateBatchFiles"+DIRECTORY_SEPARATOR+"bayesgraph.ibf");

fprintf (stdout, "Loaded bayesgraph include file\n");


/* import Bayesian network structure and parameters
from XMLBIF (XML Bayesian Interchange Format)
as an associative list
*/

fprintf (stdout, "Import ALARM network from XMLBIF file...");

import_xmlbif ("alarm.xml", "Alarm");

if ( (Rows(Alarm))[0] == "Hypovolemia") {
fprintf (stdout, "[PASSED]\n");
} else {
fprintf (stdout, "[FAILED]\n");
}


// adjacency matrix of network
adjmat = list2adjmat(Alarm);



/* this object contains all the info we need to simulate data */
fprintf (stdout, "Simulate ", ncases, " cases from network object...");
sim = simulate_data (Alarm, ncases);
if (Rows(sim) == ncases) {
fprintf (stdout, "[PASSED]\n");
} else {
fprintf (stdout, "[FAILED]\n");
}


/* keys of associative list are variable (node) names */
names = Rows(Alarm);

/* a Bayesian Graphical Model object in HyPhy is constructed with
a single (associative list) argument
*/
nodes={};
for (i = 0; i < Abs(Alarm); i=i+1)
{
/* add_discrete_node ( node name,
max. # parents,
prior sample size,
# levels)
*/
nodes[Abs(nodes)] = add_discrete_node (names[i], 2, 0, (Alarm[names[i]])["Levels"]);
}


num_nodes = Abs(nodes);

/* construct BGM */
fprintf (stdout, "Create BGM object...\n");

BayesianGraphicalModel alarm_bgm = (nodes);

GetString (bgm_names_list, BayesianGraphicalModel, -1); // returns names of all BGMs

// THIS TEST IS BROKEN
/*
lLength = Rows(bgm_names_list) * Columns(bgm_names_list);
for (_i = 0; _i < lLength; _i += 1) {
if (bgm_names_list[_i] == "alarm_bgm") {
fprintf (stdout, "[PASSED]\n");
break;
}
}
if (_i == lLength) {
fprintf (stdout, "[FAILED]\n");
}
*/

/*
Assign data set to BGM.
attach_data ( BGM identifier,
data matrix,
Gibbs imputation #steps,
" " burnin,
" " #samples)
*/

fprintf (stdout, "Attaching data and caching node scores...");
attach_data ("alarm_bgm", sim, 0, 0, 0);
cache = get_node_score_cache("alarm_bgm");
if (Abs(cache) == 111) {
fprintf (stdout, "[PASSED]\n");
} else {
fprintf (stdout, "[FAILED]\n");
}


/* graph structural MCMC */
fprintf (stdout, "RUNNING GRAPH-MCMC\n");

result0 = graph_MCMC ("alarm_bgm", 100000, 100000, 100, 0);


temp = check_edgelist (result0, adjmat, 0.8);
fprintf (stdout, "\tTrue positives = ", temp[0], "\n");
fprintf (stdout, "\tFalse negatives = ", temp[1], "\n");
fprintf (stdout, "\tFalse positives = ", temp[2], "\n");
fprintf (stdout, "\tTrue negatives = ", temp[3], "\n");

sens = temp[0]/(temp[0]+temp[1]);
spec = temp[3]/(temp[2]+temp[3]);

fprintf (stdout, "\tSensitivity (TP/TP+FN) = ", sens, "\n");
fprintf (stdout, "\tSpecificity (TN/TN+FP) = ", spec, "\n");

fprintf (stdout, "Specificity > 75% and specificity > 90% for cutoff = 0.8 ... ");
if (sens > 0.75 && spec > 0.9) {
fprintf (stdout, "[PASSED]\n");
} else {
fprintf (stdout, "[FAILED]\n");
}



display_MCMC_chain (result0);
write_edgelist("TestBGM.graphMCMC.edges", result0, num_nodes, 1);
mcmc_graph_to_dotfile("TestBGM.graphMCMC.dot", 0.6, result0, nodes);



/* node order permutation MCMC */

fprintf (stdout, "RUNNING ORDER-MCMC\n");

result1 = order_MCMC ("alarm_bgm", 10000, 10000, 100);

temp = check_edgelist (result1, adjmat, 0.8);
fprintf (stdout, "\tTrue positives = ", temp[0], "\n");
fprintf (stdout, "\tFalse negatives = ", temp[1], "\n");
fprintf (stdout, "\tFalse positives = ", temp[2], "\n");
fprintf (stdout, "\tTrue negatives = ", temp[3], "\n");

sens = temp[0]/(temp[0]+temp[1]);
spec = temp[3]/(temp[2]+temp[3]);

fprintf (stdout, "\tSensitivity (TP/TP+FN) = ", sens, "\n");
fprintf (stdout, "\tSpecificity (TN/TN+FP) = ", spec, "\n");

fprintf (stdout, "Specificity > 75% and specificity > 90% for cutoff = 0.8 ... ");
if (sens > 0.75 && spec > 0.9) {
fprintf (stdout, "[PASSED]\n");
} else {
fprintf (stdout, "[FAILED]\n");
}


write_edgelist("TestBGM.orderMCMC.edges", result1, num_nodes, 1);
mcmc_graph_to_dotfile("TestBGM.orderMCMC.dot", 0.6, result1, nodes);





Loading

0 comments on commit f376178

Please sign in to comment.