diff --git a/README.md b/README.md index 6d5fe81..da3af56 100644 --- a/README.md +++ b/README.md @@ -273,7 +273,7 @@ import netrem_evaluation_functions as nm_eval dummy_data = generate_dummy_data(corrVals = [0.9, 0.5, 0.4, -0.3, -0.8], # the # of elements in corrVals is the # of predictors (X) num_samples_M = 100000, # the number of samples M - train_data_percent = 70) # the remainder out of 100 will be kept for testing. If 100, all data is used for training and testing. + train_data_percent = 70) # the remainder out of 100,000 will be kept for testing. If 100, then ALL data is used for training and testing. ``` The Python console or Jupyter notebook will print out the following: @@ -403,21 +403,161 @@ y_test = dummy_data.view_y_test_df() Our generated single-cell gene expression data (X, y) looks like this: +```python +X_train.corr() # pairwise correlations among the training samples +``` + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
TF1TF2TF3TF4TF5
TF11.0000000.4448510.360777-0.274352-0.719915
TF20.4448511.0000000.195290-0.152394-0.398306
TF30.3607770.1952901.000000-0.125259-0.320436
TF4-0.274352-0.152394-0.1252591.0000000.242985
TF5-0.719915-0.398306-0.3204360.2429851.000000
+
+ + + + +```python +X_test.corr() # pairwise correlations among the testing samples +``` + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
TF1TF2TF3TF4TF5
TF11.0000000.4403700.359511-0.269646-0.721168
TF20.4403701.0000000.189575-0.146711-0.389451
TF30.3595110.1895751.000000-0.119603-0.312932
TF4-0.269646-0.146711-0.1196031.0000000.241384
TF5-0.721168-0.389451-0.3129320.2413841.000000
+
+ ```python # prior network edge_list (missing edges or edges with no edge weight will be added with the default_edge_list so the network is fully-connected): -edge_list = [["TF1", "TF2", 0.9], ["TF4", "TF5", 0.75], ["TF1", "TF3"], ["TF1", "TF4"], ["TF1", "TF5"], +edge_list = [["TF1", "TF2", 0.8], ["TF4", "TF5", 0.95], ["TF1", "TF3"], ["TF1", "TF4"], ["TF1", "TF5"], ["TF2", "TF3"], ["TF2", "TF4"], ["TF2", "TF5"], ["TF3", "TF4"], ["TF3", "TF5"]] -beta_network_val = 3 -# by default, model_type is Lasso, so alpha_lasso_val will be specified for the alpha_lasso parameter. (Otherwise, if model_type is LassoCV, alpha_lasso is determined by cross-validation on training data). -alpha_lasso_val = 0.01 +beta_network_val = 1 +# by default, model_type is Lasso, so alpha_lasso_val will be specified for the alpha_lasso parameter. +# However, we will specify model_type = LassoCV, so our alpha_lasso is determined by cross-validation on training data). # Building the network regularized regression model: -# Please note: To include nodes found in the gene expression data that are not found in the PPI Network (e.g. TF6 in our case), we use False for the overlapped_nodes_only argument (otherwise, we would only use TFs 1 to 5). # By default, edges are constructed between all of the nodes; nodes with a missing edge are assigned the default_edge_weight. netrem_demo = netrem(edge_list = edge_list, beta_net = beta_network_val, - alpha_lasso = alpha_lasso_val, + model_type = "LassoCV", view_network = True) # Fitting the NetREm model on training data: X_train and y_train: @@ -428,25 +568,12 @@ netrem_demo.fit(X_train, y_train) ![png](output_3_1.png) - - - -![png](output_3_2.png) - - - - 1 new node(s) added to network based on gene expression data ['TF6'] - - - -![png](output_3_5.png) - ![png](netrem_estimator.PNG) -To view and extract the predicted model coefficients for the predictors: +To view and extract the predicted model coefficients *c* for the predictors: