diff --git a/README.md b/README.md
index 6d5fe81..da3af56 100644
--- a/README.md
+++ b/README.md
@@ -273,7 +273,7 @@ import netrem_evaluation_functions as nm_eval
dummy_data = generate_dummy_data(corrVals = [0.9, 0.5, 0.4, -0.3, -0.8], # the # of elements in corrVals is the # of predictors (X)
num_samples_M = 100000, # the number of samples M
- train_data_percent = 70) # the remainder out of 100 will be kept for testing. If 100, all data is used for training and testing.
+ train_data_percent = 70) # the remainder out of 100,000 will be kept for testing. If 100, then ALL data is used for training and testing.
```
The Python console or Jupyter notebook will print out the following:
@@ -403,21 +403,161 @@ y_test = dummy_data.view_y_test_df()
Our generated single-cell gene expression data (X, y) looks like this:
+```python
+X_train.corr() # pairwise correlations among the training samples
+```
+
+
+
+
+
+ |
+ TF1 |
+ TF2 |
+ TF3 |
+ TF4 |
+ TF5 |
+
+
+
+
+ TF1 |
+ 1.000000 |
+ 0.444851 |
+ 0.360777 |
+ -0.274352 |
+ -0.719915 |
+
+
+ TF2 |
+ 0.444851 |
+ 1.000000 |
+ 0.195290 |
+ -0.152394 |
+ -0.398306 |
+
+
+ TF3 |
+ 0.360777 |
+ 0.195290 |
+ 1.000000 |
+ -0.125259 |
+ -0.320436 |
+
+
+ TF4 |
+ -0.274352 |
+ -0.152394 |
+ -0.125259 |
+ 1.000000 |
+ 0.242985 |
+
+
+ TF5 |
+ -0.719915 |
+ -0.398306 |
+ -0.320436 |
+ 0.242985 |
+ 1.000000 |
+
+
+
+
+
+
+
+
+```python
+X_test.corr() # pairwise correlations among the testing samples
+```
+
+
+
+
+
+
+
+
+
+ |
+ TF1 |
+ TF2 |
+ TF3 |
+ TF4 |
+ TF5 |
+
+
+
+
+ TF1 |
+ 1.000000 |
+ 0.440370 |
+ 0.359511 |
+ -0.269646 |
+ -0.721168 |
+
+
+ TF2 |
+ 0.440370 |
+ 1.000000 |
+ 0.189575 |
+ -0.146711 |
+ -0.389451 |
+
+
+ TF3 |
+ 0.359511 |
+ 0.189575 |
+ 1.000000 |
+ -0.119603 |
+ -0.312932 |
+
+
+ TF4 |
+ -0.269646 |
+ -0.146711 |
+ -0.119603 |
+ 1.000000 |
+ 0.241384 |
+
+
+ TF5 |
+ -0.721168 |
+ -0.389451 |
+ -0.312932 |
+ 0.241384 |
+ 1.000000 |
+
+
+
+
+
```python
# prior network edge_list (missing edges or edges with no edge weight will be added with the default_edge_list so the network is fully-connected):
-edge_list = [["TF1", "TF2", 0.9], ["TF4", "TF5", 0.75], ["TF1", "TF3"], ["TF1", "TF4"], ["TF1", "TF5"],
+edge_list = [["TF1", "TF2", 0.8], ["TF4", "TF5", 0.95], ["TF1", "TF3"], ["TF1", "TF4"], ["TF1", "TF5"],
["TF2", "TF3"], ["TF2", "TF4"], ["TF2", "TF5"], ["TF3", "TF4"], ["TF3", "TF5"]]
-beta_network_val = 3
-# by default, model_type is Lasso, so alpha_lasso_val will be specified for the alpha_lasso parameter. (Otherwise, if model_type is LassoCV, alpha_lasso is determined by cross-validation on training data).
-alpha_lasso_val = 0.01
+beta_network_val = 1
+# by default, model_type is Lasso, so alpha_lasso_val will be specified for the alpha_lasso parameter.
+# However, we will specify model_type = LassoCV, so our alpha_lasso is determined by cross-validation on training data).
# Building the network regularized regression model:
-# Please note: To include nodes found in the gene expression data that are not found in the PPI Network (e.g. TF6 in our case), we use False for the overlapped_nodes_only argument (otherwise, we would only use TFs 1 to 5).
# By default, edges are constructed between all of the nodes; nodes with a missing edge are assigned the default_edge_weight.
netrem_demo = netrem(edge_list = edge_list,
beta_net = beta_network_val,
- alpha_lasso = alpha_lasso_val,
+ model_type = "LassoCV",
view_network = True)
# Fitting the NetREm model on training data: X_train and y_train:
@@ -428,25 +568,12 @@ netrem_demo.fit(X_train, y_train)
![png](output_3_1.png)
-
-
-
-![png](output_3_2.png)
-
-
-
- 1 new node(s) added to network based on gene expression data ['TF6']
-
-
-
-![png](output_3_5.png)
-
![png](netrem_estimator.PNG)
-To view and extract the predicted model coefficients for the predictors:
+To view and extract the predicted model coefficients *c* for the predictors: