forked from girirajanlab/CN_Learn
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathconfig.params
executable file
·93 lines (86 loc) · 3.94 KB
/
config.params
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
################################################################################
# Script : config.params #
# Author : Vijay Kumar #
# Date : 4/5/2019 #
# This file is the master repository for the list of directories, files and #
# other global values and parameters used throughout the project. i.e., they #
# are shared across multiple scripts. The intent is to maintain such shared #
# variables in a single place and source this file within each bash scripts #
# #
# (c) 2019 - Vijay Kumar #
# Licenced under the GNU General Public License 3.0. #
################################################################################
# 1) Paths that must be reviewed and/or updated prior to running CN-Learn
#########################################################################
BAM_FILE_DIR=TBD
REF_GENOME=TBD
SW_DIR='/opt/tools/CN_Learn/software/'
DOCKER_INDICATOR='Y'
#####################
# 2) Main Directories
#####################
PROJ_DIR=TBD
SCRIPTS_DIR=${PROJ_DIR}'scripts/'
SOURCE_DIR=${PROJ_DIR}'source/'
DATA_DIR=${PROJ_DIR}'data/'
REF_GENOME_DIR=`dirname ${REF_GENOME}`
###################
# 3) Subdirectories
###################
RSCRIPTS_DIR=${SCRIPTS_DIR}'rscripts/'
DATA_CANOES_DIR=${DATA_DIR}'canoes/'
DATA_CODEX_DIR=${DATA_DIR}'codex/'
DATA_CLAMMS_DIR=${DATA_DIR}'clamms/'
DATA_XHMM_DIR=${DATA_DIR}'xhmm/'
DATA_BPCOV_DIR=${DATA_DIR}'bp_coverage_dir/'
DATA_LOGS_DIR=${DATA_DIR}'logs/'
PRED_DIR=${DATA_DIR}'PRED_DIR/'
VALD_DIR=${DATA_DIR}'VALD_DIR/'
###################
# 4) Software Tools
###################
GATK_SW_DIR=${SW_DIR}'gatk-3.5/'
BEDTOOLS_DIR=${SW_DIR}'bedtools2/bin/'
SAMTOOLS_DIR=${SW_DIR}'samtools-1.3.1/'
PLINK_DIR=${SW_DIR}'plinkseq-0.10/'
CLAMMS_DIR=${SW_DIR}'clamms/'
XHMM_DIR=${SW_DIR}'xhmm/'
###############
# 5) File Names
###############
ORIG_PROBES=${SOURCE_DIR}'exome_capture_targets.bed'
TARGET_PROBE_FILE='targets_auto_no_chr.bed'
TARGET_PROBES=${SOURCE_DIR}${TARGET_PROBE_FILE}
BAM_FILE_LIST_W_PATH=${SOURCE_DIR}'bam_file_list_w_full_path.txt'
BAM_FILE_LIST=${SOURCE_DIR}'bam_file_list.txt'
SAMPLE_LIST=${SOURCE_DIR}'sample_list.txt'
SAMPLE_LIST_TRAIN=${SOURCE_DIR}'sample_list_train.txt'
SAMPLE_LIST_TEST=${SOURCE_DIR}'sample_list_test.txt'
TARGET_PROBES=${SOURCE_DIR}'targets_auto_no_chr.bed'
VAL_DATA_FILE=${SOURCE_DIR}'validated_cnvs.txt'
################################
# 6) Global Variables/Parameters
################################
CALLER_LIST="CANOES CLAMMS CODEX XHMM"
CALLER_COUNT=`echo ${CALLER_LIST} | wc -w`
##########################################################################
# This threshold is used to label CNVs in training data as True Vs. False
##########################################################################
VALDATA_OV_THRESHOLD=0.1
####################################################################################
# The parameter 'CLASSIFIER' is used to choose the classifier to be built by
# CN-Learn. Default = RF (Random Forest)
# Other possible values = LR (Logistic Regression); SVM (Support Vector Machine)
# NUM_TREES can be changed to increase or decrease the number of trees in the forest
####################################################################################
CLASSIFIER='RF'
NUM_TREES=100
#################################################################
# Set the docker command based on the indicator set by the user #
#################################################################
if [ ${DOCKER_INDICATOR} = 'Y' ] || [ ${DOCKER_INDICATOR} = 'y' ];
then
DOCKER_COMMAND="docker run --rm -v ${PROJ_DIR}:${PROJ_DIR} -v ${BAM_FILE_DIR}:${BAM_FILE_DIR} -v ${REF_GENOME_DIR}:${REF_GENOME_DIR} --user $(id -u):$(id -g) girirajanlab/cnlearn "
else
DOCKER_COMMAND=''
fi