Skip to content

Commit

Permalink
Corrected the outlier removal: now removes outliers for DV and IV ins…
Browse files Browse the repository at this point in the history
…tead of DV and a CV
  • Loading branch information
Bvlampe committed Oct 26, 2022
1 parent 9edd03b commit d06bd2d
Show file tree
Hide file tree
Showing 5 changed files with 29 additions and 19 deletions.
14 changes: 7 additions & 7 deletions desc_BRD.csv
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
,conflict_id,start_year,end_year,duration,deaths,avg_deaths,HR_before,HR_after,HR_rel_change,CV_global_homicides,CV_pop
count,82.0,82.0,82.0,82.0,82.0,82.0,82.0,82.0,82.0,82.0,82.0
mean,3429.6341463414633,2007.5731707317073,2010.3658536585365,3.792682926829268,545.6646341463414,161.45398581571465,4.993741631865853,3.796701454662195,0.8243623626040427,6.154542580390244,287548547.0243902
std,5475.837798263724,6.6537476450175985,5.52996817601173,4.238966228733944,1466.4197869036102,365.802120773957,4.599610917987891,2.839624253536525,0.24838033370845905,0.31891501001459815,470918932.00473624
count,86.0,86.0,86.0,86.0,86.0,86.0,86.0,86.0,86.0,86.0,86.0
mean,3546.6744186046512,2007.2441860465117,2009.9302325581396,3.686046511627907,522.343023255814,155.3921725219605,4.912878222727906,3.73790942480407,0.8241140583590874,6.191248665860466,287334237.0232558
std,5504.221326436349,6.66674988548151,5.7512979561638575,4.167836589059764,1435.4439030922135,358.166319192644,4.510738521083326,2.792703375372585,0.25072179793298166,0.35336373683444416,470302912.8998919
min,205.0,1993.0,2000.0,1.0,12.5,5.761904761904762,0.674355823,0.409874201,0.306402839995387,5.564134104,1218441.0
25%,329.0,2001.5,2005.25,1.0,35.25,27.0,2.4123509888,2.194371056,0.6420488166896541,5.947845376,9930533.25
50%,401.0,2008.0,2012.0,2.0,152.5,56.57142857142857,3.8570083267,2.89152676285,0.8258187917717807,6.032096376,51852464.0
75%,439.0,2014.0,2015.0,4.0,483.75,116.04411764705883,5.8443891063125,4.52199352505,0.9497882980289541,6.2650332495,181386528.5
max,14268.0,2017.0,2019.0,17.0,12254.0,2370.0,27.132408252,18.45118064,1.4755054390329303,6.906000623,1379860000.0
25%,335.75,2001.0,2005.0,1.0,35.25,27.0,2.40308599245,2.1407802002,0.6420488166896541,5.947845376,9629222.75
50%,407.0,2007.5,2011.0,2.0,124.5,46.5625,3.7921992399,2.8169179908,0.8258187917717807,6.032096376,51421538.0
75%,8616.25,2014.0,2015.0,4.0,443.75,113.9375,5.393203677,4.1069368802,0.9497882980289541,6.293981484,181386528.5
max,14268.0,2017.0,2019.0,17.0,12254.0,2370.0,27.132408252,18.45118064,1.4755054390329303,6.943723418,1379860000.0
16 changes: 8 additions & 8 deletions desc_GED.csv
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
,conflict_id,start_year,end_year,duration,best,avg_deaths,HR_before,HR_after,HR_rel_change,CV_global_homicides,CV_pop
count,75.0,75.0,75.0,75.0,75.0,75.0,75.0,75.0,75.0,75.0,75.0
mean,3569.9733333333334,2008.28,2010.7466666666667,3.466666666666667,536.2533333333333,162.10019943019944,4.677040203503111,3.5202213017524446,0.8321387924947842,6.130849106120001,309040903.41333336
std,5580.853153255172,6.204967495613626,5.48485219523768,4.1859912234168055,1673.1973687987186,396.71902425764586,4.324501652295224,2.2568534599127,0.25021897715028246,0.2950419498644734,486719259.8903954
min,205.0,1994.0,2000.0,1.0,1.0,0.3333333333333333,0.874174528,0.566649714,0.3086034947899748,5.564134104,2884239.0
25%,341.0,2004.0,2006.0,1.0,27.0,25.5,2.3418648710500003,2.194371056,0.6453805976405484,5.911968893,10354851.0
50%,413.0,2009.0,2012.0,2.0,83.0,44.0,3.773985048666667,2.8048488914,0.8265738478727482,6.032096376,51852464.0
75%,5890.5,2014.0,2015.0,4.0,346.0,91.70833333333334,5.107960423950001,3.993503397875,0.9514634425668795,6.178188546,205768783.0
max,14268.0,2017.0,2019.0,17.0,13474.0,2370.0,27.132408252,11.837778168,1.4755054390329303,6.854248582,1379860000.0
count,81.0,81.0,81.0,81.0,81.0,81.0,81.0,81.0,81.0,81.0,81.0
mean,3604.567901234568,2007.4814814814815,2010.0617283950617,3.580246913580247,517.1111111111111,153.54709823783898,4.905121342925927,3.7016646412133745,0.8256084900614581,6.189664877555555,301431489.20987654
std,5558.773709052815,6.671040232061097,5.812369738351875,4.1739196132977465,1616.5676215364456,383.0011930297612,4.588468146179352,2.7885591454396397,0.25625369874873494,0.3526037494816309,480855424.9597165
min,205.0,1993.0,2000.0,1.0,1.0,0.3333333333333333,0.874174528,0.566649714,0.306402839995387,5.564134104,1851519.0
25%,327.0,2001.0,2005.0,1.0,27.0,24.0,2.4030700946,2.194371056,0.6387170357387597,5.947845376,10160034.0
50%,413.0,2008.0,2012.0,2.0,78.0,44.0,3.7839070714,2.8048488914,0.8265738478727482,6.032096376,51852464.0
75%,11342.0,2014.0,2015.0,4.0,345.0,89.41666666666667,5.1830775074,4.07348155075,0.9508719171450704,6.293981484,203631356.0
max,14268.0,2017.0,2019.0,17.0,13474.0,2370.0,27.132408252,18.45118064,1.4755054390329303,6.943723418,1379860000.0
8 changes: 4 additions & 4 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,8 +133,8 @@ def prepGED():
# Output "dirty" dataset
cc_ivdv.to_csv("output_GED_dirty.csv", index=False)

# Determine SD for important variables, set outliers to none (beyond 2SD)
for variable in ["HR_rel_change", "CV_global_homicides"]:
# Determine SD for DV and IV, set outliers to none (beyond 2SD)
for variable in ["HR_rel_change", "avg_deaths"]:
mean = cc_ivdv[variable].mean()
sd = cc_ivdv[variable].std()
for i in cc_ivdv.index:
Expand Down Expand Up @@ -286,8 +286,8 @@ def prepBRD():
# Output "dirty" dataset
cc_ivdv.to_csv("output_BRD_dirty.csv", index=False)

# Determine SD for important variables, set outliers to none (beyond 2SD)
for variable in ["HR_rel_change", "CV_global_homicides"]:
# Determine SD for DV and IV, set outliers to none (beyond 2SD)
for variable in ["HR_rel_change", "avg_deaths"]:
mean = cc_ivdv[variable].mean()
sd = cc_ivdv[variable].std()
for i in cc_ivdv.index:
Expand Down
4 changes: 4 additions & 0 deletions output_BRD.csv
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ conflict_id,country,start_year,end_year,duration,deaths,avg_deaths,HR_before,HR_
415,Tajikistan,1999,2000,2,22.333333333333332,11.166666666666666,8.7096859322,2.6878395171999996,0.30860349478997484,6.854248582,6216329.0
415,Uzbekistan,1999,2000,2,257.8333333333333,128.91666666666666,4.877556530999999,3.8675664538,0.7929311386181043,6.854248582,24650400.0
415,Uzbekistan,2004,2004,1,37.0,37.0,4.1341720318,3.16481964175,0.7655268376367133,6.504204151,25864350.0
417,North Macedonia,2001,2001,1,36.0,36.0,2.190837772,2.73724338225,1.2494048702434002,6.943723418,2034882.0
417,Serbia,2001,2001,1,36.0,36.0,2.403133686,1.8225732922,0.7584152736977613,6.943723418,7503433.0
418,Pakistan,2001,2017,17,1978.6666666666667,116.3921568627451,6.960406795199999,3.83042276,0.5503159330632107,5.906905873,207906210.0
418,Saudi Arabia,2001,2017,17,287.5,16.91176470588235,0.874174528,0.7481504585000001,0.8558364886376558,5.906905873,33101183.0
418,United States,2001,2017,17,527.6666666666666,31.039215686274506,6.259152821600001,5.529331947999999,0.883399416118835,5.906905873,325122128.0
Expand All @@ -65,6 +67,8 @@ conflict_id,country,start_year,end_year,duration,deaths,avg_deaths,HR_before,HR_
11342,India,2014,2014,1,26.0,26.0,3.7340571716,3.127678628,0.8376086611067677,6.032096376,1295600768.0
11349,China,2008,2008,1,36.0,36.0,1.5646788195999999,0.9019704371999999,0.576457242151832,6.010090245,1324655000.0
11350,Bangladesh,2005,2006,2,119.0,59.5,2.7027549398,2.7597588348000004,1.0210910335082835,6.178188546,140921154.0
11475,India,2000,2001,2,21.5,10.75,4.7526159436,4.014849655400001,0.8447662725212427,6.943723418,1075000094.0
11475,Myanmar,2000,2001,2,83.5,41.75,3.67412594,1.5560249210000001,0.4235088688876027,6.943723418,47225119.0
11475,Myanmar,2005,2007,3,345.0,115.0,1.798183639,1.9556972714,1.0875959657199397,6.012406023,49621479.0
11487,Philippines,2013,2013,1,35.0,35.0,8.609260416200001,7.973552257666667,0.926159957092583,6.032025006,98871558.0
13219,Ukraine,2014,2014,1,88.0,88.0,4.784429494,6.183730571,1.2924697874124425,6.032096376,45272155.0
Expand Down
6 changes: 6 additions & 0 deletions output_GED.csv
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ India,421,2013,2014,2,67,33.5,3.7839070714000003,3.127678628,0.8265738478727482,
India,434,2008,2008,1,25,25.0,3.9449651024000003,3.7340571716,0.9465374406806057,6.010090245,1200669762.0
India,11342,2012,2012,1,27,27.0,3.8004914084,3.3765961204,0.8884630321586598,6.113175523,1265780243.0
India,11342,2014,2014,1,26,26.0,3.7340571716,3.127678628,0.8376086611067677,6.032096376,1295600768.0
India,11475,2000,2001,2,3,1.5,4.7526159436,4.014849655400001,0.8447662725212427,6.943723418,1075000094.0
India,13653,2015,2018,4,274,68.5,3.7124690259999995,2.9540664355,0.7957147695540129,5.774733653,1352642283.0
"Iran, Islamic Rep.",205,2016,2016,1,22,22.0,2.5812707185,2.194371056,0.8501127139718104,5.947845376,79563991.0
"Iran, Islamic Rep.",14268,2017,2017,1,27,27.0,2.5812707185,2.194371056,0.8501127139718104,5.906905873,80673888.0
Expand All @@ -39,16 +40,20 @@ Kenya,413,2015,2016,2,1,0.5,5.0328433404999995,4.07348155075,0.8093797631192134,
Kyrgyz Republic,415,1999,2000,2,188,94.0,10.6436834502,8.475707348,0.7963133615967072,6.854248582,4898400.0
Lebanon,13675,2014,2015,2,202,101.0,3.4055137095999997,2.8289870901999996,0.8307078847532471,5.911968893,6532681.0
Lebanon,13675,2017,2017,1,70,70.0,3.6727600445999995,2.3458544086666664,0.6387170357387597,5.906905873,6819373.0
North Macedonia,417,2001,2001,1,72,72.0,2.190837772,2.73724338225,1.2494048702434002,6.943723418,2034882.0
Myanmar,221,2000,2011,12,1073,89.41666666666667,3.67412594,2.3956923606,0.6520441595423373,6.083969712,50990612.0
Myanmar,221,2013,2013,1,41,41.0,1.9556972714,2.2659629503333334,1.15864708892866,6.032025006,51852464.0
Myanmar,222,2013,2015,3,172,57.333333333333336,1.9556972714,1.1338204239999998,0.5797525213032313,5.911968893,52680724.0
Myanmar,253,2005,2005,1,32,32.0,1.798183639,1.6003813411999999,0.8899988335396037,6.293981484,48949931.0
Myanmar,264,1993,2002,10,1402,140.2,4.949777899,1.5166260056,0.30640283999538703,6.906000623,47702163.0
Myanmar,264,2005,2011,7,393,56.142857142857146,1.798183639,2.3956923606,1.3322845946547954,6.083969712,50990612.0
Myanmar,264,2013,2013,1,83,83.0,1.9556972714,2.2659629503333334,1.15864708892866,6.032025006,51852464.0
Myanmar,264,2015,2015,1,77,77.0,2.2370477533999997,1.1338204239999998,0.5068378277918973,5.911968893,52680724.0
Myanmar,439,2009,2009,1,78,78.0,1.5161230140000002,2.2370477533999997,1.4755054390329303,6.066836418,50250366.0
Myanmar,439,2014,2015,2,371,185.5,2.116360067,1.1338204239999998,0.5357407946215986,5.911968893,52680724.0
Myanmar,11475,2000,2001,2,102,51.0,3.67412594,1.5560249210000001,0.4235088688876027,6.943723418,47225119.0
Myanmar,11475,2005,2007,3,345,115.0,1.798183639,1.9556972714,1.0875959657199397,6.012406023,49621479.0
Namibia,327,1998,2002,5,63,12.6,21.130976846666666,18.45118064,0.8731816221222449,6.906000623,1851519.0
Pakistan,325,2004,2004,1,43,43.0,6.398900105800001,6.5320765162,1.0208123909106328,6.504204151,156664698.0
Pakistan,325,2006,2009,4,561,140.25,6.2489576067999995,7.2777356564,1.1646319457313175,6.066836418,175525610.0
Pakistan,325,2011,2016,6,496,82.66666666666667,6.781900508600001,3.863047232,0.5696113098535348,5.947845376,203631356.0
Expand All @@ -64,6 +69,7 @@ Sri Lanka,352,2003,2003,1,29,29.0,10.28221547,8.877556426333333,0.86338945650721
Tajikistan,395,2000,2000,1,46,46.0,7.6366523506,2.6878395171999996,0.35196567734143613,6.854248582,6216329.0
Tajikistan,415,1999,2000,2,3,1.5,8.7096859322,2.6878395171999996,0.30860349478997484,6.854248582,6216329.0
Thailand,253,2005,2005,1,3,3.0,7.9386936422000005,6.1543772198,0.775238029980771,6.293981484,65416189.0
Thailand,264,1993,2002,10,25,2.5,9.838479113,7.5423137026000004,0.7666137840994165,6.906000623,64069093.0
Turkiye,383,2005,2005,1,30,30.0,4.320607217999999,4.66034574,1.078632123879399,6.293981484,67903461.0
Turkiye,383,2016,2016,1,434,434.0,3.773985048666667,2.6592390495,0.7046236313096943,5.947845376,79827868.0
Turkiye,432,2007,2015,9,3,0.3333333333333333,4.5289905372499994,2.8048488914,0.6193099474001336,5.911968893,78529413.0
Expand Down

0 comments on commit d06bd2d

Please sign in to comment.