Skip to content

Commit

Permalink
Build target url list
Browse files Browse the repository at this point in the history
  • Loading branch information
luke-at-flexion authored and github-actions[bot] committed Oct 19, 2024
1 parent 84efacd commit cc16cd9
Show file tree
Hide file tree
Showing 7 changed files with 52 additions and 8 deletions.
16 changes: 8 additions & 8 deletions data/site-scanning-target-url-list-analysis.csv
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
question,answer
gov url list length,1369
gov url list length,1373
pulse url list length,24636
dap url list length,10000
omb idea url list length,9654
Expand All @@ -11,15 +11,15 @@ oira url list length,7539
.mil first url list length,1034
.mil second url list length,275
other website url list length,8
combined url list length,53535
deduped url list length,37163
url list length after ignore list checking beginnning of urls processed,33541
url list length after ignore list checking entire url,32551
number of .gov base domains,1369
combined url list length,53543
deduped url list length,37171
url list length after ignore list checking beginnning of urls processed,33549
url list length after ignore list checking entire url,32559
number of .gov base domains,1373
number of .mil base domains,77
number of urls with non-.gov or non-.mil base domains removed,3796
url list length after non-federal urls removed,29075
url list length after non-federal urls removed,29083
Number of omb_idea_public fields = TRUE,6809
Number of omb_idea_public fields = FALSE,2610
Number of omb_idea_public fields = blank,19656
Number of omb_idea_public fields = blank,19664
Number of omb_idea_public fields that != TRUE FALSE or blank,0
8 changes: 8 additions & 0 deletions data/site-scanning-target-url-list.csv
Original file line number Diff line number Diff line change
Expand Up @@ -5150,6 +5150,8 @@ www.dod.mil,dod.mil,.mil,Executive,Department of Defense,,FALSE,FALSE,FALSE,FALS
www.travel.dod.mil,dod.mil,.mil,Executive,Department of Defense,,FALSE,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,TRUE,FALSE,
dodcui.mil,dodcui.mil,.mil,Executive,Department of Defense,,FALSE,FALSE,FALSE,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,TRUE
www.dodcui.mil,dodcui.mil,.mil,Executive,Department of Defense,,FALSE,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,TRUE,FALSE,
doddns.gov,doddns.gov,.gov,Executive,Department of Defense,Office of the Department of Defense Chief Information Officer,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,
www.doddns.gov,doddns.gov,.gov,Executive,Department of Defense,Office of the Department of Defense Chief Information Officer,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,
dodig.mil,dodig.mil,.mil,Executive,Department of Defense,,FALSE,FALSE,FALSE,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,TRUE
www.dodig.mil,dodig.mil,.mil,Executive,Department of Defense,,FALSE,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,TRUE,FALSE,
greenfleet.dodlive.mil,dodlive.mil,.mil,Executive,Department of Defense,,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE,
Expand Down Expand Up @@ -6109,6 +6111,8 @@ projects.ecr.gov,ecr.gov,.gov,Executive,Morris K. Udall and Stewart L. Udall Fou
roster.ecr.gov,ecr.gov,.gov,Executive,Morris K. Udall and Stewart L. Udall Foundation,Morris K. Udall and Stewart L. Udall Foundation,FALSE,FALSE,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,
training.ecr.gov,ecr.gov,.gov,Executive,Morris K. Udall and Stewart L. Udall Foundation,Morris K. Udall and Stewart L. Udall Foundation,FALSE,FALSE,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,
www.ecr.gov,ecr.gov,.gov,Executive,Morris K. Udall and Stewart L. Udall Foundation,Morris K. Udall and Stewart L. Udall Foundation,TRUE,FALSE,FALSE,FALSE,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,
ecs.gov,ecs.gov,.gov,Executive,Department of Defense,Office of the Department of Defense Chief Information Officer,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,
www.ecs.gov,ecs.gov,.gov,Executive,Department of Defense,Office of the Department of Defense Chief Information Officer,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,
1edstg.ed.gov,ed.gov,.gov,Executive,Department of Education,Office of Chief Information Officer,FALSE,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,
1eduat.ed.gov,ed.gov,.gov,Executive,Department of Education,Office of Chief Information Officer,FALSE,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,
21apr.ed.gov,ed.gov,.gov,Executive,Department of Education,Office of Chief Information Officer,FALSE,FALSE,TRUE,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE
Expand Down Expand Up @@ -13585,6 +13589,8 @@ www.mcc.gov,mcc.gov,.gov,Executive,Millennium Challenge Corporation,Millennium C
mcctest.gov,mcctest.gov,.gov,Executive,Millennium Challenge Corporation,Millennium Challenge Corporation,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,
www.mcctest.gov,mcctest.gov,.gov,Executive,Millennium Challenge Corporation,Millennium Challenge Corporation,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,
www.mda.mil,mda.mil,.mil,Executive,Department of Defense,,FALSE,FALSE,FALSE,FALSE,TRUE,TRUE,TRUE,FALSE,FALSE,FALSE,FALSE,TRUE,
mds.gov,mds.gov,.gov,Executive,Department of Defense,Office of the Department of Defense Chief Information Officer,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,
www.mds.gov,mds.gov,.gov,Executive,Department of Defense,Office of the Department of Defense Chief Information Officer,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,
medalofvalor.gov,medalofvalor.gov,.gov,Executive,Department of Justice,Department of Justice,TRUE,FALSE,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,
www.medalofvalor.gov,medalofvalor.gov,.gov,Executive,Department of Justice,Department of Justice,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,
carts.medicaid.gov,medicaid.gov,.gov,Executive,Department of Health and Human Services,Centers for Medicare and Medicaid Services,FALSE,FALSE,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,
Expand Down Expand Up @@ -27221,6 +27227,8 @@ usgcrp.gov,usgcrp.gov,.gov,Executive,United States Global Change Research Progra
www.usgcrp.gov,usgcrp.gov,.gov,Executive,United States Global Change Research Program,U.S. Global Change Research Program,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,
usgeo.gov,usgeo.gov,.gov,Executive,National Aeronautics and Space Administration,NASA Langley Research Center,TRUE,TRUE,FALSE,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,TRUE
www.usgeo.gov,usgeo.gov,.gov,Executive,National Aeronautics and Space Administration,NASA Langley Research Center,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,
usgovdod.gov,usgovdod.gov,.gov,Executive,Department of Defense,Office of the Department of Defense Chief Information Officer,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,
www.usgovdod.gov,usgovdod.gov,.gov,Executive,Department of Defense,Office of the Department of Defense Chief Information Officer,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,
usgovernmentmanual.gov,usgovernmentmanual.gov,.gov,Legislative,Government Publishing Office,Office of Federal Register (NF),TRUE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,
www.usgovernmentmanual.gov,usgovernmentmanual.gov,.gov,Legislative,Government Publishing Office,Office of Federal Register (NF),TRUE,FALSE,FALSE,FALSE,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,
3d.wr.usgs.gov,usgs.gov,.gov,Executive,Department of the Interior,U.S. Geological Survey,FALSE,FALSE,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,
Expand Down
8 changes: 8 additions & 0 deletions data/snapshots/combined-dedup.csv
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,8 @@ ctoc.gov
cttso.gov
dc3on.gov
defense.gov
doddns.gov
ecs.gov
ehr.gov
fehrm.gov
fvap.gov
Expand All @@ -160,6 +162,7 @@ itc.gov
iwtsd.gov
jccs.gov
lps.gov
mds.gov
mtmc.gov
mypay.gov
nationalresourcedirectory.gov
Expand All @@ -175,6 +178,7 @@ tak.gov
tswg.gov
ukraineoversight.gov
usandc.gov
usgovdod.gov
budgetlob.gov
childstats.gov
collegenavigator.gov
Expand Down Expand Up @@ -1518,6 +1522,8 @@ www.ctoc.gov
www.cttso.gov
www.dc3on.gov
www.defense.gov
www.doddns.gov
www.ecs.gov
www.ehr.gov
www.fehrm.gov
www.fvap.gov
Expand All @@ -1529,6 +1535,7 @@ www.itc.gov
www.iwtsd.gov
www.jccs.gov
www.lps.gov
www.mds.gov
www.mtmc.gov
www.mypay.gov
www.nationalresourcedirectory.gov
Expand All @@ -1544,6 +1551,7 @@ www.tak.gov
www.tswg.gov
www.ukraineoversight.gov
www.usandc.gov
www.usgovdod.gov
www.budgetlob.gov
www.childstats.gov
www.collegenavigator.gov
Expand Down
8 changes: 8 additions & 0 deletions data/snapshots/combined.csv
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,8 @@ ctoc.gov
cttso.gov
dc3on.gov
defense.gov
doddns.gov
ecs.gov
ehr.gov
fehrm.gov
fvap.gov
Expand All @@ -160,6 +162,7 @@ itc.gov
iwtsd.gov
jccs.gov
lps.gov
mds.gov
mtmc.gov
mypay.gov
nationalresourcedirectory.gov
Expand All @@ -175,6 +178,7 @@ tak.gov
tswg.gov
ukraineoversight.gov
usandc.gov
usgovdod.gov
budgetlob.gov
childstats.gov
collegenavigator.gov
Expand Down Expand Up @@ -1518,6 +1522,8 @@ www.ctoc.gov
www.cttso.gov
www.dc3on.gov
www.defense.gov
www.doddns.gov
www.ecs.gov
www.ehr.gov
www.fehrm.gov
www.fvap.gov
Expand All @@ -1529,6 +1535,7 @@ www.itc.gov
www.iwtsd.gov
www.jccs.gov
www.lps.gov
www.mds.gov
www.mtmc.gov
www.mypay.gov
www.nationalresourcedirectory.gov
Expand All @@ -1544,6 +1551,7 @@ www.tak.gov
www.tswg.gov
www.ukraineoversight.gov
www.usandc.gov
www.usgovdod.gov
www.budgetlob.gov
www.childstats.gov
www.collegenavigator.gov
Expand Down
4 changes: 4 additions & 0 deletions data/snapshots/gov.csv
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,8 @@ ctoc.gov,Federal - Executive,Department of Defense,MCTFT,Starke,FL,(blank)
cttso.gov,Federal - Executive,Department of Defense,Irregular Warfare Technical Support Directorate,Alexandria,VA,[email protected]
dc3on.gov,Federal - Executive,Department of Defense,Defense Cyber Crime Center,Linthicum,MD,(blank)
defense.gov,Federal - Executive,Department of Defense,Defense Media Activity,Fort Meade,MD,[email protected]
doddns.gov,Federal - Executive,Department of Defense,Office of the Department of Defense Chief Information Officer,Washington,DC,(blank)
ecs.gov,Federal - Executive,Department of Defense,Office of the Department of Defense Chief Information Officer,Washington,DC,(blank)
ehr.gov,Federal - Executive,Department of Defense,Federal Electronic Health Record Modernization,Rosslyn,VA,[email protected]
fehrm.gov,Federal - Executive,Department of Defense,FEHRM,Rosslyn,VA,[email protected]
fvap.gov,Federal - Executive,Department of Defense,Federal Voting Assistance Program,Alexandria,VA,[email protected]
Expand All @@ -160,6 +162,7 @@ itc.gov,Federal - Executive,Department of Defense,Interagency Training Center,Fo
iwtsd.gov,Federal - Executive,Department of Defense,Irregular Warfare Technical Support Directorate,Alexandria,VA,[email protected]
jccs.gov,Federal - Executive,Department of Defense,Defense Logistics Agency,Alexandria,VA,[email protected]
lps.gov,Federal - Executive,Department of Defense,"National Security Agency, Laboratory for Physical Sciences",College Park,MD,(blank)
mds.gov,Federal - Executive,Department of Defense,Office of the Department of Defense Chief Information Officer,Washington,DC,(blank)
mtmc.gov,Federal - Executive,Department of Defense,Department of Defense,Alexandria,VA,(blank)
mypay.gov,Federal - Executive,Department of Defense,Defense Finance and Accounting Service,Indianapolis,IN,(blank)
nationalresourcedirectory.gov,Federal - Executive,Department of Defense,Office of Warrior Care Policy,Arlington,VA,(blank)
Expand All @@ -175,6 +178,7 @@ tak.gov,Federal - Executive,Department of Defense,TAK Product Center,Fort Belvoi
tswg.gov,Federal - Executive,Department of Defense,Irregular Warfare Technical Support Directorate,Alexandria,VA,[email protected]
ukraineoversight.gov,Federal - Executive,Department of Defense,Department of Defense Office of Inspector General,Alexandria,VA,(blank)
usandc.gov,Federal - Executive,Department of Defense,AFTAC/LSCSS,Patrick AFB,FL,[email protected]
usgovdod.gov,Federal - Executive,Department of Defense,Office of the Department of Defense Chief Information Officer,Washington,DC,(blank)
budgetlob.gov,Federal - Executive,Department of Education,Office of Chief Information Officer,Washington,DC,[email protected]
childstats.gov,Federal - Executive,Department of Education,Office of Chief Information Officer,Washington,DC,[email protected]
collegenavigator.gov,Federal - Executive,Department of Education,Office of Chief Information Officer,Washington,DC,[email protected]
Expand Down
8 changes: 8 additions & 0 deletions data/snapshots/remove-ignore-begins.csv
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,8 @@ ctoc.gov
cttso.gov
dc3on.gov
defense.gov
doddns.gov
ecs.gov
ehr.gov
fehrm.gov
fvap.gov
Expand All @@ -160,6 +162,7 @@ itc.gov
iwtsd.gov
jccs.gov
lps.gov
mds.gov
mtmc.gov
mypay.gov
nationalresourcedirectory.gov
Expand All @@ -175,6 +178,7 @@ tak.gov
tswg.gov
ukraineoversight.gov
usandc.gov
usgovdod.gov
budgetlob.gov
childstats.gov
collegenavigator.gov
Expand Down Expand Up @@ -1513,6 +1517,8 @@ www.ctoc.gov
www.cttso.gov
www.dc3on.gov
www.defense.gov
www.doddns.gov
www.ecs.gov
www.ehr.gov
www.fehrm.gov
www.fvap.gov
Expand All @@ -1524,6 +1530,7 @@ www.itc.gov
www.iwtsd.gov
www.jccs.gov
www.lps.gov
www.mds.gov
www.mtmc.gov
www.mypay.gov
www.nationalresourcedirectory.gov
Expand All @@ -1539,6 +1546,7 @@ www.tak.gov
www.tswg.gov
www.ukraineoversight.gov
www.usandc.gov
www.usgovdod.gov
www.budgetlob.gov
www.childstats.gov
www.collegenavigator.gov
Expand Down
8 changes: 8 additions & 0 deletions data/snapshots/remove-ignore-contains.csv
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,8 @@ ctoc.gov
cttso.gov
dc3on.gov
defense.gov
doddns.gov
ecs.gov
ehr.gov
fehrm.gov
fvap.gov
Expand All @@ -160,6 +162,7 @@ itc.gov
iwtsd.gov
jccs.gov
lps.gov
mds.gov
mtmc.gov
mypay.gov
nationalresourcedirectory.gov
Expand All @@ -175,6 +178,7 @@ tak.gov
tswg.gov
ukraineoversight.gov
usandc.gov
usgovdod.gov
budgetlob.gov
childstats.gov
collegenavigator.gov
Expand Down Expand Up @@ -1512,6 +1516,8 @@ www.ctoc.gov
www.cttso.gov
www.dc3on.gov
www.defense.gov
www.doddns.gov
www.ecs.gov
www.ehr.gov
www.fehrm.gov
www.fvap.gov
Expand All @@ -1523,6 +1529,7 @@ www.itc.gov
www.iwtsd.gov
www.jccs.gov
www.lps.gov
www.mds.gov
www.mtmc.gov
www.mypay.gov
www.nationalresourcedirectory.gov
Expand All @@ -1538,6 +1545,7 @@ www.tak.gov
www.tswg.gov
www.ukraineoversight.gov
www.usandc.gov
www.usgovdod.gov
www.budgetlob.gov
www.childstats.gov
www.collegenavigator.gov
Expand Down

0 comments on commit cc16cd9

Please sign in to comment.