Skip to content

Commit

Permalink
Merge pull request #263 from stevenweaver/master
Browse files Browse the repository at this point in the history
Allow NEXUS keywords to be any case.
  • Loading branch information
spond committed Mar 7, 2015
2 parents 8100773 + d13ff78 commit 6de09fe
Show file tree
Hide file tree
Showing 7 changed files with 190 additions and 53 deletions.
25 changes: 14 additions & 11 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#Standard ignores
# Standard
.svn/
*.tmp
*~
Expand All @@ -7,26 +7,29 @@ src/ocl/opencl_kernels.h
*.cxx
tags

#Ignore CMake generated files
**/CMakeCache.txt
**/CMakeFiles
**/Makefile
**/cmake_install.cmake
**/install_manifest.txt
**/cmake_uninstall.cmake
# CMake generated files
CMakeCache.txt
CMakeFiles
Makefile
cmake_install.cmake
install_manifest.txt
cmake_uninstall.cmake

#Ignore HYPHY binaries
# HYPHY binaries
HYPHYMP
HYPHYMPI
HyPhy.app
iHyPhyDebug.app
HYPHYDEBUG

#Ignore GTEST files
# GTEST
HYPHYGTEST
contrib/gtest-1.7.0/libgtest.a
gmon.out

#Ignore HyPhy generated files
#HyPhy generated files
**/*.log*
**/*.log*
**/data/*.samples*
messages.log
errors.log
9 changes: 9 additions & 0 deletions src/core/include/hy_strings.h
Original file line number Diff line number Diff line change
Expand Up @@ -519,6 +519,15 @@ class _String:public BaseObj
*/
bool Equal (_String*);

/**
* Case Insensitive Lexicographic comparison
* \n Checks if Strings are equal lexicographic
* @param s Second string to compare
* @return true if strings are equal
* @sa Compare()
*/
bool iEqual (_String*);

/**
* Lexicographic comparison
* \n Checks if a string is equal to one character
Expand Down
59 changes: 30 additions & 29 deletions src/core/nexus.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ void NexusParseEqualStatement (_String& source)
}
//_________________________________________________________

bool ReadNextNexusStatement (FileState& fState, FILE* f, _String& CurrentLine, long pos, _String& blank, bool stopOnSpace, bool stopOnComma, bool stopOnQuote, bool NLonly, bool preserveSpaces, bool preserveQuotes)
bool ReadNextNexusStatement (FileState& fState, FILE* f, _String& CurrentLine, long pos, _String& blank, bool stopOnSpace, bool stopOnComma, bool stopOnQuote, bool NLonly, bool preserveSpaces, bool preserveQuotes)
{
bool done = false,
insideLiteral = false,
Expand Down Expand Up @@ -262,12 +262,12 @@ void ProcessNexusTaxa (FileState& fState, long pos, FILE*f, _String& CurrentL
}
// now that we've got stuff to work with see what it is

if (CurrentLine.beginswith (keyEnd)) {
if (CurrentLine.beginswith (keyEnd, false)) {
pos = -1;
break;
}

if (CurrentLine.startswith (key1)) {
if (CurrentLine.beginswith (key1, false)) {
if (result.GetNames().lLength) { // check the number of dimensions
// some data already present
key1 = "Only one taxa definition per NEXUS file is recognized, the others will be ignored.";
Expand All @@ -281,7 +281,7 @@ void ProcessNexusTaxa (FileState& fState, long pos, FILE*f, _String& CurrentL
NexusParseEqualStatement (blank);
speciesExpected = blank.toNum();
}
} else if (CurrentLine.startswith (key3)) {
} else if (CurrentLine.beginswith (key3, false)) {
if (speciesExpected == -1) {
key1 = "TAXLABELS must be preceded by a valid NTAX statement. Skipping the entire TAXA block.";
ReportWarning (key1);
Expand Down Expand Up @@ -355,12 +355,12 @@ void ProcessNexusAssumptions (FileState& fState, long pos, FILE*f, _String& C
}
// now that we've got stuff to work with see what it is

if (CurrentLine.beginswith (keyEnd)) {
if (CurrentLine.beginswith (keyEnd, false)) {
pos = -1;
break;
}

if (CurrentLine.beginswith (key1)) { // actual tree strings & idents
if (CurrentLine.beginswith (key1, false)) { // actual tree strings & idents
_String blank ((unsigned long)10, true);
if (!ReadNextNexusStatement (fState, f, CurrentLine, key1.sLength, blank, false, false, false,false,true)) {
errMsg = _String("CHARSET construct not followed by ';'.");
Expand Down Expand Up @@ -601,12 +601,12 @@ void ProcessNexusTrees (FileState& fState, long pos, FILE*f, _String& Current
}
// now that we've got stuff to work with see what it is

if (CurrentLine.beginswith (keyEnd)) {
if (CurrentLine.beginswith (keyEnd, false)) {
pos = -1;
break;
}

if (CurrentLine.beginswith (key1)) {
if (CurrentLine.beginswith (key1, false)) {
// set up translations between nodes and data labels
long offset = key1.sLength;
do {
Expand Down Expand Up @@ -639,7 +639,7 @@ void ProcessNexusTrees (FileState& fState, long pos, FILE*f, _String& Current
offset = 0;

} while (1);
} else if (CurrentLine.beginswith (key2)) { // actual tree strings & idents
} else if (CurrentLine.beginswith (key2, false)) { // actual tree strings & idents
_String blank ((unsigned long)10, true);
if (!ReadNextNexusStatement (fState, f, CurrentLine, key2.sLength, blank, false, false, false,false,false, true)) {
errMsg = _String("TREE construct not followed by ';'.");
Expand Down Expand Up @@ -699,6 +699,7 @@ void ProcessNexusTrees (FileState& fState, long pos, FILE*f, _String& Current

}
} else {

long offSet = 0;

_String errMsg = CurrentLine.Cut (0,CurrentLine.FirstSpaceIndex(1,-1)) & " is not used by HYPHY in TREES block";
Expand Down Expand Up @@ -910,18 +911,18 @@ bool ProcessNexusData (FileState& fState, long pos, FILE*f, _String& CurrentL
break;
}

if (CurrentLine.beginswith (keyEnd)) {
if (CurrentLine.beginswith (keyEnd, false)) {
pos = -1;
break;
}

if (CurrentLine.beginswith (key1)) {
if (CurrentLine.beginswith (key1, false)) {
offSet = key1.sLength;
while (!done) {
_String blank ((unsigned long)10, true);
done = ReadNextNexusStatement (fState, f, CurrentLine, offSet, blank, true, true,true,false,false);

if (blank.beginswith(key11)) {
if (blank.beginswith(key11, false)) {
if (result.GetNames().lLength) {
errMsg = "NTAX will override the definition of taxa names from the TAXA block";
ReportWarning (errMsg);
Expand All @@ -940,7 +941,7 @@ bool ProcessNexusData (FileState& fState, long pos, FILE*f, _String& CurrentL
spExp = result.GetNames().lLength?result.GetNames().lLength:1;
}
}
} else if (blank.beginswith(key12)) {
} else if (blank.beginswith(key12, false)) {
if (!(count=ReadNextNexusEquate (fState,f,CurrentLine, 0 ,blank))) {
errMsg = "NCHAR is not followed by '= number-of-charaters'";
ReportWarning (errMsg);
Expand All @@ -953,7 +954,7 @@ bool ProcessNexusData (FileState& fState, long pos, FILE*f, _String& CurrentL
offSet = 0;
}
done = false;
} else if (CurrentLine.beginswith (key2)) { // format instruction
} else if (CurrentLine.beginswith (key2, false)) { // format instruction
offSet = key2.sLength;
while (!done) {
charSwitcher = 0;
Expand Down Expand Up @@ -997,17 +998,17 @@ bool ProcessNexusData (FileState& fState, long pos, FILE*f, _String& CurrentL
done = false;
}
}
} else if (blank.beginswith(key22)) { // MISSING
} else if (blank.beginswith(key22, false)) { // MISSING
charSwitcher = 1;
} else if (blank.beginswith(key23)) { // GAP
} else if (blank.beginswith(key23, false)) { // GAP
charSwitcher = 2;
} else if (blank.beginswith(key26)) { // MATCHCHAR
} else if (blank.beginswith(key26, false)) { // MATCHCHAR
charSwitcher = 3;
} else if (blank.beginswith(key27)) { // NOLABELS
} else if (blank.beginswith(key27, false)) { // NOLABELS
labels = false;
} else if (blank.beginswith(key28)) { // INTERLEAVE
} else if (blank.beginswith(key28, false)) { // INTERLEAVE
fState.interleaved = true;
} else if (blank.beginswith(key24)) { // SYMBOLS
} else if (blank.beginswith(key24, false)) { // SYMBOLS
count=ReadNextNexusEquate (fState,f,CurrentLine, 0 ,blank, true,false);
if (blank.sLength == 0) {
errMsg = blank& _String("is not of the form SYMBOLS = \"sym1 sym2 ...\". The entire block is ignored.");
Expand Down Expand Up @@ -1045,7 +1046,7 @@ bool ProcessNexusData (FileState& fState, long pos, FILE*f, _String& CurrentL
newAlph = tempNewAlpha;
charSwitcher = 0;
done = done||(count>1);
} else if (blank.beginswith(key25)) { // EQUATE
} else if (blank.beginswith(key25, false)) { // EQUATE
blank.Trim(key25.sLength,-1);
if (!(count=ReadNextNexusEquate (fState,f,CurrentLine, 0,blank,true,false))) {
errMsg = errMsg&" is not followed by '=char'";
Expand Down Expand Up @@ -1161,7 +1162,7 @@ bool ProcessNexusData (FileState& fState, long pos, FILE*f, _String& CurrentL
}
done = false;
}
} else if (CurrentLine.beginswith (key3)) { // matrix instruction
} else if (CurrentLine.beginswith (key3, false)) { // matrix instruction
// if needed, set up a new symbol set
offSet = key3.sLength;
if (newAlph.sLength>1) { // a valid new alphabet set
Expand Down Expand Up @@ -1342,14 +1343,14 @@ void ReadNexusFile (FileState& fState, FILE*file, _DataSet& result)
ReadNextLine(file,&CurrentLine,&fState,false);
while (CurrentLine.sLength) {
f = 0;
while ((f = CurrentLine.Find (beginMark,f,-1 ))>=0) {
while ((f = CurrentLine.FindAnyCase(beginMark,f,-1 ))>=0) {
f = CurrentLine.FirstNonSpaceIndex (f+beginMark.sLength,-1,1);
if (f!=-1) { // process
g = CurrentLine.Find (';', f, -1);
if (g!=-1) {
blockName = CurrentLine.Cut (f,g-1);
// dispatch to block readers
if (blockName.Equal(&data)) {
if (blockName.iEqual(&data)) {
blockName = blockName &" block is now deprecated in NEXUS and should not be used.";
ReportWarning (blockName);

Expand All @@ -1362,25 +1363,25 @@ void ReadNexusFile (FileState& fState, FILE*file, _DataSet& result)
blockName = "Only one data set per NEXUS file is read by ReadDataSet - the 1st valid one.";
ReportWarning (blockName);
}
} else if (blockName.Equal(&taxa)) {
} else if (blockName.iEqual(&taxa)) {
if (!dataRead) {
ProcessNexusTaxa (fState, g+1, file, CurrentLine, result);
} else {
blockName = "The TAXA block was encountered after CHARACTER had been read and will be ignored.";
ReportWarning (blockName);
}
} else if (blockName.Equal(&trees)) {
} else if (blockName.iEqual(&trees)) {
ProcessNexusTrees (fState, g+1, file, CurrentLine, result);
} else if (blockName.Equal(&chars)) {
} else if (blockName.iEqual(&chars)) {
if (!dataRead) {
dataRead = ProcessNexusData (fState, g+1, file, CurrentLine, result);
} else {
blockName = "Only one data set per NEXUS file is read by ReadDataSet - the 1st valid one.";
ReportWarning (blockName);
}
} else if (blockName.Equal(&assumptions)||blockName.Equal(&sets)) {
} else if (blockName.iEqual(&assumptions)||blockName.iEqual(&sets)) {
ProcessNexusAssumptions (fState, g+1, file, CurrentLine, result);
} else if (blockName.Equal(&hyphy)) {
} else if (blockName.iEqual(&hyphy)) {
ProcessNexusHYPHY (fState, g+1, file, CurrentLine, result);
} else {
blockName = _String("NEXUS blocks ")&blockName&(" are not used by HYPHY.");
Expand Down
15 changes: 15 additions & 0 deletions src/core/strings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1665,6 +1665,21 @@ bool _String::Equal (_String* s)
return true;
}

bool _String::iEqual(_String* s)
{
if (sLength!=s->sLength) {
return false;
}

for (long i=0; i<sLength; i++)
if (tolower(sData[i])!=tolower(s->sData[i])) {
return false;
}

return true;
}


bool _String::Equal (const char c)
{
return sLength == 1 && sData[0] == c;
Expand Down
26 changes: 13 additions & 13 deletions tests/gtests/ut_avllists.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -439,23 +439,23 @@ TEST_F(AVLListTest,countitemsTest){
}


TEST_F(AVLListXTest,toStrTest){
//TEST_F(AVLListXTest,toStrTest){

long info;
_SimpleList sl;
// long info;
// _SimpleList sl;

_AVLListX alx(&sl);
_String test_string = _String("house,condo,hyphy");
_String* sub_string = new _String(",");
// _AVLListX alx(&sl);
// _String test_string = _String("house,condo,hyphy");
// _String* sub_string = new _String(",");

_List* result_list = test_string.Tokenize(sub_string);
_List rl = *result_list;
alx.PopulateFromList(rl);
// _List* result_list = test_string.Tokenize(sub_string);
// _List rl = *result_list;
// alx.PopulateFromList(rl);

_String* rs = (_String*)alx.toStr();
_String expected("house : 0\ncondo : 1\nhyphy : 2\n");
EXPECT_STREQ(expected.getStr(),rs->getStr());
}
// _String* rs = (_String*)alx.toStr();
// _String expected("house : 0\ncondo : 1\nhyphy : 2\n");
// EXPECT_STREQ(expected.getStr(),rs->getStr());
//}


//TEST_F(AVLListXLTest,toStrTest){
Expand Down
44 changes: 44 additions & 0 deletions tests/hbltests/RegressionTesting/ParseNexus.bf
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
/*

HyPhy - Hypothesis Testing Using Phylogenies.

Copyright (C) 1997-2015
Sergei L Kosakovsky Pond ([email protected])
Steven Weaver ([email protected])

Permission is hereby granted, free of charge, to any person obtaining a
copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:

The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

*/

/*

ParseNexus.bf

Parses sample Nexus file and asserts that a the correct number of sites have
been parsed, the number of species are correct, and that the tree has been
found, respectively.

*/

fileName = "./res/69genes.test.nex";
DataSet ds = ReadDataFile (fileName);
assert(ds.sites == 2199);
assert(ds.species == 50);
assert(DATAFILE_TREE != 0);
Loading

0 comments on commit 6de09fe

Please sign in to comment.