Blame | Last modification | View Log | RSS feed
/************************************************************************The zlib/libpng LicenseCopyright (c) 2006 Joerg WiedenmannThis software is provided 'as-is', without any express or implied warranty.In no event will the authors be held liable for any damages arising fromthe use of this software.Permission is granted to anyone to use this software for any purpose,including commercial applications, and to alter it and redistribute itfreely, subject to the following restrictions:1. The origin of this software must not be misrepresented;you must not claim that you wrote the original software.If you use this software in a product, an acknowledgmentin the product documentation would be appreciated but isnot required.2. Altered source versions must be plainly marked as such,and must not be misrepresented as being the original software.3. This notice may not be removed or altered from any source distribution.***********************************************************************//********************************************************************created: 2006-01-28filename: tokenizer.cppauthor: Jörg Wiedenmannpurpose: A tokenizer function which provides a verycustomizable way of breaking up strings.history: 2006-01-28, Original version2006-03-04, Fixed a small parsing bug, thanks Elias.*********************************************************************/#include "tokenizer.h"using namespace std;void tokenize ( const string& str, vector<string>& result,const string& delimiters, const string& delimiters_preserve,const string& quote, const string& esc ){// clear the vectorif ( false == result.empty() ){result.clear();}string::size_type pos = 0; // the current position (char) in the stringchar ch = 0; // buffer for the current characterchar delimiter = 0; // the buffer for the delimiter char which// will be added to the tokens if the delimiter// is preservedchar current_quote = 0; // the char of the current open quotebool quoted = false; // indicator if there is an open quotestring token; // string buffer for the tokenbool token_complete = false; // indicates if the current token is// read to be added to the result vectorstring::size_type len = str.length(); // length of the input-string// for every char in the input-stringwhile ( len > pos ){// get the character of the string and reset the delimiter bufferch = str.at(pos);delimiter = 0;// assume ch isn't a delimiterbool add_char = true;// check ...// ... if the delimiter is an escaped characterbool escaped = false; // indicates if the next char is protectedif ( false == esc.empty() ) // check if esc-chars are provided{if ( string::npos != esc.find_first_of(ch) ){// get the escaped char++pos;if ( pos < len ) // if there are more chars left{// get the next onech = str.at(pos);// add the escaped character to the tokenadd_char = true;}else // cannot get any more characters{// don't add the esc-charadd_char = false;}// ignore the remaining delimiter checksescaped = true;}}// ... if the delimiter is a quoteif ( false == quote.empty() && false == escaped ){// if quote chars are provided and the char isn't protectedif ( string::npos != quote.find_first_of(ch) ){// if not quoted, set state to open quote and set// the quote characterif ( false == quoted ){quoted = true;current_quote = ch;// don't add the quote-char to the tokenadd_char = false;}else // if quote is open already{// check if it is the matching character to close itif ( current_quote == ch ){// close quote and reset the quote characterquoted = false;current_quote = 0;// don't add the quote-char to the tokenadd_char = false;}} // else}}// ... if the delimiter isn't preservedif ( false == delimiters.empty() && false == escaped &&false == quoted ){// if a delimiter is provided and the char isn't protected by// quote or escape charif ( string::npos != delimiters.find_first_of(ch) ){// if ch is a delimiter and the token string isn't empty// the token is completeif ( false == token.empty() ) // BUGFIX: 2006-03-04{token_complete = true;}// don't add the delimiter to the tokenadd_char = false;}}// ... if the delimiter is preserved - add it as a tokenbool add_delimiter = false;if ( false == delimiters_preserve.empty() && false == escaped &&false == quoted ){// if a delimiter which will be preserved is provided and the// char isn't protected by quote or escape charif ( string::npos != delimiters_preserve.find_first_of(ch) ){// if ch is a delimiter and the token string isn't empty// the token is completeif ( false == token.empty() ) // BUGFIX: 2006-03-04{token_complete = true;}// don't add the delimiter to the tokenadd_char = false;// add the delimiterdelimiter = ch;add_delimiter = true;}}// add the character to the tokenif ( true == add_char ){// add the current chartoken.push_back( ch );}// add the token if it is completeif ( true == token_complete && false == token.empty() ){// add the token stringresult.push_back( token );// clear the contentstoken.clear();// build the next tokentoken_complete = false;}// add the delimiterif ( true == add_delimiter ){// the next token is the delimiterstring delim_token;delim_token.push_back( delimiter );result.push_back( delim_token );// REMOVED: 2006-03-04, Bugfix}// repeat for the next character++pos;} // while// add the final tokenif ( false == token.empty() ){result.push_back( token );}}