Subversion Repositories SmartDukaan

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
15747 anikendra 1
/************************************************************************
2
The zlib/libpng License
3
 
4
Copyright (c) 2006 Joerg Wiedenmann
5
 
6
This software is provided 'as-is', without any express or implied warranty.
7
In no event will the authors be held liable for any damages arising from
8
the use of this software.
9
 
10
Permission is granted to anyone to use this software for any purpose,
11
including commercial applications, and to alter it and redistribute it
12
freely, subject to the following restrictions:
13
 
14
1. The origin of this software must not be misrepresented;
15
you must not claim that you wrote the original software.
16
If you use this software in a product, an acknowledgment
17
in the product documentation would be appreciated but is
18
not required.
19
 
20
2. Altered source versions must be plainly marked as such,
21
and must not be misrepresented as being the original software.
22
 
23
3. This notice may not be removed or altered from any source distribution.
24
 
25
***********************************************************************/
26
 
27
/********************************************************************
28
	created:	2006-01-28
29
	filename: 	tokenizer.cpp
30
	author:		Jörg Wiedenmann
31
 
32
	purpose:	A tokenizer function which provides a very
33
				customizable way of breaking up strings.
34
 
35
	history:	2006-01-28, Original version
36
				2006-03-04, Fixed a small parsing bug, thanks Elias.
37
*********************************************************************/
38
 
39
#include "tokenizer.h"
40
 
41
using namespace std;
42
 
43
void tokenize ( const string& str, vector<string>& result,
44
			   const string& delimiters, const string& delimiters_preserve,
45
			   const string& quote, const string& esc )
46
{
47
	// clear the vector
48
	if ( false == result.empty() )
49
	{
50
		result.clear();
51
	}
52
 
53
	string::size_type pos = 0; // the current position (char) in the string
54
	char ch = 0; // buffer for the current character
55
	char delimiter = 0;	// the buffer for the delimiter char which
56
							// will be added to the tokens if the delimiter
57
							// is preserved
58
	char current_quote = 0; // the char of the current open quote
59
	bool quoted = false; // indicator if there is an open quote
60
	string token;  // string buffer for the token
61
	bool token_complete = false; // indicates if the current token is
62
								 // read to be added to the result vector
63
	string::size_type len = str.length();  // length of the input-string
64
 
65
	// for every char in the input-string
66
	while ( len > pos )
67
	{
68
		// get the character of the string and reset the delimiter buffer
69
		ch = str.at(pos);
70
		delimiter = 0;
71
 
72
		// assume ch isn't a delimiter
73
		bool add_char = true;
74
 
75
		// check ...
76
 
77
		// ... if the delimiter is an escaped character
78
		bool escaped = false; // indicates if the next char is protected
79
		if ( false == esc.empty() ) // check if esc-chars are  provided
80
		{
81
			if ( string::npos != esc.find_first_of(ch) )
82
			{
83
				// get the escaped char
84
				++pos;
85
				if ( pos < len ) // if there are more chars left
86
				{
87
					// get the next one
88
					ch = str.at(pos);
89
 
90
					// add the escaped character to the token
91
					add_char = true;
92
				}
93
				else // cannot get any more characters
94
				{
95
					// don't add the esc-char
96
					add_char = false;
97
				}
98
 
99
				// ignore the remaining delimiter checks
100
				escaped = true;
101
			}
102
		}
103
 
104
		// ... if the delimiter is a quote
105
		if ( false == quote.empty() && false == escaped )
106
		{
107
			// if quote chars are provided and the char isn't protected
108
			if ( string::npos != quote.find_first_of(ch) )
109
			{
110
				// if not quoted, set state to open quote and set
111
				// the quote character
112
				if ( false == quoted )
113
				{
114
					quoted = true;
115
					current_quote = ch;
116
 
117
					// don't add the quote-char to the token
118
					add_char = false;
119
				}
120
				else // if quote is open already
121
				{
122
					// check if it is the matching character to close it
123
					if ( current_quote == ch )
124
					{
125
						// close quote and reset the quote character
126
						quoted = false;
127
						current_quote = 0;
128
 
129
						// don't add the quote-char to the token
130
						add_char = false;
131
					}
132
				} // else
133
			}
134
		}
135
 
136
		// ... if the delimiter isn't preserved
137
		if ( false == delimiters.empty() && false == escaped &&
138
			 false == quoted )
139
		{
140
			// if a delimiter is provided and the char isn't protected by
141
			// quote or escape char
142
			if ( string::npos != delimiters.find_first_of(ch) )
143
			{
144
				// if ch is a delimiter and the token string isn't empty
145
				// the token is complete
146
				if ( false == token.empty() ) // BUGFIX: 2006-03-04
147
				{
148
					token_complete = true;
149
				}
150
 
151
				// don't add the delimiter to the token
152
				add_char = false;
153
			}
154
		}
155
 
156
		// ... if the delimiter is preserved - add it as a token
157
		bool add_delimiter = false;
158
		if ( false == delimiters_preserve.empty() && false == escaped &&
159
			 false == quoted )
160
		{
161
			// if a delimiter which will be preserved is provided and the
162
			// char isn't protected by quote or escape char
163
			if ( string::npos != delimiters_preserve.find_first_of(ch) )
164
			{
165
				// if ch is a delimiter and the token string isn't empty
166
				// the token is complete
167
				if ( false == token.empty() ) // BUGFIX: 2006-03-04
168
				{
169
					token_complete = true;
170
				}
171
 
172
				// don't add the delimiter to the token
173
				add_char = false;
174
 
175
				// add the delimiter
176
				delimiter = ch;
177
				add_delimiter = true;
178
			}
179
		}
180
 
181
 
182
		// add the character to the token
183
		if ( true == add_char )
184
		{
185
			// add the current char
186
			token.push_back( ch );
187
		}
188
 
189
		// add the token if it is complete
190
		if ( true == token_complete && false == token.empty() )
191
		{
192
			// add the token string
193
			result.push_back( token );
194
 
195
			// clear the contents
196
			token.clear();
197
 
198
			// build the next token
199
			token_complete = false;
200
		}
201
 
202
		// add the delimiter
203
		if ( true == add_delimiter )
204
		{
205
			// the next token is the delimiter
206
			string delim_token;
207
			delim_token.push_back( delimiter );
208
			result.push_back( delim_token );
209
 
210
			// REMOVED: 2006-03-04, Bugfix
211
		}
212
 
213
		// repeat for the next character
214
		++pos;
215
	} // while
216
 
217
	// add the final token
218
	if ( false == token.empty() )
219
	{
220
		result.push_back( token );
221
	}
222
}