]> git.tdb.fi Git - libs/core.git/blob - source/strings/utils.cpp
Add wrappers for lower_bound and upper_bound
[libs/core.git] / source / strings / utils.cpp
1 #include <algorithm>
2 #include <list>
3 #include <stdexcept>
4 #include <cctype>
5 #include "utils.h"
6
7 using namespace std;
8
9 namespace {
10
11 template<bool long_sep, bool allow_empty>
12 vector<string> do_split(const string &str, const string &sep, int max_split)
13 {
14         vector<string> result;
15
16         unsigned start = 0;
17         while(start<str.size())
18         {
19                 unsigned end = long_sep ? str.find(sep, start) : str.find_first_of(sep, start);
20                 if(end!=start || allow_empty)
21                 {
22                         if(max_split>=0 && result.size()==static_cast<unsigned>(max_split))
23                         {
24                                 result.push_back(str.substr(start));
25                                 break;
26                         }
27                         else
28                                 result.push_back(str.substr(start, end-start));
29                 }
30
31                 if(end>str.size())
32                         break;
33
34                 start = end+(long_sep ? sep.size() : 1);
35
36                 if(allow_empty && start==str.size())
37                         result.push_back(string());
38         }
39
40         return result;
41 }
42
43 bool check_str(const std::string &str, int (*pred)(int))
44 {
45         for(string::const_iterator i=str.begin(); i!=str.end(); ++i)
46                 if(!pred(*i))
47                         return false;
48         return true;
49 }
50
51 }
52
53
54 namespace Msp {
55
56 int strcasecmp(const string &s1, const string &s2)
57 {
58         string::const_iterator i1 = s1.begin();
59         string::const_iterator i2 = s2.begin();
60         for(; (i1!=s1.end() && i2!=s2.end()); ++i1, ++i2)
61         {
62                 const char c1 = ::tolower(*i1);
63                 const char c2 = ::tolower(*i2);
64                 if(c1!=c2) return c1-c2;
65         }
66         if(i1!=s1.end()) return *i1;
67         if(i2!=s2.end()) return -*i2;
68         return 0;
69 }
70
71 string tolower(const string &str)
72 {
73         string result(str);
74         transform(result.begin(), result.end(), result.begin(), ::tolower);
75         return result;
76 }
77
78 string toupper(const string &str)
79 {
80         string result(str);
81         transform(result.begin(), result.end(), result.begin(), ::toupper);
82         return result;
83 }
84
85 bool isnumrc(const string &str)
86 {
87         return check_str(str, isdigit);
88 }
89
90 bool isalpha(const string &str)
91 {
92         return check_str(str, isalpha);
93 }
94
95 bool isalnum(const string &str)
96 {
97         return check_str(str, isalnum);
98 }
99
100 vector<string> split(const string &str, const string &sep, int max_split)
101 {
102         return do_split<false, false>(str, sep, max_split);
103 }
104
105 vector<string> split(const string &str, char sep, int max_split)
106 {
107         return split(str, string(1, sep), max_split);
108 }
109
110 vector<string> split_long(const string &str, const string &sep, int max_split)
111 {
112         return do_split<true, false>(str, sep, max_split);
113 }
114
115 vector<string> split_fields(const string &str, const string &sep, int max_split)
116 {
117         return do_split<true, true>(str, sep, max_split);
118 }
119
120 vector<string> split_fields(const string &str, char sep, int max_split)
121 {
122         return split_fields(str, string(1, sep), max_split);
123 }
124
125 string strip(const string &s)
126 {
127         string result = s;
128         if(!result.erase(0, result.find_first_not_of(" \t\r\n")).empty())
129                 result.erase(result.find_last_not_of(" \t\r\n")+1);
130         return result;
131 }
132
133 string &append(string &str, const string &sep, const string &other)
134 {
135         if(!str.empty() && !other.empty())
136                 str += sep;
137         str += other;
138         return str;
139 }
140
141 string join(const string &str1, const string &sep, const string &str2)
142 {
143         string result = str1;
144         return append(result, sep, str2);
145 }
146
147 string c_unescape(const std::string &str)
148 {
149         bool escape = false;
150         unsigned numeric_type = 0;
151         unsigned numeric_pos = 0;
152         unsigned numeric_value = 0;
153         string result;
154         for(string::const_iterator i=str.begin(); i!=str.end(); ++i)
155         {
156                 if(numeric_type==16)
157                 {
158                         unsigned digit = 0;
159                         if(*i>='0' && *i<='9')
160                                 digit = *i-'0';
161                         else if(*i>='a' && *i<='f')
162                                 digit = *i-'a'+10;
163                         else if(*i>='A' && *i<='F')
164                                 digit = *i-'A'+10;
165                         else
166                                 throw invalid_argument("c_unescape");
167
168                         numeric_value = (numeric_value<<4 | digit);
169                         ++numeric_pos;
170                         if(numeric_pos==2)
171                         {
172                                 result += numeric_value;
173                                 numeric_type = 0;
174                         }
175                 }
176                 else if(numeric_type==8)
177                 {
178                         unsigned digit = 0;
179                         if(*i>='0' && *i<='7')
180                                 digit = *i-'0';
181                         else
182                                 throw invalid_argument("c_unescape");
183
184                         numeric_value = (numeric_value<<3 | digit);
185                         ++numeric_pos;
186                         if(numeric_pos==3)
187                         {
188                                 result += numeric_value;
189                                 numeric_type = 0;
190                         }
191                 }
192                 else if(escape)
193                 {
194                         if(*i=='x')
195                         {
196                                 numeric_type = 16;
197                                 numeric_pos = 0;
198                                 numeric_value = 0;
199                         }
200                         else if(*i>='0' && *i<='3')
201                         {
202                                 numeric_type = 8;
203                                 numeric_pos = 1;
204                                 numeric_value = *i-'0';
205                         }
206                         else if(*i=='n')
207                                 result += '\n';
208                         else if(*i=='t')
209                                 result += '\t';
210                         else if(*i=='r')
211                                 result += '\r';
212                         else if(*i=='b')
213                                 result += '\b';
214                         else if(*i=='v')
215                                 result += '\v';
216                         else if(*i=='a')
217                                 result += '\a';
218                         else if(*i=='f')
219                                 result += '\f';
220                         else if(*i=='\"')
221                                 result += '\"';
222                         else if(*i=='\'')
223                                 result += '\'';
224                         else if(*i=='\\')
225                                 result += '\\';
226                         else
227                                 throw invalid_argument("c_unescape");
228
229                         escape = false;
230                 }
231                 else if(*i=='\\')
232                         escape = true;
233                 else
234                         result += *i;
235         }
236
237         if(escape)      
238                 throw invalid_argument("c_unescape");
239
240         return result;
241 }
242
243 string c_escape(const string &str, bool escape_8bit)
244 {
245         string result;
246
247         for(string::const_iterator i=str.begin(); i!=str.end(); ++i)
248         {
249                 if(*i=='\n')
250                         result += "\\n";
251                 else if(*i=='\t')
252                         result += "\\t";
253                 else if(*i=='\r')
254                         result += "\\r";
255                 else if(*i=='\b')
256                         result += "\\b";
257                 else if(*i=='\v')
258                         result += "\\v";
259                 else if(*i=='\a')
260                         result += "\\a";
261                 else if(*i=='\f')
262                         result += "\\f";
263                 else if(*i=='\"')
264                         result += "\\\"";
265                 else if(*i=='\'')
266                         result += "\\\'";
267                 else if(*i=='\\')
268                         result += "\\\\";
269                 else if(static_cast<unsigned char>(*i)<' ' || (escape_8bit && (*i&0x80)))
270                 {
271                         char buf[4] = { '\\', 0 };
272                         for(unsigned j=0; j<3; ++j)
273                                 buf[1+j] = '0'+((static_cast<unsigned char>(*i)>>(6-j*3))&7);
274                         result.append(buf, 4);
275                 }
276                 else
277                         result += *i;
278         }
279
280         return result;
281 }
282
283 } // namespace Msp