Merge branch 'io-master'
[libs/core.git] / source / strings / utils.cpp
1 #include <algorithm>
2 #include <list>
3 #include <stdexcept>
4 #include "utils.h"
5
6 using namespace std;
7
8 namespace {
9
10 template<bool long_sep, bool allow_empty>
11 vector<string> do_split(const string &str, const string &sep, int max_split)
12 {
13         vector<string> result;
14
15         unsigned start = 0;
16         while(start<str.size())
17         {
18                 unsigned end = long_sep ? str.find(sep, start) : str.find_first_of(sep, start);
19                 if(end!=start || allow_empty)
20                 {
21                         if(max_split>=0 && result.size()==static_cast<unsigned>(max_split))
22                         {
23                                 result.push_back(str.substr(start));
24                                 break;
25                         }
26                         else
27                                 result.push_back(str.substr(start, end-start));
28                 }
29
30                 if(end>str.size())
31                         break;
32
33                 start = end+(long_sep ? sep.size() : 1);
34
35                 if(allow_empty && start==str.size())
36                         result.push_back(string());
37         }
38
39         return result;
40 }
41
42 bool check_str(const std::string &str, int (*pred)(int))
43 {
44         for(string::const_iterator i=str.begin(); i!=str.end(); ++i)
45                 if(!pred(*i))
46                         return false;
47         return true;
48 }
49
50 }
51
52
53 namespace Msp {
54
55 int strcasecmp(const string &s1, const string &s2)
56 {
57         string::const_iterator i1 = s1.begin();
58         string::const_iterator i2 = s2.begin();
59         for(; (i1!=s1.end() && i2!=s2.end()); ++i1, ++i2)
60         {
61                 const char c1 = ::tolower(*i1);
62                 const char c2 = ::tolower(*i2);
63                 if(c1!=c2) return c1-c2;
64         }
65         if(i1!=s1.end()) return *i1;
66         if(i2!=s2.end()) return -*i2;
67         return 0;
68 }
69
70 string tolower(const string &str)
71 {
72         string result(str);
73         transform(result.begin(), result.end(), result.begin(), ::tolower);
74         return result;
75 }
76
77 string toupper(const string &str)
78 {
79         string result(str);
80         transform(result.begin(), result.end(), result.begin(), ::toupper);
81         return result;
82 }
83
84 bool isnumrc(const string &str)
85 {
86         return check_str(str, isdigit);
87 }
88
89 bool isalpha(const string &str)
90 {
91         return check_str(str, isalpha);
92 }
93
94 bool isalnum(const string &str)
95 {
96         return check_str(str, isalnum);
97 }
98
99 vector<string> split(const string &str, const string &sep, int max_split)
100 {
101         return do_split<false, false>(str, sep, max_split);
102 }
103
104 vector<string> split(const string &str, char sep, int max_split)
105 {
106         return split(str, string(1, sep), max_split);
107 }
108
109 vector<string> split_long(const string &str, const string &sep, int max_split)
110 {
111         return do_split<true, false>(str, sep, max_split);
112 }
113
114 vector<string> split_fields(const string &str, const string &sep, int max_split)
115 {
116         return do_split<true, true>(str, sep, max_split);
117 }
118
119 vector<string> split_fields(const string &str, char sep, int max_split)
120 {
121         return split_fields(str, string(1, sep), max_split);
122 }
123
124 string strip(const string &s)
125 {
126         string result = s;
127         if(!result.erase(0, result.find_first_not_of(" \t\r\n")).empty())
128                 result.erase(result.find_last_not_of(" \t\r\n")+1);
129         return result;
130 }
131
132 string c_unescape(const std::string &str)
133 {
134         bool escape = false;
135         unsigned numeric_type = 0;
136         unsigned numeric_pos = 0;
137         unsigned numeric_value = 0;
138         string result;
139         for(string::const_iterator i=str.begin(); i!=str.end(); ++i)
140         {
141                 if(numeric_type==16)
142                 {
143                         unsigned digit = 0;
144                         if(*i>='0' && *i<='9')
145                                 digit = *i-'0';
146                         else if(*i>='a' && *i<='f')
147                                 digit = *i-'a'+10;
148                         else if(*i>='A' && *i<='F')
149                                 digit = *i-'A'+10;
150                         else
151                                 throw invalid_argument("c_unescape");
152
153                         numeric_value = (numeric_value<<4 | digit);
154                         ++numeric_pos;
155                         if(numeric_pos==2)
156                         {
157                                 result += numeric_value;
158                                 numeric_type = 0;
159                         }
160                 }
161                 else if(numeric_type==8)
162                 {
163                         unsigned digit = 0;
164                         if(*i>='0' && *i<='7')
165                                 digit = *i-'0';
166                         else
167                                 throw invalid_argument("c_unescape");
168
169                         numeric_value = (numeric_value<<3 | digit);
170                         ++numeric_pos;
171                         if(numeric_pos==3)
172                         {
173                                 result += numeric_value;
174                                 numeric_type = 0;
175                         }
176                 }
177                 else if(escape)
178                 {
179                         if(*i=='x')
180                         {
181                                 numeric_type = 16;
182                                 numeric_pos = 0;
183                                 numeric_value = 0;
184                         }
185                         else if(*i>='0' && *i<='3')
186                         {
187                                 numeric_type = 8;
188                                 numeric_pos = 1;
189                                 numeric_value = *i-'0';
190                         }
191                         else if(*i=='n')
192                                 result += '\n';
193                         else if(*i=='t')
194                                 result += '\t';
195                         else if(*i=='r')
196                                 result += '\r';
197                         else if(*i=='b')
198                                 result += '\b';
199                         else if(*i=='v')
200                                 result += '\v';
201                         else if(*i=='a')
202                                 result += '\a';
203                         else if(*i=='f')
204                                 result += '\f';
205                         else if(*i=='\"')
206                                 result += '\"';
207                         else if(*i=='\'')
208                                 result += '\'';
209                         else if(*i=='\\')
210                                 result += '\\';
211                         else
212                                 throw invalid_argument("c_unescape");
213
214                         escape = false;
215                 }
216                 else if(*i=='\\')
217                         escape = true;
218                 else
219                         result += *i;
220         }
221
222         if(escape)      
223                 throw invalid_argument("c_unescape");
224
225         return result;
226 }
227
228 string c_escape(const string &str, bool escape_8bit)
229 {
230         string result;
231
232         for(string::const_iterator i=str.begin(); i!=str.end(); ++i)
233         {
234                 if(*i=='\n')
235                         result += "\\n";
236                 else if(*i=='\t')
237                         result += "\\t";
238                 else if(*i=='\r')
239                         result += "\\r";
240                 else if(*i=='\b')
241                         result += "\\b";
242                 else if(*i=='\v')
243                         result += "\\v";
244                 else if(*i=='\a')
245                         result += "\\a";
246                 else if(*i=='\f')
247                         result += "\\f";
248                 else if(*i=='\"')
249                         result += "\\\"";
250                 else if(*i=='\'')
251                         result += "\\\'";
252                 else if(*i=='\\')
253                         result += "\\\\";
254                 else if(static_cast<unsigned char>(*i)<' ' || (escape_8bit && (*i&0x80)))
255                 {
256                         char buf[4] = {'\\', '0'+((*i>>6)&3), '0'+((*i>>3)&7), '0'+(*i&7)};
257                         result.append(buf, 4);
258                 }
259                 else
260                         result += *i;
261         }
262
263         return result;
264 }
265
266 } // namespace Msp