]> git.tdb.fi Git - libs/core.git/blob - source/strings/utils.cpp
Add move semantics to Variant
[libs/core.git] / source / strings / utils.cpp
1 #include <list>
2 #include <stdexcept>
3 #include <cctype>
4 #include <msp/core/algorithm.h>
5 #include "utils.h"
6
7 using namespace std;
8
9 namespace {
10
11 template<bool long_sep, bool allow_empty>
12 vector<string> do_split(const string &str, const string &sep, int max_split)
13 {
14         vector<string> result;
15
16         unsigned start = 0;
17         while(start<str.size())
18         {
19                 unsigned end = long_sep ? str.find(sep, start) : str.find_first_of(sep, start);
20                 if(end!=start || allow_empty)
21                 {
22                         if(max_split>=0 && result.size()==static_cast<unsigned>(max_split))
23                         {
24                                 result.push_back(str.substr(start));
25                                 break;
26                         }
27                         else
28                                 result.push_back(str.substr(start, end-start));
29                 }
30
31                 if(end>str.size())
32                         break;
33
34                 start = end+(long_sep ? sep.size() : 1);
35
36                 if(allow_empty && start==str.size())
37                         result.push_back(string());
38         }
39
40         return result;
41 }
42
43 }
44
45
46 namespace Msp {
47
48 int strcasecmp(const string &s1, const string &s2)
49 {
50         auto i1 = s1.begin();
51         auto i2 = s2.begin();
52         for(; (i1!=s1.end() && i2!=s2.end()); ++i1, ++i2)
53         {
54                 const char c1 = ::tolower(*i1);
55                 const char c2 = ::tolower(*i2);
56                 if(c1!=c2) return c1-c2;
57         }
58         if(i1!=s1.end()) return *i1;
59         if(i2!=s2.end()) return -*i2;
60         return 0;
61 }
62
63 string tolower(const string &str)
64 {
65         string result(str);
66         transform(result, [](char c){ return std::tolower(c); });
67         return result;
68 }
69
70 string toupper(const string &str)
71 {
72         string result(str);
73         transform(result, [](char c){ return std::toupper(c); });
74         return result;
75 }
76
77 bool isnumrc(const string &str)
78 {
79         return all_of(str.begin(), str.end(), [](char c){ return std::isdigit(c); });
80 }
81
82 bool isalpha(const string &str)
83 {
84         return all_of(str.begin(), str.end(), [](char c){ return std::isalpha(c); });
85 }
86
87 bool isalnum(const string &str)
88 {
89         return all_of(str.begin(), str.end(), [](char c){ return std::isalnum(c); });
90 }
91
92 vector<string> split(const string &str, const string &sep, int max_split)
93 {
94         return do_split<false, false>(str, sep, max_split);
95 }
96
97 vector<string> split(const string &str, char sep, int max_split)
98 {
99         return split(str, string(1, sep), max_split);
100 }
101
102 vector<string> split_long(const string &str, const string &sep, int max_split)
103 {
104         return do_split<true, false>(str, sep, max_split);
105 }
106
107 vector<string> split_fields(const string &str, const string &sep, int max_split)
108 {
109         return do_split<true, true>(str, sep, max_split);
110 }
111
112 vector<string> split_fields(const string &str, char sep, int max_split)
113 {
114         return split_fields(str, string(1, sep), max_split);
115 }
116
117 string strip(const string &s)
118 {
119         string result = s;
120         if(!result.erase(0, result.find_first_not_of(" \t\r\n")).empty())
121                 result.erase(result.find_last_not_of(" \t\r\n")+1);
122         return result;
123 }
124
125 string &append(string &str, const string &sep, const string &other)
126 {
127         if(!str.empty() && !other.empty())
128                 str += sep;
129         str += other;
130         return str;
131 }
132
133 string join(const string &str1, const string &sep, const string &str2)
134 {
135         string result = str1;
136         return append(result, sep, str2);
137 }
138
139 string c_unescape(const string &str)
140 {
141         bool escape = false;
142         unsigned numeric_type = 0;
143         unsigned numeric_pos = 0;
144         unsigned numeric_value = 0;
145         string result;
146         for(auto c: str)
147         {
148                 if(numeric_type==16)
149                 {
150                         unsigned digit = 0;
151                         if(c>='0' && c<='9')
152                                 digit = c-'0';
153                         else if(c>='a' && c<='f')
154                                 digit = c-'a'+10;
155                         else if(c>='A' && c<='F')
156                                 digit = c-'A'+10;
157                         else
158                                 throw invalid_argument("c_unescape");
159
160                         numeric_value = (numeric_value<<4 | digit);
161                         ++numeric_pos;
162                         if(numeric_pos==2)
163                         {
164                                 result += numeric_value;
165                                 numeric_type = 0;
166                         }
167                 }
168                 else if(numeric_type==8)
169                 {
170                         unsigned digit = 0;
171                         if(c>='0' && c<='7')
172                                 digit = c-'0';
173                         else
174                                 throw invalid_argument("c_unescape");
175
176                         numeric_value = (numeric_value<<3 | digit);
177                         ++numeric_pos;
178                         if(numeric_pos==3)
179                         {
180                                 result += numeric_value;
181                                 numeric_type = 0;
182                         }
183                 }
184                 else if(escape)
185                 {
186                         if(c=='x')
187                         {
188                                 numeric_type = 16;
189                                 numeric_pos = 0;
190                                 numeric_value = 0;
191                         }
192                         else if(c>='0' && c<='3')
193                         {
194                                 numeric_type = 8;
195                                 numeric_pos = 1;
196                                 numeric_value = c-'0';
197                         }
198                         else if(c=='n')
199                                 result += '\n';
200                         else if(c=='t')
201                                 result += '\t';
202                         else if(c=='r')
203                                 result += '\r';
204                         else if(c=='b')
205                                 result += '\b';
206                         else if(c=='v')
207                                 result += '\v';
208                         else if(c=='a')
209                                 result += '\a';
210                         else if(c=='f')
211                                 result += '\f';
212                         else if(c=='\"')
213                                 result += '\"';
214                         else if(c=='\'')
215                                 result += '\'';
216                         else if(c=='\\')
217                                 result += '\\';
218                         else
219                                 throw invalid_argument("c_unescape");
220
221                         escape = false;
222                 }
223                 else if(c=='\\')
224                         escape = true;
225                 else
226                         result += c;
227         }
228
229         if(escape)      
230                 throw invalid_argument("c_unescape");
231
232         return result;
233 }
234
235 string c_escape(const string &str, bool escape_8bit)
236 {
237         string result;
238
239         for(char c: str)
240         {
241                 if(c=='\n')
242                         result += "\\n";
243                 else if(c=='\t')
244                         result += "\\t";
245                 else if(c=='\r')
246                         result += "\\r";
247                 else if(c=='\b')
248                         result += "\\b";
249                 else if(c=='\v')
250                         result += "\\v";
251                 else if(c=='\a')
252                         result += "\\a";
253                 else if(c=='\f')
254                         result += "\\f";
255                 else if(c=='\"')
256                         result += "\\\"";
257                 else if(c=='\'')
258                         result += "\\\'";
259                 else if(c=='\\')
260                         result += "\\\\";
261                 else if(static_cast<unsigned char>(c)<' ' || c==0x7F || (escape_8bit && (c&0x80)))
262                 {
263                         char buf[4] = { '\\', 0 };
264                         for(unsigned j=0; j<3; ++j)
265                                 buf[1+j] = '0'+((static_cast<unsigned char>(c)>>(6-j*3))&7);
266                         result.append(buf, 4);
267                 }
268                 else
269                         result += c;
270         }
271
272         return result;
273 }
274
275 } // namespace Msp