From: Mikko Rasa Date: Tue, 17 Sep 2019 16:06:34 +0000 (+0300) Subject: Properly handle multibyte UTF-8 in Entry and Text X-Git-Url: http://git.tdb.fi/?p=libs%2Fgltk.git;a=commitdiff_plain;h=2aa99730d8c3106deeac1186e057055604835752 Properly handle multibyte UTF-8 in Entry and Text --- diff --git a/source/entry.cpp b/source/entry.cpp index 8b638de..6554f7c 100644 --- a/source/entry.cpp +++ b/source/entry.cpp @@ -269,14 +269,20 @@ bool Entry::key_press(unsigned key, unsigned mod) if(selection_active) erase_selection(true); else if(edit_pos>0) - erase(edit_pos-1, 1); + { + unsigned start_pos = text.move_offset(edit_pos, -1); + erase(start_pos, edit_pos-start_pos); + } } else if(key==Input::KEY_DELETE) { if(selection_active) erase_selection(true); else - erase(edit_pos, 1); + { + unsigned end_pos = text.move_offset(edit_pos, 1); + erase(edit_pos, end_pos-edit_pos); + } } else if(key==Input::KEY_ENTER && multiline) insert(edit_pos, "\n"); @@ -378,15 +384,9 @@ void Entry::on_style_change() void Entry::move_edit_position(Navigation nav, bool select) { if(nav==NAV_LEFT) - { - if(edit_pos>0) - set_edit_position(edit_pos-1, select); - } + set_edit_position(text.move_offset(edit_pos, -1), select); else if(nav==NAV_RIGHT) - { - if(edit_pos0 ? text.coords_to_offset(row-1, col) : 0), select); } + else + throw invalid_argument("Entry::move_edit_position"); } void Entry::set_edit_position(unsigned ep, bool select) diff --git a/source/text.cpp b/source/text.cpp index 45b6567..a3ed762 100644 --- a/source/text.cpp +++ b/source/text.cpp @@ -42,7 +42,7 @@ void Text::set_style(const Style *s) const GL::Font &font = style->get_font(); float font_size = style->get_font_size(); for(vector::iterator i=lines.begin(); i!=lines.end(); ++i) - i->width = static_cast(font.get_string_width(text.substr(i->start, i->length))*font_size); + i->width = static_cast(font.get_string_width(text.substr(i->start, i->bytes))*font_size); } } @@ -81,16 +81,19 @@ void Text::set(const string &t) void Text::erase(unsigned pos, unsigned len) { + check_alignment(pos); + check_alignment(pos+len); text.erase(pos, len); vector::iterator i; - for(i=lines.begin(); (i!=lines.end() && i->start+i->lengthstart+i->bytesi->start+i->length) + if(pos+len>i->start+i->bytes) find_lines(); else { - i->length -= len; + i->bytes -= len; + i->length = count_characters(i->start, i->bytes); for(++i; i!=lines.end(); ++i) i->start -= len; @@ -99,6 +102,7 @@ void Text::erase(unsigned pos, unsigned len) void Text::insert(unsigned pos, const string &s) { + check_alignment(pos); text.insert(pos, s); if(s.find('\n')!=string::npos) @@ -106,9 +110,10 @@ void Text::insert(unsigned pos, const string &s) else { vector::iterator i; - for(i=lines.begin(); (i!=lines.end() && i->start+i->lengthstart+i->byteslength += s.size(); + i->bytes += s.size(); + i->length = count_characters(i->start, i->bytes); for(++i; i!=lines.end(); ++i) i->start += s.size(); @@ -139,6 +144,32 @@ unsigned Text::get_line_length(unsigned i) const return lines[i].length; } +unsigned Text::move_offset(unsigned offs, int change) const +{ + check_alignment(offs); + if(!change) + return offs; + + StringCodec::Utf8::Decoder dec(StringCodec::IGNORE_ERRORS); + string::const_iterator i = text.begin()+offs; + if(change>0) + { + for(; change>0; --change) + dec.decode_char(text, i); + } + else + { + while(change<0 && i!=text.begin()) + { + --i; + string::const_iterator j = i; + if(dec.decode_char(text, j)!=-1) + ++change; + } + } + return i-text.begin(); +} + void Text::offset_to_coords(unsigned offs, unsigned &row, unsigned &col) const { if(lines.empty()) @@ -149,10 +180,13 @@ void Text::offset_to_coords(unsigned offs, unsigned &row, unsigned &col) const } for(unsigned i=0; i=lines[i].start && offs<=lines[i].start+lines[i].length) + if(offs>=lines[i].start && offs<=lines[i].start+lines[i].bytes) { row = i; - col = offs-lines[i].start; + if(lines[i].length==lines[i].bytes) + col = offs-lines[i].start; + else + col = count_characters(lines[i].start, offs-lines[i].start); return; } } @@ -161,8 +195,20 @@ unsigned Text::coords_to_offset(unsigned row, unsigned col) const { if(row>=lines.size()) return text.size(); + const Line &line = lines[row]; + if(col>line.length) + col = line.length; - return lines[row].start+min(col, lines[row].length); + if(line.length==line.bytes) + return line.start+col; + else + { + StringCodec::Utf8::Decoder dec; + string::const_iterator i = text.begin()+line.start; + for(col=min(col, line.length); col; --col) + dec.decode_char(text, i); + return i-text.begin(); + } } Geometry Text::coords_to_geometry(const Part &part, const Geometry &parent, unsigned first_row, unsigned row, unsigned col) const @@ -219,11 +265,12 @@ void Text::find_lines() Line line; line.start = start; - line.length = (newline==string::npos ? text.size() : newline)-start; + line.bytes = (newline==string::npos ? text.size() : newline)-start; + line.length = count_characters(line.start, line.bytes); line.width = line.length; if(style) { - string str = text.substr(line.start, line.length); + string str = text.substr(line.start, line.bytes); line.width = static_cast(style->get_font().get_string_width(str)*font_size); } lines.push_back(line); @@ -234,6 +281,24 @@ void Text::find_lines() } } +unsigned Text::count_characters(unsigned start, unsigned bytes) const +{ + StringCodec::Utf8::Decoder dec; + string::const_iterator i = text.begin()+start; + string::const_iterator end = i+bytes; + unsigned count = 0; + for(; i void Text::process_lines(const Part &part, const Geometry &parent, unsigned first_row, void (Text::*func)(unsigned, const Geometry &, T &) const, T &data) const { @@ -271,14 +336,24 @@ void Text::build_line(unsigned i, const Geometry &rgeom, RenderData &data) const data.bld->matrix() *= GL::Matrix::translation(rgeom.x, rgeom.y, 0); data.bld->matrix() *= GL::Matrix::scaling(style->get_font_size()); - style->get_font().build_string(text.substr(line.start, line.length), *data.bld); + style->get_font().build_string(text.substr(line.start, line.bytes), *data.bld); } void Text::coords_to_geom_line(unsigned i, const Geometry &rgeom, CoordsToGeomData &data) const { if(i==data.row) { - float w = style->get_font().get_string_width(text.substr(lines[i].start, data.col)); + string::const_iterator begin = text.begin()+lines[i].start; + string::const_iterator j = begin; + if(lines[i].length==lines[i].bytes) + j += data.col; + else + { + StringCodec::Utf8::Decoder dec; + for(unsigned c=data.col; c; --c) + dec.decode_char(text, j); + } + float w = style->get_font().get_string_width(string(begin, j)); data.result = rgeom; data.result.x += static_cast(w*style->get_font_size()); } diff --git a/source/text.h b/source/text.h index 243c3a6..6ddbb23 100644 --- a/source/text.h +++ b/source/text.h @@ -22,6 +22,7 @@ private: struct Line { unsigned start; + unsigned bytes; unsigned length; unsigned width; }; @@ -51,6 +52,7 @@ public: unsigned get_n_lines() const { return lines.size(); } unsigned get_visible_lines(const Part &, const Geometry &, unsigned *) const; unsigned get_line_length(unsigned) const; + unsigned move_offset(unsigned, int) const; void offset_to_coords(unsigned, unsigned &, unsigned &) const; unsigned coords_to_offset(unsigned, unsigned) const; Geometry coords_to_geometry(const Part &, const Geometry &, unsigned, unsigned, unsigned) const; @@ -61,6 +63,8 @@ public: Text &operator=(const std::string &); private: void find_lines(); + unsigned count_characters(unsigned, unsigned) const; + void check_alignment(unsigned) const; template void process_lines(const Part &, const Geometry &, unsigned, void (Text::*)(unsigned, const Geometry &, T &) const, T &) const;