[Bf-blender-cvs] SVN commit: /data/svn/bf-blender [43427] trunk/blender/source/blender: patch [#29859] UTF-8 support for text editor.
Lockal S
lockalsash at gmail.com
Mon Jan 16 17:23:36 CET 2012
Revision: 43427
http://projects.blender.org/scm/viewvc.php?view=rev&root=bf-blender&revision=43427
Author: lockal
Date: 2012-01-16 16:23:25 +0000 (Mon, 16 Jan 2012)
Log Message:
-----------
patch [#29859] UTF-8 support for text editor.
This also fixes cursor movement in the beginning of line and adds do_versions block for converting text files with old extended ascii encoding into UTF-8.
Modified Paths:
--------------
trunk/blender/source/blender/blenkernel/BKE_text.h
trunk/blender/source/blender/blenkernel/intern/text.c
trunk/blender/source/blender/blenloader/intern/readfile.c
trunk/blender/source/blender/editors/space_text/text_draw.c
trunk/blender/source/blender/editors/space_text/text_intern.h
trunk/blender/source/blender/editors/space_text/text_ops.c
Modified: trunk/blender/source/blender/blenkernel/BKE_text.h
===================================================================
--- trunk/blender/source/blender/blenkernel/BKE_text.h 2012-01-16 16:16:55 UTC (rev 43426)
+++ trunk/blender/source/blender/blenkernel/BKE_text.h 2012-01-16 16:23:25 UTC (rev 43427)
@@ -46,6 +46,7 @@
void txt_set_undostate (int u);
int txt_get_undostate (void);
struct Text* add_empty_text (const char *name);
+int txt_extended_ascii_as_utf8(char **str);
int reopen_text (struct Text *text);
struct Text* add_text (const char *file, const char *relpath);
struct Text* copy_text (struct Text *ta);
@@ -59,6 +60,8 @@
int txt_find_string (struct Text *text, const char *findstr, int wrap, int match_case);
int txt_has_sel (struct Text *text);
int txt_get_span (struct TextLine *from, struct TextLine *to);
+int txt_utf8_offset_to_index(char *str, int offset);
+int txt_utf8_index_to_offset(char *str, int index);
void txt_move_up (struct Text *text, short sel);
void txt_move_down (struct Text *text, short sel);
void txt_move_left (struct Text *text, short sel);
@@ -86,9 +89,9 @@
void txt_split_curline (struct Text *text);
void txt_backspace_char (struct Text *text);
void txt_backspace_word (struct Text *text);
-int txt_add_char (struct Text *text, char add);
-int txt_add_raw_char (struct Text *text, char add);
-int txt_replace_char (struct Text *text, char add);
+int txt_add_char (struct Text *text, unsigned int add);
+int txt_add_raw_char (struct Text *text, unsigned int add);
+int txt_replace_char (struct Text *text, unsigned int add);
void txt_export_to_object(struct Text *text);
void txt_export_to_objects(struct Text *text);
void txt_unindent (struct Text *text);
@@ -127,34 +130,48 @@
#define UNDO_SLEFT 005
#define UNDO_SRIGHT 006
#define UNDO_SUP 007
-#define UNDO_SDOWN 021
+#define UNDO_SDOWN 010
/* Complex movement (opcode is followed
* by 4 character line ID + a 2 character
* position ID and opcode (repeat)) */
-#define UNDO_CTO 022
-#define UNDO_STO 023
+#define UNDO_CTO 011
+#define UNDO_STO 012
-/* Complex editing (opcode is followed
- * by 1 character ID and opcode (repeat)) */
-#define UNDO_INSERT 024
-#define UNDO_BS 025
-#define UNDO_DEL 026
+/* Complex editing */
+/* 1 - opcode is followed by 1 byte for ascii character and opcode (repeat)) */
+/* 2 - opcode is followed by 2 bytes for utf-8 character and opcode (repeat)) */
+/* 3 - opcode is followed by 3 bytes for utf-8 character and opcode (repeat)) */
+/* 4 - opcode is followed by 4 bytes for unicode character and opcode (repeat)) */
+#define UNDO_INSERT_1 013
+#define UNDO_INSERT_2 014
+#define UNDO_INSERT_3 015
+#define UNDO_INSERT_4 016
+#define UNDO_BS_1 017
+#define UNDO_BS_2 020
+#define UNDO_BS_3 021
+#define UNDO_BS_4 022
+
+#define UNDO_DEL_1 023
+#define UNDO_DEL_2 024
+#define UNDO_DEL_3 025
+#define UNDO_DEL_4 026
+
/* Text block (opcode is followed
* by 4 character length ID + the text
* block itself + the 4 character length
* ID (repeat) and opcode (repeat)) */
-#define UNDO_DBLOCK 027 /* Delete block */
-#define UNDO_IBLOCK 030 /* Insert block */
+#define UNDO_DBLOCK 027 /* Delete block */
+#define UNDO_IBLOCK 030 /* Insert block */
/* Misc */
-#define UNDO_SWAP 031 /* Swap cursors */
+#define UNDO_SWAP 031 /* Swap cursors */
-#define UNDO_INDENT 032
-#define UNDO_UNINDENT 033
-#define UNDO_COMMENT 034
-#define UNDO_UNCOMMENT 035
+#define UNDO_INDENT 032
+#define UNDO_UNINDENT 033
+#define UNDO_COMMENT 034
+#define UNDO_UNCOMMENT 035
/* Marker flags */
#define TMARK_TEMP 0x01 /* Remove on non-editing events, don't save */
Modified: trunk/blender/source/blender/blenkernel/intern/text.c
===================================================================
--- trunk/blender/source/blender/blenkernel/intern/text.c 2012-01-16 16:16:55 UTC (rev 43426)
+++ trunk/blender/source/blender/blenkernel/intern/text.c 2012-01-16 16:23:25 UTC (rev 43427)
@@ -33,6 +33,8 @@
#include <string.h> /* strstr */
#include <sys/types.h>
#include <sys/stat.h>
+#include <wchar.h>
+#include <wctype.h>
#include "MEM_guardedalloc.h"
@@ -215,8 +217,48 @@
return ta;
}
+/* this function replaces extended ascii characters */
+/* to a valid utf-8 sequences */
+int txt_extended_ascii_as_utf8(char **str)
+{
+ int bad_char, added= 0, i= 0;
+ int length = strlen(*str);
+
+ while ((*str)[i]) {
+ if((bad_char= BLI_utf8_invalid_byte(*str+i, length)) == -1)
+ break;
+
+ added++;
+ i+= bad_char + 1;
+ }
+
+ if (added != 0) {
+ char *newstr = MEM_mallocN(length+added+1, "text_line");
+ int mi = 0;
+ i= 0;
+
+ while ((*str)[i]) {
+ if((bad_char= BLI_utf8_invalid_byte((*str)+i, length)) == -1) {
+ memcpy(newstr+mi, (*str)+i, length - i + 1);
+ break;
+ }
+
+ memcpy(newstr+mi, (*str)+i, bad_char);
+
+ BLI_str_utf8_from_unicode((*str)[i+bad_char], newstr+mi+bad_char);
+ i+= bad_char+1;
+ mi+= bad_char+2;
+ }
+ newstr[length+added] = '\0';
+ MEM_freeN(*str);
+ *str = newstr;
+ }
+
+ return added;
+}
+
// this function removes any control characters from
-// a textline
+// a textline and fixes invalid utf-8 sequences
static void cleanup_textline(TextLine * tl)
{
@@ -229,6 +271,7 @@
i--;
}
}
+ tl->len+= txt_extended_ascii_as_utf8(&tl->line);
}
int reopen_text(Text *text)
@@ -689,16 +732,10 @@
}
/* 0:whitespace, 1:punct, 2:alphanumeric */
-static short txt_char_type (char ch)
+static short txt_char_type(unsigned int ch)
{
- if (ch <= ' ') return 0; /* 32 */
- if (ch <= '/') return 1; /* 47 */
- if (ch <= '9') return 2; /* 57 */
- if (ch <= '@') return 1; /* 64 */
- if (ch <= 'Z') return 2; /* 90 */
- if (ch == '_') return 2; /* 95, dont delimit '_' */
- if (ch <= '`') return 1; /* 96 */
- if (ch <= 'z') return 2; /* 122 */
+ if (iswspace(ch)) return 0;
+ if (iswalpha(ch) || iswdigit(ch)) return 2;
return 1;
}
@@ -731,10 +768,43 @@
}
}
-/****************************/
+/*****************************/
/* Cursor movement functions */
-/****************************/
+/*****************************/
+int txt_utf8_offset_to_index(char *str, int offset)
+{
+ int index= 0, pos= 0;
+ while (pos != offset) {
+ pos += BLI_str_utf8_size(str + pos);
+ index++;
+ }
+ return index;
+}
+
+int txt_utf8_index_to_offset(char *str, int index)
+{
+ int offset= 0, pos= 0;
+ while (pos != index) {
+ offset += BLI_str_utf8_size(str + offset);
+ pos++;
+ }
+ return offset;
+}
+
+/* returns the real number of characters in string */
+/* not the same as BLI_strlen_utf8, which returns length for wide characters */
+static int txt_utf8_len(const char *src)
+{
+ int len;
+
+ for (len=0; *src; len++) {
+ src += BLI_str_utf8_size(src);
+ }
+
+ return len;
+}
+
void txt_move_up(Text *text, short sel)
{
TextLine **linep;
@@ -747,13 +817,13 @@
old= *charp;
if((*linep)->prev) {
+ int index = txt_utf8_offset_to_index((*linep)->line, *charp);
*linep= (*linep)->prev;
- if (*charp > (*linep)->len) {
- *charp= (*linep)->len;
- if(!undoing) txt_undo_add_toop(text, sel?UNDO_STO:UNDO_CTO, txt_get_span(text->lines.first, (*linep)->next), old, txt_get_span(text->lines.first, *linep), (unsigned short) *charp);
- } else {
- if(!undoing) txt_undo_add_op(text, sel?UNDO_SUP:UNDO_CUP);
- }
+ if (index > txt_utf8_len((*linep)->line)) *charp= (*linep)->len;
+ else *charp= txt_utf8_index_to_offset((*linep)->line, index);
+
+ if(!undoing)
+ txt_undo_add_op(text, sel?UNDO_SUP:UNDO_CUP);
} else {
txt_move_bol(text, sel);
}
@@ -773,12 +843,13 @@
old= *charp;
if((*linep)->next) {
+ int index = txt_utf8_offset_to_index((*linep)->line, *charp);
*linep= (*linep)->next;
- if (*charp > (*linep)->len) {
- *charp= (*linep)->len;
- if(!undoing) txt_undo_add_toop(text, sel?UNDO_STO:UNDO_CTO, txt_get_span(text->lines.first, (*linep)->prev), old, txt_get_span(text->lines.first, *linep), (unsigned short)*charp);
- } else
- if(!undoing) txt_undo_add_op(text, sel?UNDO_SDOWN:UNDO_CDOWN);
+ if (index > txt_utf8_len((*linep)->line)) *charp= (*linep)->len;
+ else *charp= txt_utf8_index_to_offset((*linep)->line, index);
+
+ if(!undoing)
+ txt_undo_add_op(text, sel?UNDO_SDOWN:UNDO_CDOWN);
} else {
txt_move_eol(text, sel);
}
@@ -790,7 +861,7 @@
{
TextLine **linep;
int *charp, oundoing= undoing;
- int tabsize = 1, i=0;
+ int tabsize= 0, i= 0;
if (!text) return;
if(sel) txt_curs_sel(text, &linep, &charp);
@@ -799,32 +870,36 @@
undoing= 1;
- // do nice left only if there are only spaces
- // TXT_TABSIZE hardcoded in DNA_text_types.h
- if (text->flags & TXT_TABSTOSPACES) {
- tabsize = TXT_TABSIZE;
-
- if (*charp < tabsize)
- tabsize = *charp;
- else {
- for (i=0;i<(*charp);i++)
+ if (*charp== 0) {
+ if ((*linep)->prev) {
+ txt_move_up(text, sel);
+ *charp= (*linep)->len;
+ }
+ }
+ else {
+ // do nice left only if there are only spaces
+ // TXT_TABSIZE hardcoded in DNA_text_types.h
+ if (text->flags & TXT_TABSTOSPACES) {
+ tabsize= (*charp < TXT_TABSIZE) ? *charp : TXT_TABSIZE;
+
+ for (i=0; i<(*charp); i++)
if ((*linep)->line[i] != ' ') {
- tabsize = 1;
+ tabsize= 0;
break;
}
+
// if in the middle of the space-tab
- if ((*charp) % tabsize != 0)
- tabsize = ((*charp) % tabsize);
+ if (tabsize && (*charp) % TXT_TABSIZE != 0)
+ tabsize= ((*charp) % TXT_TABSIZE);
}
- }
-
- if (*charp== 0) {
- if ((*linep)->prev) {
- txt_move_up(text, sel);
- *charp= (*linep)->len;
+
+ if (tabsize)
+ (*charp)-= tabsize;
+ else {
+ const char *prev= BLI_str_prev_char_utf8((*linep)->line + *charp);
+ *charp= prev - (*linep)->line;
}
}
- else (*charp)-= tabsize;
undoing= oundoing;
if(!undoing) txt_undo_add_op(text, sel?UNDO_SLEFT:UNDO_CLEFT);
@@ -835,8 +910,7 @@
void txt_move_right(Text *text, short sel)
{
TextLine **linep;
- int *charp, oundoing= undoing;
- int tabsize=1, i=0;
+ int *charp, oundoing= undoing, do_tab= 0, i;
if (!text) return;
if(sel) txt_curs_sel(text, &linep, &charp);
@@ -845,32 +919,33 @@
undoing= 1;
- // do nice right only if there are only spaces
- // spaces hardcoded in DNA_text_types.h
- if (text->flags & TXT_TABSTOSPACES) {
- tabsize = TXT_TABSIZE;
-
- if ((*charp) + tabsize > (*linep)->len)
- tabsize = 1;
- else {
- for (i=0;i<(*charp) + tabsize - ((*charp) % tabsize);i++)
- if ((*linep)->line[i] != ' ') {
- tabsize = 1;
- break;
- }
@@ Diff output truncated at 10240 characters. @@
More information about the Bf-blender-cvs
mailing list