[Bf-blender-cvs] SVN commit: /data/svn/bf-blender [43427] trunk/blender/source/blender: patch [#29859] UTF-8 support for text editor.

Lockal S lockalsash at gmail.com
Mon Jan 16 17:23:36 CET 2012


Revision: 43427
          http://projects.blender.org/scm/viewvc.php?view=rev&root=bf-blender&revision=43427
Author:   lockal
Date:     2012-01-16 16:23:25 +0000 (Mon, 16 Jan 2012)
Log Message:
-----------
patch [#29859] UTF-8 support for text editor.
This also fixes cursor movement in the beginning of line and adds do_versions block for converting text files with old extended ascii encoding into UTF-8.

Modified Paths:
--------------
    trunk/blender/source/blender/blenkernel/BKE_text.h
    trunk/blender/source/blender/blenkernel/intern/text.c
    trunk/blender/source/blender/blenloader/intern/readfile.c
    trunk/blender/source/blender/editors/space_text/text_draw.c
    trunk/blender/source/blender/editors/space_text/text_intern.h
    trunk/blender/source/blender/editors/space_text/text_ops.c

Modified: trunk/blender/source/blender/blenkernel/BKE_text.h
===================================================================
--- trunk/blender/source/blender/blenkernel/BKE_text.h	2012-01-16 16:16:55 UTC (rev 43426)
+++ trunk/blender/source/blender/blenkernel/BKE_text.h	2012-01-16 16:23:25 UTC (rev 43427)
@@ -46,6 +46,7 @@
 void 			txt_set_undostate	(int u);
 int 			txt_get_undostate	(void);
 struct Text*	add_empty_text	(const char *name);
+int             txt_extended_ascii_as_utf8(char **str);
 int	            reopen_text		(struct Text *text);
 struct Text*	add_text		(const char *file, const char *relpath); 
 struct Text*	copy_text		(struct Text *ta);
@@ -59,6 +60,8 @@
 int		txt_find_string		(struct Text *text, const char *findstr, int wrap, int match_case);
 int		txt_has_sel			(struct Text *text);
 int		txt_get_span		(struct TextLine *from, struct TextLine *to);
+int     txt_utf8_offset_to_index(char *str, int offset);
+int     txt_utf8_index_to_offset(char *str, int index);
 void	txt_move_up			(struct Text *text, short sel);
 void	txt_move_down		(struct Text *text, short sel);
 void	txt_move_left		(struct Text *text, short sel);
@@ -86,9 +89,9 @@
 void	txt_split_curline	(struct Text *text);
 void	txt_backspace_char	(struct Text *text);
 void	txt_backspace_word	(struct Text *text);
-int		txt_add_char		(struct Text *text, char add);
-int		txt_add_raw_char	(struct Text *text, char add);
-int		txt_replace_char	(struct Text *text, char add);
+int		txt_add_char		(struct Text *text, unsigned int add);
+int		txt_add_raw_char	(struct Text *text, unsigned int add);
+int		txt_replace_char	(struct Text *text, unsigned int add);
 void	txt_export_to_object(struct Text *text);
 void	txt_export_to_objects(struct Text *text);
 void	txt_unindent		(struct Text *text);
@@ -127,34 +130,48 @@
 #define UNDO_SLEFT		005
 #define UNDO_SRIGHT		006
 #define UNDO_SUP		007
-#define UNDO_SDOWN		021
+#define UNDO_SDOWN		010
 
 /* Complex movement (opcode is followed
  * by 4 character line ID + a 2 character
  * position ID and opcode (repeat)) */
-#define UNDO_CTO		022
-#define UNDO_STO		023
+#define UNDO_CTO		011
+#define UNDO_STO		012
 
-/* Complex editing (opcode is followed
- * by 1 character ID and opcode (repeat)) */
-#define UNDO_INSERT		024
-#define UNDO_BS			025
-#define UNDO_DEL		026
+/* Complex editing */
+/* 1 - opcode is followed by 1 byte for ascii character and opcode (repeat)) */
+/* 2 - opcode is followed by 2 bytes for utf-8 character and opcode (repeat)) */
+/* 3 - opcode is followed by 3 bytes for utf-8 character and opcode (repeat)) */
+/* 4 - opcode is followed by 4 bytes for unicode character and opcode (repeat)) */
+#define UNDO_INSERT_1   013
+#define UNDO_INSERT_2   014
+#define UNDO_INSERT_3   015
+#define UNDO_INSERT_4   016
 
+#define UNDO_BS_1       017
+#define UNDO_BS_2       020
+#define UNDO_BS_3       021
+#define UNDO_BS_4       022
+
+#define UNDO_DEL_1      023
+#define UNDO_DEL_2      024
+#define UNDO_DEL_3      025
+#define UNDO_DEL_4      026
+
 /* Text block (opcode is followed
  * by 4 character length ID + the text
  * block itself + the 4 character length
  * ID (repeat) and opcode (repeat)) */
-#define UNDO_DBLOCK		027 /* Delete block */
-#define UNDO_IBLOCK		030 /* Insert block */
+#define UNDO_DBLOCK	    027 /* Delete block */
+#define UNDO_IBLOCK	    030 /* Insert block */
 
 /* Misc */
-#define UNDO_SWAP		031	/* Swap cursors */
+#define UNDO_SWAP       031	/* Swap cursors */
 
-#define UNDO_INDENT		032
-#define UNDO_UNINDENT		033
-#define UNDO_COMMENT		034
-#define UNDO_UNCOMMENT		035
+#define UNDO_INDENT     032
+#define UNDO_UNINDENT   033
+#define UNDO_COMMENT    034
+#define UNDO_UNCOMMENT  035
 
 /* Marker flags */
 #define TMARK_TEMP		0x01	/* Remove on non-editing events, don't save */

Modified: trunk/blender/source/blender/blenkernel/intern/text.c
===================================================================
--- trunk/blender/source/blender/blenkernel/intern/text.c	2012-01-16 16:16:55 UTC (rev 43426)
+++ trunk/blender/source/blender/blenkernel/intern/text.c	2012-01-16 16:23:25 UTC (rev 43427)
@@ -33,6 +33,8 @@
 #include <string.h> /* strstr */
 #include <sys/types.h>
 #include <sys/stat.h>
+#include <wchar.h>
+#include <wctype.h>
 
 #include "MEM_guardedalloc.h"
 
@@ -215,8 +217,48 @@
 	return ta;
 }
 
+/* this function replaces extended ascii characters */
+/* to a valid utf-8 sequences */
+int txt_extended_ascii_as_utf8(char **str)
+{
+	int bad_char, added= 0, i= 0;
+	int length = strlen(*str);
+
+	while ((*str)[i]) {
+		if((bad_char= BLI_utf8_invalid_byte(*str+i, length)) == -1)
+		    break;
+
+		added++;
+		i+= bad_char + 1;
+	}
+	
+	if (added != 0) {
+		char *newstr = MEM_mallocN(length+added+1, "text_line");
+		int mi = 0;
+		i= 0;
+		
+		while ((*str)[i]) {
+			if((bad_char= BLI_utf8_invalid_byte((*str)+i, length)) == -1) {
+				memcpy(newstr+mi, (*str)+i, length - i + 1);
+				break;
+			}
+			
+			memcpy(newstr+mi, (*str)+i, bad_char);
+
+			BLI_str_utf8_from_unicode((*str)[i+bad_char], newstr+mi+bad_char);
+			i+= bad_char+1;
+			mi+= bad_char+2;
+		}
+		newstr[length+added] = '\0';
+		MEM_freeN(*str);
+		*str = newstr;
+	}
+	
+	return added;
+}
+
 // this function removes any control characters from
-// a textline
+// a textline and fixes invalid utf-8 sequences
 
 static void cleanup_textline(TextLine * tl)
 {
@@ -229,6 +271,7 @@
 			i--;
 		}
 	}
+	tl->len+= txt_extended_ascii_as_utf8(&tl->line);
 }
 
 int reopen_text(Text *text)
@@ -689,16 +732,10 @@
 }
 
 /* 0:whitespace, 1:punct, 2:alphanumeric */
-static short txt_char_type (char ch)
+static short txt_char_type(unsigned int ch)
 {
-	if (ch <= ' ') return 0; /* 32 */
-	if (ch <= '/') return 1; /* 47 */
-	if (ch <= '9') return 2; /* 57 */
-	if (ch <= '@') return 1; /* 64 */
-	if (ch <= 'Z') return 2; /* 90 */
-	if (ch == '_') return 2; /* 95, dont delimit '_' */
-	if (ch <= '`') return 1; /* 96 */
-	if (ch <= 'z') return 2; /* 122 */
+	if (iswspace(ch)) return 0;
+	if (iswalpha(ch) || iswdigit(ch)) return 2;
 	return 1;
 }
 
@@ -731,10 +768,43 @@
 	}
 }
 
-/****************************/
+/*****************************/
 /* Cursor movement functions */
-/****************************/
+/*****************************/
 
+int txt_utf8_offset_to_index(char *str, int offset)
+{
+	int index= 0, pos= 0;
+	while (pos != offset) {
+		pos += BLI_str_utf8_size(str + pos);
+		index++;
+	}
+	return index;
+}
+
+int txt_utf8_index_to_offset(char *str, int index)
+{
+	int offset= 0, pos= 0;
+	while (pos != index) {
+		offset += BLI_str_utf8_size(str + offset);
+		pos++;
+	}
+	return offset;
+}
+
+/* returns the real number of characters in string */
+/* not the same as BLI_strlen_utf8, which returns length for wide characters */
+static int txt_utf8_len(const char *src)
+{
+	int len;
+
+	for (len=0; *src; len++) {
+		src += BLI_str_utf8_size(src);
+	}
+
+	return len;
+}
+
 void txt_move_up(Text *text, short sel)
 {
 	TextLine **linep;
@@ -747,13 +817,13 @@
 	old= *charp;
 
 	if((*linep)->prev) {
+		int index = txt_utf8_offset_to_index((*linep)->line, *charp);
 		*linep= (*linep)->prev;
-		if (*charp > (*linep)->len) {
-			*charp= (*linep)->len;
-			if(!undoing) txt_undo_add_toop(text, sel?UNDO_STO:UNDO_CTO, txt_get_span(text->lines.first, (*linep)->next), old, txt_get_span(text->lines.first, *linep), (unsigned short) *charp);
-		} else {
-			if(!undoing) txt_undo_add_op(text, sel?UNDO_SUP:UNDO_CUP);
-		}
+		if (index > txt_utf8_len((*linep)->line)) *charp= (*linep)->len;
+		else *charp= txt_utf8_index_to_offset((*linep)->line, index);
+		
+		if(!undoing)
+			txt_undo_add_op(text, sel?UNDO_SUP:UNDO_CUP);
 	} else {
 		txt_move_bol(text, sel);
 	}
@@ -773,12 +843,13 @@
 	old= *charp;
 
 	if((*linep)->next) {
+		int index = txt_utf8_offset_to_index((*linep)->line, *charp);
 		*linep= (*linep)->next;
-		if (*charp > (*linep)->len) {
-			*charp= (*linep)->len;
-			if(!undoing) txt_undo_add_toop(text, sel?UNDO_STO:UNDO_CTO, txt_get_span(text->lines.first, (*linep)->prev), old, txt_get_span(text->lines.first, *linep), (unsigned short)*charp);
-		} else
-			if(!undoing) txt_undo_add_op(text, sel?UNDO_SDOWN:UNDO_CDOWN);	
+		if (index > txt_utf8_len((*linep)->line)) *charp= (*linep)->len;
+		else *charp= txt_utf8_index_to_offset((*linep)->line, index);
+		
+		if(!undoing)
+			txt_undo_add_op(text, sel?UNDO_SDOWN:UNDO_CDOWN);
 	} else {
 		txt_move_eol(text, sel);
 	}
@@ -790,7 +861,7 @@
 {
 	TextLine **linep;
 	int *charp, oundoing= undoing;
-	int tabsize = 1, i=0;
+	int tabsize= 0, i= 0;
 	
 	if (!text) return;
 	if(sel) txt_curs_sel(text, &linep, &charp);
@@ -799,32 +870,36 @@
 
 	undoing= 1;
 
-	// do nice left only if there are only spaces
-	// TXT_TABSIZE hardcoded in DNA_text_types.h
-	if (text->flags & TXT_TABSTOSPACES) {
-		tabsize = TXT_TABSIZE;
-
-		if (*charp < tabsize)
-			tabsize = *charp;
-		else {
-			for (i=0;i<(*charp);i++)
+	if (*charp== 0) {
+		if ((*linep)->prev) {
+			txt_move_up(text, sel);
+			*charp= (*linep)->len;
+		}
+	}
+	else {
+		// do nice left only if there are only spaces
+		// TXT_TABSIZE hardcoded in DNA_text_types.h
+		if (text->flags & TXT_TABSTOSPACES) {
+			tabsize= (*charp < TXT_TABSIZE) ? *charp : TXT_TABSIZE;
+			
+			for (i=0; i<(*charp); i++)
 				if ((*linep)->line[i] != ' ') {
-					tabsize = 1;
+					tabsize= 0;
 					break;
 				}
+			
 			// if in the middle of the space-tab
-			if ((*charp) % tabsize != 0)
-					tabsize = ((*charp) % tabsize);
+			if (tabsize && (*charp) % TXT_TABSIZE != 0)
+				tabsize= ((*charp) % TXT_TABSIZE);
 		}
-	}
-
-	if (*charp== 0) {
-		if ((*linep)->prev) {
-			txt_move_up(text, sel);
-			*charp= (*linep)->len;
+		
+		if (tabsize)
+			(*charp)-= tabsize;
+		else {
+			const char *prev= BLI_str_prev_char_utf8((*linep)->line + *charp);
+			*charp= prev - (*linep)->line;
 		}
 	}
-	else (*charp)-= tabsize;
 
 	undoing= oundoing;
 	if(!undoing) txt_undo_add_op(text, sel?UNDO_SLEFT:UNDO_CLEFT);
@@ -835,8 +910,7 @@
 void txt_move_right(Text *text, short sel) 
 {
 	TextLine **linep;
-	int *charp, oundoing= undoing;
-	int tabsize=1, i=0;
+	int *charp, oundoing= undoing, do_tab= 0, i;
 	
 	if (!text) return;
 	if(sel) txt_curs_sel(text, &linep, &charp);
@@ -845,32 +919,33 @@
 
 	undoing= 1;
 
-	// do nice right only if there are only spaces
-	// spaces hardcoded in DNA_text_types.h
-	if (text->flags & TXT_TABSTOSPACES) {
-		tabsize = TXT_TABSIZE;
-
-		if ((*charp) + tabsize > (*linep)->len)
-			tabsize = 1;
-		else {
-			for (i=0;i<(*charp) + tabsize - ((*charp) % tabsize);i++)
-				if ((*linep)->line[i] != ' ') {
-					tabsize = 1;
-					break;
-				}

@@ Diff output truncated at 10240 characters. @@



More information about the Bf-blender-cvs mailing list