[Bf-translations-svn] SVN commit: /data/svn/bf-translations [392] branches/ar/ar_to_utf.py: -refactoring the script, now it shows the python tooltips

Wed Feb 15 01:46:27 CET 2012

Revision: 392
          http://projects.blender.org/scm/viewvc.php?view=rev&root=bf-translations&revision=392
Author:   bat3a
Date:     2012-02-15 00:46:26 +0000 (Wed, 15 Feb 2012)
Log Message:
-----------
-refactoring the script, now it shows the python tooltips

Modified Paths:
--------------
    branches/ar/ar_to_utf.py

Modified: branches/ar/ar_to_utf.py
===================================================================

--- branches/ar/ar_to_utf.py	2012-02-14 04:19:10 UTC (rev 391)
+++ branches/ar/ar_to_utf.py	2012-02-15 00:46:26 UTC (rev 392)
@@ -17,8 +17,8 @@
 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 #
 #    Authers:
-#      -Dalai Felinto
 #      -Yousef Harfoush
+#      -Dalai Felinto
 #      -Bastien Montagne
 #
 #    based on The Arabic Writer script by Omar Muhammad
@@ -28,12 +28,8 @@
 
 import sys
 import os
+import re
 
-
-def stripeol(s):
-    return s.rstrip("\n\r")
-
-
 # Isolated, Beginning, Middle, End
 # http://en.wikipedia.org/wiki/Arabic_alphabet
 # http://en.wikipedia.org/wiki/Arabic_characters_in_Unicode
@@ -77,180 +73,176 @@
 laaxr = ["ﻹ","ﻹ","ﻺ","ﻺ"]
 laaaa = ["ﻻ","ﻻ","ﻼ","ﻼ"]
 
+# these are not used for now but maybe later---------
 # defining numbers
 numbers ="0123456789٠١٢٣٤٥٦٧٨٩"
-#defining arabic unicodec chars
+
+# defining arabic unicodec chars
 unicodec ="ﺁﺁﺂﺂﺃﺃﺄﺄﺅﺅﺆﺆﺇﺇﺈﺈﺉﺋﺌﺊﺍﺍﺎﺎﺏﺑﺒﺐﺓﺓﺔﺔﺕﺗﺘﺖﺙﺛﺜﺚﺝﺟﺠﺞﺡﺣﺤﺢﺥﺧﺨﺦﺩﺩﺪﺪﺫﺫﺬﺬﺭﺭﺮﺮﺯﺯﺰﺰﺱﺳﺴﺲﺵﺷﺸﺶﺹﺻﺼﺺﺽﺿﻀﺾﻁﻃﻄﻂﻅﻇﻈﻆﻉﻋﻌﻊﻍﻏﻐﻎﻑﻓﻔﻒﻕﻗﻘﻖﻙﻛﻜﻚﻝﻟﻠﻞﻡﻣﻤﻢﻥﻧﻨﻦﻩﻫﻬﻪﻭﻭﻮﻮﻯﻯﻰﻰﻱﻳﻴﻲﻵﻵﻶﻶﻷﻷﻸﻸﻹﻹﻺﻺﻻﻻﻼﻼ"
 
-# letters that have only Isolated and End forms
-# (and work as word breakers) + laaam and deriveds
-wordbreak ="آأؤإاةدذرزوﻵﻷﻹﻻ"
-
-# defining all arabic letters + harakat
-arabic ="ًٌٍَُِّْْئءؤرلاىةوزظشسيبلاتنمكطضصثقفغعهخحجدذْلآآلأأـلإإ،؟"
 # defining the harakat
 harakat ="ًٌٍَُِّْْ"
+
 # defining other symbols
 sym ="ًٌٍَُِّـ.،؟ @#$%^&*-+|\/=~(){}ْ,:"
+#---------------------------------------------------
 
-def ProcessInput(input):
-    """main function, the code is not self-explanatory.
-    It requires understanding of arabic alphabet.
-    """
+# letters that have only Isolated and End forms
+# (and work as word breakers) + deriveds of lam
+wordbreak ="آأؤإاةدذرزوﻵﻷﻹﻻ"
 
-    words = ""
-    x=list(input)
-    ln=len(x)
+# defining all arabic letters + harakat
+arabic ="ًٌٍَُِّْْئءؤرلاىةوزظشسيبلاتنمكطضصثقفغعهخحجدذْلآآلأأـلإإ،؟"
 
-    #process each letter, submit it to tests and then add it to the output string
-    # we can't do a for loop because we need to change 'g' inside the loop
-    g = 0
-    while g < ln: 
+def ProcessInput(charPre, charOp, charPost):
+    #process each letter, submit it to tests return
 
-        b=a=1 #ignoring/discarding the harakat
-        # see how many chars I need to skip to get the next
-        # non-harakat char in the left (a) or the right (b)
+    chr = ""
+    
+    # get the position
+    if charPre == "Null" and charPost != "Null": # WILL BE ONLY POS 0 OR 1
+        if charOp in wordbreak:#  WILL BE ONLY POS 0
+            pos = 0 # isolated
+        else: #  WILL BE ONLY POS 1
+            pos = 1 # start
+    elif charPost == "Null" and charPre != "Null": # WILL BE ONLY POS 0 OR 3
+        if charOp in wordbreak:#  WILL BE ONLY POS 0
+            pos = 0 # isolated
+        else: #  WILL BE ONLY POS 3
+            pos = 3 # end
+    elif charPost == "Null" and charPre == "Null": # WILL BE ONLY POS 0
+        pos = 0 # isolated
+    else: # WILL BE ONLY POS 1 OR 2
+        if charPre in wordbreak:#  WILL BE ONLY POS 0
+            pos = 1 # start
+        else: #  WILL BE ONLY POS 2
+            pos = 2 # middle
 
-        while g-b >= 0 and x[g-b] in harakat: b+=1
-        while g+a < ln and x[g+a] in harakat: a+=1
-        
-        # get the position
-        if x[g] not in wordbreak and g+a < ln and x[g+a] in arabic and x[g+a] != "ء":
-            if g-b >= 0 and x[g-b] not in wordbreak and x[g-b] in arabic and x[g-b] != "ء":
-                pos = 2 # middle
-            else:
-                pos = 1 # beggining
-        else:
-            if g-b >= 0 and x[g-b] not in wordbreak and x[g-b] in arabic and x[g-b] != "ء":
-                pos = 3 # end
-            else:
-                pos = 0 # isolated
+    # find what char to aggregate to the phrase based on the input and its
+    # position in the word.    
+    if charOp=="ء": chr="ﺀ"
+    elif charOp=="آ":#skip this for it will be compined by the laam next
+        if not charPre=="ل":
+            chr=alfmd[pos]
+    elif charOp=="أ": 
+        if not charPre=="ل":
+            chr=alfhz[pos]
+    elif charOp=="إ": 
+        if not charPre=="ل":
+            chr=alfxr[pos]
+    elif charOp=="ا": 
+        if not charPre=="ل":
+            chr=alfff[pos]
+    elif charOp=="ؤ": chr=wowhz[pos]
+    elif charOp=="ئ": chr=hamzk[pos]
+    elif charOp=="ب": chr=baaaa[pos]
+    elif charOp=="ة": chr=tamrb[pos]
+    elif charOp=="ت": chr=taaaa[pos]
+    elif charOp=="ث": chr=thaaa[pos]
+    elif charOp=="ج": chr=geeem[pos]
+    elif charOp=="ح": chr=haaaa[pos]
+    elif charOp=="خ": chr=khaaa[pos]
+    elif charOp=="د": chr=daaal[pos]
+    elif charOp=="ذ": chr=thaal[pos]
+    elif charOp=="ر": chr=raaaa[pos]
+    elif charOp=="ز": chr=zaaai[pos]
+    elif charOp=="س": chr=seeen[pos]
+    elif charOp=="ش": chr=sheen[pos]
+    elif charOp=="ص": chr=saaad[pos]
+    elif charOp=="ض": chr=daaad[pos]
+    elif charOp=="ط": chr=taaah[pos]
+    elif charOp=="ظ": chr=daaah[pos]
+    elif charOp=="ع": chr=aayen[pos]
+    elif charOp=="غ": chr=gayen[pos]
+    elif charOp=="ف": chr=faaaa[pos]
+    elif charOp=="ق": chr=qaaaf[pos]
+    elif charOp=="ك": chr=kaaaf[pos]
+    elif charOp=="ل":
+    # dealing with (la combination)
+    # in this case the char has two chars in one
+        if charPost == " ": chr=laaam[pos]
+        elif charPost=="ا": chr=laaaa[pos]
+        elif charPost=="أ": chr=laahz[pos]
+        elif charPost=="إ": chr=laaxr[pos]
+        elif charPost=="آ": chr=laamd[pos]
+        else: chr=laaam[pos]
+    elif charOp=="م": chr=meeem[pos]
+    elif charOp=="ن": chr=nooon[pos]
+    elif charOp=="ه": chr=hhhhh[pos]
+    elif charOp=="و": chr=wowww[pos]
+    elif charOp=="ى": chr=yaamd[pos]
+    elif charOp=="ي": chr=yaaaa[pos]
+    elif charOp=="لآ": chr=laamd[pos]
+    elif charOp=="لأ": chr=laahz[pos]
+    elif charOp=="لإ": chr=laaxr[pos]
+    elif charOp=="لا": chr=laaaa[pos]
+    else:
+        chr = charOp
+    return chr
 
-        # find what char to aggregate to the phrase based on the input and its
-        # position in the word.
-        chr = ""
+# check if the line needs editing
+def needsEditing(line, x):
+    if line.startswith('msgstr "'):
+        if not line.startswith('msgstr ""'):
+            return 'true'
+    elif line.startswith('"'):
+        words=line[1:-1].split(' ')
+        for word in words:
+            chars = list(word)
+            if chars and chars[0] in arabic:
+                return 'true'
 
-        if x[g]=="\n": {} #if this char is a new line, go to add new line def
-        elif x[g]=="\r": {} #if this char is carriage return, skip it.
-        elif x[g]=="{": chr="}" #dealing with parenthesis
-        elif x[g]=="}": chr="{"
-        elif x[g]=="(": chr=")"
-        elif x[g]==")": chr="("
-        elif x[g]=="ء": chr="ﺀ"
-
-        # dealing with letters, output each
-        # letter with its right position
-        elif x[g]=="آ": chr=alfmd[pos]
-        elif x[g]=="أ": chr=alfhz[pos]
-        elif x[g]=="ؤ": chr=wowhz[pos]
-        elif x[g]=="إ": chr=alfxr[pos]
-        elif x[g]=="ئ": chr=hamzk[pos]
-        elif x[g]=="ا": chr=alfff[pos]
-        elif x[g]=="ب": chr=baaaa[pos]
-        elif x[g]=="ة": chr=tamrb[pos]
-        elif x[g]=="ت": chr=taaaa[pos]
-        elif x[g]=="ث": chr=thaaa[pos]
-        elif x[g]=="ج": chr=geeem[pos]
-        elif x[g]=="ح": chr=haaaa[pos]
-        elif x[g]=="خ": chr=khaaa[pos]
-        elif x[g]=="د": chr=daaal[pos]
-        elif x[g]=="ذ": chr=thaal[pos]
-        elif x[g]=="ر": chr=raaaa[pos]
-        elif x[g]=="ز": chr=zaaai[pos]
-        elif x[g]=="س": chr=seeen[pos]
-        elif x[g]=="ش": chr=sheen[pos]
-        elif x[g]=="ص": chr=saaad[pos]
-        elif x[g]=="ض": chr=daaad[pos]
-        elif x[g]=="ط": chr=taaah[pos]
-        elif x[g]=="ظ": chr=daaah[pos]
-        elif x[g]=="ع": chr=aayen[pos]
-        elif x[g]=="غ": chr=gayen[pos]
-        elif x[g]=="ف": chr=faaaa[pos]
-        elif x[g]=="ق": chr=qaaaf[pos]
-        elif x[g]=="ك": chr=kaaaf[pos]
-        elif x[g]=="ل":
-        # dealing with (la combination)
-        # in this case the char has two chars in one
-        # so we should increment the counter (g)
-            g = g+1
-            if g == ln:
-                g = g-1
-                chr=laaam[pos]
-            elif x[g]=="ا": chr=laaaa[pos]
-            elif x[g]=="أ": chr=laahz[pos]
-            elif x[g]=="إ": chr=laaxr[pos]
-            elif x[g]=="آ": chr=laamd[pos]
-            else:
-                g = g-1
-                chr=laaam[pos]
-        elif x[g]=="م": chr=meeem[pos]
-        elif x[g]=="ن": chr=nooon[pos]
-        elif x[g]=="ه": chr=hhhhh[pos]
-        elif x[g]=="و": chr=wowww[pos]
-        elif x[g]=="ى": chr=yaamd[pos]
-        elif x[g]=="ي": chr=yaaaa[pos]
-        elif x[g]=="لآ": chr=laamd[pos]
-        elif x[g]=="لأ": chr=laahz[pos]
-        elif x[g]=="لإ": chr=laaxr[pos]
-        elif x[g]=="لا": chr=laaaa[pos]
-        #if the char is a symbol, add it
-        elif x[g] in sym:
-            # Special cases for escaped \" and \t
-            if x[g] == "\\" and g+1 < ln:
-                if x[g+1] == "\"":
-                    chr="\\\""
-                    g += 1
-                elif x[g+1] == "t":
-                    chr="\\t"
-                    g += 1
-                else:
-                    chr = x[g]
-            else:
-                chr = x[g]
-        #if the char is an arabic reversed letter, reverse it back!
-        elif x[g] in unicodec: chr=x[g]
-
-        # advance to the next char
-        g += 1
-        # add the char before the previous one
-        words = chr+words
-    return words
-
 def Start(fileR, fileW):
-    """Open the .po file and do a special reverse in the msgstr lines"""
+    #Open the .po file and do a special reverse in the msgstr lines
     fileR = open(fileR, "r",-1, "utf-8")
     fileW = open(fileW, "w",-1, "utf-8")
-
-    inside_msgstr = False
-    inside_header = True
-
-    for line in fileR:
-        if inside_header:
-            fileW.write(line)
-            if line == "\n": inside_header = False
-
-        else:
-            if line.startswith("msgstr"):
-                    strng = stripeol(line)[8:-1]
-                    rslt = ProcessInput(strng)
-                    fileW.write('msgstr "'+rslt+'"\n')
-                    inside_msgstr = True

@@ Diff output truncated at 10240 characters. @@