[Bf-translations-svn] SVN commit: /data/svn/bf-translations [195] branches/fa: Cloned ar_to_utf. py to see if the rest of the characters will be converted.

bf-translations at blender.org bf-translations at blender.org
Fri Nov 18 06:33:59 CET 2011


Revision: 195
          http://projects.blender.org/scm/viewvc.php?view=rev&root=bf-translations&revision=195
Author:   leomoon
Date:     2011-11-18 05:33:58 +0000 (Fri, 18 Nov 2011)
Log Message:
-----------
Cloned ar_to_utf.py to see if the rest of the characters will be converted.

Modified Paths:
--------------
    branches/fa/fa_to_utf.py

Removed Paths:
-------------
    branches/fa/persian_to_utf.py

Modified: branches/fa/fa_to_utf.py
===================================================================
--- branches/fa/fa_to_utf.py	2011-11-17 20:44:52 UTC (rev 194)
+++ branches/fa/fa_to_utf.py	2011-11-18 05:33:58 UTC (rev 195)
@@ -1,5 +1,31 @@
 #coding: utf-8
 
+# ***** BEGIN GPL LICENSE BLOCK *****
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software Foundation,
+# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+#
+#    Authers:
+#      -Dalai Felinto
+#      -Yousef Harfoush
+#      -Bastien Montagne
+#
+#    based on The Arabic Writer script by Omar Muhammad
+#    thanks for Rabeh Torchi for ideas
+#
+# ***** END GPL LICENSE BLOCK *****
+
 import sys
 import os
 
@@ -9,67 +35,67 @@
 
 
 # Isolated, Beginning, Middle, End
-# http://en.wikipedia.org/wiki/Persian_alphabet
-alefa = ["ﺁ","ﺁ","ﺂ","ﺂ"]
-alefd = ["ﺍ","ﺍ","ﺎ","ﺎ"]
-beeee = ["ﺏ","ﺑ","ﺒ","ﺐ"]
-peeee = ["ﭖ","ﭘ","ﭙ","ﭗ"]
-teeee = ["ﺕ","ﺗ","ﺘ","ﺖ"]
-seeee = ["ﺙ","ﺛ","ﺜ","ﺚ"]
-jimmm = ["ﺝ","ﺟ","ﺠ","ﺞ"]
-cheee = ["ﭺ","ﭼ","ﭽ","ﭻ"]
-heeee = ["ﺡ","ﺣ","ﺤ","ﺢ"]
-kheee = ["ﺥ","ﺧ","ﺨ","ﺦ"]
-daall = ["ﺩ","ﺩ","ﺪ","ﺪ"]
-zaall = ["ﺫ","ﺫ","ﺬ","ﺬ"]
-reeee = ["ﺭ","ﺭ","ﺮ","ﺮ"]
-zeeee = ["ﺯ","ﺯ","ﺰ","ﺰ"]
-zheee = ["ﮊ","ﮊ","ﮋ","ﮋ"]
-sinnn = ["ﺱ","ﺳ","ﺴ","ﺲ"]
-shinn = ["ﺵ","ﺷ","ﺸ","ﺶ"]
-saddd = ["ﺹ","ﺻ","ﺼ","ﺺ"]
-zaddd = ["ﺽ","ﺿ","ﻀ","ﺾ"]
-taaaa = ["ﻁ","ﻃ","ﻄ","ﻂ"]
-zaaaa = ["ﻅ","ﻇ","ﻈ","ﻆ"]
-eynnn = ["ﻉ","ﻋ","ﻌ","ﻊ"]
-gheyn = ["ﻍ","ﻏ","ﻐ","ﻎ"]
-feeee = ["ﻑ","ﻓ","ﻔ","ﻒ"]
-ghaaf = ["ﻕ","ﻗ","ﻘ","ﻖ"]
-kaaff = ["ک","ﻛ","ﻜ","ﮏ"]
-gaaff = ["ﮒ","ﮔ","ﮕ","ﮓ"]
-laamm = ["ﻝ","ﻟ","ﻠ","ﻞ"]
-mimmm = ["ﻡ","ﻣ","ﻤ","ﻢ"]
-noonn = ["ﻥ","ﻧ","ﻨ","ﻦ"]
-vaavv = ["ﻭ","ﻭ","ﻮ","ﻮ"]
-hehhh = ["ﻩ","ﻫ","ﻬ","ﻪ"]
-yeeee = ["ﯼ","ﯾ","ﯿ","ﯽ"]
+# http://en.wikipedia.org/wiki/Arabic_alphabet
+# http://en.wikipedia.org/wiki/Arabic_characters_in_Unicode
+alfmd = ["ﺁ","ﺁ","ﺂ","ﺂ"]
+alfhz = ["ﺃ","ﺃ","ﺄ","ﺄ"]
+wowhz = ["ﺅ","ﺅ","ﺆ","ﺆ"]
+alfxr = ["ﺇ","ﺇ","ﺈ","ﺈ"]
+hamzk = ["ﺉ","ﺋ","ﺌ","ﺊ"]
+alfff = ["ﺍ","ﺍ","ﺎ","ﺎ"]
+baaaa = ["ﺏ","ﺑ","ﺒ","ﺐ"]
+tamrb = ["ﺓ","ﺓ","ﺔ","ﺔ"]
+taaaa = ["ﺕ","ﺗ","ﺘ","ﺖ"]
+thaaa = ["ﺙ","ﺛ","ﺜ","ﺚ"]
+geeem = ["ﺝ","ﺟ","ﺠ","ﺞ"]
+haaaa = ["ﺡ","ﺣ","ﺤ","ﺢ"]
+khaaa = ["ﺥ","ﺧ","ﺨ","ﺦ"]
+daaal = ["ﺩ","ﺩ","ﺪ","ﺪ"]
+thaal = ["ﺫ","ﺫ","ﺬ","ﺬ"]
+raaaa = ["ﺭ","ﺭ","ﺮ","ﺮ"]
+zaaai = ["ﺯ","ﺯ","ﺰ","ﺰ"]
+seeen = ["ﺱ","ﺳ","ﺴ","ﺲ"]
+sheen = ["ﺵ","ﺷ","ﺸ","ﺶ"]
+saaad = ["ﺹ","ﺻ","ﺼ","ﺺ"]
+daaad = ["ﺽ","ﺿ","ﻀ","ﺾ"]
+taaah = ["ﻁ","ﻃ","ﻄ","ﻂ"]
+daaah = ["ﻅ","ﻇ","ﻈ","ﻆ"]
+aayen = ["ﻉ","ﻋ","ﻌ","ﻊ"]
+gayen = ["ﻍ","ﻏ","ﻐ","ﻎ"]
+faaaa = ["ﻑ","ﻓ","ﻔ","ﻒ"]
+qaaaf = ["ﻕ","ﻗ","ﻘ","ﻖ"]
+kaaaf = ["ﻙ","ﻛ","ﻜ","ﻚ"]
+laaam = ["ﻝ","ﻟ","ﻠ","ﻞ"]
+meeem = ["ﻡ","ﻣ","ﻤ","ﻢ"]
+nooon = ["ﻥ","ﻧ","ﻨ","ﻦ"]
+hhhhh = ["ﻩ","ﻫ","ﻬ","ﻪ"]
+wowww = ["ﻭ","ﻭ","ﻮ","ﻮ"]
+yaamd = ["ﻯ","ﻯ","ﻰ","ﻰ"]
+yaaaa = ["ﻱ","ﻳ","ﻴ","ﻲ"]
+laamd = ["ﻵ","ﻵ","ﻶ","ﻶ"]
+laahz = ["ﻷ","ﻷ","ﻸ","ﻸ"]
+laaxr = ["ﻹ","ﻹ","ﻺ","ﻺ"]
+laaaa = ["ﻻ","ﻻ","ﻼ","ﻼ"]
 
-yehmz = ["ﺉ","ﺋ","ﺌ","ﺊ"]
-laaad = ["ﻻ","ﻻ","ﻼ","ﻼ"]
-
 # defining numbers
-numbers ="0123456789۰۱۲۳۴۵۶۷۸۹"
-#defining Persian unicodec chars
-unicodec ="ﺁﺁﺂﺂﺍﺍﺎﺎﺏﺑﺒﺐﭖﭘﭙﭗﺕﺗﺘﺖﺙﺛﺜﺚﺝﺟﺠﺞﭺﭼﭽﭻﺡﺣﺤﺢﺥﺧﺨﺦﺩﺩﺪﺪﺫﺫﺬﺬﺭﺭﺮﺮﺯﺯﺰﺰﮊﮊﮋﮋﺱﺳﺴﺲﺵﺷﺸﺶﺹﺻﺼﺺﺽﺿﻀﺾﻁﻃﻄﻂﻅﻇﻈﻆﻉﻋﻌﻊﻍﻏﻐﻎﻑﻓﻔﻒﻕﻗﻘﻖکﻛﻜﮏﮒﮔﮕﮓﻝﻟﻠﻞﻡﻣﻤﻢﻥﻧﻨﻦﻭﻭﻮﻮﻩﻫﻬﻪیﻳﻴﻰﺉﺋﺌﺊﻻﻻﻼﻼ"
+numbers ="0123456789٠١٢٣٤٥٦٧٨٩"
+#defining arabic unicodec chars
+unicodec ="ﺁﺁﺂﺂﺃﺃﺄﺄﺅﺅﺆﺆﺇﺇﺈﺈﺉﺋﺌﺊﺍﺍﺎﺎﺏﺑﺒﺐﺓﺓﺔﺔﺕﺗﺘﺖﺙﺛﺜﺚﺝﺟﺠﺞﺡﺣﺤﺢﺥﺧﺨﺦﺩﺩﺪﺪﺫﺫﺬﺬﺭﺭﺮﺮﺯﺯﺰﺰﺱﺳﺴﺲﺵﺷﺸﺶﺹﺻﺼﺺﺽﺿﻀﺾﻁﻃﻄﻂﻅﻇﻈﻆﻉﻋﻌﻊﻍﻏﻐﻎﻑﻓﻔﻒﻕﻗﻘﻖﻙﻛﻜﻚﻝﻟﻠﻞﻡﻣﻤﻢﻥﻧﻨﻦﻩﻫﻬﻪﻭﻭﻮﻮﻯﻯﻰﻰﻱﻳﻴﻲﻵﻵﻶﻶﻷﻷﻸﻸﻹﻹﻺﻺﻻﻻﻼﻼ"
 
 # letters that have only Isolated and End forms
 # (and work as word breakers) + laaam and deriveds
-wordbreak ="آادذرزﮊوﻻ"
+wordbreak ="آأؤإاةدذرزوﻵﻷﻹﻻ"
 
-# defining all Persian letters + araab
-#Persian ="ًٌٍَُِّْْئءؤرلاىةوزظشسيبلاتنمكطضصثقفغعهخحجدذْلآآلأأـلإإ،؟"
-Persian = "ﺁﺍﺉ"
-# defining the araab
-#araab ="ًٌٍَُِّْْ"
-araab = ""
-
+# defining all arabic letters + harakat
+arabic ="ًٌٍَُِّْْئءؤرلاىةوزظشسيبلاتنمكطضصثقفغعهخحجدذْلآآلأأـلإإ،؟"
+# defining the harakat
+harakat ="ًٌٍَُِّْْ"
 # defining other symbols
-#sym ="ًٌٍَُِّـ.،؟ @#$%^&*-+|\/=~(){}ْ,:"
-sym = "{}()~=/\|+-*&^%$#@ ٬؟.,:"
+sym ="ًٌٍَُِّـ.،؟ @#$%^&*-+|\/=~(){}ْ,:"
 
 def ProcessInput(input):
     """main function, the code is not self-explanatory.
-    It requires understanding of Persian alphabet.
+    It requires understanding of arabic alphabet.
     """
 
     words = ""
@@ -81,21 +107,21 @@
     g = 0
     while g < ln: 
 
-        b=a=1 #ignoring/discarding the araab
+        b=a=1 #ignoring/discarding the harakat
         # see how many chars I need to skip to get the next
-        # non-araab char in the left (a) or the right (b)
+        # non-harakat char in the left (a) or the right (b)
 
-        while g-b >= 0 and x[g-b] in araab: b+=1
-        while g+a < ln and x[g+a] in araab: a+=1
+        while g-b >= 0 and x[g-b] in harakat: b+=1
+        while g+a < ln and x[g+a] in harakat: a+=1
         
         # get the position
-        if x[g] not in wordbreak and g+a < ln and x[g+a] in Persian and x[g+a] != "ء":
-            if g-b >= 0 and x[g-b] not in wordbreak and x[g-b] in Persian and x[g-b] != "ء":
+        if x[g] not in wordbreak and g+a < ln and x[g+a] in arabic and x[g+a] != "ء":
+            if g-b >= 0 and x[g-b] not in wordbreak and x[g-b] in arabic and x[g-b] != "ء":
                 pos = 2 # middle
             else:
                 pos = 1 # beggining
         else:
-            if g-b >= 0 and x[g-b] not in wordbreak and x[g-b] in Persian and x[g-b] != "ء":
+            if g-b >= 0 and x[g-b] not in wordbreak and x[g-b] in arabic and x[g-b] != "ء":
                 pos = 3 # end
             else:
                 pos = 0 # isolated
@@ -114,33 +140,34 @@
 
         # dealing with letters, output each
         # letter with its right position
-        elif x[g]=="آ": chr=alefa[pos]
-        elif x[g]=="ا": chr=alefd[pos]
-        elif x[g]=="ب": chr=beeee[pos]
-        elif x[g]=="پ": chr=peeee[pos]
-        elif x[g]=="ت": chr=teeee[pos]
-        elif x[g]=="ث": chr=seeee[pos]
-        elif x[g]=="ج": chr=jimmm[pos]
-        elif x[g]=="چ": chr=cheee[pos]
-        elif x[g]=="ح": chr=heeee[pos]
-        elif x[g]=="خ": chr=kheee[pos]
-        elif x[g]=="د": chr=daall[pos]
-        elif x[g]=="ذ": chr=zaall[pos]
-        elif x[g]=="ر": chr=reeee[pos]
-        elif x[g]=="ز": chr=zeeee[pos]
-        elif x[g]=="ژ": chr=zheee[pos]
-        elif x[g]=="س": chr=sinnn[pos]
-        elif x[g]=="ش": chr=shinn[pos]
-        elif x[g]=="ص": chr=saddd[pos]
-        elif x[g]=="ض": chr=zaddd[pos]
-        elif x[g]=="ط": chr=taaaa[pos]
-        elif x[g]=="ظ": chr=zaaaa[pos]
-        elif x[g]=="ع": chr=eynnn[pos]
-        elif x[g]=="غ": chr=gheyn[pos]
-        elif x[g]=="ف": chr=feeee[pos]
-        elif x[g]=="ق": chr=ghaaf[pos]
-        elif x[g]=="ک": chr=kaaff[pos]
-        elif x[g]=="گ": chr=gaaff[pos]
+        elif x[g]=="آ": chr=alfmd[pos]
+        elif x[g]=="أ": chr=alfhz[pos]
+        elif x[g]=="ؤ": chr=wowhz[pos]
+        elif x[g]=="إ": chr=alfxr[pos]
+        elif x[g]=="ئ": chr=hamzk[pos]
+        elif x[g]=="ا": chr=alfff[pos]
+        elif x[g]=="ب": chr=baaaa[pos]
+        elif x[g]=="ة": chr=tamrb[pos]
+        elif x[g]=="ت": chr=taaaa[pos]
+        elif x[g]=="ث": chr=thaaa[pos]
+        elif x[g]=="ج": chr=geeem[pos]
+        elif x[g]=="ح": chr=haaaa[pos]
+        elif x[g]=="خ": chr=khaaa[pos]
+        elif x[g]=="د": chr=daaal[pos]
+        elif x[g]=="ذ": chr=thaal[pos]
+        elif x[g]=="ر": chr=raaaa[pos]
+        elif x[g]=="ز": chr=zaaai[pos]
+        elif x[g]=="س": chr=seeen[pos]
+        elif x[g]=="ش": chr=sheen[pos]
+        elif x[g]=="ص": chr=saaad[pos]
+        elif x[g]=="ض": chr=daaad[pos]
+        elif x[g]=="ط": chr=taaah[pos]
+        elif x[g]=="ظ": chr=daaah[pos]
+        elif x[g]=="ع": chr=aayen[pos]
+        elif x[g]=="غ": chr=gayen[pos]
+        elif x[g]=="ف": chr=faaaa[pos]
+        elif x[g]=="ق": chr=qaaaf[pos]
+        elif x[g]=="ك": chr=kaaaf[pos]
         elif x[g]=="ل":
         # dealing with (la combination)
         # in this case the char has two chars in one
@@ -148,20 +175,23 @@
             g = g+1
             if g == ln:
                 g = g-1
-                chr=laamm[pos]
-            elif x[g]=="ا": chr=laaad[pos]
-            #elif x[g]=="أ": chr=laahz[pos]
-            #elif x[g]=="إ": chr=laaxr[pos]
-            #elif x[g]=="آ": chr=laamd[pos]
+                chr=laaam[pos]
+            elif x[g]=="ا": chr=laaaa[pos]
+            elif x[g]=="أ": chr=laahz[pos]
+            elif x[g]=="إ": chr=laaxr[pos]
+            elif x[g]=="آ": chr=laamd[pos]
             else:
                 g = g-1
-                chr=laamm[pos]
-        elif x[g]=="م": chr=mimmm[pos]
-        elif x[g]=="ن": chr=noonn[pos]
-        elif x[g]=="و": chr=vaavv[pos]
-        elif x[g]=="ه": chr=hehhh[pos]
-        elif x[g]=="ی": chr=yeeee[pos]
-        elif x[g]=="ئ": chr=yehmz[pos]
+                chr=laaam[pos]
+        elif x[g]=="م": chr=meeem[pos]
+        elif x[g]=="ن": chr=nooon[pos]
+        elif x[g]=="ه": chr=hhhhh[pos]
+        elif x[g]=="و": chr=wowww[pos]
+        elif x[g]=="ى": chr=yaamd[pos]
+        elif x[g]=="ي": chr=yaaaa[pos]
+        elif x[g]=="لآ": chr=laamd[pos]
+        elif x[g]=="لأ": chr=laahz[pos]
+        elif x[g]=="لإ": chr=laaxr[pos]
         elif x[g]=="لا": chr=laaaa[pos]
         #if the char is a symbol, add it
         elif x[g] in sym:
@@ -177,7 +207,7 @@
                     chr = x[g]
             else:
                 chr = x[g]
-        #if the char is an Persian reversed letter, reverse it back!
+        #if the char is an arabic reversed letter, reverse it back!
         elif x[g] in unicodec: chr=x[g]
 
         # advance to the next char
@@ -222,6 +252,7 @@
 
 
 if __name__ == "__main__":
+    #argument parsing
     import argparse
     parser = argparse.ArgumentParser(description="Open the .po file and do " \
                                                  "a special reverse in the " \
@@ -232,7 +263,7 @@
 

@@ Diff output truncated at 10240 characters. @@


More information about the Bf-translations-svn mailing list