Documentation
¶
Overview ¶
Package arabicgo provides Arabic text shaping and processing for Go applications. It handles Arabic character joining, ligatures, tashkeel (diacritical marks), and right-to-left text rendering.
Index ¶
Constants ¶
const ( FATHA rune = '\u064E' // َ (short a) DAMMA rune = '\u064F' // ُ (short u) KASRA rune = '\u0650' // ِ (short i) SUKUN rune = '\u0652' // ْ (no vowel) SHADDA rune = '\u0651' // ّ (gemination/doubling) // Tanween (nunation) TANWEEN_FATH rune = '\u064B' // ً (an) TANWEEN_DAMM rune = '\u064C' // ٌ (un) TANWEEN_KASR rune = '\u064D' // ٍ (in) // Quranic / Extended marks SUPERSCRIPT_ALEF rune = '\u0670' // ٰ (dagger alef) MADDAH_ABOVE rune = '\u0653' // ٓ (maddah) HAMZA_ABOVE rune = '\u0654' // ٔ (hamza above) HAMZA_BELOW rune = '\u0655' // ٕ (hamza below) SUBSCRIPT_ALEF rune = '\u0656' // ٖ (subscript alef) INVERTED_DAMMA rune = '\u0657' // ٗ (inverted damma) MARK_NOON_GHUNNA rune = '\u0658' // ٘ (noon ghunna) // Shadda + Vowel Ligatures (Arabic Presentation Forms-B) SHADDA_FATHA rune = '\uFC60' // ﱠ SHADDA_DAMMA rune = '\uFC61' // ﱡ SHADDA_KASRA rune = '\uFC62' // ﱢ SHADDA_DAMMATAN rune = '\uFC5E' // ﱞ (Shadda + Tanween Damm) SHADDA_KASRATAN rune = '\uFC5F' // ﱟ (Shadda + Tanween Kasr) SHADDA_SUPERSCRIPT_ALEF rune = '\uFC63' // ﱣ // Eastern Arabic-Indic numerals (٠-٩) // Unicode range: U+0660 to U+0669 ARABIC_INDIC_ZERO rune = '\u0660' // ٠ ARABIC_INDIC_ONE rune = '\u0661' // ١ ARABIC_INDIC_TWO rune = '\u0662' // ٢ ARABIC_INDIC_THREE rune = '\u0663' // ٣ ARABIC_INDIC_FOUR rune = '\u0664' // ٤ ARABIC_INDIC_FIVE rune = '\u0665' // ٥ ARABIC_INDIC_SIX rune = '\u0666' // ٦ ARABIC_INDIC_SEVEN rune = '\u0667' // ٧ ARABIC_INDIC_EIGHT rune = '\u0668' // ٨ ARABIC_INDIC_NINE rune = '\u0669' // ٩ )
const ALLAH_LIGATURE rune = 0xFDF2
ALLAH_LIGATURE is the Unicode character for the Allah ligature (U+FDF2 ﷲ)
Variables ¶
var ( ALEF_HAMZA_ABOVE = Harf{ Unicode: '\u0623', Isolated: '\ufe83', Beginning: '\u0623', Middle: '\ufe84', Final: '\ufe84'} ALEF = Harf{ Unicode: '\u0627', Isolated: '\ufe8d', Beginning: '\u0627', Middle: '\ufe8e', Final: '\ufe8e'} ALEF_MADDA_ABOVE = Harf{ Unicode: '\u0622', Isolated: '\ufe81', Beginning: '\u0622', Middle: '\ufe82', Final: '\ufe82'} HAMZA = Harf{ Unicode: '\u0621', Isolated: '\ufe80', Beginning: '\u0621', Middle: '\u0621', Final: '\u0621'} WAW_HAMZA_ABOVE = Harf{ Unicode: '\u0624', Isolated: '\ufe85', Beginning: '\u0624', Middle: '\ufe86', Final: '\ufe86'} ALEF_HAMZA_BELOW = Harf{ Unicode: '\u0625', Isolated: '\ufe87', Beginning: '\u0625', Middle: '\ufe88', Final: '\ufe88'} YEH_HAMZA_ABOVE = Harf{ Unicode: '\u0626', Isolated: '\ufe89', Beginning: '\ufe8b', Middle: '\ufe8c', Final: '\ufe8a'} BEH = Harf{ Unicode: '\u0628', Isolated: '\ufe8f', Beginning: '\ufe91', Middle: '\ufe92', Final: '\ufe90'} PEH = Harf{ Unicode: '\u067e', Isolated: '\ufb56', Beginning: '\ufb58', Middle: '\ufb59', Final: '\ufb57'} TEH = Harf{ Unicode: '\u062A', Isolated: '\ufe95', Beginning: '\ufe97', Middle: '\ufe98', Final: '\ufe96'} TEH_MARBUTA = Harf{ Unicode: '\u0629', Isolated: '\ufe93', Beginning: '\u0629', Middle: '\u0629', Final: '\ufe94'} THEH = Harf{ Unicode: '\u062b', Isolated: '\ufe99', Beginning: '\ufe9b', Middle: '\ufe9c', Final: '\ufe9a'} JEEM = Harf{ Unicode: '\u062c', Isolated: '\ufe9d', Beginning: '\ufe9f', Middle: '\ufea0', Final: '\ufe9e'} // ـج TCHEH = Harf{ Unicode: '\u0686', Isolated: '\ufb7a', Beginning: '\ufb7c', Middle: '\ufb7d', Final: '\ufb7b'} HAH = Harf{ Unicode: '\u062d', Isolated: '\ufea1', Beginning: '\ufea3', Middle: '\ufea4', Final: '\ufea2'} KHAH = Harf{ Unicode: '\u062e', Isolated: '\ufea5', Beginning: '\ufea7', Middle: '\ufea8', Final: '\ufea6'} DAL = Harf{ Unicode: '\u062f', Isolated: '\ufea9', Beginning: '\u062f', Middle: '\ufeaa', Final: '\ufeaa'} THAL = Harf{ Unicode: '\u0630', Isolated: '\ufeab', Beginning: '\u0630', Middle: '\ufeac', Final: '\ufeac'} REH = Harf{ Unicode: '\u0631', Isolated: '\ufead', Beginning: '\u0631', Middle: '\ufeae', Final: '\ufeae'} JEH = Harf{ Unicode: '\u0698', Isolated: '\ufb8a', Beginning: '\u0698', Middle: '\ufb8b', Final: '\ufb8b', } ZAIN = Harf{ Unicode: '\u0632', Isolated: '\ufeaf', Beginning: '\u0632', Middle: '\ufeb0', Final: '\ufeb0'} SEEN = Harf{ Unicode: '\u0633', Isolated: '\ufeb1', Beginning: '\ufeb3', Middle: '\ufeb4', Final: '\ufeb2'} SHEEN = Harf{ Unicode: '\u0634', Isolated: '\ufeb5', Beginning: '\ufeb7', Middle: '\ufeb8', Final: '\ufeb6'} SAD = Harf{ Unicode: '\u0635', Isolated: '\ufeb9', Beginning: '\ufebb', Middle: '\ufebc', Final: '\ufeba'} DAD = Harf{ Unicode: '\u0636', Isolated: '\ufebd', Beginning: '\ufebf', Middle: '\ufec0', Final: '\ufebe'} TAH = Harf{ Unicode: '\u0637', Isolated: '\ufec1', Beginning: '\ufec3', Middle: '\ufec4', Final: '\ufec2'} ZAH = Harf{ Unicode: '\u0638', Isolated: '\ufec5', Beginning: '\ufec7', Middle: '\ufec8', Final: '\ufec6'} AIN = Harf{ Unicode: '\u0639', Isolated: '\ufec9', Beginning: '\ufecb', Middle: '\ufecc', Final: '\ufeca'} GHAIN = Harf{ Unicode: '\u063a', Isolated: '\ufecd', Beginning: '\ufecf', Middle: '\ufed0', Final: '\ufece'} FEH = Harf{ Unicode: '\u0641', Isolated: '\ufed1', Beginning: '\ufed3', Middle: '\ufed4', Final: '\ufed2'} QAF = Harf{ Unicode: '\u0642', Isolated: '\ufed5', Beginning: '\ufed7', Middle: '\ufed8', Final: '\ufed6'} KAF = Harf{ Unicode: '\u0643', Isolated: '\ufed9', Beginning: '\ufedb', Middle: '\ufedc', Final: '\ufeda'} KEHEH = Harf{ Unicode: '\u06a9', Isolated: '\ufb8e', Beginning: '\ufb90', Middle: '\ufb91', Final: '\ufb8f', } GAF = Harf{ Unicode: '\u06af', Isolated: '\ufb92', Beginning: '\ufb94', Middle: '\ufb95', Final: '\ufb93'} LAM = Harf{ Unicode: '\u0644', Isolated: '\ufedd', Beginning: '\ufedf', Middle: '\ufee0', Final: '\ufede'} MEEM = Harf{ Unicode: '\u0645', Isolated: '\ufee1', Beginning: '\ufee3', Middle: '\ufee4', Final: '\ufee2'} NOON = Harf{ Unicode: '\u0646', Isolated: '\ufee5', Beginning: '\ufee7', Middle: '\ufee8', Final: '\ufee6'} HEH = Harf{ Unicode: '\u0647', Isolated: '\ufee9', Beginning: '\ufeeb', Middle: '\ufeec', Final: '\ufeea'} WAW = Harf{ Unicode: '\u0648', Isolated: '\ufeed', Beginning: '\u0648', Middle: '\ufeee', Final: '\ufeee'} YEH = Harf{ Unicode: '\u06cc', Isolated: '\ufbfc', Beginning: '\ufbfe', Middle: '\ufbff', Final: '\ufbfd'} ARABICYEH = Harf{ Unicode: '\u064a', Isolated: '\ufef1', Beginning: '\ufef3', Middle: '\ufef4', Final: '\ufef2'} ALEF_MAKSURA = Harf{ Unicode: '\u0649', Isolated: '\ufeef', Beginning: '\u0649', Middle: '\ufef0', Final: '\ufef0'} TATWEEL = Harf{ Unicode: '\u0640', Isolated: '\u0640', Beginning: '\u0640', Middle: '\u0640', Final: '\u0640'} LAM_ALEF = Harf{ Unicode: '\ufefb', Isolated: '\ufefb', Beginning: '\ufefb', Middle: '\ufefc', Final: '\ufefc'} LAM_ALEF_HAMZA_ABOVE = Harf{ Unicode: '\ufef7', Isolated: '\ufef7', Beginning: '\ufef7', Middle: '\ufef8', Final: '\ufef8'} )
Arabic Alphabet using the Harf type.
Functions ¶
func GetShaddaLigature ¶
GetShaddaLigature returns the combined Shadda+Vowel ligature for a given vowel. Returns 0 if no ligature exists for the vowel.
func IsTashkeel ¶
IsTashkeel returns true if the rune is an Arabic diacritical mark
func IsWesternDigit ¶
IsWesternDigit returns true if the rune is a Western Arabic digit (0-9).
func ToArabic ¶
ToArabic processes Arabic text for proper display. It handles character joining, ligatures (Lam-Alef, Allah), tashkeel, converts Western digits (0-9) to Eastern Arabic-Indic (٠-٩), and reverses the text for RTL rendering.
func ToEasternDigit ¶
ToEasternDigit converts a Western Arabic digit (0-9) to Eastern Arabic-Indic (٠-٩). Returns the original rune if not a Western digit.

