-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdeva2han.cs
More file actions
165 lines (138 loc) · 6.01 KB
/
deva2han.cs
File metadata and controls
165 lines (138 loc) · 6.01 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
using System;
using System.Collections;
using System.IO;
using System.Text;
using System.Text.RegularExpressions;
namespace CST.Conversion
{
class Deva2Han
{
static void Main(string[] args)
{
try
{
if (args.Length < 2)
{
PrintUsage();
return;
}
FileInfo fi = new FileInfo(args[0]);
if (!fi.Exists)
{
Console.WriteLine("Input file does not exist.");
return;
}
DirectoryInfo di = new DirectoryInfo(args[1]);
if (!di.Exists) di.Create();
Deva2Han d2h = new Deva2Han();
d2h.InputFilePath = args[0];
d2h.OutputFilePath = Path.Combine(di.FullName, fi.Name);
d2h.ConvertFile();
Console.WriteLine("Transliteration to Hanzi (Sino-Pali) Complete: " + d2h.OutputFilePath);
}
catch (Exception ex)
{
Console.WriteLine("Error: " + ex.Message);
}
}
static void PrintUsage()
{
Console.WriteLine("Transliterates Unicode Devanagari to Unicode Chinese (Mahayana Standard)");
Console.WriteLine("Syntax: deva2han <inputfile> <outputdirectory>");
}
private Hashtable deva2Han;
public Deva2Han()
{
deva2Han = new Hashtable();
// === VOWELS (Independent & Dependent) ===
deva2Han['\x0905'] = "阿"; // a
deva2Han['\x0906'] = "阿"; // aa
deva2Han['\x0907'] = "伊"; // i
deva2Han['\x0908'] = "伊"; // ii
deva2Han['\x0909'] = "優"; // u
deva2Han['\x090A'] = "優"; // uu
deva2Han['\x090F'] = "翳"; // e
deva2Han['\x0913'] = "摩"; // o (Namo -> 那摩)
deva2Han['\x093E'] = ""; // aa sign
deva2Han['\x093F'] = "伊"; // i sign
deva2Han['\x0940'] = "伊"; // ii sign
deva2Han['\x0941'] = "優"; // u sign
deva2Han['\x0942'] = "優"; // uu sign
deva2Han['\x0947'] = "翳"; // e sign
deva2Han['\x094B'] = "摩"; // o sign
// === CONSONANTS ===
deva2Han['\x0915'] = "迦"; // ka
deva2Han['\x0916'] = "佉"; // kha
deva2Han['\x0917'] = "伽"; // ga
deva2Han['\x0918'] = "伽"; // gha
deva2Han['\x0919'] = "哦"; // nga
deva2Han['\x091A'] = "遮"; // ca
deva2Han['\x091B'] = "車"; // cha
deva2Han['\x091C'] = "闍"; // ja
deva2Han['\x091D'] = "闍"; // jha
deva2Han['\x091E'] = "若"; // nya
deva2Han['\x091F'] = "吒"; // Ta
deva2Han['\x0920'] = "他"; // Tha
deva2Han['\x0921'] = "茶"; // Da
deva2Han['\x0922'] = "荼"; // Dha
deva2Han['\x0923'] = "拏"; // Na
deva2Han['\x0924'] = "多"; // ta
deva2Han['\x0925'] = "他"; // tha
deva2Han['\x0926'] = "陀"; // da
deva2Han['\x0927'] = "馱"; // dha
deva2Han['\x0928'] = "那"; // na
deva2Han['\x092A'] = "波"; // pa
deva2Han['\x092B'] = "頗"; // pha
deva2Han['\x092C'] = "婆"; // ba
deva2Han['\x092D'] = "梵"; // bha
deva2Han['\x092E'] = "摩"; // ma
deva2Han['\x092F'] = "也"; // ya
deva2Han['\x0930'] = "羅"; // ra
deva2Han['\x0932'] = "羅"; // la
deva2Han['\x0935'] = "縛"; // va
deva2Han['\x0938'] = "薩"; // sa
deva2Han['\x0939'] = "訶"; // ha
// === SPECIAL & PUNCTUATION ===
deva2Han['\x0902'] = "南"; // Anusvara (ใช้ 南 เพื่อเสียงปิดปากในพยางค์ท้าย)
deva2Han['\x094D'] = ""; // Virama
deva2Han['a'] = ""; // Inherent a
deva2Han['\x0964'] = "。"; // Danda
deva2Han['\x0965'] = "॥"; // Double Danda
deva2Han['\x0970'] = ".";
}
public string InputFilePath { get; set; }
public string OutputFilePath { get; set; }
public void ConvertFile()
{
string devStr = File.ReadAllText(InputFilePath, Encoding.UTF8);
// Change Stylesheet link
devStr = devStr.Replace("tipitaka-deva.xsl", "tipitaka-han.xsl");
// XML Tag Protection: แยกประมวลผลเฉพาะ Text ระหว่าง Tag
string processedStr = Regex.Replace(devStr, "(<[^>]+>)|([^<]+)", m =>
{
if (m.Groups[1].Success) return m.Groups[1].Value;
return ConvertText(m.Groups[2].Value);
});
File.WriteAllText(OutputFilePath, processedStr, Encoding.UTF8);
}
private string ConvertText(string text)
{
if (string.IsNullOrEmpty(text)) return text;
// 1. จัดการตัวสะกดซ้อน (Double Consonants) เช่น ss -> s
// ในอักษรจีนนิยมแทนเสียงพยัญชนะซ้อนด้วยอักษรตัวเดียวเพื่อความกระชับ
text = Regex.Replace(text, "([\x0915-\x0939])\x094D\\1", "$1");
// 2. เติมสระ 'a' หลังพยัญชนะเดี่ยว (Alphabetization)
text = Regex.Replace(text, "([\x0915-\x0939])([^\x093E-\x094Da])", "$1a$2");
text = Regex.Replace(text, "([\x0915-\x0939])([^\x093E-\x094Da])", "$1a$2");
StringBuilder sb = new StringBuilder();
foreach (char c in text)
{
if (deva2Han.ContainsKey(c))
sb.Append(deva2Han[c]);
else
sb.Append(c);
}
return sb.ToString();
}
}
}