.
1 /*
2 Copyright (C) 2013 2015 Johan Mattsson
3
4 This library is free software; you can redistribute it and/or modify
5 it under the terms of the GNU Lesser General Public License as
6 published by the Free Software Foundation; either version 3 of the
7 License, or (at your option) any later version.
8
9 This library is distributed in the hope that it will be useful, but
10 WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Lesser General Public License for more details.
13 */
14
15 using Gee;
16 using Sqlite;
17
18 namespace BirdFont {
19
20 public class CharDatabaseParser : GLib.Object {
21 static unowned Database db;
22 static Database? database = null;
23
24 GlyphRange utf8 = new GlyphRange ();
25
26 public CharDatabaseParser () {
27 }
28
29 public File get_database_file () {
30 string? fn = BirdFont.get_argument ("--parse-ucd");
31
32 if (fn != null && ((!) fn) != "") {
33 return File.new_for_path ((!) fn);
34 }
35
36 return File.new_for_path ("ucd.sqlite");
37 }
38
39 public void regenerate_database () {
40 File f = get_database_file ();
41
42 stdout.printf ("Generating sqlite database in: %s\n", (!) f.get_path ());
43
44 try {
45 if (f.query_exists ()) {
46 f.delete ();
47 }
48
49 open_database ();
50 create_tables ();
51 parse_all_entries ();
52 } catch (GLib.Error e) {
53 warning (e.message);
54 }
55 }
56
57 public void open_database () {
58 File f = get_database_file ();
59 int rc = Database.open ((!) f.get_path (), out database);
60
61 db = (!) database;
62
63 if (rc != Sqlite.OK) {
64 stderr.printf ("Can't open database: %d, %s\n", rc, db.errmsg ());
65 }
66 }
67
68 public void create_tables () {
69 int ec;
70 string? errmsg;
71 string description_table = """
72 CREATE TABLE Description (
73 unicode INTEGER PRIMARY KEY NOT NULL,
74 description TEXT NOT NULL
75 );
76 """;
77
78 ec = db.exec (description_table, null, out errmsg);
79 if (ec != Sqlite.OK) {
80 warning ("Error: %s\n", (!) errmsg);
81 }
82
83 string index_table = """
84 CREATE TABLE Words (
85 unicode INTEGER NOT NULL,
86 word TEXT NOT NULL
87 );
88 """;
89
90 ec = db.exec (index_table, null, out errmsg);
91 if (ec != Sqlite.OK) {
92 warning ("Error: %s\n", (!) errmsg);
93 }
94
95 string create_index = "CREATE INDEX word_index ON Words (word);";
96
97 ec = db.exec (create_index, null, out errmsg);
98 if (ec != Sqlite.OK) {
99 warning ("Error: %s\n", (!) errmsg);
100 }
101 }
102
103 public void insert_lookup (int64 character, string word) {
104 string? errmsg;
105 string query = """
106 INSERT INTO Words (unicode, word)
107 VALUES (""" + @"$((int64) character)" + """, '""" + word.replace ("'", "''") + "');";
108 int ec = db.exec (query, null, out errmsg);
109 if (ec != Sqlite.OK) {
110 stderr.printf (query);
111 warning ("Error: %s\n", (!) errmsg);
112 }
113 }
114
115 public void insert_entry (int64 character, string description) {
116 string? errmsg;
117 string query = """
118 INSERT INTO Description (unicode, description)
119 VALUES (""" + @"$((int64) character)" + """, '""" + description.replace ("'", "''") + "');";
120
121 int ec = db.exec (query, null, out errmsg);
122
123 if (ec != Sqlite.OK) {
124 stderr.printf (query);
125 warning ("Error: %s\n", (!) errmsg);
126 warning (@"Can't insert description to: $(character)");
127 }
128 }
129
130 private void add_entry (string data) {
131 string[] e;
132 string[] r;
133 string[] d;
134 string index_values;
135 unichar ch;
136 string unicode_hex;
137
138 if (data.has_prefix ("@")) { // ignore comments
139 return;
140 }
141
142 if (data.has_prefix (";")) {
143 return;
144 }
145
146 index_values = data.down ();
147 index_values = index_values.replace ("\n\tx", "");
148 index_values = index_values.replace ("\n\t*", "");
149 index_values = index_values.replace ("\n\t=", "");
150 index_values = index_values.replace ("\n\t#", "");
151 index_values = index_values.replace (" - ", " ");
152 index_values = index_values.replace ("(", "");
153 index_values = index_values.replace (")", "");
154 index_values = index_values.replace ("<font>", "");
155 index_values = index_values.replace (" a ", " ");
156 index_values = index_values.replace (" is ", " ");
157 index_values = index_values.replace (" the ", " ");
158
159 e = index_values.split ("\t");
160
161 return_if_fail (e.length > 0);
162
163 unicode_hex = e[0].up ();
164
165 ch = Font.to_unichar ("U+" + unicode_hex.down ());
166 stdout.printf ("Adding " + (!) ch.to_string () + "\n");
167 insert_entry ((int64) ch, data);
168 utf8.add_single (ch);
169
170 foreach (string s in e) {
171 r = s.split ("\n");
172 foreach (string t in r) {
173 d = t.split (" ");
174 foreach (string token in d) {
175 if (token != "") {
176 insert_lookup ((int64) ch, token);
177 }
178 }
179 }
180 }
181 }
182
183 private void parse_all_entries () {
184 FileInputStream fin;
185 DataInputStream din;
186 string? line;
187 string data;
188 string description = "";
189 File file;
190 int ec;
191 string? errmsg;
192 uint64 transaction_number = 0;
193
194 file = get_unicode_database ();
195
196 ec = db.exec ("BEGIN TRANSACTION", null, out errmsg);
197 if (ec != Sqlite.OK) {
198 warning ("Error: %s\n", (!) errmsg);
199 }
200
201 try {
202 fin = file.read ();
203 din = new DataInputStream (fin);
204
205 line = din.read_line (null);
206 while (true) {
207 data = (!) line;
208 description = data;
209
210 while ((line = din.read_line (null)) != null) {
211 data = (!) line;
212 if (data.has_prefix ("\t")) {
213 description += "\n";
214 description += data;
215 } else {
216 if (description.index_of ("<not a character>") == -1) {
217 add_entry (description);
218 transaction_number++;
219
220 if (transaction_number >= 1000) {
221 print ("Write to database\n");
222
223 ec = db.exec ("END TRANSACTION", null, out errmsg);
224 if (ec != Sqlite.OK) {
225 warning ("Error: %s\n", (!) errmsg);
226 }
227
228 ec = db.exec ("BEGIN TRANSACTION", null, out errmsg);
229 if (ec != Sqlite.OK) {
230 warning ("Error: %s\n", (!) errmsg);
231 }
232
233 transaction_number = 0;
234 }
235 }
236 break;
237 }
238 }
239
240 if (line == null) {
241 break;
242 }
243 }
244
245 if (description == "") {
246 warning ("no description found");
247 }
248
249 fin.close ();
250 din.close ();
251 } catch (GLib.Error e) {
252 warning (e.message);
253 warning ("In %s", (!) get_unicode_database ().get_path ());
254 }
255
256 ec = db.exec ("END TRANSACTION", null, out errmsg);
257 if (ec != Sqlite.OK) {
258 warning ("Error: %s\n", (!) errmsg);
259 }
260
261 stdout.printf ("Done");
262 }
263
264 File get_unicode_database () {
265 return SearchPaths.get_char_database ();
266 }
267 }
268
269 }
270