The Birdfont Source Code


All Repositories / birdfont.git / blob – RSS feed

CharDatabaseParser.vala in libbirdfont

This file is a part of the Birdfont project.

Contributing

Send patches or pull requests to johan.mattsson.m@gmail.com.
Clone this repository: git clone https://github.com/johanmattssonm/birdfont.git

Revisions

View the latest version of libbirdfont/CharDatabaseParser.vala.
Updated UCD (Unicode Character Database)
1 /* 2 Copyright (C) 2013 2015 2018 Johan Mattsson 3 4 This library is free software; you can redistribute it and/or modify 5 it under the terms of the GNU Lesser General Public License as 6 published by the Free Software Foundation; either version 3 of the 7 License, or (at your option) any later version. 8 9 This library is distributed in the hope that it will be useful, but 10 WITHOUT ANY WARRANTY; without even the implied warranty of 11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12 Lesser General Public License for more details. 13 */ 14 15 using Gee; 16 using Sqlite; 17 18 namespace BirdFont { 19 20 public class CharDatabaseParser : GLib.Object { 21 static unowned Database db; 22 static Database? database = null; 23 24 GlyphRange utf8 = new GlyphRange (); 25 26 public CharDatabaseParser () { 27 } 28 29 public File get_database_file () { 30 string? fn = BirdFont.get_argument ("--parse-ucd"); 31 32 if (fn != null && ((!) fn) != "") { 33 return File.new_for_path ((!) fn); 34 } 35 36 return File.new_for_path ("ucd.sqlite"); 37 } 38 39 public void regenerate_database () { 40 File f = get_database_file (); 41 42 stdout.printf ("Generating sqlite database in: %s\n", (!) f.get_path ()); 43 44 try { 45 if (f.query_exists ()) { 46 f.delete (); 47 } 48 49 bool open = open_database (OPEN_READWRITE | OPEN_CREATE); 50 51 if (open) { 52 create_tables (); 53 parse_all_entries (); 54 } 55 } catch (GLib.Error e) { 56 warning (e.message); 57 } 58 } 59 60 public bool open_database (int access_mode) { 61 File f = get_database_file (); 62 int rc = Database.open_v2 ((!) f.get_path (), out database, access_mode); 63 64 db = (!) database; 65 66 if (rc != Sqlite.OK) { 67 stderr.printf ("File: %s\n", (!) f.get_path ()); 68 stderr.printf ("Can't open database: %d, %s\n", rc, db.errmsg ()); 69 return false; 70 } 71 72 return true; 73 } 74 75 public void create_tables () { 76 int ec; 77 string? errmsg; 78 string description_table = """ 79 CREATE TABLE Description ( 80 unicode INTEGER PRIMARY KEY NOT NULL, 81 description TEXT NOT NULL 82 ); 83 """; 84 85 ec = db.exec (description_table, null, out errmsg); 86 if (ec != Sqlite.OK) { 87 warning ("Error: %s\n", (!) errmsg); 88 } 89 90 string index_table = """ 91 CREATE TABLE Words ( 92 unicode INTEGER NOT NULL, 93 word TEXT NOT NULL 94 ); 95 """; 96 97 ec = db.exec (index_table, null, out errmsg); 98 if (ec != Sqlite.OK) { 99 warning ("Error: %s\n", (!) errmsg); 100 } 101 102 string create_index = "CREATE INDEX word_index ON Words (word);"; 103 104 ec = db.exec (create_index, null, out errmsg); 105 if (ec != Sqlite.OK) { 106 warning ("Error: %s\n", (!) errmsg); 107 } 108 } 109 110 public void insert_lookup (int64 character, string word) { 111 string? errmsg; 112 string w = word.down (); 113 string query = """ 114 INSERT INTO Words (unicode, word) 115 VALUES (""" + @"$((int64) character)" + """, '""" + w.replace ("'", "''") + "');"; 116 int ec = db.exec (query, null, out errmsg); 117 118 if (ec != Sqlite.OK) { 119 stderr.printf (query); 120 warning ("Error: %s\n", (!) errmsg); 121 } 122 } 123 124 /** medial, isolated etc. */ 125 public string get_context_substitution (string description) { 126 string[] lines = description.split ("\n"); 127 return_val_if_fail (lines.length > 0, "NONE"); 128 129 string first_line = lines[0]; 130 string type = "NONE"; 131 132 if (first_line.has_suffix ("INITIAL FORM")) { 133 type = "INITIAL"; 134 } else if (first_line.has_suffix ("MEDIAL FORM")) { 135 type = "MEDIAL"; 136 } else if (first_line.has_suffix ("FINAL FORM")) { 137 type = "FINAL"; 138 } else if (first_line.has_suffix ("ISOLATED FORM")) { 139 type = "ISOLATED"; 140 } 141 142 return type; 143 } 144 145 public string get_name (string description) { 146 string[] lines = description.split ("\n"); 147 return_val_if_fail (lines.length > 0, "NONE"); 148 149 string first_line = lines[0]; 150 int separator = first_line.index_of ("\t"); 151 string name = first_line.substring (separator + "\t".length); 152 return name.strip (); 153 } 154 155 156 public void insert_entry (int64 character, string description) { 157 string? errmsg; 158 159 string query = """ 160 INSERT INTO Description (unicode, description) 161 VALUES (""" + @"$((int64) character)" + ", " 162 + "'" + description.replace ("'", "''") + "');"; 163 164 int ec = db.exec (query, null, out errmsg); 165 166 if (ec != Sqlite.OK) { 167 stderr.printf (query); 168 warning ("Error: %s\n", (!) errmsg); 169 warning (@"Can't insert description to: $(character)"); 170 } 171 } 172 173 private void add_entry (string data) { 174 string[] e; 175 string[] r; 176 string[] d; 177 string index_values; 178 unichar ch; 179 string unicode_hex; 180 181 if (data.has_prefix ("@")) { // ignore comments 182 return; 183 } 184 185 if (data.has_prefix (";")) { 186 return; 187 } 188 189 index_values = data.down (); 190 index_values = index_values.replace ("\n\tx", ""); 191 index_values = index_values.replace ("\n\t*", ""); 192 index_values = index_values.replace ("\n\t=", ""); 193 index_values = index_values.replace ("\n\t#", ""); 194 index_values = index_values.replace (",", " "); 195 index_values = index_values.replace (" - ", " "); 196 index_values = index_values.replace ("(", ""); 197 index_values = index_values.replace (")", ""); 198 index_values = index_values.replace ("<font>", ""); 199 index_values = index_values.replace (" a ", " "); 200 index_values = index_values.replace (" is ", " "); 201 index_values = index_values.replace (" the ", " "); 202 203 e = index_values.split ("\t"); 204 205 return_if_fail (e.length > 0); 206 207 unicode_hex = e[0].up (); 208 209 ch = Font.to_unichar ("U+" + unicode_hex.down ()); 210 insert_entry ((int64) ch, data); 211 utf8.add_single (ch); 212 213 foreach (string s in e) { 214 r = s.split ("\n"); 215 216 foreach (string t in r) { 217 if (!t.has_prefix ("\t~")) { 218 d = t.split (" "); 219 foreach (string token in d) { 220 if (token != "") { 221 insert_lookup ((int64) ch, token); 222 } 223 } 224 } 225 } 226 } 227 } 228 229 private void parse_all_entries () { 230 FileInputStream fin; 231 DataInputStream din; 232 string? line; 233 string data; 234 string description = ""; 235 File file; 236 int ec; 237 string? errmsg; 238 uint64 transaction_number = 0; 239 240 file = get_unicode_database (); 241 242 ec = db.exec ("BEGIN TRANSACTION", null, out errmsg); 243 if (ec != Sqlite.OK) { 244 warning ("Error: %s\n", (!) errmsg); 245 } 246 247 try { 248 fin = file.read (); 249 din = new DataInputStream (fin); 250 251 line = din.read_line (null); 252 while (true) { 253 data = (!) line; 254 description = data; 255 256 while ((line = din.read_line (null)) != null) { 257 data = (!) line; 258 if (data.has_prefix ("\t")) { 259 description += "\n"; 260 description += data; 261 } else { 262 if (description.index_of ("<not a character>") == -1) { 263 add_entry (description); 264 transaction_number++; 265 266 if (transaction_number >= 1000) { 267 ec = db.exec ("END TRANSACTION", null, out errmsg); 268 if (ec != Sqlite.OK) { 269 warning ("Error: %s\n", (!) errmsg); 270 } 271 272 ec = db.exec ("BEGIN TRANSACTION", null, out errmsg); 273 if (ec != Sqlite.OK) { 274 warning ("Error: %s\n", (!) errmsg); 275 } 276 277 transaction_number = 0; 278 } 279 } 280 break; 281 } 282 } 283 284 if (line == null) { 285 break; 286 } 287 } 288 289 if (description == "") { 290 warning ("no description found"); 291 } 292 293 fin.close (); 294 din.close (); 295 } catch (GLib.Error e) { 296 warning (e.message); 297 warning ("In %s", (!) get_unicode_database ().get_path ()); 298 } 299 300 ec = db.exec ("END TRANSACTION", null, out errmsg); 301 if (ec != Sqlite.OK) { 302 warning ("Error: %s\n", (!) errmsg); 303 } 304 305 stdout.printf ("Done"); 306 } 307 308 File get_unicode_database () { 309 return SearchPaths.get_char_database (); 310 } 311 } 312 313 } 314