| 1 | /* ***** BEGIN LICENSE BLOCK ***** |
|---|
| 2 | * Version: MPL 1.1/GPL 2.0/LGPL 2.1 |
|---|
| 3 | * |
|---|
| 4 | * The contents of this file are subject to the Mozilla Public License Version |
|---|
| 5 | * 1.1 (the "License"); you may not use this file except in compliance with |
|---|
| 6 | * the License. You may obtain a copy of the License at |
|---|
| 7 | * http://www.mozilla.org/MPL/ |
|---|
| 8 | * |
|---|
| 9 | * Software distributed under the License is distributed on an "AS IS" basis, |
|---|
| 10 | * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License |
|---|
| 11 | * for the specific language governing rights and limitations under the |
|---|
| 12 | * License. |
|---|
| 13 | * |
|---|
| 14 | * The Original Code is Mozilla. |
|---|
| 15 | * |
|---|
| 16 | * The Initial Developer of the Original Code is Darin Fisher. |
|---|
| 17 | * Portions created by the Initial Developer are Copyright (C) 2003 |
|---|
| 18 | * the Initial Developer. All Rights Reserved. |
|---|
| 19 | * |
|---|
| 20 | * Contributor(s): |
|---|
| 21 | * Darin Fisher <darin@meer.net> |
|---|
| 22 | * |
|---|
| 23 | * Alternatively, the contents of this file may be used under the terms of |
|---|
| 24 | * either the GNU General Public License Version 2 or later (the "GPL"), or |
|---|
| 25 | * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), |
|---|
| 26 | * in which case the provisions of the GPL or the LGPL are applicable instead |
|---|
| 27 | * of those above. If you wish to allow use of your version of this file only |
|---|
| 28 | * under the terms of either the GPL or the LGPL, and not to allow others to |
|---|
| 29 | * use your version of this file under the terms of the MPL, indicate your |
|---|
| 30 | * decision by deleting the provisions above and replace them with the notice |
|---|
| 31 | * and other provisions required by the GPL or the LGPL. If you do not delete |
|---|
| 32 | * the provisions above, a recipient may use your version of this file under |
|---|
| 33 | * the terms of any one of the MPL, the GPL or the LGPL. |
|---|
| 34 | * |
|---|
| 35 | * ***** END LICENSE BLOCK ***** */ |
|---|
| 36 | |
|---|
| 37 | #include <stdlib.h> |
|---|
| 38 | #include <string.h> |
|---|
| 39 | #include <ctype.h> |
|---|
| 40 | #include "prefread.h" |
|---|
| 41 | #include "nsString.h" |
|---|
| 42 | #include "nsUTF8Utils.h" |
|---|
| 43 | |
|---|
| 44 | #ifdef TEST_PREFREAD |
|---|
| 45 | #include <stdio.h> |
|---|
| 46 | #define NS_WARNING(_s) printf(">>> " _s "!\n") |
|---|
| 47 | #define NS_NOTREACHED(_s) NS_WARNING(_s) |
|---|
| 48 | #else |
|---|
| 49 | #include "nsDebug.h" // for NS_WARNING |
|---|
| 50 | #endif |
|---|
| 51 | |
|---|
| 52 | /* pref parser states */ |
|---|
| 53 | enum { |
|---|
| 54 | PREF_PARSE_INIT, |
|---|
| 55 | PREF_PARSE_MATCH_STRING, |
|---|
| 56 | PREF_PARSE_UNTIL_NAME, |
|---|
| 57 | PREF_PARSE_QUOTED_STRING, |
|---|
| 58 | PREF_PARSE_UNTIL_COMMA, |
|---|
| 59 | PREF_PARSE_UNTIL_VALUE, |
|---|
| 60 | PREF_PARSE_INT_VALUE, |
|---|
| 61 | PREF_PARSE_COMMENT_MAYBE_START, |
|---|
| 62 | PREF_PARSE_COMMENT_BLOCK, |
|---|
| 63 | PREF_PARSE_COMMENT_BLOCK_MAYBE_END, |
|---|
| 64 | PREF_PARSE_ESC_SEQUENCE, |
|---|
| 65 | PREF_PARSE_HEX_ESCAPE, |
|---|
| 66 | PREF_PARSE_UTF16_LOW_SURROGATE, |
|---|
| 67 | PREF_PARSE_UNTIL_OPEN_PAREN, |
|---|
| 68 | PREF_PARSE_UNTIL_CLOSE_PAREN, |
|---|
| 69 | PREF_PARSE_UNTIL_SEMICOLON, |
|---|
| 70 | PREF_PARSE_UNTIL_EOL |
|---|
| 71 | }; |
|---|
| 72 | |
|---|
| 73 | #define UTF16_ESC_NUM_DIGITS 4 |
|---|
| 74 | #define HEX_ESC_NUM_DIGITS 2 |
|---|
| 75 | #define BITS_PER_HEX_DIGIT 4 |
|---|
| 76 | |
|---|
| 77 | static const char kUserPref[] = "user_pref"; |
|---|
| 78 | static const char kPref[] = "pref"; |
|---|
| 79 | static const char kTrue[] = "true"; |
|---|
| 80 | static const char kFalse[] = "false"; |
|---|
| 81 | |
|---|
| 82 | /** |
|---|
| 83 | * pref_GrowBuf |
|---|
| 84 | * |
|---|
| 85 | * this function will increase the size of the buffer owned |
|---|
| 86 | * by the given pref parse state. We currently use a simple |
|---|
| 87 | * doubling algorithm, but the only hard requirement is that |
|---|
| 88 | * it increase the buffer by at least the size of the ps->esctmp |
|---|
| 89 | * buffer used for escape processing (currently 6 bytes). |
|---|
| 90 | * |
|---|
| 91 | * this buffer is used to store partial pref lines. it is |
|---|
| 92 | * freed when the parse state is destroyed. |
|---|
| 93 | * |
|---|
| 94 | * @param ps |
|---|
| 95 | * parse state instance |
|---|
| 96 | * |
|---|
| 97 | * this function updates all pointers that reference an |
|---|
| 98 | * address within lb since realloc may relocate the buffer. |
|---|
| 99 | * |
|---|
| 100 | * @return PR_FALSE if insufficient memory. |
|---|
| 101 | */ |
|---|
| 102 | static PRBool |
|---|
| 103 | pref_GrowBuf(PrefParseState *ps) |
|---|
| 104 | { |
|---|
| 105 | int bufLen, curPos, valPos; |
|---|
| 106 | |
|---|
| 107 | bufLen = ps->lbend - ps->lb; |
|---|
| 108 | curPos = ps->lbcur - ps->lb; |
|---|
| 109 | valPos = ps->vb - ps->lb; |
|---|
| 110 | |
|---|
| 111 | if (bufLen == 0) |
|---|
| 112 | bufLen = 128; /* default buffer size */ |
|---|
| 113 | else |
|---|
| 114 | bufLen <<= 1; /* double buffer size */ |
|---|
| 115 | |
|---|
| 116 | #ifdef TEST_PREFREAD |
|---|
| 117 | fprintf(stderr, ">>> realloc(%d)\n", bufLen); |
|---|
| 118 | #endif |
|---|
| 119 | |
|---|
| 120 | ps->lb = (char*) realloc(ps->lb, bufLen); |
|---|
| 121 | if (!ps->lb) |
|---|
| 122 | return PR_FALSE; |
|---|
| 123 | |
|---|
| 124 | ps->lbcur = ps->lb + curPos; |
|---|
| 125 | ps->lbend = ps->lb + bufLen; |
|---|
| 126 | ps->vb = ps->lb + valPos; |
|---|
| 127 | |
|---|
| 128 | return PR_TRUE; |
|---|
| 129 | } |
|---|
| 130 | |
|---|
| 131 | /** |
|---|
| 132 | * pref_DoCallback |
|---|
| 133 | * |
|---|
| 134 | * this function is called when a complete pref name-value pair has |
|---|
| 135 | * been extracted from the input data. |
|---|
| 136 | * |
|---|
| 137 | * @param ps |
|---|
| 138 | * parse state instance |
|---|
| 139 | * |
|---|
| 140 | * @return PR_FALSE to indicate a fatal error. |
|---|
| 141 | */ |
|---|
| 142 | static PRBool |
|---|
| 143 | pref_DoCallback(PrefParseState *ps) |
|---|
| 144 | { |
|---|
| 145 | PrefValue value; |
|---|
| 146 | |
|---|
| 147 | switch (ps->vtype) { |
|---|
| 148 | case PREF_STRING: |
|---|
| 149 | value.stringVal = ps->vb; |
|---|
| 150 | break; |
|---|
| 151 | case PREF_INT: |
|---|
| 152 | if ((ps->vb[0] == '-' || ps->vb[0] == '+') && ps->vb[1] == '\0') { |
|---|
| 153 | NS_WARNING("malformed integer value"); |
|---|
| 154 | return PR_FALSE; |
|---|
| 155 | } |
|---|
| 156 | value.intVal = atoi(ps->vb); |
|---|
| 157 | break; |
|---|
| 158 | case PREF_BOOL: |
|---|
| 159 | value.boolVal = (ps->vb == kTrue); |
|---|
| 160 | break; |
|---|
| 161 | default: |
|---|
| 162 | break; |
|---|
| 163 | } |
|---|
| 164 | (*ps->reader)(ps->closure, ps->lb, value, ps->vtype, ps->fdefault); |
|---|
| 165 | return PR_TRUE; |
|---|
| 166 | } |
|---|
| 167 | |
|---|
| 168 | void |
|---|
| 169 | PREF_InitParseState(PrefParseState *ps, PrefReader reader, void *closure) |
|---|
| 170 | { |
|---|
| 171 | memset(ps, 0, sizeof(*ps)); |
|---|
| 172 | ps->reader = reader; |
|---|
| 173 | ps->closure = closure; |
|---|
| 174 | } |
|---|
| 175 | |
|---|
| 176 | void |
|---|
| 177 | PREF_FinalizeParseState(PrefParseState *ps) |
|---|
| 178 | { |
|---|
| 179 | if (ps->lb) |
|---|
| 180 | free(ps->lb); |
|---|
| 181 | } |
|---|
| 182 | |
|---|
| 183 | /** |
|---|
| 184 | * Pseudo-BNF |
|---|
| 185 | * ---------- |
|---|
| 186 | * function = LJUNK function-name JUNK function-args |
|---|
| 187 | * function-name = "user_pref" | "pref" |
|---|
| 188 | * function-args = "(" JUNK pref-name JUNK "," JUNK pref-value JUNK ")" JUNK ";" |
|---|
| 189 | * pref-name = quoted-string |
|---|
| 190 | * pref-value = quoted-string | "true" | "false" | integer-value |
|---|
| 191 | * JUNK = *(WS | comment-block | comment-line) |
|---|
| 192 | * LJUNK = *(WS | comment-block | comment-line | bcomment-line) |
|---|
| 193 | * WS = SP | HT | LF | VT | FF | CR |
|---|
| 194 | * SP = <US-ASCII SP, space (32)> |
|---|
| 195 | * HT = <US-ASCII HT, horizontal-tab (9)> |
|---|
| 196 | * LF = <US-ASCII LF, linefeed (10)> |
|---|
| 197 | * VT = <US-ASCII HT, vertical-tab (11)> |
|---|
| 198 | * FF = <US-ASCII FF, form-feed (12)> |
|---|
| 199 | * CR = <US-ASCII CR, carriage return (13)> |
|---|
| 200 | * comment-block = <C/C++ style comment block> |
|---|
| 201 | * comment-line = <C++ style comment line> |
|---|
| 202 | * bcomment-line = <bourne-shell style comment line> |
|---|
| 203 | */ |
|---|
| 204 | PRBool |
|---|
| 205 | PREF_ParseBuf(PrefParseState *ps, const char *buf, int bufLen) |
|---|
| 206 | { |
|---|
| 207 | const char *end; |
|---|
| 208 | char c; |
|---|
| 209 | char udigit; |
|---|
| 210 | int state; |
|---|
| 211 | |
|---|
| 212 | state = ps->state; |
|---|
| 213 | for (end = buf + bufLen; buf != end; ++buf) { |
|---|
| 214 | c = *buf; |
|---|
| 215 | switch (state) { |
|---|
| 216 | /* initial state */ |
|---|
| 217 | case PREF_PARSE_INIT: |
|---|
| 218 | if (ps->lbcur != ps->lb) { /* reset state */ |
|---|
| 219 | ps->lbcur = ps->lb; |
|---|
| 220 | ps->vb = NULL; |
|---|
| 221 | ps->vtype = PREF_INVALID; |
|---|
| 222 | ps->fdefault = PR_FALSE; |
|---|
| 223 | } |
|---|
| 224 | switch (c) { |
|---|
| 225 | case '/': /* begin comment block or line? */ |
|---|
| 226 | state = PREF_PARSE_COMMENT_MAYBE_START; |
|---|
| 227 | break; |
|---|
| 228 | case '#': /* accept shell style comments */ |
|---|
| 229 | state = PREF_PARSE_UNTIL_EOL; |
|---|
| 230 | break; |
|---|
| 231 | case 'u': /* indicating user_pref */ |
|---|
| 232 | case 'p': /* indicating pref */ |
|---|
| 233 | ps->smatch = (c == 'u' ? kUserPref : kPref); |
|---|
| 234 | ps->sindex = 1; |
|---|
| 235 | ps->nextstate = PREF_PARSE_UNTIL_OPEN_PAREN; |
|---|
| 236 | state = PREF_PARSE_MATCH_STRING; |
|---|
| 237 | break; |
|---|
| 238 | /* else skip char */ |
|---|
| 239 | } |
|---|
| 240 | break; |
|---|
| 241 | |
|---|
| 242 | /* string matching */ |
|---|
| 243 | case PREF_PARSE_MATCH_STRING: |
|---|
| 244 | if (c == ps->smatch[ps->sindex++]) { |
|---|
| 245 | /* if we've matched all characters, then move to next state. */ |
|---|
| 246 | if (ps->smatch[ps->sindex] == '\0') { |
|---|
| 247 | state = ps->nextstate; |
|---|
| 248 | ps->nextstate = PREF_PARSE_INIT; /* reset next state */ |
|---|
| 249 | } |
|---|
| 250 | /* else wait for next char */ |
|---|
| 251 | } |
|---|
| 252 | else { |
|---|
| 253 | NS_WARNING("malformed pref file"); |
|---|
| 254 | return PR_FALSE; |
|---|
| 255 | } |
|---|
| 256 | break; |
|---|
| 257 | |
|---|
| 258 | /* quoted string parsing */ |
|---|
| 259 | case PREF_PARSE_QUOTED_STRING: |
|---|
| 260 | /* we assume that the initial quote has already been consumed */ |
|---|
| 261 | if (ps->lbcur == ps->lbend && !pref_GrowBuf(ps)) |
|---|
| 262 | return PR_FALSE; /* out of memory */ |
|---|
| 263 | if (c == '\\') |
|---|
| 264 | state = PREF_PARSE_ESC_SEQUENCE; |
|---|
| 265 | else if (c == ps->quotechar) { |
|---|
| 266 | *ps->lbcur++ = '\0'; |
|---|
| 267 | state = ps->nextstate; |
|---|
| 268 | ps->nextstate = PREF_PARSE_INIT; /* reset next state */ |
|---|
| 269 | } |
|---|
| 270 | else |
|---|
| 271 | *ps->lbcur++ = c; |
|---|
| 272 | break; |
|---|
| 273 | |
|---|
| 274 | /* name parsing */ |
|---|
| 275 | case PREF_PARSE_UNTIL_NAME: |
|---|
| 276 | if (c == '\"' || c == '\'') { |
|---|
| 277 | ps->fdefault = (ps->smatch == kPref); |
|---|
| 278 | ps->quotechar = c; |
|---|
| 279 | ps->nextstate = PREF_PARSE_UNTIL_COMMA; /* return here when done */ |
|---|
| 280 | state = PREF_PARSE_QUOTED_STRING; |
|---|
| 281 | } |
|---|
| 282 | else if (c == '/') { /* allow embedded comment */ |
|---|
| 283 | ps->nextstate = state; /* return here when done with comment */ |
|---|
| 284 | state = PREF_PARSE_COMMENT_MAYBE_START; |
|---|
| 285 | } |
|---|
| 286 | else if (!isspace(c)) { |
|---|
| 287 | NS_WARNING("malformed pref file"); |
|---|
| 288 | return PR_FALSE; |
|---|
| 289 | } |
|---|
| 290 | break; |
|---|
| 291 | |
|---|
| 292 | /* parse until we find a comma separating name and value */ |
|---|
| 293 | case PREF_PARSE_UNTIL_COMMA: |
|---|
| 294 | if (c == ',') { |
|---|
| 295 | ps->vb = ps->lbcur; |
|---|
| 296 | state = PREF_PARSE_UNTIL_VALUE; |
|---|
| 297 | } |
|---|
| 298 | else if (c == '/') { /* allow embedded comment */ |
|---|
| 299 | ps->nextstate = state; /* return here when done with comment */ |
|---|
| 300 | state = PREF_PARSE_COMMENT_MAYBE_START; |
|---|
| 301 | } |
|---|
| 302 | else if (!isspace(c)) { |
|---|
| 303 | NS_WARNING("malformed pref file"); |
|---|
| 304 | return PR_FALSE; |
|---|
| 305 | } |
|---|
| 306 | break; |
|---|
| 307 | |
|---|
| 308 | /* value parsing */ |
|---|
| 309 | case PREF_PARSE_UNTIL_VALUE: |
|---|
| 310 | /* the pref value type is unknown. so, we scan for the first |
|---|
| 311 | * character of the value, and determine the type from that. */ |
|---|
| 312 | if (c == '\"' || c == '\'') { |
|---|
| 313 | ps->vtype = PREF_STRING; |
|---|
| 314 | ps->quotechar = c; |
|---|
| 315 | ps->nextstate = PREF_PARSE_UNTIL_CLOSE_PAREN; |
|---|
| 316 | state = PREF_PARSE_QUOTED_STRING; |
|---|
| 317 | } |
|---|
| 318 | else if (c == 't' || c == 'f') { |
|---|
| 319 | ps->vb = (char *) (c == 't' ? kTrue : kFalse); |
|---|
| 320 | ps->vtype = PREF_BOOL; |
|---|
| 321 | ps->smatch = ps->vb; |
|---|
| 322 | ps->sindex = 1; |
|---|
| 323 | ps->nextstate = PREF_PARSE_UNTIL_CLOSE_PAREN; |
|---|
| 324 | state = PREF_PARSE_MATCH_STRING; |
|---|
| 325 | } |
|---|
| 326 | else if (isdigit(c) || (c == '-') || (c == '+')) { |
|---|
| 327 | ps->vtype = PREF_INT; |
|---|
| 328 | /* write c to line buffer... */ |
|---|
| 329 | if (ps->lbcur == ps->lbend && !pref_GrowBuf(ps)) |
|---|
| 330 | return PR_FALSE; /* out of memory */ |
|---|
| 331 | *ps->lbcur++ = c; |
|---|
| 332 | state = PREF_PARSE_INT_VALUE; |
|---|
| 333 | } |
|---|
| 334 | else if (c == '/') { /* allow embedded comment */ |
|---|
| 335 | ps->nextstate = state; /* return here when done with comment */ |
|---|
| 336 | state = PREF_PARSE_COMMENT_MAYBE_START; |
|---|
| 337 | } |
|---|
| 338 | else if (!isspace(c)) { |
|---|
| 339 | NS_WARNING("malformed pref file"); |
|---|
| 340 | return PR_FALSE; |
|---|
| 341 | } |
|---|
| 342 | break; |
|---|
| 343 | case PREF_PARSE_INT_VALUE: |
|---|
| 344 | /* grow line buffer if necessary... */ |
|---|
| 345 | if (ps->lbcur == ps->lbend && !pref_GrowBuf(ps)) |
|---|
| 346 | return PR_FALSE; /* out of memory */ |
|---|
| 347 | if (isdigit(c)) |
|---|
| 348 | *ps->lbcur++ = c; |
|---|
| 349 | else { |
|---|
| 350 | *ps->lbcur++ = '\0'; /* stomp null terminator; we are done. */ |
|---|
| 351 | if (c == ')') |
|---|
| 352 | state = PREF_PARSE_UNTIL_SEMICOLON; |
|---|
| 353 | else if (c == '/') { /* allow embedded comment */ |
|---|
| 354 | ps->nextstate = PREF_PARSE_UNTIL_CLOSE_PAREN; |
|---|
| 355 | state = PREF_PARSE_COMMENT_MAYBE_START; |
|---|
| 356 | } |
|---|
| 357 | else if (isspace(c)) |
|---|
| 358 | state = PREF_PARSE_UNTIL_CLOSE_PAREN; |
|---|
| 359 | else { |
|---|
| 360 | NS_WARNING("malformed pref file"); |
|---|
| 361 | return PR_FALSE; |
|---|
| 362 | } |
|---|
| 363 | } |
|---|
| 364 | break; |
|---|
| 365 | |
|---|
| 366 | /* comment parsing */ |
|---|
| 367 | case PREF_PARSE_COMMENT_MAYBE_START: |
|---|
| 368 | switch (c) { |
|---|
| 369 | case '*': /* comment block */ |
|---|
| 370 | state = PREF_PARSE_COMMENT_BLOCK; |
|---|
| 371 | break; |
|---|
| 372 | case '/': /* comment line */ |
|---|
| 373 | state = PREF_PARSE_UNTIL_EOL; |
|---|
| 374 | break; |
|---|
| 375 | default: |
|---|
| 376 | /* pref file is malformed */ |
|---|
| 377 | NS_WARNING("malformed pref file"); |
|---|
| 378 | return PR_FALSE; |
|---|
| 379 | } |
|---|
| 380 | break; |
|---|
| 381 | case PREF_PARSE_COMMENT_BLOCK: |
|---|
| 382 | if (c == '*') |
|---|
| 383 | state = PREF_PARSE_COMMENT_BLOCK_MAYBE_END; |
|---|
| 384 | break; |
|---|
| 385 | case PREF_PARSE_COMMENT_BLOCK_MAYBE_END: |
|---|
| 386 | switch (c) { |
|---|
| 387 | case '/': |
|---|
| 388 | state = ps->nextstate; |
|---|
| 389 | ps->nextstate = PREF_PARSE_INIT; |
|---|
| 390 | break; |
|---|
| 391 | case '*': /* stay in this state */ |
|---|
| 392 | break; |
|---|
| 393 | default: |
|---|
| 394 | state = PREF_PARSE_COMMENT_BLOCK; |
|---|
| 395 | } |
|---|
| 396 | break; |
|---|
| 397 | |
|---|
| 398 | /* string escape sequence parsing */ |
|---|
| 399 | case PREF_PARSE_ESC_SEQUENCE: |
|---|
| 400 | /* not necessary to resize buffer here since we should be writing |
|---|
| 401 | * only one character and the resize check would have been done |
|---|
| 402 | * for us in the previous state */ |
|---|
| 403 | switch (c) { |
|---|
| 404 | case '\"': |
|---|
| 405 | case '\'': |
|---|
| 406 | case '\\': |
|---|
| 407 | break; |
|---|
| 408 | case 'r': |
|---|
| 409 | c = '\r'; |
|---|
| 410 | break; |
|---|
| 411 | case 'n': |
|---|
| 412 | c = '\n'; |
|---|
| 413 | break; |
|---|
| 414 | case 'x': /* hex escape -- always interpreted as Latin-1 */ |
|---|
| 415 | case 'u': /* UTF16 escape */ |
|---|
| 416 | ps->esctmp[0] = c; |
|---|
| 417 | ps->esclen = 1; |
|---|
| 418 | ps->utf16[0] = ps->utf16[1] = 0; |
|---|
| 419 | ps->sindex = (c == 'x' ) ? |
|---|
| 420 | HEX_ESC_NUM_DIGITS : |
|---|
| 421 | UTF16_ESC_NUM_DIGITS; |
|---|
| 422 | state = PREF_PARSE_HEX_ESCAPE; |
|---|
| 423 | continue; |
|---|
| 424 | default: |
|---|
| 425 | NS_WARNING("preserving unexpected JS escape sequence"); |
|---|
| 426 | /* Invalid escape sequence so we do have to write more than |
|---|
| 427 | * one character. Grow line buffer if necessary... */ |
|---|
| 428 | if ((ps->lbcur+1) == ps->lbend && !pref_GrowBuf(ps)) |
|---|
| 429 | return PR_FALSE; /* out of memory */ |
|---|
| 430 | *ps->lbcur++ = '\\'; /* preserve the escape sequence */ |
|---|
| 431 | break; |
|---|
| 432 | } |
|---|
| 433 | *ps->lbcur++ = c; |
|---|
| 434 | state = PREF_PARSE_QUOTED_STRING; |
|---|
| 435 | break; |
|---|
| 436 | |
|---|
| 437 | /* parsing a hex (\xHH) or utf16 escape (\uHHHH) */ |
|---|
| 438 | case PREF_PARSE_HEX_ESCAPE: |
|---|
| 439 | if ( c >= '0' && c <= '9' ) |
|---|
| 440 | udigit = (c - '0'); |
|---|
| 441 | else if ( c >= 'A' && c <= 'F' ) |
|---|
| 442 | udigit = (c - 'A') + 10; |
|---|
| 443 | else if ( c >= 'a' && c <= 'f' ) |
|---|
| 444 | udigit = (c - 'a') + 10; |
|---|
| 445 | else { |
|---|
| 446 | /* bad escape sequence found, write out broken escape as-is */ |
|---|
| 447 | NS_WARNING("preserving invalid or incomplete hex escape"); |
|---|
| 448 | *ps->lbcur++ = '\\'; /* original escape slash */ |
|---|
| 449 | if ((ps->lbcur + ps->esclen) >= ps->lbend && !pref_GrowBuf(ps)) |
|---|
| 450 | return PR_FALSE; |
|---|
| 451 | for (int i = 0; i < ps->esclen; ++i) |
|---|
| 452 | *ps->lbcur++ = ps->esctmp[i]; |
|---|
| 453 | |
|---|
| 454 | /* push the non-hex character back for re-parsing. */ |
|---|
| 455 | /* (++buf at the top of the loop keeps this safe) */ |
|---|
| 456 | --buf; |
|---|
| 457 | state = PREF_PARSE_QUOTED_STRING; |
|---|
| 458 | continue; |
|---|
| 459 | } |
|---|
| 460 | |
|---|
| 461 | /* have a digit */ |
|---|
| 462 | ps->esctmp[ps->esclen++] = c; /* preserve it */ |
|---|
| 463 | ps->utf16[1] <<= BITS_PER_HEX_DIGIT; |
|---|
| 464 | ps->utf16[1] |= udigit; |
|---|
| 465 | ps->sindex--; |
|---|
| 466 | if (ps->sindex == 0) { |
|---|
| 467 | /* have the full escape. Convert to UTF8 */ |
|---|
| 468 | int utf16len = 0; |
|---|
| 469 | if (ps->utf16[0]) { |
|---|
| 470 | /* already have a high surrogate, this is a two char seq */ |
|---|
| 471 | utf16len = 2; |
|---|
| 472 | } |
|---|
| 473 | else if (0xD800 == (0xFC00 & ps->utf16[1])) { |
|---|
| 474 | /* a high surrogate, can't convert until we have the low */ |
|---|
| 475 | ps->utf16[0] = ps->utf16[1]; |
|---|
| 476 | ps->utf16[1] = 0; |
|---|
| 477 | state = PREF_PARSE_UTF16_LOW_SURROGATE; |
|---|
| 478 | break; |
|---|
| 479 | } |
|---|
| 480 | else { |
|---|
| 481 | /* a single utf16 character */ |
|---|
| 482 | ps->utf16[0] = ps->utf16[1]; |
|---|
| 483 | utf16len = 1; |
|---|
| 484 | } |
|---|
| 485 | |
|---|
| 486 | /* actual conversion */ |
|---|
| 487 | /* make sure there's room, 6 bytes is max utf8 len (in */ |
|---|
| 488 | /* theory; 4 bytes covers the actual utf16 range) */ |
|---|
| 489 | if (ps->lbcur+6 >= ps->lbend && !pref_GrowBuf(ps)) |
|---|
| 490 | return PR_FALSE; |
|---|
| 491 | |
|---|
| 492 | ConvertUTF16toUTF8 converter(ps->lbcur); |
|---|
| 493 | converter.write(ps->utf16, utf16len); |
|---|
| 494 | ps->lbcur += converter.Size(); |
|---|
| 495 | state = PREF_PARSE_QUOTED_STRING; |
|---|
| 496 | } |
|---|
| 497 | break; |
|---|
| 498 | |
|---|
| 499 | /* looking for beginning of utf16 low surrogate */ |
|---|
| 500 | case PREF_PARSE_UTF16_LOW_SURROGATE: |
|---|
| 501 | if (ps->sindex == 0 && c == '\\') { |
|---|
| 502 | ++ps->sindex; |
|---|
| 503 | } |
|---|
| 504 | else if (ps->sindex == 1 && c == 'u') { |
|---|
| 505 | /* escape sequence is correct, now parse hex */ |
|---|
| 506 | ps->sindex = UTF16_ESC_NUM_DIGITS; |
|---|
| 507 | ps->esctmp[0] = 'u'; |
|---|
| 508 | ps->esclen = 1; |
|---|
| 509 | state = PREF_PARSE_HEX_ESCAPE; |
|---|
| 510 | } |
|---|
| 511 | else { |
|---|
| 512 | /* didn't find expected low surrogate. Ignore high surrogate |
|---|
| 513 | * (it would just get converted to nothing anyway) and start |
|---|
| 514 | * over with this character */ |
|---|
| 515 | --buf; |
|---|
| 516 | if (ps->sindex == 1) |
|---|
| 517 | state = PREF_PARSE_ESC_SEQUENCE; |
|---|
| 518 | else |
|---|
| 519 | state = PREF_PARSE_QUOTED_STRING; |
|---|
| 520 | continue; |
|---|
| 521 | } |
|---|
| 522 | break; |
|---|
| 523 | |
|---|
| 524 | /* function open and close parsing */ |
|---|
| 525 | case PREF_PARSE_UNTIL_OPEN_PAREN: |
|---|
| 526 | /* tolerate only whitespace and embedded comments */ |
|---|
| 527 | if (c == '(') |
|---|
| 528 | state = PREF_PARSE_UNTIL_NAME; |
|---|
| 529 | else if (c == '/') { |
|---|
| 530 | ps->nextstate = state; /* return here when done with comment */ |
|---|
| 531 | state = PREF_PARSE_COMMENT_MAYBE_START; |
|---|
| 532 | } |
|---|
| 533 | else if (!isspace(c)) { |
|---|
| 534 | NS_WARNING("malformed pref file"); |
|---|
| 535 | return PR_FALSE; |
|---|
| 536 | } |
|---|
| 537 | break; |
|---|
| 538 | case PREF_PARSE_UNTIL_CLOSE_PAREN: |
|---|
| 539 | /* tolerate only whitespace and embedded comments */ |
|---|
| 540 | if (c == ')') |
|---|
| 541 | state = PREF_PARSE_UNTIL_SEMICOLON; |
|---|
| 542 | else if (c == '/') { |
|---|
| 543 | ps->nextstate = state; /* return here when done with comment */ |
|---|
| 544 | state = PREF_PARSE_COMMENT_MAYBE_START; |
|---|
| 545 | } |
|---|
| 546 | else if (!isspace(c)) { |
|---|
| 547 | NS_WARNING("malformed pref file"); |
|---|
| 548 | return PR_FALSE; |
|---|
| 549 | } |
|---|
| 550 | break; |
|---|
| 551 | |
|---|
| 552 | /* function terminator ';' parsing */ |
|---|
| 553 | case PREF_PARSE_UNTIL_SEMICOLON: |
|---|
| 554 | /* tolerate only whitespace and embedded comments */ |
|---|
| 555 | if (c == ';') { |
|---|
| 556 | if (!pref_DoCallback(ps)) |
|---|
| 557 | return PR_FALSE; |
|---|
| 558 | state = PREF_PARSE_INIT; |
|---|
| 559 | } |
|---|
| 560 | else if (c == '/') { |
|---|
| 561 | ps->nextstate = state; /* return here when done with comment */ |
|---|
| 562 | state = PREF_PARSE_COMMENT_MAYBE_START; |
|---|
| 563 | } |
|---|
| 564 | else if (!isspace(c)) { |
|---|
| 565 | NS_WARNING("malformed pref file"); |
|---|
| 566 | return PR_FALSE; |
|---|
| 567 | } |
|---|
| 568 | break; |
|---|
| 569 | |
|---|
| 570 | /* eol parsing */ |
|---|
| 571 | case PREF_PARSE_UNTIL_EOL: |
|---|
| 572 | /* need to handle mac, unix, or dos line endings. |
|---|
| 573 | * PREF_PARSE_INIT will eat the next \n in case |
|---|
| 574 | * we have \r\n. */ |
|---|
| 575 | if (c == '\r' || c == '\n' || c == 0x1A) { |
|---|
| 576 | state = ps->nextstate; |
|---|
| 577 | ps->nextstate = PREF_PARSE_INIT; /* reset next state */ |
|---|
| 578 | } |
|---|
| 579 | break; |
|---|
| 580 | } |
|---|
| 581 | } |
|---|
| 582 | ps->state = state; |
|---|
| 583 | return PR_TRUE; |
|---|
| 584 | } |
|---|
| 585 | |
|---|
| 586 | #ifdef TEST_PREFREAD |
|---|
| 587 | |
|---|
| 588 | static void |
|---|
| 589 | pref_reader(void *closure, |
|---|
| 590 | const char *pref, |
|---|
| 591 | PrefValue val, |
|---|
| 592 | PrefType type, |
|---|
| 593 | PRBool defPref) |
|---|
| 594 | { |
|---|
| 595 | printf("%spref(\"%s\", ", defPref ? "" : "user_", pref); |
|---|
| 596 | switch (type) { |
|---|
| 597 | case PREF_STRING: |
|---|
| 598 | printf("\"%s\");\n", val.stringVal); |
|---|
| 599 | break; |
|---|
| 600 | case PREF_INT: |
|---|
| 601 | printf("%i);\n", val.intVal); |
|---|
| 602 | break; |
|---|
| 603 | case PREF_BOOL: |
|---|
| 604 | printf("%s);\n", val.boolVal == PR_FALSE ? "false" : "true"); |
|---|
| 605 | break; |
|---|
| 606 | } |
|---|
| 607 | } |
|---|
| 608 | |
|---|
| 609 | int |
|---|
| 610 | main(int argc, char **argv) |
|---|
| 611 | { |
|---|
| 612 | PrefParseState ps; |
|---|
| 613 | char buf[4096]; /* i/o buffer */ |
|---|
| 614 | FILE *fp; |
|---|
| 615 | int n; |
|---|
| 616 | |
|---|
| 617 | if (argc == 1) { |
|---|
| 618 | printf("usage: prefread file.js\n"); |
|---|
| 619 | return -1; |
|---|
| 620 | } |
|---|
| 621 | |
|---|
| 622 | fp = fopen(argv[1], "r"); |
|---|
| 623 | if (!fp) { |
|---|
| 624 | printf("failed to open file\n"); |
|---|
| 625 | return -1; |
|---|
| 626 | } |
|---|
| 627 | |
|---|
| 628 | PREF_InitParseState(&ps, pref_reader, NULL); |
|---|
| 629 | |
|---|
| 630 | while ((n = fread(buf, 1, sizeof(buf), fp)) > 0) |
|---|
| 631 | PREF_ParseBuf(&ps, buf, n); |
|---|
| 632 | |
|---|
| 633 | PREF_FinalizeParseState(&ps); |
|---|
| 634 | |
|---|
| 635 | fclose(fp); |
|---|
| 636 | return 0; |
|---|
| 637 | } |
|---|
| 638 | |
|---|
| 639 | #endif /* TEST_PREFREAD */ |
|---|