1 Index: jflex/src/main/java/jflex/Emitter.java
3 Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
5 ===================================================================
6 --- jflex/src/main/java/jflex/Emitter.java (revision de2312d6891de7fdf2752ad8e0a45983202ad5e0)
7 +++ jflex/src/main/java/jflex/Emitter.java (revision 5b6b151f542d88ffb2ea4363c6ee8f23c7a49b09)
13 +import java.util.Arrays;
14 +import java.util.LinkedHashMap;
15 +import java.util.Map;
16 import java.util.regex.Matcher;
17 import java.util.regex.Pattern;
21 private void emitNextInput() {
22 println(" if (zzCurrentPosL < zzEndReadL) {");
23 - println(" zzInput = Character.codePointAt(zzBufferL, zzCurrentPosL, zzEndReadL);");
24 + println(" zzInput = Character.codePointAt(zzBufferL, zzCurrentPosL/*, zzEndReadL*/);");
25 println(" zzCurrentPosL += Character.charCount(zzInput);");
27 println(" else if (zzAtEOF) {");
29 println(" break zzForAction;");
32 - println(" zzInput = Character.codePointAt(zzBufferL, zzCurrentPosL, zzEndReadL);");
33 + println(" zzInput = Character.codePointAt(zzBufferL, zzCurrentPosL/*, zzEndReadL*/);");
34 println(" zzCurrentPosL += Character.charCount(zzInput);");
39 private void emitHeader() {
40 - println("/* The following code was generated by JFlex " + Main.version + " */");
41 + println("/* The following code was generated by JFlex " + Main.version + " tweaked for IntelliJ platform */");
49 - private void emitCharMapInitFunction(int packedCharMapPairs) {
50 + private void emitCharMapInitFunction() {
52 CharClasses cl = parser.getCharClasses();
55 println(" * @return the unpacked character translation table");
57 println(" private static char [] zzUnpackCMap(String packed) {");
58 - println(" char [] map = new char[0x" + Integer.toHexString(cl.getMaxCharCode() + 1) + "];");
59 + println(" int size = 0;");
60 + println(" for (int i = 0, length = packed.length(); i < length; i += 2) {");
61 + println(" size += packed.charAt(i);");
63 + println(" char[] map = new char[size];");
64 println(" int i = 0; /* index in packed string */");
65 println(" int j = 0; /* index in unpacked array */");
66 - println(" while (i < " + 2 * packedCharMapPairs + ") {");
67 + println(" while (i < packed.length()) {");
68 println(" int count = packed.charAt(i++);");
69 println(" char value = packed.charAt(i++);");
70 println(" do map[j++] = value; while (--count > 0);");
72 * the number of char map array entries per value is
73 * ceil(count / 0xFFFF)
75 - private int emitCharMapArray() {
76 + private void emitCharMapArray() {
77 CharClasses cl = parser.getCharClasses();
79 if ( cl.getMaxCharCode() < 256 ) {
80 emitCharMapArrayUnPacked();
81 - return 0; // the char map array will not be packed
84 // ignores cl.getMaxCharCode(), emits all intervals instead
86 intervals = cl.getIntervals();
91 - println(" * Translates characters to character classes");
93 - println(" private static final String ZZ_CMAP_PACKED = ");
94 + char[] exploded = new char[cl.getMaxCharCode() + 1];
95 + for (int i = 0, k = 0; i < intervals.length && k < exploded.length; i ++) {
96 + int count = intervals[i].end - intervals[i].start + 1;
97 + int value = colMap[intervals[i].charClass];
100 - int n = 0; // numbers of entries in current line
103 - int i = 0, numPairs = 0;
105 - while ( i < intervals.length ) {
106 - count = intervals[i].end-intervals[i].start+1;
107 - value = colMap[intervals[i].charClass];
109 - // count could be >= 0x10000
110 - while (count > 0xFFFF) {
116 + while (count-- > 0) {
117 + exploded[k++] = (char) value;
123 - if (i < intervals.length-1) {
134 + int[] sizes = EmitterCM.findBestSizes(exploded, 3);
135 + Object[] o = EmitterCM.generateForSizes(exploded, sizes);
136 + int totalSize = EmitterCM.getTotalBytes((char[][]) o[0], sizes, (int[]) o[1]);
144 println(" * Translates characters to character classes");
145 + println(" * Chosen bits are " + Arrays.toString(sizes));
146 + println(" * Total runtime size is " + totalSize + " bytes");
148 - println(" private static final char [] ZZ_CMAP = zzUnpackCMap(ZZ_CMAP_PACKED);");
150 + println(" public static int ZZ_CMAP(int ch) {\n" +
151 + " return " + EmitterCM.genAccess("ZZ_CMAP_" + "A", "ch", 16, sizes, (int[]) o[3], (int[]) o[4], (boolean[]) o[2]) + ";\n" +
159 - * Print number as octal/unicode escaped string character.
161 - * @param c the value to print
162 - * @prec 0 <= c <= 0xFFFF
164 - private void printUC(int c) {
167 - if (c < 0x1000) out.print("0");
168 - out.print(Integer.toHexString(c));
169 + String tables = EmitterCM.genTables((char[][]) o[0], sizes, (int[]) o[1], (boolean[]) o[2]);
175 - out.print(Integer.toOctalString(c));
180 private void emitRowMapArray() {
182 println(" for (zzCurrentPosL = zzStartRead ;");
183 println(" zzCurrentPosL < zzMarkedPosL ;");
184 println(" zzCurrentPosL += zzCharCount ) {");
185 - println(" zzCh = Character.codePointAt(zzBufferL, zzCurrentPosL, zzMarkedPosL);");
186 + println(" zzCh = Character.codePointAt(zzBufferL, zzCurrentPosL/*, zzMarkedPosL*/);");
187 println(" zzCharCount = Character.charCount(zzCh);");
188 println(" switch (zzCh) {");
189 println(" case '\\u000B':");
191 println(" // peek one character ahead if it is \\n (if we have counted one line too much)");
192 println(" boolean zzPeek;");
193 println(" if (zzMarkedPosL < zzEndReadL)");
194 - println(" zzPeek = zzBufferL[zzMarkedPosL] == '\\n';");
195 + println(" zzPeek = zzBufferL.charAt(zzMarkedPosL) == '\\n';");
196 println(" else if (zzAtEOF)");
197 println(" zzPeek = false;");
200 println(" if (eof) ");
201 println(" zzPeek = false;");
203 - println(" zzPeek = zzBufferL[zzMarkedPosL] == '\\n';");
204 + println(" zzPeek = zzBufferL.charAt(zzMarkedPosL) == '\\n';");
206 println(" if (zzPeek) yyline--;");
209 // if match was empty, last value of zzAtBOL can be used
210 // zzStartRead is always >= 0
211 println(" if (zzMarkedPosL > zzStartRead) {");
212 - println(" switch (zzBufferL[zzMarkedPosL-1]) {");
213 + println(" switch (zzBufferL.charAt(zzMarkedPosL-1)) {");
214 println(" case '\\n':");
215 println(" case '\\u000B':");
216 println(" case '\\u000C':");
219 println(" case '\\r': ");
220 println(" if (zzMarkedPosL < zzEndReadL)");
221 - println(" zzAtBOL = zzBufferL[zzMarkedPosL] != '\\n';");
222 + println(" zzAtBOL = zzBufferL.charAt(zzMarkedPosL) != '\\n';");
223 println(" else if (zzAtEOF)");
224 println(" zzAtBOL = false;");
226 @@ -1038,7 +1005,7 @@
227 println(" if (eof) ");
228 println(" zzAtBOL = false;");
230 - println(" zzAtBOL = zzBufferL[zzMarkedPosL] != '\\n';");
231 + println(" zzAtBOL = zzBufferL.charAt(zzMarkedPosL) != '\\n';");
234 println(" default:");
235 @@ -1073,7 +1040,11 @@
238 private void emitGetRowMapNext() {
239 - println(" int zzNext = zzTransL[ zzRowMapL[zzState] + zzCMapL[zzInput] ];");
240 + CharClasses cl = parser.getCharClasses();
242 + boolean unpacked = cl.getMaxCharCode() < 256;
244 + println(" int zzNext = zzTransL[ zzRowMapL[zzState] + "+ (unpacked? "zzCMapL[zzInput]" : "ZZ_CMAP(zzInput)") +" ];");
245 println(" if (zzNext == "+DFA.NO_TARGET+") break zzForAction;");
246 println(" zzState = zzNext;");
248 @@ -1157,6 +1128,9 @@
251 private void emitActions() {
252 + CharClasses cl = parser.getCharClasses();
253 + boolean unpacked = cl.getMaxCharCode() < 256;
255 println(" switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) {");
257 int i = actionTable.size()+1;
258 @@ -1170,27 +1144,27 @@
259 if (action.lookAhead() == Action.FIXED_BASE) {
260 println(" // lookahead expression with fixed base length");
261 println(" zzMarkedPos = Character.offsetByCodePoints");
262 - println(" (zzBufferL, zzStartRead, zzEndRead - zzStartRead, zzStartRead, " + action.getLookLength() + ");");
263 + println(" (zzBufferL/*, zzStartRead, zzEndRead - zzStartRead*/, zzStartRead, " + action.getLookLength() + ");");
266 if (action.lookAhead() == Action.FIXED_LOOK ||
267 action.lookAhead() == Action.FINITE_CHOICE) {
268 println(" // lookahead expression with fixed lookahead length");
269 println(" zzMarkedPos = Character.offsetByCodePoints");
270 - println(" (zzBufferL, zzStartRead, zzEndRead - zzStartRead, zzMarkedPos, -" + action.getLookLength() + ");");
271 + println(" (zzBufferL/*, zzStartRead, zzEndRead - zzStartRead*/, zzMarkedPos, -" + action.getLookLength() + ");");
274 if (action.lookAhead() == Action.GENERAL_LOOK) {
275 println(" // general lookahead, find correct zzMarkedPos");
276 println(" { int zzFState = "+dfa.entryState[action.getEntryState()]+";");
277 println(" int zzFPos = zzStartRead;");
278 - println(" if (zzFin.length <= zzBufferL.length) { zzFin = new boolean[zzBufferL.length+1]; }");
279 + println(" if (zzFin.length <= zzBufferL.length()) { zzFin = new boolean[zzBufferL.length()+1]; }");
280 println(" boolean zzFinL[] = zzFin;");
281 println(" while (zzFState != -1 && zzFPos < zzMarkedPos) {");
282 println(" zzFinL[zzFPos] = ((zzAttrL[zzFState] & 1) == 1);");
283 - println(" zzInput = Character.codePointAt(zzBufferL, zzFPos, zzMarkedPos);");
284 + println(" zzInput = Character.codePointAt(zzBufferL, zzFPos/*, zzMarkedPos*/);");
285 println(" zzFPos += Character.charCount(zzInput);");
286 - println(" zzFState = zzTransL[ zzRowMapL[zzFState] + zzCMapL[zzInput] ];");
287 + println(" zzFState = zzTransL[ zzRowMapL[zzFState] + " + (unpacked ? "zzCMapL[zzInput]" : "ZZ_CMAP(zzInput)") + " ];");
289 println(" if (zzFState != -1) { zzFinL[zzFPos++] = ((zzAttrL[zzFState] & 1) == 1); } ");
290 println(" while (zzFPos <= zzMarkedPos) {");
291 @@ -1200,9 +1174,9 @@
292 println(" zzFState = "+dfa.entryState[action.getEntryState()+1]+";");
293 println(" zzFPos = zzMarkedPos;");
294 println(" while (!zzFinL[zzFPos] || (zzAttrL[zzFState] & 1) != 1) {");
295 - println(" zzInput = Character.codePointBefore(zzBufferL, zzFPos, zzStartRead);");
296 + println(" zzInput = Character.codePointBefore(zzBufferL, zzFPos/*, zzStartRead*/);");
297 println(" zzFPos -= Character.charCount(zzInput);");
298 - println(" zzFState = zzTransL[ zzRowMapL[zzFState] + zzCMapL[zzInput] ];");
299 + println(" zzFState = zzTransL[ zzRowMapL[zzFState] + " + (unpacked ? "zzCMapL[zzInput]" : "ZZ_CMAP(zzInput)") + " ];");
301 println(" zzMarkedPos = zzFPos;");
303 @@ -1230,10 +1204,10 @@
304 EOFActions eofActions = parser.getEOFActions();
306 if ( scanner.eofCode != null )
307 - println(" zzDoEOF();");
308 + println(" zzDoEOF();");
310 if ( eofActions.numActions() > 0 ) {
311 - println(" switch (zzLexicalState) {");
312 + println(" switch (zzLexicalState) {");
314 // pick a start value for break case labels.
315 // must be larger than any value of a lex state:
316 @@ -1420,7 +1394,7 @@
320 - int packedCharMapPairs = emitCharMapArray();
321 + emitCharMapArray();
325 @@ -1444,7 +1418,7 @@
327 emitConstructorDecl();
329 - emitCharMapInitFunction(packedCharMapPairs);
330 + emitCharMapInitFunction();
332 if (scanner.debugOption) {
334 Index: jflex/src/main/java/jflex/EmitterCM.java
335 IDEA additional info:
336 Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
338 ===================================================================
339 --- jflex/src/main/java/jflex/EmitterCM.java (revision 5b6b151f542d88ffb2ea4363c6ee8f23c7a49b09)
340 +++ jflex/src/main/java/jflex/EmitterCM.java (revision 5b6b151f542d88ffb2ea4363c6ee8f23c7a49b09)
344 +import java.io.StringWriter;
347 + * Based on generatecharacter/GenerateCharacter.java
348 + * from http://hg.openjdk.java.net tailored to our needs
354 + static int[] findBestSizes(char[] map, int partitions) {
355 + int[] curMin = {Integer.MAX_VALUE};
356 + int[][] minSizes = {null};
357 + while (partitions > 0) {
358 + permuteSizesAndFindBest(map, new int[partitions--], curMin, minSizes);
360 + if (minSizes[0] == null) {
361 + throw new IllegalArgumentException("best sizes not found");
363 + return minSizes[0];
366 + private static void permuteSizesAndFindBest(char[] map, int[] sizes, int[] curMin, int[][] minSizes) {
367 + int bitsTotal = 0; // 21 for 0x10ffff
368 + for (int i = map.length; i > 0; i >>= 1) {
372 + int last = sizes.length - 1;
374 + for (int j = last, sum = 0; true ; j = last, sum = 0) {
375 + for (int i = 0; i <= last; i++) {
376 + if (sizes[i] == 0 && i != last) { sum = 0; break; }
379 + if (sum == bitsTotal) {
380 + int newMin = tryGenerateTables(map, sizes);
381 + //System.out.println(Arrays.toString(sizes) + " -> " + newMin);
382 + if (curMin[0] > newMin) {
383 + curMin[0] = newMin;
384 + minSizes[0] = sizes.clone();
388 + while (j >= 0 && sizes[j] == bitsTotal) j --;
391 + for (int j0 = j; j0 < last; j0++) {
398 + private static int tryGenerateTables(char[] map, int[] sizes) {
400 + Object[] o = generateForSizes(map, sizes);
401 + char[][] tables = (char[][])o[0];
402 + int[] bytes = (int[])o[1];
403 + return getTotalBytes(tables, sizes, bytes);
405 + catch (IllegalArgumentException e) {
406 + return Integer.MAX_VALUE;
410 + static int getTotalBytes(char[][] tables, int[] sizes, int[] bytes) {
411 + int totalBytes = 0;
412 + for (int k = 0; k < sizes.length - 1; k++) {
413 + totalBytes += tables[k].length * bytes[k] << 1;
415 + totalBytes += ((((tables[sizes.length - 1].length * 32) + 31) >> 5) << 1);
420 + private static char[][] buildTable(char[] map, int size) {
421 + int n = map.length;
422 + if (((n >> size) << size) != n) {
423 + throw new IllegalArgumentException("Length " + n + " is not a multiple of " + (1 << size));
426 + // We know the final length of the new map up front.
427 + char[] newmap = new char[n >> size];
428 + // The buffer is used temporarily to hold data for the compressed table
429 + // because we don't know its final length yet.
430 + char[] buffer = new char[n];
433 + for (int i = 0; i < n; i += m) {
434 + // For every block of size m in the original map...
436 + for (int j = 0; j < ptr; j += m) {
437 + // Find out whether there is already a block just like it in the buffer.
438 + for (int k = 0; k < m; k++) {
439 + if (buffer[j + k] != map[i + k])
442 + // There is a block just like it at position j, so just
443 + // put its index into the new map (thereby sharing it).
444 + newmap[i >> size] = (char)(j >> size);
447 + // There is no block just like it already, so add it to
448 + // the buffer and put its index into the new map.
449 + System.arraycopy(map, i, buffer, ptr, m);
450 + newmap[i >> size] = (char)(ptr >> size);
453 + // Now we know how char the compressed table should be,
454 + // so create a new array and copy data from the temporary buffer.
455 + char[] newdata = new char[ptr];
456 + System.arraycopy(buffer, 0, newdata, 0, ptr);
457 + // Return the new map and the new data table.
458 + return new char[][]{newmap, newdata};
462 + static Object[] generateForSizes(char[] map, int[] sizes) {
464 + int[] shifts = new int[sizes.length];
465 + for (int k = sizes.length - 1; k >= 0; k--) {
469 + if ((1 << sum) < map.length || (1 << (sum - 1)) >= map.length) {
470 + throw new IllegalArgumentException("Bit field widths total to " + sum +
471 + ": wrong total for map of size " + map.length);
473 + // need a table for each set of lookup bits in char
474 + char[][] tables = new char[sizes.length][];
475 + // the last table is the map
476 + tables[sizes.length - 1] = map;
477 + for (int j = sizes.length - 1; j > 0; j--) {
478 + //if (verbose && bins == 0)
479 + // System.err.println("Building map " + (j + 1) + " of bit width " + sizes[j]);
480 + char[][] temp = buildTable(tables[j], sizes[j]);
481 + tables[j - 1] = temp[0];
482 + tables[j] = temp[1];
484 + boolean[] preshifted = new boolean[sizes.length];
485 + int[] zeroextend = new int[sizes.length];
486 + int[] bytes = new int[sizes.length];
487 + for (int j = 0; j < sizes.length - 1; j++) {
488 + int len = tables[j + 1].length;
489 + int size = sizes[j + 1];
490 + if (len > 0x100 && (len >> size) <= 0x100) {
492 + preshifted[j] = false;
494 + else if (len > 0x10000 && (len >> size) <= 0x10000) {
496 + preshifted[j] = false;
499 + preshifted[j] = true;
501 + if (len > 0x7F && len <= 0xFF) {
503 + else if (len > 0x7FFF && len <= 0xFFFF) {
504 + zeroextend[j] = 0xFFFF;
509 + if (len <= 0x100) bytes[j] = 1;
510 + else if (len <= 0x10000) bytes[j] = 2;
513 + preshifted[sizes.length - 1] = true;
514 + zeroextend[sizes.length - 1] = 0;
515 + bytes[sizes.length - 1] = 0;
517 + return new Object[] { tables, bytes, preshifted, shifts, zeroextend};
520 + static String genAccess(String tbl, String var, int bits, int[] sizes, int[] shifts, int[] zeroextend, boolean[] preshifted) {
521 + String access = null;
522 + int bitoffset = bits == 1 ? 5 : bits == 2 ? 4 : bits == 4 ? 3 : 0;
523 + for (int k = 0; k < sizes.length; k++) {
524 + String tableName = "ZZ_CMAP_" + String.valueOf((char) ('Z' - k));
525 + int offset = ((k < sizes.length - 1) ? 0 : bitoffset);
526 + int shift = shifts[k] + offset;
527 + String shifted = (shift == 0) ? var : "(" + var + ">>" + shift + ")";
528 + int mask = (1 << (sizes[k] - offset)) - 1;
529 + String masked = (k == 0) ? shifted :
530 + "(" + shifted + "&0x" + Integer.toHexString(mask) + ")";
531 + String index = (k == 0) ? masked :
532 + (mask == 0) ? access : "(" + access + "|" + masked + ")";
533 + String indexNoParens = (index.charAt(0) != '(') ? index :
534 + index.substring(1, index.length() - 1);
535 + String tblname = (k == sizes.length - 1) ? tbl : tableName;
536 + String fetched = tblname + "[" + indexNoParens + "]";
537 + String zeroextended = (zeroextend[k] == 0) ? fetched :
538 + "(" + fetched + "&0x" + Integer.toHexString(zeroextend[k]) + ")";
539 + int adjustment = preshifted[k] ? 0 :
540 + sizes[k + 1] - ((k == sizes.length - 2) ? bitoffset : 0);
541 + String adjusted = (preshifted[k] || adjustment == 0) ? zeroextended :
542 + "(" + zeroextended + "<<" + adjustment + ")";
543 + String bitshift = (bits == 1) ? "(" + var + "&0x1F)" :
544 + (bits == 2) ? "((" + var + "&0xF)<<1)" :
545 + (bits == 4) ? "((" + var + "&7)<<2)" : null;
546 + String extracted = ((k < sizes.length - 1) || (bits >= 8)) ? adjusted :
547 + "((" + adjusted + ">>" + bitshift + ")&" +
548 + (bits == 4 ? "0xF" : String.valueOf((1 << bits) - 1)) + ")";
549 + access = extracted;
555 + static String genTables(char[][] tables, int[] sizes, int[] bytes, boolean[] preshifted) {
556 + StringBuffer result = new StringBuffer();
558 + for (int k = 0; k < sizes.length - 1; k++) {
559 + String tableName = "ZZ_CMAP_" + String.valueOf((char)('Z' - k));
560 + genTable(result, tableName, tables[k], 0, bytes[k] << 3, preshifted[k], sizes[k + 1]);
561 + result.append("\n");
563 + genTable(result, "ZZ_CMAP_" + "A", tables[sizes.length - 1], 0, 16, false, 0);
565 + return result.toString();
568 + private static void genTable(StringBuffer result, String name,
569 + char[] table, int extract, int bits,
570 + boolean preshifted, int shift) {
572 + String atype = "char";
573 + int entriesPerChar = 1;
574 + boolean noConversion = atype.equals("char");
575 + boolean shiftEntries = preshifted && shift != 0;
577 + result.append(" /*");
578 + result.append(" The ").append(name).append(" table has ").append(table.length).append(" entries */\n");
579 + result.append(" static final ");
580 + result.append(atype);
581 + result.append(" ").append(name).append("[");
582 + result.append("] = zzUnpackCMap(\n");
583 + StringWriter theString = new MyStringWriter();
584 + int entriesInCharSoFar = 0;
585 + char ch = '\u0000';
586 + for (int j = 0; j < table.length; ++j) {
587 + int entry = table[j] >> extract;
588 + if (shiftEntries) entry <<= shift;
589 + if (entry >= (1L << bits)) {
590 + throw new IllegalArgumentException("Entry too big");
592 + // Pack multiple entries into a character
593 + ch = (char) (((int) ch >> bits) | (entry << (entriesPerChar - 1) * bits));
594 + ++entriesInCharSoFar;
595 + if (entriesInCharSoFar == entriesPerChar) {
596 + // Character is full
597 + theString.append(ch);
598 + entriesInCharSoFar = 0;
602 + if (entriesInCharSoFar > 0) {
603 + while (entriesInCharSoFar < entriesPerChar) {
604 + ch = (char) ((int) ch >> bits);
605 + ++entriesInCharSoFar;
607 + theString.append(ch);
610 + result.append(theString.getBuffer());
611 + if (noConversion) {
612 + result.append(")");
614 + result.append(";\n");
617 + private static class MyStringWriter extends StringWriter {
622 + public StringWriter append(char c) {
623 + if (cur == c && count <= 0xffff) {
628 + appendImpl((char) count);
638 + public void flush() {
640 + appendImpl((char) count);
652 + void appendImpl(char c) {
653 + if (getBuffer().length() >= limit) {
654 + if (limit > 0) super.append("\"+\n");
655 + limit = getBuffer().length() + 78;
656 + super.append(" \"");
659 + super.append("\\u");
660 + if (c < 0x1000) super.append("0");
661 + super.append(Integer.toHexString(c));
664 + super.append("\\");
665 + super.append(Integer.toOctalString(c));