Index: jflex/src/main/java/jflex/Emitter.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- jflex/src/main/java/jflex/Emitter.java (revision de2312d6891de7fdf2752ad8e0a45983202ad5e0) +++ jflex/src/main/java/jflex/Emitter.java (revision 5b6b151f542d88ffb2ea4363c6ee8f23c7a49b09) @@ -10,7 +10,9 @@ package jflex; import java.io.*; -import java.util.*; +import java.util.Arrays; +import java.util.LinkedHashMap; +import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -373,7 +375,7 @@ private void emitNextInput() { println(" if (zzCurrentPosL < zzEndReadL) {"); - println(" zzInput = Character.codePointAt(zzBufferL, zzCurrentPosL, zzEndReadL);"); + println(" zzInput = Character.codePointAt(zzBufferL, zzCurrentPosL/*, zzEndReadL*/);"); println(" zzCurrentPosL += Character.charCount(zzInput);"); println(" }"); println(" else if (zzAtEOF) {"); @@ -395,14 +397,14 @@ println(" break zzForAction;"); println(" }"); println(" else {"); - println(" zzInput = Character.codePointAt(zzBufferL, zzCurrentPosL, zzEndReadL);"); + println(" zzInput = Character.codePointAt(zzBufferL, zzCurrentPosL/*, zzEndReadL*/);"); println(" zzCurrentPosL += Character.charCount(zzInput);"); println(" }"); println(" }"); } private void emitHeader() { - println("/* The following code was generated by JFlex " + Main.version + " */"); + println("/* The following code was generated by JFlex " + Main.version + " tweaked for IntelliJ platform */"); println(""); } @@ -544,7 +546,7 @@ } - private void emitCharMapInitFunction(int packedCharMapPairs) { + private void emitCharMapInitFunction() { CharClasses cl = parser.getCharClasses(); @@ -558,10 +560,14 @@ println(" * @return the unpacked character translation table"); println(" */"); println(" private static char [] zzUnpackCMap(String packed) {"); - println(" char [] map = new char[0x" + Integer.toHexString(cl.getMaxCharCode() + 1) + "];"); + println(" int size = 0;"); + println(" for (int i = 0, length = packed.length(); i < length; i += 2) {"); + println(" size += packed.charAt(i);"); + println(" }"); + println(" char[] map = new char[size];"); println(" int i = 0; /* index in packed string */"); println(" int j = 0; /* index in unpacked array */"); - println(" while (i < " + 2 * packedCharMapPairs + ") {"); + println(" while (i < packed.length()) {"); println(" int count = packed.charAt(i++);"); println(" char value = packed.charAt(i++);"); println(" do map[j++] = value; while (--count > 0);"); @@ -613,85 +619,46 @@ * the number of char map array entries per value is * ceil(count / 0xFFFF) */ - private int emitCharMapArray() { + private void emitCharMapArray() { CharClasses cl = parser.getCharClasses(); if ( cl.getMaxCharCode() < 256 ) { emitCharMapArrayUnPacked(); - return 0; // the char map array will not be packed } // ignores cl.getMaxCharCode(), emits all intervals instead intervals = cl.getIntervals(); - + - println(""); - println(" /** "); - println(" * Translates characters to character classes"); - println(" */"); - println(" private static final String ZZ_CMAP_PACKED = "); + char[] exploded = new char[cl.getMaxCharCode() + 1]; + for (int i = 0, k = 0; i < intervals.length && k < exploded.length; i ++) { + int count = intervals[i].end - intervals[i].start + 1; + int value = colMap[intervals[i].charClass]; - + - int n = 0; // numbers of entries in current line - print(" \""); - - int i = 0, numPairs = 0; - int count, value; - while ( i < intervals.length ) { - count = intervals[i].end-intervals[i].start+1; - value = colMap[intervals[i].charClass]; - - // count could be >= 0x10000 - while (count > 0xFFFF) { - printUC(0xFFFF); - printUC(value); - count -= 0xFFFF; - numPairs++; - n++; + while (count-- > 0) { + exploded[k++] = (char) value; } - numPairs++; - printUC(count); - printUC(value); - - if (i < intervals.length-1) { - if ( ++n >= 10 ) { - println("\"+"); - print(" \""); - n = 0; - } + } - } - i++; - } + int[] sizes = EmitterCM.findBestSizes(exploded, 3); + Object[] o = EmitterCM.generateForSizes(exploded, sizes); + int totalSize = EmitterCM.getTotalBytes((char[][]) o[0], sizes, (int[]) o[1]); - + - println("\";"); - println(); - + println(""); println(" /** "); println(" * Translates characters to character classes"); + println(" * Chosen bits are " + Arrays.toString(sizes)); + println(" * Total runtime size is " + totalSize + " bytes"); println(" */"); - println(" private static final char [] ZZ_CMAP = zzUnpackCMap(ZZ_CMAP_PACKED);"); + + println(" public static int ZZ_CMAP(int ch) {\n" + + " return " + EmitterCM.genAccess("ZZ_CMAP_" + "A", "ch", 16, sizes, (int[]) o[3], (int[]) o[4], (boolean[]) o[2]) + ";\n" + + " }"); println(); - return numPairs; - } - - /** - * Print number as octal/unicode escaped string character. - * - * @param c the value to print - * @prec 0 <= c <= 0xFFFF - */ - private void printUC(int c) { - if (c > 255) { - out.print("\\u"); - if (c < 0x1000) out.print("0"); - out.print(Integer.toHexString(c)); + String tables = EmitterCM.genTables((char[][]) o[0], sizes, (int[]) o[1], (boolean[]) o[2]); + println(tables); - } + } - else { - out.print("\\"); - out.print(Integer.toOctalString(c)); - } - } private void emitRowMapArray() { @@ -949,7 +916,7 @@ println(" for (zzCurrentPosL = zzStartRead ;"); println(" zzCurrentPosL < zzMarkedPosL ;"); println(" zzCurrentPosL += zzCharCount ) {"); - println(" zzCh = Character.codePointAt(zzBufferL, zzCurrentPosL, zzMarkedPosL);"); + println(" zzCh = Character.codePointAt(zzBufferL, zzCurrentPosL/*, zzMarkedPosL*/);"); println(" zzCharCount = Character.charCount(zzCh);"); println(" switch (zzCh) {"); println(" case '\\u000B':"); @@ -993,7 +960,7 @@ println(" // peek one character ahead if it is \\n (if we have counted one line too much)"); println(" boolean zzPeek;"); println(" if (zzMarkedPosL < zzEndReadL)"); - println(" zzPeek = zzBufferL[zzMarkedPosL] == '\\n';"); + println(" zzPeek = zzBufferL.charAt(zzMarkedPosL) == '\\n';"); println(" else if (zzAtEOF)"); println(" zzPeek = false;"); println(" else {"); @@ -1004,7 +971,7 @@ println(" if (eof) "); println(" zzPeek = false;"); println(" else "); - println(" zzPeek = zzBufferL[zzMarkedPosL] == '\\n';"); + println(" zzPeek = zzBufferL.charAt(zzMarkedPosL) == '\\n';"); println(" }"); println(" if (zzPeek) yyline--;"); println(" }"); @@ -1016,7 +983,7 @@ // if match was empty, last value of zzAtBOL can be used // zzStartRead is always >= 0 println(" if (zzMarkedPosL > zzStartRead) {"); - println(" switch (zzBufferL[zzMarkedPosL-1]) {"); + println(" switch (zzBufferL.charAt(zzMarkedPosL-1)) {"); println(" case '\\n':"); println(" case '\\u000B':"); println(" case '\\u000C':"); @@ -1027,7 +994,7 @@ println(" break;"); println(" case '\\r': "); println(" if (zzMarkedPosL < zzEndReadL)"); - println(" zzAtBOL = zzBufferL[zzMarkedPosL] != '\\n';"); + println(" zzAtBOL = zzBufferL.charAt(zzMarkedPosL) != '\\n';"); println(" else if (zzAtEOF)"); println(" zzAtBOL = false;"); println(" else {"); @@ -1038,7 +1005,7 @@ println(" if (eof) "); println(" zzAtBOL = false;"); println(" else "); - println(" zzAtBOL = zzBufferL[zzMarkedPosL] != '\\n';"); + println(" zzAtBOL = zzBufferL.charAt(zzMarkedPosL) != '\\n';"); println(" }"); println(" break;"); println(" default:"); @@ -1073,7 +1040,11 @@ private void emitGetRowMapNext() { - println(" int zzNext = zzTransL[ zzRowMapL[zzState] + zzCMapL[zzInput] ];"); + CharClasses cl = parser.getCharClasses(); + + boolean unpacked = cl.getMaxCharCode() < 256; + + println(" int zzNext = zzTransL[ zzRowMapL[zzState] + "+ (unpacked? "zzCMapL[zzInput]" : "ZZ_CMAP(zzInput)") +" ];"); println(" if (zzNext == "+DFA.NO_TARGET+") break zzForAction;"); println(" zzState = zzNext;"); println(); @@ -1157,6 +1128,9 @@ } private void emitActions() { + CharClasses cl = parser.getCharClasses(); + boolean unpacked = cl.getMaxCharCode() < 256; + println(" switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) {"); int i = actionTable.size()+1; @@ -1170,27 +1144,27 @@ if (action.lookAhead() == Action.FIXED_BASE) { println(" // lookahead expression with fixed base length"); println(" zzMarkedPos = Character.offsetByCodePoints"); - println(" (zzBufferL, zzStartRead, zzEndRead - zzStartRead, zzStartRead, " + action.getLookLength() + ");"); + println(" (zzBufferL/*, zzStartRead, zzEndRead - zzStartRead*/, zzStartRead, " + action.getLookLength() + ");"); } if (action.lookAhead() == Action.FIXED_LOOK || action.lookAhead() == Action.FINITE_CHOICE) { println(" // lookahead expression with fixed lookahead length"); println(" zzMarkedPos = Character.offsetByCodePoints"); - println(" (zzBufferL, zzStartRead, zzEndRead - zzStartRead, zzMarkedPos, -" + action.getLookLength() + ");"); + println(" (zzBufferL/*, zzStartRead, zzEndRead - zzStartRead*/, zzMarkedPos, -" + action.getLookLength() + ");"); } if (action.lookAhead() == Action.GENERAL_LOOK) { println(" // general lookahead, find correct zzMarkedPos"); println(" { int zzFState = "+dfa.entryState[action.getEntryState()]+";"); println(" int zzFPos = zzStartRead;"); - println(" if (zzFin.length <= zzBufferL.length) { zzFin = new boolean[zzBufferL.length+1]; }"); + println(" if (zzFin.length <= zzBufferL.length()) { zzFin = new boolean[zzBufferL.length()+1]; }"); println(" boolean zzFinL[] = zzFin;"); println(" while (zzFState != -1 && zzFPos < zzMarkedPos) {"); println(" zzFinL[zzFPos] = ((zzAttrL[zzFState] & 1) == 1);"); - println(" zzInput = Character.codePointAt(zzBufferL, zzFPos, zzMarkedPos);"); + println(" zzInput = Character.codePointAt(zzBufferL, zzFPos/*, zzMarkedPos*/);"); println(" zzFPos += Character.charCount(zzInput);"); - println(" zzFState = zzTransL[ zzRowMapL[zzFState] + zzCMapL[zzInput] ];"); + println(" zzFState = zzTransL[ zzRowMapL[zzFState] + " + (unpacked ? "zzCMapL[zzInput]" : "ZZ_CMAP(zzInput)") + " ];"); println(" }"); println(" if (zzFState != -1) { zzFinL[zzFPos++] = ((zzAttrL[zzFState] & 1) == 1); } "); println(" while (zzFPos <= zzMarkedPos) {"); @@ -1200,9 +1174,9 @@ println(" zzFState = "+dfa.entryState[action.getEntryState()+1]+";"); println(" zzFPos = zzMarkedPos;"); println(" while (!zzFinL[zzFPos] || (zzAttrL[zzFState] & 1) != 1) {"); - println(" zzInput = Character.codePointBefore(zzBufferL, zzFPos, zzStartRead);"); + println(" zzInput = Character.codePointBefore(zzBufferL, zzFPos/*, zzStartRead*/);"); println(" zzFPos -= Character.charCount(zzInput);"); - println(" zzFState = zzTransL[ zzRowMapL[zzFState] + zzCMapL[zzInput] ];"); + println(" zzFState = zzTransL[ zzRowMapL[zzFState] + " + (unpacked ? "zzCMapL[zzInput]" : "ZZ_CMAP(zzInput)") + " ];"); println(" };"); println(" zzMarkedPos = zzFPos;"); println(" }"); @@ -1230,10 +1204,10 @@ EOFActions eofActions = parser.getEOFActions(); if ( scanner.eofCode != null ) - println(" zzDoEOF();"); + println(" zzDoEOF();"); if ( eofActions.numActions() > 0 ) { - println(" switch (zzLexicalState) {"); + println(" switch (zzLexicalState) {"); // pick a start value for break case labels. // must be larger than any value of a lex state: @@ -1420,7 +1394,7 @@ emitLexicalStates(); - int packedCharMapPairs = emitCharMapArray(); + emitCharMapArray(); emitActionTable(); @@ -1444,7 +1418,7 @@ emitConstructorDecl(); - emitCharMapInitFunction(packedCharMapPairs); + emitCharMapInitFunction(); if (scanner.debugOption) { println(""); Index: jflex/src/main/java/jflex/EmitterCM.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- jflex/src/main/java/jflex/EmitterCM.java (revision 5b6b151f542d88ffb2ea4363c6ee8f23c7a49b09) +++ jflex/src/main/java/jflex/EmitterCM.java (revision 5b6b151f542d88ffb2ea4363c6ee8f23c7a49b09) @@ -0,0 +1,328 @@ +package jflex; + +import java.io.StringWriter; + +/** + * Based on generatecharacter/GenerateCharacter.java + * from http://hg.openjdk.java.net tailored to our needs + * + * @author gregsh + */ +class EmitterCM { + + static int[] findBestSizes(char[] map, int partitions) { + int[] curMin = {Integer.MAX_VALUE}; + int[][] minSizes = {null}; + while (partitions > 0) { + permuteSizesAndFindBest(map, new int[partitions--], curMin, minSizes); + } + if (minSizes[0] == null) { + throw new IllegalArgumentException("best sizes not found"); + } + return minSizes[0]; + } + + private static void permuteSizesAndFindBest(char[] map, int[] sizes, int[] curMin, int[][] minSizes) { + int bitsTotal = 0; // 21 for 0x10ffff + for (int i = map.length; i > 0; i >>= 1) { + bitsTotal++; + } + + int last = sizes.length - 1; + + for (int j = last, sum = 0; true ; j = last, sum = 0) { + for (int i = 0; i <= last; i++) { + if (sizes[i] == 0 && i != last) { sum = 0; break; } + sum += sizes[i]; + } + if (sum == bitsTotal) { + int newMin = tryGenerateTables(map, sizes); + //System.out.println(Arrays.toString(sizes) + " -> " + newMin); + if (curMin[0] > newMin) { + curMin[0] = newMin; + minSizes[0] = sizes.clone(); + } + } + + while (j >= 0 && sizes[j] == bitsTotal) j --; + if (j < 0) break; + + for (int j0 = j; j0 < last; j0++) { + sizes[j0 + 1] = 0; + } + sizes[j]++; + } + } + + private static int tryGenerateTables(char[] map, int[] sizes) { + try { + Object[] o = generateForSizes(map, sizes); + char[][] tables = (char[][])o[0]; + int[] bytes = (int[])o[1]; + return getTotalBytes(tables, sizes, bytes); + } + catch (IllegalArgumentException e) { + return Integer.MAX_VALUE; + } + } + + static int getTotalBytes(char[][] tables, int[] sizes, int[] bytes) { + int totalBytes = 0; + for (int k = 0; k < sizes.length - 1; k++) { + totalBytes += tables[k].length * bytes[k] << 1; + } + totalBytes += ((((tables[sizes.length - 1].length * 32) + 31) >> 5) << 1); + return totalBytes; + } + + + private static char[][] buildTable(char[] map, int size) { + int n = map.length; + if (((n >> size) << size) != n) { + throw new IllegalArgumentException("Length " + n + " is not a multiple of " + (1 << size)); + } + int m = 1 << size; + // We know the final length of the new map up front. + char[] newmap = new char[n >> size]; + // The buffer is used temporarily to hold data for the compressed table + // because we don't know its final length yet. + char[] buffer = new char[n]; + int ptr = 0; + OUTER: + for (int i = 0; i < n; i += m) { + // For every block of size m in the original map... + MIDDLE: + for (int j = 0; j < ptr; j += m) { + // Find out whether there is already a block just like it in the buffer. + for (int k = 0; k < m; k++) { + if (buffer[j + k] != map[i + k]) + continue MIDDLE; + } + // There is a block just like it at position j, so just + // put its index into the new map (thereby sharing it). + newmap[i >> size] = (char)(j >> size); + continue OUTER; + } // end MIDDLE + // There is no block just like it already, so add it to + // the buffer and put its index into the new map. + System.arraycopy(map, i, buffer, ptr, m); + newmap[i >> size] = (char)(ptr >> size); + ptr += m; + } // end OUTER + // Now we know how char the compressed table should be, + // so create a new array and copy data from the temporary buffer. + char[] newdata = new char[ptr]; + System.arraycopy(buffer, 0, newdata, 0, ptr); + // Return the new map and the new data table. + return new char[][]{newmap, newdata}; + } + + + static Object[] generateForSizes(char[] map, int[] sizes) { + int sum = 0; + int[] shifts = new int[sizes.length]; + for (int k = sizes.length - 1; k >= 0; k--) { + shifts[k] = sum; + sum += sizes[k]; + } + if ((1 << sum) < map.length || (1 << (sum - 1)) >= map.length) { + throw new IllegalArgumentException("Bit field widths total to " + sum + + ": wrong total for map of size " + map.length); + } + // need a table for each set of lookup bits in char + char[][] tables = new char[sizes.length][]; + // the last table is the map + tables[sizes.length - 1] = map; + for (int j = sizes.length - 1; j > 0; j--) { + //if (verbose && bins == 0) + // System.err.println("Building map " + (j + 1) + " of bit width " + sizes[j]); + char[][] temp = buildTable(tables[j], sizes[j]); + tables[j - 1] = temp[0]; + tables[j] = temp[1]; + } + boolean[] preshifted = new boolean[sizes.length]; + int[] zeroextend = new int[sizes.length]; + int[] bytes = new int[sizes.length]; + for (int j = 0; j < sizes.length - 1; j++) { + int len = tables[j + 1].length; + int size = sizes[j + 1]; + if (len > 0x100 && (len >> size) <= 0x100) { + len >>= size; + preshifted[j] = false; + } + else if (len > 0x10000 && (len >> size) <= 0x10000) { + len >>= size; + preshifted[j] = false; + } + else { + preshifted[j] = true; + } + if (len > 0x7F && len <= 0xFF) { + } + else if (len > 0x7FFF && len <= 0xFFFF) { + zeroextend[j] = 0xFFFF; + } + else { + zeroextend[j] = 0; + } + if (len <= 0x100) bytes[j] = 1; + else if (len <= 0x10000) bytes[j] = 2; + else bytes[j] = 4; + } + preshifted[sizes.length - 1] = true; + zeroextend[sizes.length - 1] = 0; + bytes[sizes.length - 1] = 0; + + return new Object[] { tables, bytes, preshifted, shifts, zeroextend}; + } + + static String genAccess(String tbl, String var, int bits, int[] sizes, int[] shifts, int[] zeroextend, boolean[] preshifted) { + String access = null; + int bitoffset = bits == 1 ? 5 : bits == 2 ? 4 : bits == 4 ? 3 : 0; + for (int k = 0; k < sizes.length; k++) { + String tableName = "ZZ_CMAP_" + String.valueOf((char) ('Z' - k)); + int offset = ((k < sizes.length - 1) ? 0 : bitoffset); + int shift = shifts[k] + offset; + String shifted = (shift == 0) ? var : "(" + var + ">>" + shift + ")"; + int mask = (1 << (sizes[k] - offset)) - 1; + String masked = (k == 0) ? shifted : + "(" + shifted + "&0x" + Integer.toHexString(mask) + ")"; + String index = (k == 0) ? masked : + (mask == 0) ? access : "(" + access + "|" + masked + ")"; + String indexNoParens = (index.charAt(0) != '(') ? index : + index.substring(1, index.length() - 1); + String tblname = (k == sizes.length - 1) ? tbl : tableName; + String fetched = tblname + "[" + indexNoParens + "]"; + String zeroextended = (zeroextend[k] == 0) ? fetched : + "(" + fetched + "&0x" + Integer.toHexString(zeroextend[k]) + ")"; + int adjustment = preshifted[k] ? 0 : + sizes[k + 1] - ((k == sizes.length - 2) ? bitoffset : 0); + String adjusted = (preshifted[k] || adjustment == 0) ? zeroextended : + "(" + zeroextended + "<<" + adjustment + ")"; + String bitshift = (bits == 1) ? "(" + var + "&0x1F)" : + (bits == 2) ? "((" + var + "&0xF)<<1)" : + (bits == 4) ? "((" + var + "&7)<<2)" : null; + String extracted = ((k < sizes.length - 1) || (bits >= 8)) ? adjusted : + "((" + adjusted + ">>" + bitshift + ")&" + + (bits == 4 ? "0xF" : String.valueOf((1 << bits) - 1)) + ")"; + access = extracted; + } + return access; + } + + + static String genTables(char[][] tables, int[] sizes, int[] bytes, boolean[] preshifted) { + StringBuffer result = new StringBuffer(); + + for (int k = 0; k < sizes.length - 1; k++) { + String tableName = "ZZ_CMAP_" + String.valueOf((char)('Z' - k)); + genTable(result, tableName, tables[k], 0, bytes[k] << 3, preshifted[k], sizes[k + 1]); + result.append("\n"); + } + genTable(result, "ZZ_CMAP_" + "A", tables[sizes.length - 1], 0, 16, false, 0); + + return result.toString(); + } + + private static void genTable(StringBuffer result, String name, + char[] table, int extract, int bits, + boolean preshifted, int shift) { + + String atype = "char"; + int entriesPerChar = 1; + boolean noConversion = atype.equals("char"); + boolean shiftEntries = preshifted && shift != 0; + + result.append(" /*"); + result.append(" The ").append(name).append(" table has ").append(table.length).append(" entries */\n"); + result.append(" static final "); + result.append(atype); + result.append(" ").append(name).append("["); + result.append("] = zzUnpackCMap(\n"); + StringWriter theString = new MyStringWriter(); + int entriesInCharSoFar = 0; + char ch = '\u0000'; + for (int j = 0; j < table.length; ++j) { + int entry = table[j] >> extract; + if (shiftEntries) entry <<= shift; + if (entry >= (1L << bits)) { + throw new IllegalArgumentException("Entry too big"); + } + // Pack multiple entries into a character + ch = (char) (((int) ch >> bits) | (entry << (entriesPerChar - 1) * bits)); + ++entriesInCharSoFar; + if (entriesInCharSoFar == entriesPerChar) { + // Character is full + theString.append(ch); + entriesInCharSoFar = 0; + ch = '\u0000'; + } + } + if (entriesInCharSoFar > 0) { + while (entriesInCharSoFar < entriesPerChar) { + ch = (char) ((int) ch >> bits); + ++entriesInCharSoFar; + } + theString.append(ch); + } + theString.flush(); + result.append(theString.getBuffer()); + if (noConversion) { + result.append(")"); + } + result.append(";\n"); + } + + private static class MyStringWriter extends StringWriter { + char cur; + int count; + + @Override + public StringWriter append(char c) { + if (cur == c && count <= 0xffff) { + count++; + } + else { + if (count > 0) { + appendImpl((char) count); + appendImpl(cur); + } + cur = c; + count = 1; + } + return this; + } + + @Override + public void flush() { + if (count > 0) { + appendImpl((char) count); + appendImpl(cur); + count = 0; + } + if (limit > 0) { + super.append('"'); + limit = 0; + } + } + + int limit; + + void appendImpl(char c) { + if (getBuffer().length() >= limit) { + if (limit > 0) super.append("\"+\n"); + limit = getBuffer().length() + 78; + super.append(" \""); + } + if (c > 255) { + super.append("\\u"); + if (c < 0x1000) super.append("0"); + super.append(Integer.toHexString(c)); + } + else { + super.append("\\"); + super.append(Integer.toOctalString(c)); + } + } + } +}