私は.jjt
ファイルで書かれたパーサーを持っています。これはthisと非常に似ています。私の場合、唯一の変更は自分の表現評価方法のプラグインです。現在、1式がパーサーになるには約1ミリ秒かかります。私はこのパーサーのパフォーマンスを改善する必要があります。私はVisualVMを使ってプロファイリングを行い、ファイルの行をArrayList<String>
に読み込み、93の式(ArrayList<String>
のファイルの行から引き出されたパラメータ値を持つ)を評価するコードを実行するのに要した時間は44.5秒でした。私のパーサは、約43秒がparseStream
メソッドで費やされました。 私はthis linkを私のパーサーの改善のために参照しました。ERROR_REPORTING
オプションをFALSE
に設定しようとしましたが、役に立たなかったです。Javaパーサーのパフォーマンスを向上させる
EDIT 1:
ここではparser.jjtファイルです。 私は、ファイルに1000行のために自分のアプリケーションをプロファイリングし、ほとんどの時間を取った方法は、1がボトルネックになっているはずですプロファイリングでStart()
options {
JAVA_UNICODE_ESCAPE = true;
MULTI = true;
VISITOR = true;
VISITOR_EXCEPTION = "ParseException";
NODE_DEFAULT_VOID = true;
// NODE_PACKAGE = "org.nfunk.jep.parser";
// BUILD_NODE_FILES=false;
STATIC = false;
// DEBUG_TOKEN_MANAGER = true;
// DEBUG_PARSER = true;
// DEBUG_LOOKAHEAD = true;
}
/***************************************************************
PARSER BEGIN
***************************************************************/
PARSER_BEGIN(Parser)
package org.nfunk.jep;
import java.util.Vector;
import org.nfunk.jep.function.*;
import org.nfunk.jep.type.*;
public class Parser {
private JEP jep;
private SymbolTable symTab;
private OperatorSet opSet;
private int initialTokenManagerState = DEFAULT;
public Node parseStream(java.io.Reader stream, JEP jep_in)
throws ParseException {
restart(stream,jep_in);
// Parse the expression, and return the
enable_tracing();
Node node = Start();
if (node == null) throw new ParseException("No expression entered");
return node.jjtGetChild(0);
}
/**
* Restart the parse with the given stream.
* @since 2.3.0 beta 1
*/
public void restart(java.io.Reader stream, JEP jep_in)
{
ReInit(stream);
this.token_source.SwitchTo(initialTokenManagerState);
jep = jep_in;
symTab = jep.getSymbolTable();
opSet = jep.getOperatorSet();
}
/**
* Continue parsing without re-initilising stream.
* Allows renetrancy of parser so that strings like
* "x=1; y=2; z=3;" can be parsed.
* When a semi colon is encountered parsing finishes leaving the rest of the string unparsed.
* Parsing can be resumed from the current position by using this method.
* For example
* <pre>
* XJep j = new XJep();
* Parser parse = j.getParse();
* StringReader sr = new StringReader("x=1; y=2; z=3;");
* parse.restart(sr,j);
* Node node;
* try {
* while((node = j.continueParse())!=null) {
* j.println(node);
* } }catch(ParseException e) {}
* </pre>
*/
public Node continueParse() throws ParseException
{
ASTStart node = Start();
if (node==null) return null;
return node.jjtGetChild(0);
}
private void addToErrorList(String errorStr) {
jep.errorList.addElement(errorStr);
}
/**
* Sets the initial state that the token manager is in.
* Can be used to change how x.x is interpreted, either as a single
* identifier (DEFAULT) or as x <DOT> x (NO_DOT_IN_IDENTIFIERS)
* @param state the state to be in. Currently the only legal values are DEFAULT and NO_DOT_IN_IDENTIFIER
*/
public void setInitialTokenManagerState(int state)
{
initialTokenManagerState = state;
}
/**
* Translate all escape sequences to characters. Inspired by Rob Millar's
* unescape() method in rcm.util.Str fron the Web Sphinx project.
*
* @param inputStr String containing escape characters.
* @return String with all escape sequences replaced.
*/
private String replaceEscape(String inputStr) {
int len = inputStr.length();
int p = 0;
int i;
String metachars = "tnrbf\\\"'";
String chars = "\t\n\r\b\f\\\"'";
StringBuffer output = new StringBuffer();
while ((i = inputStr.indexOf('\\', p)) != -1) {
output.append(inputStr.substring(p, i));
if (i+1 == len) break;
// find metacharacter
char metac = inputStr.charAt(i+1);
// find the index of the metac
int k = metachars.indexOf(metac);
if (k == -1) {
// didn't find the metachar, leave sequence as found.
// This code should be unreachable if the parser
// is functioning properly because strings containing
// unknown escape characters should not be accepted.
output.append('\\');
output.append(metac);
} else {
// its corresponding true char
output.append(chars.charAt(k));
}
// skip over both escape character & metacharacter
p = i + 2;
}
// add the end of the input string to the output
if (p < len)
output.append(inputStr.substring(p));
return output.toString();
}
}
PARSER_END(Parser)
/***************************************************************
SKIP
***************************************************************/
<*> SKIP :
{
" "
| "\t"
| "\n"
| "\r"
| <"//" (~["\n","\r"])* ("\n"|"\r"|"\r\n")>
| <"/*" (~["*"])* "*" (~["/"] (~["*"])* "*")* "/">
}
/***************************************************************
TOKENS
***************************************************************/
<*> TOKEN : /* LITERALS */
{
< INTEGER_LITERAL:
<DECIMAL_LITERAL>
>
|
< #DECIMAL_LITERAL: ["0"-"9"] (["0"-"9"])* >
|
< FLOATING_POINT_LITERAL:
(["0"-"9"])+ "." (["0"-"9"])* (<EXPONENT>)?
| "." (["0"-"9"])+ (<EXPONENT>)?
| (["0"-"9"])+ <EXPONENT>
>
|
< #EXPONENT: ["e","E"] (["+","-"])? (["0"-"9"])+ >
|
< STRING_LITERAL:
"\""
((~["\"","\\","\n","\r"])
| ("\\" ["n","t","b","r","f","\\","'","\""])
)*
"\""
>
}
/* IDENTIFIERS
Letters before version 2.22
< #LETTER: ["_","a"-"z","A"-"Z"] >
In Ver 2.3.0.1 presence of . in an identifier is switchable.
In the DEFAULT lexical state identifiers can contain a .
In the NO_DOT_IN_IDENTIFIERS state identifiers cannot contain a .
the state can be set by using
Parser.setInitialTokenManagerState
*/
<DEFAULT> TOKEN:
{
<INDENTIFIER1: <LETTER1>(<LETTER1>|<DIGIT1>|".")*>
|
< #LETTER1:
[
"\u0024", // $
"\u0041"-"\u005a", // A - Z
"\u005f", // _
"\u0061"-"\u007a", // a - z
"\u00c0"-"\u00d6", // Upper case symbols of Latin-1 Supplement
"\u00d8"-"\u00f6", // Lower case symbols of Latin-1 Supplement
"\u00f8"-"\u00ff", // More lower case symbols of Latin-1 Supplement
"\u0100"-"\u1fff", // Many languages (including Greek)
"\u3040"-"\u318f", // Hiragana, Katakana, Bopomofo, Hangul Compatibility Jamo
"\u3300"-"\u337f", // CJK Compatibility
"\u3400"-"\u3d2d", // CJK Unified Ideographs Extension A
"\u4e00"-"\u9fff", // CJK Unified Ideographs
"\uf900"-"\ufaff" // CJK Compatibility Ideographs
]
>
|
< #DIGIT1: ["0"-"9"] >
}
<NO_DOT_IN_IDENTIFIERS> TOKEN:
{
<INDENTIFIER2: <LETTER2>(<LETTER2>|<DIGIT2>)*>
|
< #LETTER2:
[
"\u0024", // $
"\u0041"-"\u005a", // A - Z
"\u005f", // _
"\u0061"-"\u007a", // a - z
"\u00c0"-"\u00d6", // Upper case symbols of Latin-1 Supplement
"\u00d8"-"\u00f6", // Lower case symbols of Latin-1 Supplement
"\u00f8"-"\u00ff", // More lower case symbols of Latin-1 Supplement
"\u0100"-"\u1fff", // Many languages (including Greek)
"\u3040"-"\u318f", // Hiragana, Katakana, Bopomofo, Hangul Compatibility Jamo
"\u3300"-"\u337f", // CJK Compatibility
"\u3400"-"\u3d2d", // CJK Unified Ideographs Extension A
"\u4e00"-"\u9fff", // CJK Unified Ideographs
"\uf900"-"\ufaff" // CJK Compatibility Ideographs
]
>
|
< #DIGIT2: ["0"-"9"] >
}
/* OPERATORS */
<*> TOKEN:
{
< ASSIGN:"=" > // rjm
| < SEMI: ";" > // rjm
| < COMMA: "," > // rjm
| < GT: ">" >
| < LT: "<" >
| < EQ: "==" >
| < LE: "<=" >
| < GE: ">=" >
| < NE: "!=" >
| < AND: "&&" >
| < OR: "||" >
| < PLUS: "+" >
| < MINUS:"-" >
| < MUL: "*" >
| < DOT: "." > // rjm
| < DIV: "/" >
| < MOD: "%" >
| < NOT: "!" >
| < POWER:"^" >
| < CROSS:"^^" > // rjm
| < LSQ: "[" > // rjm
| < RSQ: "]" > // rjm
| < LRND: "(" > // rjm
| < RRND: ")" > // rjm
| < COLON: ":" > // rjm
}
/***************************************************************
GRAMMAR START
***************************************************************/
ASTStart Start() #Start :
{
}
{
Expression() (<EOF> | <SEMI>) { return jjtThis; }
| (<EOF> | <SEMI>)
{
// njf - The next line is commented out in 2.3.0 since
// two "No expression entered" errors are reported
// in EvaluatorVisitor and Console (one from here
// the other from ParseStream())
// Decided to just return null, and handle the error
// in ParseStream.
// addToErrorList("No expression entered");
return null;
}
}
// Expresions can be like
// x=3
// x=y=3 parsed as x=(y=3)
void Expression() : {}
{
LOOKAHEAD(LValue() <ASSIGN>) // need to prevent javacc warning with left recusion
AssignExpression() // rjm changes from OrExpresion
|
RightExpression()
}
void AssignExpression() : {} // rjm addition
{
(LValue() <ASSIGN> Expression()
{
if (!jep.getAllowAssignment()) throw new ParseException(
"Syntax Error (assignment not enabled)");
jjtThis.setOperator(opSet.getAssign());
}
)
#FunNode(2)
}
void RightExpression() :
{
}
{
OrExpression()
}
void OrExpression() :
{
}
{
AndExpression()
(
(<OR> AndExpression()
{
jjtThis.setOperator(opSet.getOr());
}
) #FunNode(2)
)*
}
void AndExpression() :
{
}
{
EqualExpression()
(
(<AND> EqualExpression()
{
jjtThis.setOperator(opSet.getAnd());
}
) #FunNode(2)
)*
}
void EqualExpression() :
{
}
{
RelationalExpression()
(
(<NE> RelationalExpression()
{
jjtThis.setOperator(opSet.getNE());
}
) #FunNode(2)
|
(<EQ> RelationalExpression()
{
jjtThis.setOperator(opSet.getEQ());
}
) #FunNode(2)
)*
}
void RelationalExpression() :
{
}
{
AdditiveExpression()
(
(<LT> AdditiveExpression()
{
jjtThis.setOperator(opSet.getLT());
}
) #FunNode(2)
|
(<GT> AdditiveExpression()
{
jjtThis.setOperator(opSet.getGT());
}
) #FunNode(2)
|
(<LE> AdditiveExpression()
{
jjtThis.setOperator(opSet.getLE());
}
) #FunNode(2)
|
(<GE> AdditiveExpression()
{
jjtThis.setOperator(opSet.getGE());
}
) #FunNode(2)
)*
}
void AdditiveExpression() :
{
}
{
MultiplicativeExpression()
(
(<PLUS> MultiplicativeExpression()
{
jjtThis.setOperator(opSet.getAdd());
}
) #FunNode(2)
|
(<MINUS> MultiplicativeExpression()
{
jjtThis.setOperator(opSet.getSubtract());
}
) #FunNode(2)
)*
}
void MultiplicativeExpression() :
{
}
{
UnaryExpression()
(
(
PowerExpression()
{
if (!jep.implicitMul) throw new ParseException(
"Syntax Error (implicit multiplication not enabled)");
jjtThis.setOperator(opSet.getMultiply());
}
) #FunNode(2)
|
(<MUL> UnaryExpression()
{
jjtThis.setOperator(opSet.getMultiply());
}
) #FunNode(2)
|
(<DOT> UnaryExpression()
{
jjtThis.setOperator(opSet.getDot());
}
) #FunNode(2)
|
(<CROSS> UnaryExpression()
{
jjtThis.setOperator(opSet.getCross());
}
) #FunNode(2)
|
(<DIV> UnaryExpression()
{
jjtThis.setOperator(opSet.getDivide());
}
) #FunNode(2)
|
(<MOD> UnaryExpression()
{
jjtThis.setOperator(opSet.getMod());
}
) #FunNode(2)
)*
}
void UnaryExpression() :
{
}
{
(<PLUS> UnaryExpression())
|
(<MINUS> UnaryExpression()
{
jjtThis.setOperator(opSet.getUMinus());
}
) #FunNode(1)
|
(<NOT> UnaryExpression()
{
jjtThis.setOperator(opSet.getNot());
}
) #FunNode(1)
|
PowerExpression()
}
void PowerExpression() :
{
}
{
UnaryExpressionNotPlusMinus()
[
(<POWER> UnaryExpression()
{
jjtThis.setOperator(opSet.getPower());
}
) #FunNode(2)
]
}
void UnaryExpressionNotPlusMinus() :
{
String identString = "";
int type;
}
{
AnyConstant()
|
LOOKAHEAD(ArrayAccess())
ArrayAccess()
|
LOOKAHEAD({ (getToken(1).kind == INDENTIFIER1 || getToken(1).kind == INDENTIFIER2) &&
jep.funTab.containsKey(getToken(1).image) })
Function()
|
Variable()
|
<LRND> Expression() <RRND>
|
// LOOKAHEAD(<LSQ> Expression() <COLON>)
// RangeExpression()
// |
ListExpression()
}
void ListExpression() #FunNode:
{
jjtThis.setOperator(opSet.getList());
}
{
<LSQ> Expression() (<COMMA> Expression())* <RSQ>
}
/*
void RangeExpression() #FunNode:
{
jjtThis.setOperator(opSet.getRange());
}
{
<LSQ> Expression() (<COLON> Expression())+ <RSQ>
}
*/
void LValue() :
{
}
{
LOOKAHEAD(ArrayAccess())
ArrayAccess()
| Variable()
}
void ArrayAccess() :
{
}
{
Variable() ListExpression()
{
jjtThis.setOperator(opSet.getElement());
} #FunNode(2)
}
void Variable() :
{
String identString = "";
}
{
(identString = Identifier()
{
if (symTab.containsKey(identString)) {
jjtThis.setVar(symTab.getVar(identString));
} else {
if (jep.allowUndeclared) {
jjtThis.setVar(symTab.makeVarIfNeeded(identString));
} else {
addToErrorList("Unrecognized symbol \"" + identString +"\"");
}
}
}
) #VarNode
}
void Function() :
{
int reqArguments = 0;
String identString = "";
}
{
(identString = Identifier()
{
if (jep.funTab.containsKey(identString)) {
//Set number of required arguments
reqArguments =
((PostfixMathCommandI)jep.funTab.get(identString)).getNumberOfParameters();
jjtThis.setFunction(identString,
(PostfixMathCommandI)jep.funTab.get(identString));
} else {
addToErrorList("!!! Unrecognized function \"" + identString +"\"");
}
}
<LRND> ArgumentList(reqArguments, identString) <RRND>
) #FunNode
}
void ArgumentList(int reqArguments, String functionName) :
{
int count = 0;
String errorStr = "";
}
{
[
Expression() { count++; }
(
<COMMA>
Expression() { count++; }
)*
]
{
if(reqArguments == -1) {
if(!((PostfixMathCommandI)jep.funTab.get(functionName)).checkNumberOfParameters(count))
{
errorStr = "Function \"" + functionName +"\" illegal number of arguments " + count;
addToErrorList(errorStr);
}
}
else if (reqArguments != count) {
errorStr = "Function \"" + functionName +"\" requires "
+ reqArguments + " parameter";
if (reqArguments!=1) errorStr += "s";
addToErrorList(errorStr);
}
}
}
String Identifier() :
{
Token t;
}
{
(t = <INDENTIFIER1> | t = <INDENTIFIER2>) { return t.image; }
}
void AnyConstant() #Constant:
{
Token t;
Object value;
}
{
t=<STRING_LITERAL> {
// strip away double quotes at end of string
String temp = (t.image).substring(1,t.image.length()-1);
// replace escape characters
temp = replaceEscape(temp);
jjtThis.setValue(temp);
}
|
value = RealConstant() {
jjtThis.setValue(value);
// }
// |
// value = Array() {
// jjtThis.setValue(value);
}
}
/*
Vector Array() :
{
Object value;
Vector result = new Vector();
}
{
<LSQ>
value = RealConstant()
{
result.addElement(value);
}
(
<COMMA>
value = RealConstant()
{
result.addElement(value);
}
)*
<RSQ>
{
return result;
}
}
*/
Object RealConstant() :
{
Token t;
Object value;
}
{
(t=<INTEGER_LITERAL> | t=<FLOATING_POINT_LITERAL>)
{
try {
value = jep.getNumberFactory().createNumber(t.image);
} catch (Exception e) {
value = null;
addToErrorList("Can't parse \"" + t.image + "\"");
}
return value;
}
}
はあなたが書いたコードを投稿しました。 – Jokab
あなたが目に見えない文法を目に見えない形で間違えていない限り、ここではパーサーのパフォーマンスが本物の問題であることは間違いありません。毎秒何百万という表現を実際に解析する必要がありますか? – EJP
ファイル全体を確実にメモリに保持することは、パフォーマンスの向上に役立たないでしょう。あなたがそれらを読むときにラインを解析してみませんか? – VGR