Javaパーサーのパフォーマンスを向上させる

-1

私は.jjtファイルで書かれたパーサーを持っています。これはthisと非常に似ています。私の場合、唯一の変更は自分の表現評価方法のプラグインです。現在、1式がパーサーになるには約1ミリ秒かかります。私はこのパーサーのパフォーマンスを改善する必要があります。私はVisualVMを使ってプロファイリングを行い、ファイルの行をArrayList<String>に読み込み、93の式（ArrayList<String>のファイルの行から引き出されたパラメータ値を持つ）を評価するコードを実行するのに要した時間は44.5秒でした。私のパーサは、約43秒がparseStreamメソッドで費やされました。私はthis linkを私のパーサーの改善のために参照しました。ERROR_REPORTINGオプションをFALSEに設定しようとしましたが、役に立たなかったです。Javaパーサーのパフォーマンスを向上させる

EDIT 1：

ここではparser.jjtファイルです。私は、ファイルに1000行のために自分のアプリケーションをプロファイリングし、ほとんどの時間を取った方法は、1がボトルネックになっているはずですプロファイリングでStart()

options { 
    JAVA_UNICODE_ESCAPE = true; 
    MULTI = true; 
    VISITOR = true; 
    VISITOR_EXCEPTION = "ParseException"; 
    NODE_DEFAULT_VOID = true; 
// NODE_PACKAGE = "org.nfunk.jep.parser"; 
// BUILD_NODE_FILES=false; 
    STATIC = false; 
// DEBUG_TOKEN_MANAGER = true; 
// DEBUG_PARSER = true; 
// DEBUG_LOOKAHEAD = true; 
} 


/*************************************************************** 
PARSER BEGIN 
***************************************************************/ 

PARSER_BEGIN(Parser) 
package org.nfunk.jep; 

import java.util.Vector; 
import org.nfunk.jep.function.*; 
import org.nfunk.jep.type.*; 

public class Parser { 
    private JEP  jep; 
    private SymbolTable symTab; 
    private OperatorSet opSet; 
    private int initialTokenManagerState = DEFAULT; 

public Node parseStream(java.io.Reader stream, JEP jep_in) 
         throws ParseException { 
    restart(stream,jep_in); 
    // Parse the expression, and return the 
    enable_tracing(); 
    Node node = Start(); 
    if (node == null) throw new ParseException("No expression entered"); 
    return node.jjtGetChild(0); 
} 

/** 
* Restart the parse with the given stream. 
* @since 2.3.0 beta 1 
*/ 
public void restart(java.io.Reader stream, JEP jep_in) 
{ 
    ReInit(stream); 
    this.token_source.SwitchTo(initialTokenManagerState); 
    jep = jep_in; 
    symTab = jep.getSymbolTable(); 
    opSet = jep.getOperatorSet(); 
} 
/** 
* Continue parsing without re-initilising stream. 
* Allows renetrancy of parser so that strings like 
* "x=1; y=2; z=3;" can be parsed. 
* When a semi colon is encountered parsing finishes leaving the rest of the string unparsed. 
* Parsing can be resumed from the current position by using this method. 
* For example 
* <pre> 
* XJep j = new XJep(); 
* Parser parse = j.getParse(); 
* StringReader sr = new StringReader("x=1; y=2; z=3;"); 
* parse.restart(sr,j); 
* Node node; 
* try { 
* while((node = j.continueParse())!=null) { 
* j.println(node); 
* } }catch(ParseException e) {} 
* </pre> 
*/ 
public Node continueParse() throws ParseException 
{ 
    ASTStart node = Start(); 
    if (node==null) return null; 
    return node.jjtGetChild(0); 
} 

private void addToErrorList(String errorStr) { 
    jep.errorList.addElement(errorStr);  
} 

/** 
* Sets the initial state that the token manager is in. 
* Can be used to change how x.x is interpreted, either as a single 
* identifier (DEFAULT) or as x <DOT> x (NO_DOT_IN_IDENTIFIERS) 
* @param state the state to be in. Currently the only legal values are DEFAULT and NO_DOT_IN_IDENTIFIER 
*/ 
public void setInitialTokenManagerState(int state) 
{ 
    initialTokenManagerState = state; 
} 
/** 
* Translate all escape sequences to characters. Inspired by Rob Millar's 
* unescape() method in rcm.util.Str fron the Web Sphinx project. 
* 
* @param inputStr String containing escape characters. 
* @return String with all escape sequences replaced. 
*/ 
private String replaceEscape(String inputStr) { 
    int len = inputStr.length(); 
    int p = 0; 
    int i; 
    String metachars = "tnrbf\\\"'"; 
    String chars = "\t\n\r\b\f\\\"'"; 

    StringBuffer output = new StringBuffer(); 

    while ((i = inputStr.indexOf('\\', p)) != -1) { 
     output.append(inputStr.substring(p, i)); 

     if (i+1 == len) break; 

     // find metacharacter 
     char metac = inputStr.charAt(i+1); 

     // find the index of the metac 
     int k = metachars.indexOf(metac); 
     if (k == -1) { 
      // didn't find the metachar, leave sequence as found. 
      // This code should be unreachable if the parser 
      // is functioning properly because strings containing 
      // unknown escape characters should not be accepted. 
      output.append('\\'); 
      output.append(metac); 
     } else { 
      // its corresponding true char 
      output.append(chars.charAt(k)); 
     } 

     // skip over both escape character & metacharacter 
     p = i + 2; 
    } 

    // add the end of the input string to the output 
    if (p < len) 
     output.append(inputStr.substring(p)); 

    return output.toString(); 
} 
} 

PARSER_END(Parser) 

/*************************************************************** 
SKIP 
***************************************************************/ 

<*> SKIP : 
{ 
    " " 
    | "\t" 
    | "\n" 
    | "\r" 

    | <"//" (~["\n","\r"])* ("\n"|"\r"|"\r\n")> 
    | <"/*" (~["*"])* "*" (~["/"] (~["*"])* "*")* "/"> 
} 


/*************************************************************** 
TOKENS 
***************************************************************/ 

<*> TOKEN : /* LITERALS */ 
{ 
    < INTEGER_LITERAL: 
     <DECIMAL_LITERAL> 
    > 
| 
    < #DECIMAL_LITERAL: ["0"-"9"] (["0"-"9"])* > 
| 
    < FLOATING_POINT_LITERAL: 
     (["0"-"9"])+ "." (["0"-"9"])* (<EXPONENT>)? 
     | "." (["0"-"9"])+ (<EXPONENT>)? 
     | (["0"-"9"])+ <EXPONENT> 
    > 
| 
    < #EXPONENT: ["e","E"] (["+","-"])? (["0"-"9"])+ > 
| 
    < STRING_LITERAL: 
     "\"" 
     ((~["\"","\\","\n","\r"]) 
     | ("\\" ["n","t","b","r","f","\\","'","\""]) 
     )* 
     "\"" 
    > 
} 

/* IDENTIFIERS 

    Letters before version 2.22 
    < #LETTER: ["_","a"-"z","A"-"Z"] > 

    In Ver 2.3.0.1 presence of . in an identifier is switchable. 
    In the DEFAULT lexical state identifiers can contain a . 
    In the NO_DOT_IN_IDENTIFIERS state identifiers cannot contain a . 
    the state can be set by using 
    Parser.setInitialTokenManagerState 
*/ 

<DEFAULT> TOKEN: 
{ 
    <INDENTIFIER1: <LETTER1>(<LETTER1>|<DIGIT1>|".")*> 
    | 
    < #LETTER1: 
    [ 
     "\u0024",   // $ 
     "\u0041"-"\u005a", // A - Z 
     "\u005f",   // _ 
     "\u0061"-"\u007a", // a - z 
     "\u00c0"-"\u00d6", // Upper case symbols of Latin-1 Supplement 
     "\u00d8"-"\u00f6", // Lower case symbols of Latin-1 Supplement 
     "\u00f8"-"\u00ff", // More lower case symbols of Latin-1 Supplement 
     "\u0100"-"\u1fff", // Many languages (including Greek) 
     "\u3040"-"\u318f", // Hiragana, Katakana, Bopomofo, Hangul Compatibility Jamo 
     "\u3300"-"\u337f", // CJK Compatibility 
     "\u3400"-"\u3d2d", // CJK Unified Ideographs Extension A 
     "\u4e00"-"\u9fff", // CJK Unified Ideographs 
     "\uf900"-"\ufaff" // CJK Compatibility Ideographs 
    ] 
    > 
| 
    < #DIGIT1: ["0"-"9"] > 
} 

<NO_DOT_IN_IDENTIFIERS> TOKEN: 
{ 
    <INDENTIFIER2: <LETTER2>(<LETTER2>|<DIGIT2>)*> 
    | 
    < #LETTER2: 
    [ 
     "\u0024",   // $ 
     "\u0041"-"\u005a", // A - Z 
     "\u005f",   // _ 
     "\u0061"-"\u007a", // a - z 
     "\u00c0"-"\u00d6", // Upper case symbols of Latin-1 Supplement 
     "\u00d8"-"\u00f6", // Lower case symbols of Latin-1 Supplement 
     "\u00f8"-"\u00ff", // More lower case symbols of Latin-1 Supplement 
     "\u0100"-"\u1fff", // Many languages (including Greek) 
     "\u3040"-"\u318f", // Hiragana, Katakana, Bopomofo, Hangul Compatibility Jamo 
     "\u3300"-"\u337f", // CJK Compatibility 
     "\u3400"-"\u3d2d", // CJK Unified Ideographs Extension A 
     "\u4e00"-"\u9fff", // CJK Unified Ideographs 
     "\uf900"-"\ufaff" // CJK Compatibility Ideographs 
    ] 
    > 
    | 
    < #DIGIT2: ["0"-"9"] > 
} 

/* OPERATORS */ 
<*> TOKEN: 
{ 
    < ASSIGN:"=" > // rjm 
| < SEMI: ";" > // rjm 
| < COMMA: "," > // rjm 
| < GT: ">" > 
| < LT: "<" > 
| < EQ: "==" > 
| < LE: "<=" > 
| < GE: ">=" > 
| < NE: "!=" > 
| < AND: "&&" > 
| < OR: "||" > 
| < PLUS: "+" > 
| < MINUS:"-" > 
| < MUL: "*" > 
| < DOT: "." > // rjm 
| < DIV: "/" > 
| < MOD: "%" > 
| < NOT: "!" > 
| < POWER:"^" > 
| < CROSS:"^^" > // rjm 
| < LSQ: "[" > // rjm 
| < RSQ: "]" > // rjm 
| < LRND: "(" > // rjm 
| < RRND: ")" > // rjm 
| < COLON: ":" > // rjm 
} 


/*************************************************************** 
GRAMMAR START 
***************************************************************/ 

ASTStart Start() #Start : 
{ 
} 
{ 
    Expression() (<EOF> | <SEMI>) { return jjtThis; } 
    | (<EOF> | <SEMI>) 
    { 
     // njf - The next line is commented out in 2.3.0 since 
     //  two "No expression entered" errors are reported 
     //  in EvaluatorVisitor and Console (one from here 
     //  the other from ParseStream()) 
     //  Decided to just return null, and handle the error 
     //  in ParseStream. 
     // addToErrorList("No expression entered"); 
     return null; 
    } 
} 

// Expresions can be like 
// x=3 
// x=y=3 parsed as x=(y=3) 

void Expression() : {} 
{ 
    LOOKAHEAD(LValue() <ASSIGN>)  // need to prevent javacc warning with left recusion 
    AssignExpression() // rjm changes from OrExpresion 
    | 
    RightExpression() 
} 

void AssignExpression() : {} // rjm addition 
{ 

    (LValue() <ASSIGN> Expression() 
     { 
      if (!jep.getAllowAssignment()) throw new ParseException(
      "Syntax Error (assignment not enabled)"); 

      jjtThis.setOperator(opSet.getAssign()); 
     } 
    ) 
     #FunNode(2) 
} 

void RightExpression() : 
{ 
} 
{ 
    OrExpression() 
} 

void OrExpression() : 
{ 
} 
{ 
    AndExpression() 
    (
     (<OR> AndExpression() 
     { 
      jjtThis.setOperator(opSet.getOr()); 
     } 
    ) #FunNode(2) 
    )* 
} 


void AndExpression() : 
{ 
} 
{ 
    EqualExpression() 
    (
     (<AND> EqualExpression() 
     { 
      jjtThis.setOperator(opSet.getAnd()); 
     } 
    ) #FunNode(2) 
    )* 
} 



void EqualExpression() : 
{ 
} 
{ 
    RelationalExpression() 
    (
     (<NE> RelationalExpression() 
     { 
     jjtThis.setOperator(opSet.getNE()); 
     } 
    ) #FunNode(2) 
    | 
     (<EQ> RelationalExpression() 
     { 
      jjtThis.setOperator(opSet.getEQ()); 
     } 
    ) #FunNode(2) 
    )* 
} 



void RelationalExpression() : 
{ 
} 
{ 
    AdditiveExpression() 
    (
    (<LT> AdditiveExpression() 
     { 
     jjtThis.setOperator(opSet.getLT()); 
     } 
    ) #FunNode(2) 
    | 
    (<GT> AdditiveExpression() 
     { 
     jjtThis.setOperator(opSet.getGT()); 
     } 
    ) #FunNode(2) 
    | 
    (<LE> AdditiveExpression() 
     { 
     jjtThis.setOperator(opSet.getLE()); 
     } 
    ) #FunNode(2) 
    | 
    (<GE> AdditiveExpression() 
     { 
     jjtThis.setOperator(opSet.getGE()); 
     } 
    ) #FunNode(2) 
)* 
} 


void AdditiveExpression() : 
{ 
} 
{ 
    MultiplicativeExpression() 
    (
    (<PLUS> MultiplicativeExpression() 
     { 
     jjtThis.setOperator(opSet.getAdd()); 
     } 
    ) #FunNode(2) 
    | 
    (<MINUS> MultiplicativeExpression() 
     { 
     jjtThis.setOperator(opSet.getSubtract()); 
     } 
    ) #FunNode(2) 
)* 
} 


void MultiplicativeExpression() : 
{ 
} 
{ 
    UnaryExpression() 
    (
    (  
     PowerExpression() 
     { 
     if (!jep.implicitMul) throw new ParseException(
      "Syntax Error (implicit multiplication not enabled)"); 

     jjtThis.setOperator(opSet.getMultiply()); 
     } 
    ) #FunNode(2) 
    | 
    (<MUL> UnaryExpression() 
     { 
     jjtThis.setOperator(opSet.getMultiply()); 
     } 
    ) #FunNode(2) 
    | 
    (<DOT> UnaryExpression() 
     { 
     jjtThis.setOperator(opSet.getDot()); 
     } 
    ) #FunNode(2) 
    | 
    (<CROSS> UnaryExpression() 
     { 
     jjtThis.setOperator(opSet.getCross()); 
     } 
    ) #FunNode(2) 
    | 
    (<DIV> UnaryExpression() 
     { 
     jjtThis.setOperator(opSet.getDivide()); 
     } 
    ) #FunNode(2) 
    | 
    (<MOD> UnaryExpression() 
     { 
     jjtThis.setOperator(opSet.getMod()); 
     } 
    ) #FunNode(2) 
)* 
} 


void UnaryExpression() : 
{ 
} 
{ 
    (<PLUS> UnaryExpression()) 
| 
    (<MINUS> UnaryExpression() 
    { 
     jjtThis.setOperator(opSet.getUMinus()); 
    } 
) #FunNode(1) 
| 
    (<NOT> UnaryExpression() 
    { 
     jjtThis.setOperator(opSet.getNot()); 
    } 
) #FunNode(1) 
| 
    PowerExpression() 
} 


void PowerExpression() : 
{ 
} 
{ 
    UnaryExpressionNotPlusMinus() 
    [ 
    (<POWER> UnaryExpression() 
    { 
     jjtThis.setOperator(opSet.getPower()); 
    } 
) #FunNode(2) 
    ] 
} 


void UnaryExpressionNotPlusMinus() : 
{ 
    String identString = ""; 
    int type; 
} 
{ 
    AnyConstant() 
    | 
    LOOKAHEAD(ArrayAccess()) 
    ArrayAccess() 
    | 
    LOOKAHEAD({ (getToken(1).kind == INDENTIFIER1 || getToken(1).kind == INDENTIFIER2) && 
        jep.funTab.containsKey(getToken(1).image) }) 
    Function() 
    | 
    Variable() 
    | 
    <LRND> Expression() <RRND> 
    | 
// LOOKAHEAD(<LSQ> Expression() <COLON>) 
// RangeExpression() 
// | 
    ListExpression() 
} 

void ListExpression() #FunNode: 
{ 
    jjtThis.setOperator(opSet.getList()); 
} 
{ 
    <LSQ> Expression() (<COMMA> Expression())* <RSQ> 
} 

/* 
void RangeExpression() #FunNode: 
{ 
    jjtThis.setOperator(opSet.getRange()); 
} 
{ 
    <LSQ> Expression() (<COLON> Expression())+ <RSQ> 
} 
*/ 

void LValue() : 
{ 
} 
{ 
    LOOKAHEAD(ArrayAccess()) 
    ArrayAccess() 
    | Variable() 
} 

void ArrayAccess() : 
{ 
} 
{ 
    Variable() ListExpression() 
    { 
    jjtThis.setOperator(opSet.getElement()); 
    } #FunNode(2) 

} 
void Variable() : 
{ 
    String identString = ""; 
} 
{ 
    (identString = Identifier() 
    { 
     if (symTab.containsKey(identString)) { 
      jjtThis.setVar(symTab.getVar(identString)); 
     } else { 
      if (jep.allowUndeclared) { 
       jjtThis.setVar(symTab.makeVarIfNeeded(identString)); 
      } else { 
       addToErrorList("Unrecognized symbol \"" + identString +"\""); 
      } 
     } 
    } 
    ) #VarNode 
} 



void Function() : 
{ 
    int reqArguments = 0; 
    String identString = ""; 
} 
{ 
    (identString = Identifier() 
     { 
      if (jep.funTab.containsKey(identString)) { 
       //Set number of required arguments 
       reqArguments = 
        ((PostfixMathCommandI)jep.funTab.get(identString)).getNumberOfParameters(); 
       jjtThis.setFunction(identString, 
        (PostfixMathCommandI)jep.funTab.get(identString)); 
      } else { 
       addToErrorList("!!! Unrecognized function \"" + identString +"\""); 
      } 
     } 

     <LRND> ArgumentList(reqArguments, identString) <RRND> 

    ) #FunNode 
} 

void ArgumentList(int reqArguments, String functionName) : 
{ 
    int count = 0; 
    String errorStr = ""; 
} 
{ 
    [ 
    Expression() { count++; } 
    (
     <COMMA> 
     Expression() { count++; } 
    )* 
    ] 
    { 
     if(reqArguments == -1) { 
      if(!((PostfixMathCommandI)jep.funTab.get(functionName)).checkNumberOfParameters(count)) 
      { 
       errorStr = "Function \"" + functionName +"\" illegal number of arguments " + count; 
       addToErrorList(errorStr); 
      } 
     } 
     else if (reqArguments != count) { 
      errorStr = "Function \"" + functionName +"\" requires " 
         + reqArguments + " parameter"; 
      if (reqArguments!=1) errorStr += "s"; 
      addToErrorList(errorStr); 
     } 
    } 
} 



String Identifier() : 
{ 
    Token t; 
} 
{ 
    (t = <INDENTIFIER1> | t = <INDENTIFIER2>) { return t.image; } 
} 


void AnyConstant() #Constant: 
{ 
    Token t; 
    Object value; 
} 
{ 
    t=<STRING_LITERAL> { 
     // strip away double quotes at end of string 
     String temp = (t.image).substring(1,t.image.length()-1); 

     // replace escape characters 
     temp = replaceEscape(temp); 

     jjtThis.setValue(temp); 
    } 
    | 
    value = RealConstant() { 
     jjtThis.setValue(value); 
// } 
// | 
// value = Array() { 
//  jjtThis.setValue(value); 
    } 
} 

/* 
Vector Array() : 
{ 
    Object value; 
    Vector result = new Vector(); 
} 
{ 
    <LSQ> 
    value = RealConstant() 
    { 
     result.addElement(value); 
    } 
    (
     <COMMA> 
     value = RealConstant() 
     { 
      result.addElement(value); 
     } 
    )* 
    <RSQ> 
    { 
     return result; 
    } 
} 
*/ 




Object RealConstant() : 
{ 
    Token t; 
    Object value; 
} 
{ 
    (t=<INTEGER_LITERAL> | t=<FLOATING_POINT_LITERAL>) 
    { 
     try { 
      value = jep.getNumberFactory().createNumber(t.image); 
     } catch (Exception e) { 
      value = null; 
      addToErrorList("Can't parse \"" + t.image + "\""); 
     } 

     return value; 
    } 
}

出典

2016-07-18 shalakha

はあなたが書いたコードを投稿しました。 – Jokab

あなたが目に見えない文法を目に見えない形で間違えていない限り、ここではパーサーのパフォーマンスが本物の問題であることは間違いありません。毎秒何百万という表現を実際に解析する必要がありますか？ – EJP

ファイル全体を確実にメモリに保持することは、パフォーマンスの向上に役立たないでしょう。あなたがそれらを読むときにラインを解析してみませんか？ – VGR

た、readStreamは最も時間がかかりますが、（その機能にs）？いくつかのプロファイラーは、これらの重要なホットスポットを図式的に再表示します。私の注意を取った何

はreplaceEscape

private static final String METACHARS = "tnrbf\\\"'"; 
private static final String CHARS = "\t\n\r\b\f\\\"'"; 

private String replaceEscape(String inputStr) { 
    int i = inputStr.indexOf('\\'); 
    if (i == -1) { // 1. Heuristic strings without backslash 
     return inputStr; 
    } 
    int len = inputStr.length(); 
    int p = 0; 
    StringBuilder output = new StringBuilder(); // 2. Faster StringBuilder 

    while (i != -1) { 
     if (i + 1 == len) break; 

     if (p < i) output.append(inputStr.substring(p, i)); 
     p = i + 1; 
     char metac = inputStr.charAt(i+1); 

     // find the index of the metac 
     int k = METACHARS.indexOf(metac); 
     if (k != -1) { 
      // its corresponding true char 
      metac = CHARS.charAt(k)); 
      ++p; // Start copying after metachar 
     } 
     output.append(metac); 
    } 

    // add the end of the input string to the output 
    if (p < len) 
     output.append(inputStr.substring(p)); 
    return output.toString(); 
}

出典

2016-07-19 07:20:58

どのように私はそれを改善することができますか？ – shalakha

解析をトレースします。私はそうは見ませんでしたが、文法はある時点で非効率的かもしれません。たとえば、いくつかのASTサブツリーをノードに縮小することができます。シンボルテーブルのようなデータ構造は最適ではないかもしれません。さまざまな文法的な用途をベンチマークする（変数を一切使わない方が良い？） 'Files.readAllLines'を使います。 BufferedReaderを使用します。 –

Javaパーサーのパフォーマンスを向上させる

答えて

関連する問題