2017-04-25 9 views
2

antlr文法repoのPython3.g4文法ファイルを使用してantlr4を使用してPythonターゲットを生成しました。生成されたPython3Lexer.pyファイルには、Pythonに変換する必要があるJavaコードが含まれていました。antlr4 python3ターゲットでJavaをPythonに変換する

# A queue where extra tokens are pushed on (see the NEWLINE lexer rule). 
tokens = deque() 

# The stack that keeps track of the indentation level. 
# https://docs.python.org/3/tutorial/datastructures.html#using-lists-as-stacks 
indents = [] 

# The amount of opened braces, brackets and parenthesis. 
opened = 0 

# The most recently produced token. 
lastToken = None 

def emit(self, t): 
    self._token = t 
    self.tokens.append(t) 

def nextToken(self): 

    # Check if the end-of-file is ahead and there are still some DEDENTS expected. 
    if self._input.LA(1) == Token.EOF and self.indents.size() != 0: 

    # Remove any trailing EOF tokens from our buffer. 
    for i in range(tokens.size() - 1, 0, -1): 
     if self.tokens[i].getType() == Token.EOF: 
     self.tokens.remove(i) 

    # First emit an extra line break that serves as the end of the statement. 
    self.emit(commonToken(Python3Parser.NEWLINE, "\n")) 

    # Now emit as much DEDENT tokens as needed. 
    while self.indents.size() != 0: 
     self.emit(createDedent()) 
     self.indents.pop() 

    # Put the EOF back on the token stream. 
    self.emit(commonToken(Python3Parser.EOF, "<EOF>")) 

    next = self.nextToken() 

    if next.getChannel() == Token.DEFAULT_CHANNEL: 
    # Keep track of the last token on the default channel. 
    self.lastToken = next 

    return next if self.tokens.size() == 0 else self.tokens.popleft() 

def createDedent(): 
    dedent = commonToken(Python3Parser.DEDENT, "") 
    dedent.setLine(self.lastToken.getLine()) 
    return dedent 

def commonToken(self, type, text): 
    stop = self.getCharIndex() - 1 
    start = stop if text.size() == 0 else stop - text.size() + 1 
    return CommonToken(self._tokenFactorySourcePair, type, DEFAULT_TOKEN_CHANNEL, start, stop) 

def getIndentationCount(spaces): 

    count = 0 

    for ch in spaces: 
    if ch == '\t': 
     count += 8 - (count % 8) 
     break 
    else: 
     # A normal space char. 
     count = count + 1 

    return count 

def atStartOfInput(self): 
    return self.getCharPositionInLine() == 0 and self.getLine() == 1 

:ここではそれが出力される2つのJavaのセグメントがあり、あなたがそれらを見つけることができ、両方のpython3文法ファイル内 here also

// A queue where extra tokens are pushed on (see the NEWLINE lexer rule). 
private java.util.LinkedList<Token> tokens = new java.util.LinkedList<>(); 

// The stack that keeps track of the indentation level. 
private java.util.Stack<Integer> indents = new java.util.Stack<>(); 

// The amount of opened braces, brackets and parenthesis. 
private int opened = 0; 

// The most recently produced token. 
private Token lastToken = null; 

@Override 
public void emit(Token t) { 
    super.setToken(t); 
    tokens.offer(t); 
} 

@Override 
public Token nextToken() { 

    // Check if the end-of-file is ahead and there are still some DEDENTS expected. 
    if (_input.LA(1) == EOF && !this.indents.isEmpty()) { 

    // Remove any trailing EOF tokens from our buffer. 
    for (int i = tokens.size() - 1; i >= 0; i--) { 
     if (tokens.get(i).getType() == EOF) { 
      tokens.remove(i); 
     } 
    } 

    // First emit an extra line break that serves as the end of the statement. 
    this.emit(commonToken(Python3Parser.NEWLINE, "\n")); 

    // Now emit as much DEDENT tokens as needed. 
    while (!indents.isEmpty()) { 
     this.emit(createDedent()); 
     indents.pop(); 
    } 

    // Put the EOF back on the token stream. 
    this.emit(commonToken(Python3Parser.EOF, "<EOF>")); 
    } 

    Token next = super.nextToken(); 

    if (next.getChannel() == Token.DEFAULT_CHANNEL) { 
     // Keep track of the last token on the default channel. 
     this.lastToken = next; 
    } 

    return tokens.isEmpty() ? next : tokens.poll(); 
} 

private Token createDedent() { 
    CommonToken dedent = commonToken(Python3Parser.DEDENT, ""); 
    dedent.setLine(this.lastToken.getLine()); 
    return dedent; 
} 

private CommonToken commonToken(int type, String text) { 
    int stop = this.getCharIndex() - 1; 
    int start = text.isEmpty() ? stop : stop - text.length() + 1; 
    return new CommonToken(this._tokenFactorySourcePair, type, DEFAULT_TOKEN_CHANNEL, start, stop); 
} 

static int getIndentationCount(String spaces) { 

    int count = 0; 

    for (char ch : spaces.toCharArray()) { 
     switch (ch) { 
     case '\t': 
      count += 8 - (count % 8); 
      break; 
     default: 
      // A normal space char. 
      count++; 
     } 
    } 

    return count; 
} 

boolean atStartOfInput() { 
    return super.getCharPositionInLine() == 0 && super.getLine() == 1; 
} 

String newLine = getText().replaceAll("[^\r\n\f]+", ""); 
String spaces = getText().replaceAll("[\r\n\f]+", ""); 
int next = _input.LA(1); 

if (opened > 0 || next == '\r' || next == '\n' || next == '\f' || next == '#') { 
    // If we're inside a list or on a blank line, ignore all indents, 
    // dedents and line breaks. 
    skip(); 
} 
else { 
    emit(commonToken(NEWLINE, newLine)); 

    int indent = getIndentationCount(spaces); 
    int previous = indents.isEmpty() ? 0 : indents.peek(); 

    if (indent == previous) { 
     // skip indents of the same size as the present indent-size 
     skip(); 
    } 
    else if (indent > previous) { 
     indents.push(indent); 
     emit(commonToken(Python3Parser.INDENT, spaces)); 
    } 
    else { 
     // Possibly emit more than 1 DEDENT token. 
     while(!indents.isEmpty() && indents.peek() > indent) { 
      this.emit(createDedent()); 
      indents.pop(); 
     } 
    } 
} 

私はこれらを自分で翻訳しました

newLine = getText().replaceAll("[^\r\n\f]+", "") 
spaces = getText().replaceAll("[\r\n\f]+", "") 
next = self._input.LA(1) 

if opened > 0 or next == '\r' or next == '\n' or next == '\f' or next == '#': 
    # If we're inside a list or on a blank line, ignore all indents, 
    # dedents and line breaks. 
    skip() 
else: 
    emit(commonToken(NEWLINE, newLine)) 

indent = getIndentationCount(spaces) 
previous = 0 if indents.isEmpty() else indents.peek() 

if indent == previous: 
    # skip indents of the same size as the present indent-size 
    skip() 
elif indent > previous: 
    indents.push(indent) 
    emit(commonToken(Python3Parser.INDENT, spaces)) 
else: 
    # Possibly emit more than 1 DEDENT token. 
    while not indents.isEmpty() and indents.peek() > indent: 
    self.emit(createDedent()) 
    indents.pop() 

これは私のpythonスクリプトで、Java断片の代わりにpython内部でantlrの出力を実行します。 python main.py test.py

import sys 
from antlr4 import * 
from Python3Lexer import Python3Lexer 
from Python3Parser import Python3Parser 
from Python3Listener import Python3Listener 

class FuncPrinter(Python3Listener): 
    def enterFuncdef(self, ctx): 
    print("Oh, a func") 

def main(argv): 
    input = FileStream(argv[1]) 
    lexer = Python3Lexer(input) 
    stream = CommonTokenStream(lexer) 
    parser = Python3Parser(stream) 
    tree = parser.funcdef() 

    printer = KeyPrinter() 
    walker = ParseTreeWalker() 
    walker.walk(printer, tree) 

if __name__ == '__main__': 
    main(sys.argv) 

このエラーコマンドを実行し、入力ファイルは次のようになり、次のトレース

Traceback (most recent call last): 
    File "main.py", line 24, in <module> 
    main(sys.argv) 
    File "main.py", line 17, in main 
    tree = parser.parameters() 
    File "...\antler-test\Python3Parser.py", line 1297, in parameters 
    self.enterRule(localctx, 14, self.RULE_parameters) 
    File "...\antler-test\antlr4\Parser.py", line 358, in enterRule 
    self._ctx.start = self._input.LT(1) 
    File "...\antler-test\antlr4\CommonTokenStream.py", line 61, in LT 
    self.lazyInit() 
    File "...\antler-test\antlr4\BufferedTokenStream.py", line 186, in lazyInit 
    self.setup() 
    File "...\antler-test\antlr4\BufferedTokenStream.py", line 189, in setup 
    self.sync(0) 
    File "...\antler-test\antlr4\BufferedTokenStream.py", line 111, in sync 
    fetched = self.fetch(n) 
    File "...\antler-test\antlr4\BufferedTokenStream.py", line 123, in fetch 
    t = self.tokenSource.nextToken() 
    File "...\antler-test\Python3Lexer.py", line 698, in nextToken 
    next = self.nextToken() 
    File "...\antler-test\Python3Lexer.py", line 698, in nextToken 
    next = self.nextToken() 
    File "...\antler-test\Python3Lexer.py", line 698, in nextToken 
    next = self.nextToken() 
    [Previous line repeated 985 more times] 
    File "...\antler-test\Python3Lexer.py", line 680, in nextToken 
    if self._input.LA(1) == Token.EOF and self.indents.size() != 0: 
    File "...\antler-test\antlr4\InputStream.py", line 49, in LA 
    if offset==0: 
RecursionError: maximum recursion depth exceeded in comparison 

を印刷:

def fun1(): 
    return None 

def fun2(): 
    return None 

私は私が間違ってのpythonを翻訳している場合はわかりませんまたは再帰アルゴリズムは単純にあまりにも多くのPythonのですが、私はまた、nextTokenメソッドのアルゴリズムを変更する方法を理解することはできません反復的であるe。多分誰かがそれを理解することができたでしょうか?それとも、私がやっていることに何か他の問題がありますか?

答えて

2

superあなたはおそらくのようなものを意味self.と同じではありません

Token next = super.nextToken(); 

に留意されたいです。私は数日から同じ話題に正確に取り組んでいます。

これは簡単ではありません。 Pythonランタイムは、Java APIとまったく同じAPIではありません。 Pythonランタイムはあまり使用されておらず、非常に不完全です。

いくつかの回避策を使用する必要がありましたが、動作するようです。ここに私のコードは次のとおりです。

tokens { INDENT, DEDENT } 

@lexer::members { 

    # A queue where extra tokens are pushed on (see the NEWLINE lexer rule). 
    self.tokens = [] 

    # The stack that keeps track of the indentation level. 
    self.indents = [] 

    # The amount of opened braces, brackets and parenthesis. 
    self.opened = 0 

    # The most recently produced token. 
    self.last_token = None 

def emitToken(self, t): 
    super().emitToken(t) 
    self.tokens.append(t) 

def nextToken(self): 
    if self._input.LA(1) == Token.EOF and len(self.indents) > 0: 
     # Remove any trailing EOF tokens from our buffer. 
     while len(self.tokens) > 0 and self.tokens[-1].type == Token.EOF: 
      del self.tokens[-1] 

     # First emit an extra line break that serves as the end of the statement. 
     self.emitToken(self.common_token(Python3Lexer.NEWLINE, "\n")); 

     # Now emit as much DEDENT tokens as needed. 
     while len(self.indents) != 0: 
      self.emitToken(self.create_dedent()) 
      del self.indents[-1] 

     # Put the EOF back on the token stream. 
     self.emitToken(self.common_token(Token.EOF, "<EOF>")); 

    next = super().nextToken(); 

    if next.channel == Token.DEFAULT_CHANNEL: 
     # Keep track of the last token on the default channel. 
     self.last_token = next 

    if len(self.tokens) == 0: 
     return next 
    else: 
     t = self.tokens[0] 
     del self.tokens[0] 
     return t 

def create_dedent(self): 
    from Python3Parser import Python3Parser 
    dedent = self.common_token(Python3Parser.DEDENT, "") 
    dedent.line = self.last_token.line 
    return dedent 

def common_token(self, _type, text): 
    from antlr4.Token import CommonToken 
    stop = self.getCharIndex() - 1 
    if len(self.text) == 0: 
     start = stop 
    else: 
     start = stop - len(self.text) + 1 
    return CommonToken(self._tokenFactorySourcePair, _type, Lexer.DEFAULT_TOKEN_CHANNEL, start, stop) 

## Calculates the indentation of the provided spaces, taking the 
## following rules into account: 
## 
## "Tabs are replaced (from left to right) by one to eight spaces 
## such that the total number of characters up to and including 
## the replacement is a multiple of eight [...]" 
## 
## -- https://docs.python.org/3.1/reference/lexical_analysis.html#indentation 
def getIndentationCount(self, spaces): 
    count = 0 
    for ch in spaces: 
     if ch == '\t': 
      count += 8 - (count % 8) 
     else: 
      count += 1 
    return count 

def atStartOfInput(self): 
    return self._interp.column == 0 and self._interp.line == 1 

} 

と改行レクサーの一部について:

NEWLINE 
: ({self.atStartOfInput()}? SPACES 
    | ('\r'? '\n' | '\r' | '\f') SPACES? 
    ) 

    { 
    import re 
    from Python3Parser import Python3Parser 
    new_line = re.sub(r"[^\r\n\f]+", "", self._interp.getText(self._input)) #.replaceAll("[^\r\n\f]+", "") 
    spaces = re.sub(r"[\r\n\f]+", "", self._interp.getText(self._input)) #.replaceAll("[\r\n\f]+", "") 
    next = self._input.LA(1) 

    if self.opened > 0 or next == '\r' or next == '\n' or next == '\f' or next == '#': 
     self.skip() 
    else: 
     self.emitToken(self.common_token(self.NEWLINE, new_line)) 

     indent = self.getIndentationCount(spaces) 
     if len(self.indents) == 0: 
      previous = 0 
     else: 
      previous = self.indents[-1] 

     if indent == previous: 
      self.skip() 
     elif indent > previous: 
      self.indents.append(indent) 
      self.emitToken(self.common_token(Python3Parser.INDENT, spaces)) 
     else: 
      while len(self.indents) > 0 and self.indents[-1] > indent: 
       self.emitToken(self.create_dedent()) 
       del self.indents[-1] 

    }; 

あなたはまた、(例えば)「文字列」でファイル全体でレクサー・ID「STR」を交換する必要があるためstrはpythonのキーワードです。

2

あなたのPythonコードは

next = self.nextToken() 

を言うしかし、あなたのJavaコードは言う:

next = super().nextToken() 
関連する問題