2017-12-11 13 views
0

RASAにLUISスキーマモデルをインポートして、spacy + scikitパイプラインを使用して訓練しようとしています。私はRASA NLUのv0.10.4RASA NLUの誤ったエンティティ注釈エラーを解決する方法

を使用しています。しかし、私はLUISモデルスキーマをロードしようとすると、ner_crfコンポーネントが不整列エンティティ注釈警告を投げています。

Misaligned Entity Annotation

私はLUISモデルスキーマに正しくエンティティをタグ付けしているが。ここで

は私の設定ファイルである:ここで

{ 
    "project": "SynonymsExample", 
    "path": "C:\\Users\\xyz\\Desktop\\RASA\\models", 
    "response_log": "C:\\Users\\xyz\\Desktop\\RASA\\logs", 
    "pipeline": "spacy_sklearn", 
    "data": "C:\\Users\\xyz\\Desktop\\RASA\\data\\examples\\RasaFormat.json", 
    "cors_origins": ["*"], 
    "aws_endpoint_url": null, 
    "token": null, 
    "num_threads": 2, 
    "port": 5000 
} 

は私LUISモデルです

{ 
    "luis_schema_version": "2.1.0", 
    "versionId": "0.1", 
    "name": "phraseListDemo", 
    "desc": "", 
    "culture": "en-us", 
    "intents": [ 
    { 
     "name": "None" 
    }, 
    { 
     "name": "PersonalInfo" 
    } 
    ], 
    "entities": [ 
    { 
     "name": "city" 
    }, 
    { 
     "name": "Contact" 
    }, 
    { 
     "name": "Email" 
    }, 
    { 
     "name": "FirstName" 
    }, 
    { 
     "name": "LastName" 
    } 
    ], 
    "composites": [], 
    "closedLists": [], 
    "bing_entities": [ 
    "datetimeV2" 
    ], 
    "actions": [], 
    "model_features": [ 
    { 
     "name": "city", 
     "mode": true, 
     "words": "jaipur,bangalore,florida,japan,delhi,pune,bombay,mumbai,chennai,hyderabad,kolkata,chandigarh,ahmedabad,china,lucknow,germany,noida,indore,nagpur,coimbatore,bhopal,banglore,india,patna,maharashtra,surat,kanpur,guwahati,ludhiana,gwalior,aurangabad,amritsar,rajkot,gujarat,madurai,pradesh,dehradun,raipur,ranchi,varanasi,jabalpur,jodhpur,srinagar,mangalore,udaipur,jamshedpur,vadodara", 
     "activated": true 
    }, 
    { 
     "name": "contact", 
     "mode": true, 
     "words": "8947847422,8967564556,8967907890,1235712345,8989898989,1231231231", 
     "activated": true 
    }, 
    { 
     "name": "Email", 
     "mode": true, 
     "words": "[email protected], [email protected]", 
     "activated": true 
    }, 
    { 
     "name": "emailid", 
     "mode": true, 
     "words": "[email protected], [email protected]", 
     "activated": true 
    }, 
    { 
     "name": "FirstName", 
     "mode": true, 
     "words": "amit,ankur,ankit,ram,shyam,kunal,saikat,sundar,krishna,vikram,mohan,vijay,karthik,sunil,vivek,gopal,John,Chris,satish,surya,ajay,raju,suresh,sanjay,rajesh,ravi,ramesh,arun,rakesh,manoj,anil,kiran,sachin,dinesh,pradeep,raj,ashok,priya,prakash,david,mukesh,praveen,mahesh,naresh,anand,kumar,nikhil,michael,paul,naveen,nitin,srinivas,prasad,vinod,kishore,james,vinay,thomas", 
     "activated": true 
    }, 
    { 
     "name": "LastName", 
     "mode": true, 
     "words": "Gupta,Sharma,Jain,kumar,singh,mishra,Mukherjee,goswami,verma,yadav,patel,ghosh,das", 
     "activated": true 
    }, 
    { 
     "name": "MID", 
     "mode": true, 
     "words": "M1039205,M1039222,M1036767,M1048967,M1056789,M1028967,M1088967", 
     "activated": true 
    } 
    ], 
    "regex_features": [], 
    "utterances": [ 
    { 
     "text": "my name is ankur", 
     "intent": "PersonalInfo", 
     "entities": [ 
     { 
      "entity": "FirstName", 
      "startPos": 11, 
      "endPos": 15 
     } 
     ] 
    }, 
    { 
     "text": "my contact number is 1231234123", 
     "intent": "PersonalInfo", 
     "entities": [ 
     { 
      "entity": "Contact", 
      "startPos": 21, 
      "endPos": 30 
     } 
     ] 
    }, 
    { 
     "text": "my firstname is amit and lastname is gupta", 
     "intent": "PersonalInfo", 
     "entities": [ 
     { 
      "entity": "FirstName", 
      "startPos": 16, 
      "endPos": 19 
     }, 
     { 
      "entity": "LastName", 
      "startPos": 37, 
      "endPos": 41 
     } 
     ] 
    }, 
    { 
     "text": "my email is [email protected]", 
     "intent": "PersonalInfo", 
     "entities": [ 
     { 
      "entity": "Email", 
      "startPos": 12, 
      "endPos": 22 
     } 
     ] 
    }, 
    { 
     "text": "kunal is one person", 
     "intent": "PersonalInfo", 
     "entities": [ 
     { 
      "entity": "FirstName", 
      "startPos": 0, 
      "endPos": 4 
     } 
     ] 
    }, 
    { 
     "text": "myself singh and my dob comes on 24 may", 
     "intent": "PersonalInfo", 
     "entities": [ 
     { 
      "entity": "LastName", 
      "startPos": 7, 
      "endPos": 11 
     } 
     ] 
    }, 
    { 
     "text": "my name is gupta and my dob is in month april", 
     "intent": "PersonalInfo", 
     "entities": [ 
     { 
      "entity": "LastName", 
      "startPos": 11, 
      "endPos": 15 
     } 
     ] 
    }, 
    { 
     "text": "my name is amit and my date of birth is in month of march", 
     "intent": "PersonalInfo", 
     "entities": [ 
     { 
      "entity": "FirstName", 
      "startPos": 11, 
      "endPos": 14 
     } 
     ] 
    } 
    ] 
} 

私が間違っているつもり缶誰ポイント?警告メッセージは、指摘startendはおそらく間違ってトークンに含まれるいくつかのホワイトスペースを引き起こして設定されていたよう

更新 ここでは

{ 
    "rasa_nlu_data": { 
    "entity_synonyms": [ 
     { 
     "value": "city", 
     "synonyms": [ 
      "jaipur", 
      "bangalore", 
      "florida", 
      "japan", 
      "delhi", 
      "pune", 
      "bombay", 
      "mumbai", 
      "chennai", 
      "hyderabad", 
      "kolkata", 
      "chandigarh", 
      "ahmedabad", 
      "china", 
      "lucknow", 
      "germany", 
      "noida", 
      "indore", 
      "nagpur", 
      "coimbatore", 
      "bhopal", 
      "banglore", 
      "india", 
      "patna", 
      "maharashtra", 
      "surat", 
      "kanpur", 
      "guwahati", 
      "ludhiana", 
      "gwalior", 
      "aurangabad", 
      "amritsar", 
      "rajkot", 
      "gujarat", 
      "madurai", 
      "pradesh", 
      "dehradun", 
      "raipur", 
      "ranchi", 
      "varanasi", 
      "jabalpur", 
      "jodhpur", 
      "srinagar", 
      "mangalore", 
      "udaipur", 
      "jamshedpur", 
      "vadodara" 
     ] 
     }, 
     { 
     "value": "contact", 
     "synonyms": [ 
      "8947847422", 
      "8967564556", 
      "8967907890", 
      "1235712345", 
      "8989898989", 
      "1231231231" 
     ] 
     }, 
     { 
     "value": "Email", 
     "synonyms": [ 
      "[email protected]", 
      " [email protected]" 
     ] 
     }, 
     { 
     "value": "emailid", 
     "synonyms": [ 
      "[email protected]", 
      " [email protected]" 
     ] 
     }, 
     { 
     "value": "FirstName", 
     "synonyms": [ 
      "amit", 
      "ankur", 
      "ankit", 
      "ram", 
      "shyam", 
      "kunal", 
      "saikat", 
      "sundar", 
      "krishna", 
      "vikram", 
      "mohan", 
      "vijay", 
      "karthik", 
      "sunil", 
      "vivek", 
      "gopal", 
      "John", 
      "Chris", 
      "satish", 
      "surya", 
      "ajay", 
      "raju", 
      "suresh", 
      "sanjay", 
      "rajesh", 
      "ravi", 
      "ramesh", 
      "arun", 
      "rakesh", 
      "manoj", 
      "anil", 
      "kiran", 
      "sachin", 
      "dinesh", 
      "pradeep", 
      "raj", 
      "ashok", 
      "priya", 
      "prakash", 
      "david", 
      "mukesh", 
      "praveen", 
      "mahesh", 
      "naresh", 
      "anand", 
      "kumar", 
      "nikhil", 
      "michael", 
      "paul", 
      "naveen", 
      "nitin", 
      "srinivas", 
      "prasad", 
      "vinod", 
      "kishore", 
      "james", 
      "vinay", 
      "thomas" 
     ] 
     }, 
     { 
     "value": "LastName", 
     "synonyms": [ 
      "Gupta", 
      "Sharma", 
      "Jain", 
      "kumar", 
      "singh", 
      "mishra", 
      "Mukherjee", 
      "goswami", 
      "verma", 
      "yadav", 
      "patel", 
      "ghosh", 
      "das" 
     ] 
     }, 
     { 
     "value": "MID", 
     "synonyms": [ 
      "M1039205", 
      "M1039222", 
      "M1036767", 
      "M1048967", 
      "M1056789", 
      "M1028967", 
      "M1088967" 
     ] 
     } 
    ], 
    "regex_features": [], 
    "common_examples": [ 
     { 
     "text": "my name is ankur", 
     "intent": "PersonalInfo", 
     "entities": [ 
      { 
      "entity": "FirstName", 
      "value": "ankur", 
      "start": 11, 
      "end": 15 
      } 
     ] 
     }, 
     { 
     "text": "my contact number is 1231234123", 
     "intent": "PersonalInfo", 
     "entities": [ 
      { 
      "entity": "Contact", 
      "value": "1231234123", 
      "start": 21, 
      "end": 30 
      } 
     ] 
     }, 
     { 
     "text": "my firstname is amit and lastname is gupta", 
     "intent": "PersonalInfo", 
     "entities": [ 
      { 
      "entity": "FirstName", 
      "value": "amit", 
      "start": 16, 
      "end": 19 
      }, 
      { 
      "entity": "LastName", 
      "value": "gupta", 
      "start": 37, 
      "end": 41 
      } 
     ] 
     }, 
     { 
     "text": "my email is [email protected]", 
     "intent": "PersonalInfo", 
     "entities": [ 
      { 
      "entity": "Email", 
      "value": "[email protected]", 
      "start": 12, 
      "end": 22 
      } 
     ] 
     }, 
     { 
     "text": "kunal is one person", 
     "intent": "PersonalInfo", 
     "entities": [ 
      { 
      "entity": "FirstName", 
      "value": "kunal", 
      "start": 0, 
      "end": 4 
      } 
     ] 
     }, 
     { 
     "text": "myself singh and my dob comes on 24 may", 
     "intent": "PersonalInfo", 
     "entities": [ 
      { 
      "entity": "LastName", 
      "value": "singh", 
      "start": 7, 
      "end": 11 
      } 
     ] 
     }, 
     { 
     "text": "my name is gupta and my dob is in month april", 
     "intent": "PersonalInfo", 
     "entities": [ 
      { 
      "entity": "LastName", 
      "value": "gupta", 
      "start": 11, 
      "end": 15 
      } 
     ] 
     }, 
     { 
     "text": "my name is amit and my date of birth is in month of march", 
     "intent": "PersonalInfo", 
     "entities": [ 
      { 
      "entity": "FirstName", 
      "value": "amit", 
      "start": 11, 
      "end": 14 
      } 
     ] 
     } 
    ] 
    } 
} 

答えて

2

私RASA形式のトレーニングデータであります境界(開始または終了のいずれか)。 は例えば、(自分のルイス・モデルからの)このような文 { "text": "kunal is one person", "intent": "PersonalInfo", "entities": [ { "entity": "FirstName", "startPos": 0, "endPos": 4 } ] },

かもしれないが(間違って)トレーニングデータにあるように1すべきstartend5持っています。

Rasa NLU Trainerを使用してトレーニングデータを視覚化してみてください。

これは私にも起こりました。 startendの番号を修正しました。

+0

なぜ「開始」と「終了」位置は1ではなく0で始まるのですか?単にインデックスをインクリメントすれば、Array Out Of Bounds例外がスローされませんか? –

+0

この発言のために**中国のレストランを見せてください**開始位置8と終了位置15のエンティティ '中国語 'のインデックス。これを確認してください(http://nlu.rasa.ai/dataformat.html) –

+1

@KunalMukherjee同様に、私は '1'で' start'を開始するのが間違っていると言って、_これがエラーを投げてしまうかもしれません。 私はあなたの訓練データを見ていないので、エラーがどういうものであるかについては何もコメントすることができません。 しかし、エラーメッセージから、いくつかのトレーニングの例では、 'start'と' end'インデックスの位置がずれることがあります。トークンの先頭または末尾にいくつかの空白が含まれます。 ラサNLUトレーナービジュアライザを使用してみましたか? –

関連する問題