RASAにLUISスキーマモデルをインポートして、spacy + scikitパイプラインを使用して訓練しようとしています。私はRASA NLUのv0.10.4RASA NLUの誤ったエンティティ注釈エラーを解決する方法
を使用しています。しかし、私はLUISモデルスキーマをロードしようとすると、ner_crfコンポーネントが不整列エンティティ注釈警告を投げています。
私はLUISモデルスキーマに正しくエンティティをタグ付けしているが。ここで
は私の設定ファイルである:ここで
{
"project": "SynonymsExample",
"path": "C:\\Users\\xyz\\Desktop\\RASA\\models",
"response_log": "C:\\Users\\xyz\\Desktop\\RASA\\logs",
"pipeline": "spacy_sklearn",
"data": "C:\\Users\\xyz\\Desktop\\RASA\\data\\examples\\RasaFormat.json",
"cors_origins": ["*"],
"aws_endpoint_url": null,
"token": null,
"num_threads": 2,
"port": 5000
}
は私LUISモデルです
{
"luis_schema_version": "2.1.0",
"versionId": "0.1",
"name": "phraseListDemo",
"desc": "",
"culture": "en-us",
"intents": [
{
"name": "None"
},
{
"name": "PersonalInfo"
}
],
"entities": [
{
"name": "city"
},
{
"name": "Contact"
},
{
"name": "Email"
},
{
"name": "FirstName"
},
{
"name": "LastName"
}
],
"composites": [],
"closedLists": [],
"bing_entities": [
"datetimeV2"
],
"actions": [],
"model_features": [
{
"name": "city",
"mode": true,
"words": "jaipur,bangalore,florida,japan,delhi,pune,bombay,mumbai,chennai,hyderabad,kolkata,chandigarh,ahmedabad,china,lucknow,germany,noida,indore,nagpur,coimbatore,bhopal,banglore,india,patna,maharashtra,surat,kanpur,guwahati,ludhiana,gwalior,aurangabad,amritsar,rajkot,gujarat,madurai,pradesh,dehradun,raipur,ranchi,varanasi,jabalpur,jodhpur,srinagar,mangalore,udaipur,jamshedpur,vadodara",
"activated": true
},
{
"name": "contact",
"mode": true,
"words": "8947847422,8967564556,8967907890,1235712345,8989898989,1231231231",
"activated": true
},
{
"name": "Email",
"mode": true,
"words": "[email protected], [email protected]",
"activated": true
},
{
"name": "emailid",
"mode": true,
"words": "[email protected], [email protected]",
"activated": true
},
{
"name": "FirstName",
"mode": true,
"words": "amit,ankur,ankit,ram,shyam,kunal,saikat,sundar,krishna,vikram,mohan,vijay,karthik,sunil,vivek,gopal,John,Chris,satish,surya,ajay,raju,suresh,sanjay,rajesh,ravi,ramesh,arun,rakesh,manoj,anil,kiran,sachin,dinesh,pradeep,raj,ashok,priya,prakash,david,mukesh,praveen,mahesh,naresh,anand,kumar,nikhil,michael,paul,naveen,nitin,srinivas,prasad,vinod,kishore,james,vinay,thomas",
"activated": true
},
{
"name": "LastName",
"mode": true,
"words": "Gupta,Sharma,Jain,kumar,singh,mishra,Mukherjee,goswami,verma,yadav,patel,ghosh,das",
"activated": true
},
{
"name": "MID",
"mode": true,
"words": "M1039205,M1039222,M1036767,M1048967,M1056789,M1028967,M1088967",
"activated": true
}
],
"regex_features": [],
"utterances": [
{
"text": "my name is ankur",
"intent": "PersonalInfo",
"entities": [
{
"entity": "FirstName",
"startPos": 11,
"endPos": 15
}
]
},
{
"text": "my contact number is 1231234123",
"intent": "PersonalInfo",
"entities": [
{
"entity": "Contact",
"startPos": 21,
"endPos": 30
}
]
},
{
"text": "my firstname is amit and lastname is gupta",
"intent": "PersonalInfo",
"entities": [
{
"entity": "FirstName",
"startPos": 16,
"endPos": 19
},
{
"entity": "LastName",
"startPos": 37,
"endPos": 41
}
]
},
{
"text": "my email is [email protected]",
"intent": "PersonalInfo",
"entities": [
{
"entity": "Email",
"startPos": 12,
"endPos": 22
}
]
},
{
"text": "kunal is one person",
"intent": "PersonalInfo",
"entities": [
{
"entity": "FirstName",
"startPos": 0,
"endPos": 4
}
]
},
{
"text": "myself singh and my dob comes on 24 may",
"intent": "PersonalInfo",
"entities": [
{
"entity": "LastName",
"startPos": 7,
"endPos": 11
}
]
},
{
"text": "my name is gupta and my dob is in month april",
"intent": "PersonalInfo",
"entities": [
{
"entity": "LastName",
"startPos": 11,
"endPos": 15
}
]
},
{
"text": "my name is amit and my date of birth is in month of march",
"intent": "PersonalInfo",
"entities": [
{
"entity": "FirstName",
"startPos": 11,
"endPos": 14
}
]
}
]
}
私が間違っているつもり缶誰ポイント?警告メッセージは、指摘start
とend
はおそらく間違ってトークンに含まれるいくつかのホワイトスペースを引き起こして設定されていたよう
更新 ここでは
{
"rasa_nlu_data": {
"entity_synonyms": [
{
"value": "city",
"synonyms": [
"jaipur",
"bangalore",
"florida",
"japan",
"delhi",
"pune",
"bombay",
"mumbai",
"chennai",
"hyderabad",
"kolkata",
"chandigarh",
"ahmedabad",
"china",
"lucknow",
"germany",
"noida",
"indore",
"nagpur",
"coimbatore",
"bhopal",
"banglore",
"india",
"patna",
"maharashtra",
"surat",
"kanpur",
"guwahati",
"ludhiana",
"gwalior",
"aurangabad",
"amritsar",
"rajkot",
"gujarat",
"madurai",
"pradesh",
"dehradun",
"raipur",
"ranchi",
"varanasi",
"jabalpur",
"jodhpur",
"srinagar",
"mangalore",
"udaipur",
"jamshedpur",
"vadodara"
]
},
{
"value": "contact",
"synonyms": [
"8947847422",
"8967564556",
"8967907890",
"1235712345",
"8989898989",
"1231231231"
]
},
{
"value": "Email",
"synonyms": [
"[email protected]",
" [email protected]"
]
},
{
"value": "emailid",
"synonyms": [
"[email protected]",
" [email protected]"
]
},
{
"value": "FirstName",
"synonyms": [
"amit",
"ankur",
"ankit",
"ram",
"shyam",
"kunal",
"saikat",
"sundar",
"krishna",
"vikram",
"mohan",
"vijay",
"karthik",
"sunil",
"vivek",
"gopal",
"John",
"Chris",
"satish",
"surya",
"ajay",
"raju",
"suresh",
"sanjay",
"rajesh",
"ravi",
"ramesh",
"arun",
"rakesh",
"manoj",
"anil",
"kiran",
"sachin",
"dinesh",
"pradeep",
"raj",
"ashok",
"priya",
"prakash",
"david",
"mukesh",
"praveen",
"mahesh",
"naresh",
"anand",
"kumar",
"nikhil",
"michael",
"paul",
"naveen",
"nitin",
"srinivas",
"prasad",
"vinod",
"kishore",
"james",
"vinay",
"thomas"
]
},
{
"value": "LastName",
"synonyms": [
"Gupta",
"Sharma",
"Jain",
"kumar",
"singh",
"mishra",
"Mukherjee",
"goswami",
"verma",
"yadav",
"patel",
"ghosh",
"das"
]
},
{
"value": "MID",
"synonyms": [
"M1039205",
"M1039222",
"M1036767",
"M1048967",
"M1056789",
"M1028967",
"M1088967"
]
}
],
"regex_features": [],
"common_examples": [
{
"text": "my name is ankur",
"intent": "PersonalInfo",
"entities": [
{
"entity": "FirstName",
"value": "ankur",
"start": 11,
"end": 15
}
]
},
{
"text": "my contact number is 1231234123",
"intent": "PersonalInfo",
"entities": [
{
"entity": "Contact",
"value": "1231234123",
"start": 21,
"end": 30
}
]
},
{
"text": "my firstname is amit and lastname is gupta",
"intent": "PersonalInfo",
"entities": [
{
"entity": "FirstName",
"value": "amit",
"start": 16,
"end": 19
},
{
"entity": "LastName",
"value": "gupta",
"start": 37,
"end": 41
}
]
},
{
"text": "my email is [email protected]",
"intent": "PersonalInfo",
"entities": [
{
"entity": "Email",
"value": "[email protected]",
"start": 12,
"end": 22
}
]
},
{
"text": "kunal is one person",
"intent": "PersonalInfo",
"entities": [
{
"entity": "FirstName",
"value": "kunal",
"start": 0,
"end": 4
}
]
},
{
"text": "myself singh and my dob comes on 24 may",
"intent": "PersonalInfo",
"entities": [
{
"entity": "LastName",
"value": "singh",
"start": 7,
"end": 11
}
]
},
{
"text": "my name is gupta and my dob is in month april",
"intent": "PersonalInfo",
"entities": [
{
"entity": "LastName",
"value": "gupta",
"start": 11,
"end": 15
}
]
},
{
"text": "my name is amit and my date of birth is in month of march",
"intent": "PersonalInfo",
"entities": [
{
"entity": "FirstName",
"value": "amit",
"start": 11,
"end": 14
}
]
}
]
}
}
なぜ「開始」と「終了」位置は1ではなく0で始まるのですか?単にインデックスをインクリメントすれば、Array Out Of Bounds例外がスローされませんか? –
この発言のために**中国のレストランを見せてください**開始位置8と終了位置15のエンティティ '中国語 'のインデックス。これを確認してください(http://nlu.rasa.ai/dataformat.html) –
@KunalMukherjee同様に、私は '1'で' start'を開始するのが間違っていると言って、_これがエラーを投げてしまうかもしれません。 私はあなたの訓練データを見ていないので、エラーがどういうものであるかについては何もコメントすることができません。 しかし、エラーメッセージから、いくつかのトレーニングの例では、 'start'と' end'インデックスの位置がずれることがあります。トークンの先頭または末尾にいくつかの空白が含まれます。 ラサNLUトレーナービジュアライザを使用してみましたか? –