more stable implementation of speech stuff

This commit is contained in:
Pavel Lutskov
2019-02-02 13:38:35 +01:00
parent 3217b7f841
commit 655c5418fd
7 changed files with 77 additions and 279 deletions

View File

@@ -47,6 +47,4 @@ catkin_install_python(PROGRAMS
)
add_executable(aruco_detector src/aruco_detector.cpp)
add_executable(speech src/speech.cpp)
target_link_libraries(aruco_detector ${catkin_LIBRARIES} ${aruco_LIB})
target_link_libraries(speech ${catkin_LIBRARIES} ${aruco_LIB})
target_link_libraries(aruco_detector ${catkin_LIBRARIES})

View File

@@ -8,13 +8,12 @@
output="screen"/>
<node name="speech_server" pkg="teleoperation" type="speech_server.py"
output="screen"/>
<node name="speech_client" pkg="teleoperation" type="speech_client.py"
output="screen"/>
<node name="controller" pkg="teleoperation" type="controller.py"
output="screen"/>
<node name="speech_client" pkg="teleoperation" type="speech_client.py"
output="screen"/>
<node name="imitator" pkg="teleoperation" type="imitator.py"
output="screen"/>
<node name="walker" pkg="teleoperation" type="walker.py"/>
<node name="fall_detector" pkg="teleoperation" type="fall_detector.py"/>
</launch>

View File

@@ -51,7 +51,7 @@ def handle_request(r):
elif module == 'speech':
if message == 'recognize':
if STATE in ('idle', 'imitate'):
if STATE in ('idle', 'imitate', 'dead'):
permission = True
elif message == 'imitate':
if STATE == 'idle':
@@ -61,6 +61,13 @@ def handle_request(r):
if STATE == 'imitate':
STATE = 'idle'
permission = True
elif message == 'kill':
STATE = 'dead'
permission = True
elif message == 'revive':
if STATE == 'dead':
STATE = 'idle'
permission = True
rospy.logdebug(
'GOT REQUEST FROM %s TO %s.\nPERMISSION: %s.\nSTATE IS NOW: %s.' % (

View File

@@ -13,9 +13,15 @@ from controller import inform_controller_factory
in_progress = False
state = 'idle'
IMITATE = 'repeat'
KILL = 'kill'
REVIVE = 'go'
STOP = 'stop'
voc_state = {
'idle': 'start',
'imitate': 'stop'
'idle': [IMITATE, KILL],
'imitate': [STOP, KILL],
'killed': [REVIVE]
}
_inform_controller = inform_controller_factory('speech')
@@ -23,10 +29,17 @@ _inform_controller = inform_controller_factory('speech')
def done_cb(_, result):
global in_progress, state
rospy.loginfo(result)
if result.word == 'start' and _inform_controller('imitate'):
rospy.loginfo('SPEECH CLIENT: {}'.format(result))
if result is None:
in_progress = False
return
if result.word == IMITATE and _inform_controller('imitate'):
state = 'imitate'
elif result.word == 'stop' and _inform_controller('stop'):
elif result.word == STOP and _inform_controller('stop'):
state = 'idle'
elif result.word == KILL and _inform_controller('kill'):
state = 'killed'
elif result.word == REVIVE and _inform_controller('revive'):
state = 'idle'
in_progress = False
@@ -46,11 +59,10 @@ if __name__ == '__main__':
client.cancel_goal()
in_progress = False
state = 'idle'
continue
else:
if not in_progress:
in_progress = True
client.send_goal(RequestSpeechGoal([voc_state[state]]),
client.send_goal(RequestSpeechGoal(voc_state[state]),
done_cb)
rospy.Rate(2).sleep()
rospy.Rate(4).sleep()

View File

@@ -10,7 +10,6 @@ from teleoperation.msg import RequestSpeechAction, RequestSpeechResult
speech_broker = None
almem = None
r = False
def request_speech(goal):
@@ -18,7 +17,9 @@ def request_speech(goal):
sas.set_succeeded(RequestSpeechResult(word=''))
return
while not sas.is_preempt_requested() and not speech_detector.have_word():
while (not sas.is_preempt_requested() and
not speech_detector.have_word() and
not rospy.is_shutdown()):
rospy.Rate(10).sleep()
if speech_detector.have_word():
@@ -40,16 +41,24 @@ class SpeechDetectorModule(ALModule):
self.asr = ALProxy('ALSpeechRecognition')
self.tts = ALProxy('ALTextToSpeech')
self.asr.setLanguage('English')
self.running = False
almem.subscribeToEvent("WordRecognized",
"speech_detector",
"on_word_recognized")
self.asr.pause(True)
self._busy = False
def start_speech(self, voc):
if self.running:
def get_status(self):
print(almem.getData('ALSpeechRecognition/Status'))
def start_speech(self, voc, resume=False):
if self._busy != resume:
return False
if not resume:
self.voc = voc
self.asr.setVocabulary(voc, False)
self.asr.subscribe(self.subid)
self.asr.pause(False)
self.running = True
self._busy = True
return True
def have_word(self):
@@ -60,23 +69,26 @@ class SpeechDetectorModule(ALModule):
self.recognized = None
return result
def stop_speech(self):
if not self.running:
def stop_speech(self, pause=False):
if not self._busy:
return
self.asr.unsubscribe(self.subid)
self.asr.pause(True)
self.running = False
if not pause:
self.asr.unsubscribe(self.subid)
self._busy = False
def on_word_recognized(self, *_args):
word, conf = almem.getData('WordRecognized')
print(word, conf)
if conf > 0.4:
self.stop_speech()
self.stop_speech(pause=True)
self.tts.say(word)
self.recognized = word
else:
self.stop_speech()
else:
self.stop_speech(pause=True)
self.tts.say('I didn\'t understand. Please repeat')
self.start_speech(self.voc)
self.start_speech(self.voc, resume=True)
@@ -84,21 +96,16 @@ if __name__ == '__main__':
rospy.init_node('speech_server')
speech_broker = ALBroker('speech_broker', '0.0.0.0', 0,
os.environ['NAO_IP'], 9559)
speech_detector = SpeechDetectorModule('speech_detector')
almem = ALProxy('ALMemory')
almem.subscribeToEvent("WordRecognized",
"speech_detector",
"on_word_recognized")
speech_detector.asr.pause(True)
speech_detector = SpeechDetectorModule('speech_detector')
sas = actionlib.SimpleActionServer('speech_server', RequestSpeechAction,
execute_cb=request_speech,
auto_start=False)
sas.start()
while not rospy.is_shutdown():
rospy.Rate(4).sleep()
if speech_detector.running:
speech_detector.stop_speech()
rospy.Rate(1).sleep()
while sas.is_active():
pass
speech_broker.shutdown()

View File

@@ -21,8 +21,8 @@ VMIN = 0.3
VMAX = 1.0
def thirdway(a, b):
return a + (b - a) / 3
def n_way(a, b, n=3):
return a + (b - a) / n
def global_init():
@@ -32,12 +32,12 @@ def global_init():
x = json.load(f)
cx, cy, cz = x['cr']
FW = thirdway(cx, x['fw']), x['fw']
BK = thirdway(cx, x['bk']), x['bk']
LT = thirdway(cy, x['lt']), x['lt']
RT = thirdway(cy, x['rt']), x['rt']
LR = thirdway(cz, x['lr']), x['lr']
RR = thirdway(cz, x['rr']), x['rr']
FW = n_way(cx, x['fw']), x['fw']
BK = n_way(cx, x['bk']), x['bk']
LT = n_way(cy, x['lt']), x['lt']
RT = n_way(cy, x['rt']), x['rt']
LR = n_way(cz, x['lr'], 2), x['lr']
RR = n_way(cz, x['rr'], 2), x['rr']
_inform_controller = inform_controller_factory('walker')
@@ -96,7 +96,8 @@ if __name__ == '__main__':
if not any(movement):
rospy.logdebug('WALKER: STOP')
_inform_controller('stop')
mp.move(0, 0, 0)
# mp.move(0, 0, 0)
mp.stopMove()
continue
permission = _inform_controller('move')

View File

@@ -1,226 +0,0 @@
#include <iostream>
#include <fstream>
#include <stdlib.h>
#include <ros/ros.h>
#include <string.h>
#include <std_srvs/Empty.h>
#include <naoqi_bridge_msgs/SpeechWithFeedbackActionGoal.h>
#include <actionlib_msgs/GoalStatusArray.h>
#include <naoqi_bridge_msgs/SetSpeechVocabularyActionGoal.h>
#include <naoqi_bridge_msgs/WordRecognized.h>
#include <std_msgs/Bool.h>
#include <std_msgs/String.h>
#include <teleoperation/InformController.h>
#include <teleoperation/utils.hpp>
using namespace std;
class Nao_control {
protected:
// ROS node handler
ros::NodeHandle nh_;
// Publisher for nao speech
ros::Publisher speech_pub;
// Publisher for nao vocabulary parameters
ros::Publisher voc_params_pub;
// Client for starting speech recognition
ros::ServiceClient recog_start_srv;
// Client for stopping speech recognition
ros::ServiceClient recog_stop_srv;
// Subscriber to speech recognition
ros::Subscriber recog_sub;
// Publisher for recognized commands
ros::ServiceClient ic;
bool imitating;
int speech_id_ctr;
public:
Nao_control() : imitating(false), speech_id_ctr(1) {
ROS_INFO("Constructor");
speech_pub = nh_.advertise<naoqi_bridge_msgs::SpeechWithFeedbackActionGoal>(
"/speech_action/goal", 1);
voc_params_pub= nh_.advertise<naoqi_bridge_msgs::SetSpeechVocabularyActionGoal>(
"/speech_vocabulary_action/goal", 1);
recog_start_srv=nh_.serviceClient<std_srvs::Empty>("/start_recognition");
recog_stop_srv=nh_.serviceClient<std_srvs::Empty>("/stop_recognition");
recog_sub=nh_.subscribe("/word_recognized", 1,
&Nao_control::speechRecognitionCallback, this);
ic = nh_.serviceClient<teleoperation::InformController>(
"/inform_controller", false);
}
~Nao_control() {
ROS_INFO("SPEECH: DESTRUCT");
std_srvs::Empty srv;
if (recog_stop_srv.call(srv)) {
ROS_INFO("SUCCESSFULLY STOPPED RECOGNITION");
}
else {
ROS_ERROR("COULDN'T STOP RECOGNITION");
}
}
void speechRecognitionCallback(
const naoqi_bridge_msgs::WordRecognized::ConstPtr& msg) {
ROS_INFO("A WORD WAS RECOGNIZED");
std_srvs::Empty srv;
ROS_INFO("CONFIDENCE: %lf", msg->confidence_values[0]);
for (int i = 0; i < msg->words.size(); i++) {
std::cout << msg->words[i] << std::endl;
}
//set pause duration
double f_pause = 2;
if (recog_stop_srv.call(srv) && ((msg->words.size())> 0)) {
ROS_INFO("SUCCESSFULLY STOPPED RECOGNITION");
// Use confidence level to decide wether the recognized word
// should be published
if (msg->confidence_values[0] > 0.35) {
ROS_INFO("SPEECH STARTING");
std::string say = "Ok I understood " + msg->words[0];
naoqi_bridge_msgs::SpeechWithFeedbackActionGoal s_msg;
s_msg.goal_id.id = stuff_to_str(this->speech_id_ctr);
this->speech_id_ctr += 1;
s_msg.goal.say = say;
this->speech_pub.publish(s_msg);
teleoperation::InformController ic_msg;
ic_msg.request.module = "speech_recognition";
if (msg->words[0] == "imitate") {
ic_msg.request.message = "imitate";
ROS_INFO("SPEECH: REQUESTING IMITATION");
if (this->ic.call(ic_msg) && ic_msg.response.permission) {
this->imitating = true;
}
else {
ROS_ERROR("SPEECH: CONTROLLER UNREACHABLE");
}
}
else if (msg->words[0] == "stop") {
ROS_INFO("SPEECH: REQUESTING STOP IMITATION");
ic_msg.request.message = "stop";
if (this->ic.call(ic_msg) && ic_msg.response.permission) {
this->imitating = false;
}
else {
ROS_ERROR("SPEECH: CONTROLLER UNREACHABLE");
}
}
}
else if (msg->confidence_values[0] > 0.05) {
ROS_INFO("SPEECH STARTING");
std::string say = "I did not understand. Could you repeat that please";
naoqi_bridge_msgs::SpeechWithFeedbackActionGoal s_msg;
s_msg.goal_id.id = stuff_to_str(this->speech_id_ctr);
this->speech_id_ctr += 1;
s_msg.goal.say = say;
this->speech_pub.publish(s_msg);
// increase pause duration
f_pause = 0.4;
}
}
else {
ROS_ERROR("COULDN'T STOP RECOGNITION");
}
// pause until NAO stops talking
ros::Rate loop_rate(f_pause);
loop_rate.sleep();
// re-start recogntion
commandRecognition();
}
void initializeVocabulary()
{
std::vector<std::string> vocabulary;
if (!this->imitating) vocabulary.push_back("imitate");
else vocabulary.push_back("stop");
vocabulary.push_back("open");
vocabulary.push_back("close");
// vocabulary.push_back("kill");
naoqi_bridge_msgs::SetSpeechVocabularyActionGoal msg;
msg.goal.words = vocabulary;
msg.goal_id.id = stuff_to_str(speech_id_ctr);
std::cout << msg.goal << std::endl;
speech_id_ctr += 1;
voc_params_pub.publish(msg);
ROS_INFO("VOCABULARY INITIALIZED");
}
void commandRecognition()
{
//recognition has to be started and ended once a valid command was found
while (true) {
ros::Rate loop_rate(4);
loop_rate.sleep();
teleoperation::InformController ic_msg;
ic_msg.request.module = "speech_recognition";
ic_msg.request.message = "recognize";
if (this->ic.call(ic_msg) && ic_msg.response.permission) {
break;
}
}
this->initializeVocabulary();
ros::Rate loop_rate(1);
loop_rate.sleep();
std_srvs::Empty srv;
if (recog_start_srv.call(srv)) {
ROS_INFO("SUCCESSFULLY STARTED RECOGNITION");
}
else {
ROS_ERROR("COULDN'T START RECOGNITION");
}
}
};
int main(int argc, char** argv) {
ros::init(argc, argv, "speech");
Nao_control TermiNAOtor;
ros::Rate loop_rate(1);
loop_rate.sleep();
TermiNAOtor.commandRecognition();
ROS_INFO("SPIN");
ros::spin();
return 0;
}