more stable implementation of speech stuff
This commit is contained in:
@@ -47,6 +47,4 @@ catkin_install_python(PROGRAMS
|
||||
)
|
||||
|
||||
add_executable(aruco_detector src/aruco_detector.cpp)
|
||||
add_executable(speech src/speech.cpp)
|
||||
target_link_libraries(aruco_detector ${catkin_LIBRARIES} ${aruco_LIB})
|
||||
target_link_libraries(speech ${catkin_LIBRARIES} ${aruco_LIB})
|
||||
target_link_libraries(aruco_detector ${catkin_LIBRARIES})
|
||||
|
||||
@@ -8,13 +8,12 @@
|
||||
output="screen"/>
|
||||
<node name="speech_server" pkg="teleoperation" type="speech_server.py"
|
||||
output="screen"/>
|
||||
<node name="speech_client" pkg="teleoperation" type="speech_client.py"
|
||||
output="screen"/>
|
||||
<node name="controller" pkg="teleoperation" type="controller.py"
|
||||
output="screen"/>
|
||||
<node name="speech_client" pkg="teleoperation" type="speech_client.py"
|
||||
output="screen"/>
|
||||
<node name="imitator" pkg="teleoperation" type="imitator.py"
|
||||
output="screen"/>
|
||||
<node name="walker" pkg="teleoperation" type="walker.py"/>
|
||||
<node name="fall_detector" pkg="teleoperation" type="fall_detector.py"/>
|
||||
|
||||
</launch>
|
||||
|
||||
@@ -51,7 +51,7 @@ def handle_request(r):
|
||||
|
||||
elif module == 'speech':
|
||||
if message == 'recognize':
|
||||
if STATE in ('idle', 'imitate'):
|
||||
if STATE in ('idle', 'imitate', 'dead'):
|
||||
permission = True
|
||||
elif message == 'imitate':
|
||||
if STATE == 'idle':
|
||||
@@ -61,6 +61,13 @@ def handle_request(r):
|
||||
if STATE == 'imitate':
|
||||
STATE = 'idle'
|
||||
permission = True
|
||||
elif message == 'kill':
|
||||
STATE = 'dead'
|
||||
permission = True
|
||||
elif message == 'revive':
|
||||
if STATE == 'dead':
|
||||
STATE = 'idle'
|
||||
permission = True
|
||||
|
||||
rospy.logdebug(
|
||||
'GOT REQUEST FROM %s TO %s.\nPERMISSION: %s.\nSTATE IS NOW: %s.' % (
|
||||
|
||||
@@ -13,9 +13,15 @@ from controller import inform_controller_factory
|
||||
in_progress = False
|
||||
state = 'idle'
|
||||
|
||||
IMITATE = 'repeat'
|
||||
KILL = 'kill'
|
||||
REVIVE = 'go'
|
||||
STOP = 'stop'
|
||||
|
||||
voc_state = {
|
||||
'idle': 'start',
|
||||
'imitate': 'stop'
|
||||
'idle': [IMITATE, KILL],
|
||||
'imitate': [STOP, KILL],
|
||||
'killed': [REVIVE]
|
||||
}
|
||||
|
||||
_inform_controller = inform_controller_factory('speech')
|
||||
@@ -23,10 +29,17 @@ _inform_controller = inform_controller_factory('speech')
|
||||
|
||||
def done_cb(_, result):
|
||||
global in_progress, state
|
||||
rospy.loginfo(result)
|
||||
if result.word == 'start' and _inform_controller('imitate'):
|
||||
rospy.loginfo('SPEECH CLIENT: {}'.format(result))
|
||||
if result is None:
|
||||
in_progress = False
|
||||
return
|
||||
if result.word == IMITATE and _inform_controller('imitate'):
|
||||
state = 'imitate'
|
||||
elif result.word == 'stop' and _inform_controller('stop'):
|
||||
elif result.word == STOP and _inform_controller('stop'):
|
||||
state = 'idle'
|
||||
elif result.word == KILL and _inform_controller('kill'):
|
||||
state = 'killed'
|
||||
elif result.word == REVIVE and _inform_controller('revive'):
|
||||
state = 'idle'
|
||||
in_progress = False
|
||||
|
||||
@@ -46,11 +59,10 @@ if __name__ == '__main__':
|
||||
client.cancel_goal()
|
||||
in_progress = False
|
||||
state = 'idle'
|
||||
continue
|
||||
|
||||
else:
|
||||
if not in_progress:
|
||||
in_progress = True
|
||||
client.send_goal(RequestSpeechGoal([voc_state[state]]),
|
||||
client.send_goal(RequestSpeechGoal(voc_state[state]),
|
||||
done_cb)
|
||||
rospy.Rate(2).sleep()
|
||||
|
||||
rospy.Rate(4).sleep()
|
||||
|
||||
@@ -10,7 +10,6 @@ from teleoperation.msg import RequestSpeechAction, RequestSpeechResult
|
||||
|
||||
speech_broker = None
|
||||
almem = None
|
||||
r = False
|
||||
|
||||
|
||||
def request_speech(goal):
|
||||
@@ -18,7 +17,9 @@ def request_speech(goal):
|
||||
sas.set_succeeded(RequestSpeechResult(word=''))
|
||||
return
|
||||
|
||||
while not sas.is_preempt_requested() and not speech_detector.have_word():
|
||||
while (not sas.is_preempt_requested() and
|
||||
not speech_detector.have_word() and
|
||||
not rospy.is_shutdown()):
|
||||
rospy.Rate(10).sleep()
|
||||
|
||||
if speech_detector.have_word():
|
||||
@@ -40,16 +41,24 @@ class SpeechDetectorModule(ALModule):
|
||||
self.asr = ALProxy('ALSpeechRecognition')
|
||||
self.tts = ALProxy('ALTextToSpeech')
|
||||
self.asr.setLanguage('English')
|
||||
self.running = False
|
||||
almem.subscribeToEvent("WordRecognized",
|
||||
"speech_detector",
|
||||
"on_word_recognized")
|
||||
self.asr.pause(True)
|
||||
self._busy = False
|
||||
|
||||
def start_speech(self, voc):
|
||||
if self.running:
|
||||
def get_status(self):
|
||||
print(almem.getData('ALSpeechRecognition/Status'))
|
||||
|
||||
def start_speech(self, voc, resume=False):
|
||||
if self._busy != resume:
|
||||
return False
|
||||
if not resume:
|
||||
self.voc = voc
|
||||
self.asr.setVocabulary(voc, False)
|
||||
self.asr.subscribe(self.subid)
|
||||
self.asr.pause(False)
|
||||
self.running = True
|
||||
self._busy = True
|
||||
return True
|
||||
|
||||
def have_word(self):
|
||||
@@ -60,23 +69,26 @@ class SpeechDetectorModule(ALModule):
|
||||
self.recognized = None
|
||||
return result
|
||||
|
||||
def stop_speech(self):
|
||||
if not self.running:
|
||||
def stop_speech(self, pause=False):
|
||||
if not self._busy:
|
||||
return
|
||||
self.asr.unsubscribe(self.subid)
|
||||
self.asr.pause(True)
|
||||
self.running = False
|
||||
if not pause:
|
||||
self.asr.unsubscribe(self.subid)
|
||||
self._busy = False
|
||||
|
||||
def on_word_recognized(self, *_args):
|
||||
word, conf = almem.getData('WordRecognized')
|
||||
print(word, conf)
|
||||
if conf > 0.4:
|
||||
self.stop_speech()
|
||||
self.stop_speech(pause=True)
|
||||
self.tts.say(word)
|
||||
self.recognized = word
|
||||
else:
|
||||
self.stop_speech()
|
||||
else:
|
||||
self.stop_speech(pause=True)
|
||||
self.tts.say('I didn\'t understand. Please repeat')
|
||||
self.start_speech(self.voc)
|
||||
self.start_speech(self.voc, resume=True)
|
||||
|
||||
|
||||
|
||||
@@ -84,21 +96,16 @@ if __name__ == '__main__':
|
||||
rospy.init_node('speech_server')
|
||||
speech_broker = ALBroker('speech_broker', '0.0.0.0', 0,
|
||||
os.environ['NAO_IP'], 9559)
|
||||
speech_detector = SpeechDetectorModule('speech_detector')
|
||||
almem = ALProxy('ALMemory')
|
||||
almem.subscribeToEvent("WordRecognized",
|
||||
"speech_detector",
|
||||
"on_word_recognized")
|
||||
speech_detector.asr.pause(True)
|
||||
speech_detector = SpeechDetectorModule('speech_detector')
|
||||
sas = actionlib.SimpleActionServer('speech_server', RequestSpeechAction,
|
||||
execute_cb=request_speech,
|
||||
auto_start=False)
|
||||
sas.start()
|
||||
|
||||
while not rospy.is_shutdown():
|
||||
rospy.Rate(4).sleep()
|
||||
|
||||
if speech_detector.running:
|
||||
speech_detector.stop_speech()
|
||||
rospy.Rate(1).sleep()
|
||||
while sas.is_active():
|
||||
pass
|
||||
|
||||
speech_broker.shutdown()
|
||||
|
||||
@@ -21,8 +21,8 @@ VMIN = 0.3
|
||||
VMAX = 1.0
|
||||
|
||||
|
||||
def thirdway(a, b):
|
||||
return a + (b - a) / 3
|
||||
def n_way(a, b, n=3):
|
||||
return a + (b - a) / n
|
||||
|
||||
|
||||
def global_init():
|
||||
@@ -32,12 +32,12 @@ def global_init():
|
||||
x = json.load(f)
|
||||
|
||||
cx, cy, cz = x['cr']
|
||||
FW = thirdway(cx, x['fw']), x['fw']
|
||||
BK = thirdway(cx, x['bk']), x['bk']
|
||||
LT = thirdway(cy, x['lt']), x['lt']
|
||||
RT = thirdway(cy, x['rt']), x['rt']
|
||||
LR = thirdway(cz, x['lr']), x['lr']
|
||||
RR = thirdway(cz, x['rr']), x['rr']
|
||||
FW = n_way(cx, x['fw']), x['fw']
|
||||
BK = n_way(cx, x['bk']), x['bk']
|
||||
LT = n_way(cy, x['lt']), x['lt']
|
||||
RT = n_way(cy, x['rt']), x['rt']
|
||||
LR = n_way(cz, x['lr'], 2), x['lr']
|
||||
RR = n_way(cz, x['rr'], 2), x['rr']
|
||||
|
||||
|
||||
_inform_controller = inform_controller_factory('walker')
|
||||
@@ -96,7 +96,8 @@ if __name__ == '__main__':
|
||||
if not any(movement):
|
||||
rospy.logdebug('WALKER: STOP')
|
||||
_inform_controller('stop')
|
||||
mp.move(0, 0, 0)
|
||||
# mp.move(0, 0, 0)
|
||||
mp.stopMove()
|
||||
continue
|
||||
|
||||
permission = _inform_controller('move')
|
||||
|
||||
226
src/speech.cpp
226
src/speech.cpp
@@ -1,226 +0,0 @@
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <stdlib.h>
|
||||
#include <ros/ros.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <std_srvs/Empty.h>
|
||||
#include <naoqi_bridge_msgs/SpeechWithFeedbackActionGoal.h>
|
||||
#include <actionlib_msgs/GoalStatusArray.h>
|
||||
#include <naoqi_bridge_msgs/SetSpeechVocabularyActionGoal.h>
|
||||
#include <naoqi_bridge_msgs/WordRecognized.h>
|
||||
#include <std_msgs/Bool.h>
|
||||
#include <std_msgs/String.h>
|
||||
|
||||
#include <teleoperation/InformController.h>
|
||||
#include <teleoperation/utils.hpp>
|
||||
|
||||
using namespace std;
|
||||
|
||||
class Nao_control {
|
||||
|
||||
protected:
|
||||
|
||||
// ROS node handler
|
||||
ros::NodeHandle nh_;
|
||||
|
||||
// Publisher for nao speech
|
||||
ros::Publisher speech_pub;
|
||||
|
||||
// Publisher for nao vocabulary parameters
|
||||
ros::Publisher voc_params_pub;
|
||||
|
||||
// Client for starting speech recognition
|
||||
ros::ServiceClient recog_start_srv;
|
||||
|
||||
// Client for stopping speech recognition
|
||||
ros::ServiceClient recog_stop_srv;
|
||||
|
||||
// Subscriber to speech recognition
|
||||
ros::Subscriber recog_sub;
|
||||
|
||||
// Publisher for recognized commands
|
||||
ros::ServiceClient ic;
|
||||
|
||||
bool imitating;
|
||||
|
||||
int speech_id_ctr;
|
||||
|
||||
public:
|
||||
|
||||
Nao_control() : imitating(false), speech_id_ctr(1) {
|
||||
|
||||
ROS_INFO("Constructor");
|
||||
|
||||
speech_pub = nh_.advertise<naoqi_bridge_msgs::SpeechWithFeedbackActionGoal>(
|
||||
"/speech_action/goal", 1);
|
||||
|
||||
voc_params_pub= nh_.advertise<naoqi_bridge_msgs::SetSpeechVocabularyActionGoal>(
|
||||
"/speech_vocabulary_action/goal", 1);
|
||||
|
||||
recog_start_srv=nh_.serviceClient<std_srvs::Empty>("/start_recognition");
|
||||
|
||||
recog_stop_srv=nh_.serviceClient<std_srvs::Empty>("/stop_recognition");
|
||||
|
||||
recog_sub=nh_.subscribe("/word_recognized", 1,
|
||||
&Nao_control::speechRecognitionCallback, this);
|
||||
|
||||
ic = nh_.serviceClient<teleoperation::InformController>(
|
||||
"/inform_controller", false);
|
||||
}
|
||||
|
||||
~Nao_control() {
|
||||
|
||||
ROS_INFO("SPEECH: DESTRUCT");
|
||||
|
||||
std_srvs::Empty srv;
|
||||
|
||||
if (recog_stop_srv.call(srv)) {
|
||||
ROS_INFO("SUCCESSFULLY STOPPED RECOGNITION");
|
||||
}
|
||||
else {
|
||||
ROS_ERROR("COULDN'T STOP RECOGNITION");
|
||||
}
|
||||
}
|
||||
|
||||
void speechRecognitionCallback(
|
||||
const naoqi_bridge_msgs::WordRecognized::ConstPtr& msg) {
|
||||
|
||||
ROS_INFO("A WORD WAS RECOGNIZED");
|
||||
std_srvs::Empty srv;
|
||||
|
||||
ROS_INFO("CONFIDENCE: %lf", msg->confidence_values[0]);
|
||||
|
||||
for (int i = 0; i < msg->words.size(); i++) {
|
||||
std::cout << msg->words[i] << std::endl;
|
||||
}
|
||||
|
||||
//set pause duration
|
||||
double f_pause = 2;
|
||||
|
||||
if (recog_stop_srv.call(srv) && ((msg->words.size())> 0)) {
|
||||
|
||||
ROS_INFO("SUCCESSFULLY STOPPED RECOGNITION");
|
||||
|
||||
// Use confidence level to decide wether the recognized word
|
||||
// should be published
|
||||
|
||||
if (msg->confidence_values[0] > 0.35) {
|
||||
|
||||
ROS_INFO("SPEECH STARTING");
|
||||
std::string say = "Ok I understood " + msg->words[0];
|
||||
|
||||
naoqi_bridge_msgs::SpeechWithFeedbackActionGoal s_msg;
|
||||
s_msg.goal_id.id = stuff_to_str(this->speech_id_ctr);
|
||||
this->speech_id_ctr += 1;
|
||||
s_msg.goal.say = say;
|
||||
this->speech_pub.publish(s_msg);
|
||||
teleoperation::InformController ic_msg;
|
||||
ic_msg.request.module = "speech_recognition";
|
||||
if (msg->words[0] == "imitate") {
|
||||
ic_msg.request.message = "imitate";
|
||||
ROS_INFO("SPEECH: REQUESTING IMITATION");
|
||||
if (this->ic.call(ic_msg) && ic_msg.response.permission) {
|
||||
this->imitating = true;
|
||||
}
|
||||
else {
|
||||
ROS_ERROR("SPEECH: CONTROLLER UNREACHABLE");
|
||||
}
|
||||
}
|
||||
else if (msg->words[0] == "stop") {
|
||||
ROS_INFO("SPEECH: REQUESTING STOP IMITATION");
|
||||
ic_msg.request.message = "stop";
|
||||
if (this->ic.call(ic_msg) && ic_msg.response.permission) {
|
||||
this->imitating = false;
|
||||
}
|
||||
else {
|
||||
ROS_ERROR("SPEECH: CONTROLLER UNREACHABLE");
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
else if (msg->confidence_values[0] > 0.05) {
|
||||
|
||||
ROS_INFO("SPEECH STARTING");
|
||||
std::string say = "I did not understand. Could you repeat that please";
|
||||
|
||||
naoqi_bridge_msgs::SpeechWithFeedbackActionGoal s_msg;
|
||||
s_msg.goal_id.id = stuff_to_str(this->speech_id_ctr);
|
||||
this->speech_id_ctr += 1;
|
||||
s_msg.goal.say = say;
|
||||
this->speech_pub.publish(s_msg);
|
||||
|
||||
// increase pause duration
|
||||
f_pause = 0.4;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
else {
|
||||
ROS_ERROR("COULDN'T STOP RECOGNITION");
|
||||
}
|
||||
// pause until NAO stops talking
|
||||
ros::Rate loop_rate(f_pause);
|
||||
loop_rate.sleep();
|
||||
|
||||
// re-start recogntion
|
||||
commandRecognition();
|
||||
}
|
||||
|
||||
void initializeVocabulary()
|
||||
{
|
||||
|
||||
std::vector<std::string> vocabulary;
|
||||
if (!this->imitating) vocabulary.push_back("imitate");
|
||||
else vocabulary.push_back("stop");
|
||||
vocabulary.push_back("open");
|
||||
vocabulary.push_back("close");
|
||||
// vocabulary.push_back("kill");
|
||||
|
||||
naoqi_bridge_msgs::SetSpeechVocabularyActionGoal msg;
|
||||
msg.goal.words = vocabulary;
|
||||
msg.goal_id.id = stuff_to_str(speech_id_ctr);
|
||||
std::cout << msg.goal << std::endl;
|
||||
speech_id_ctr += 1;
|
||||
voc_params_pub.publish(msg);
|
||||
ROS_INFO("VOCABULARY INITIALIZED");
|
||||
}
|
||||
|
||||
void commandRecognition()
|
||||
{
|
||||
//recognition has to be started and ended once a valid command was found
|
||||
while (true) {
|
||||
ros::Rate loop_rate(4);
|
||||
loop_rate.sleep();
|
||||
teleoperation::InformController ic_msg;
|
||||
ic_msg.request.module = "speech_recognition";
|
||||
ic_msg.request.message = "recognize";
|
||||
if (this->ic.call(ic_msg) && ic_msg.response.permission) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
this->initializeVocabulary();
|
||||
ros::Rate loop_rate(1);
|
||||
loop_rate.sleep();
|
||||
std_srvs::Empty srv;
|
||||
|
||||
if (recog_start_srv.call(srv)) {
|
||||
ROS_INFO("SUCCESSFULLY STARTED RECOGNITION");
|
||||
}
|
||||
else {
|
||||
ROS_ERROR("COULDN'T START RECOGNITION");
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
ros::init(argc, argv, "speech");
|
||||
|
||||
Nao_control TermiNAOtor;
|
||||
ros::Rate loop_rate(1);
|
||||
loop_rate.sleep();
|
||||
TermiNAOtor.commandRecognition();
|
||||
ROS_INFO("SPIN");
|
||||
ros::spin();
|
||||
return 0;
|
||||
}
|
||||
Reference in New Issue
Block a user