From b77c97afe0550cdf888e553d9332e9b7bb7e64fe Mon Sep 17 00:00:00 2001 From: HRS_D Date: Tue, 22 Jan 2019 11:04:37 +0100 Subject: [PATCH 1/3] did work on speech recognition --- .gitignore | 3 + CMakeLists.txt | 23 ++-- action/SetSpeechVocabulary.action | 9 ++ action/SpeechWithFeedback.action | 11 ++ src/speech.cpp | 219 ++++++++++++++++++++++++++++++ src/speech_2.cpp | 177 ++++++++++++++++++++++++ 6 files changed, 433 insertions(+), 9 deletions(-) create mode 100644 action/SetSpeechVocabulary.action create mode 100644 action/SpeechWithFeedback.action create mode 100644 src/speech.cpp create mode 100644 src/speech_2.cpp diff --git a/.gitignore b/.gitignore index 4d61f1b..9f83993 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,5 @@ # Vim stuff .*.sw* + +# Gedit stuff +*~ diff --git a/CMakeLists.txt b/CMakeLists.txt index f7a67ff..24c18ea 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -16,6 +16,7 @@ find_package(catkin REQUIRED COMPONENTS std_msgs tf aruco + naoqi_bridge_msgs ) ## System dependencies are found with CMake's conventions @@ -66,17 +67,17 @@ find_package(catkin REQUIRED COMPONENTS # ) ## Generate actions in the 'action' folder -# add_action_files( -# FILES -# Action1.action -# Action2.action -# ) +#add_action_files( +# FILES +# Blink.action +#) ## Generate added messages and services with any dependencies listed here -# generate_messages( -# DEPENDENCIES -# sensor_msgs# std_msgs -# ) +generate_messages( + DEPENDENCIES + sensor_msgs + actionlib_msgs +) ################################################ ## Declare ROS dynamic reconfigure parameters ## @@ -112,6 +113,8 @@ catkin_package( # LIBRARIES project_aruco # CATKIN_DEPENDS cv_bridge image_transport roscpp sensor_msgs std_msgs # DEPENDS system_lib + + #CATKIN_DEPENDS actionlib_msgs ) ########### @@ -157,8 +160,10 @@ include_directories( add_executable(aruco_detector src/aruco_detector.cpp) add_executable(aruco_effector src/aruco_effector.cpp) +add_executable(speech src/speech.cpp) target_link_libraries(aruco_detector ${catkin_LIBRARIES} ${aruco_LIB}) target_link_libraries(aruco_effector ${catkin_LIBRARIES} ${aruco_LIB}) +target_link_libraries(speech ${catkin_LIBRARIES} ${aruco_LIB}) # add_dependencies(aruco tutorial_4_generate_messages_cpp) ############# diff --git a/action/SetSpeechVocabulary.action b/action/SetSpeechVocabulary.action new file mode 100644 index 0000000..d188731 --- /dev/null +++ b/action/SetSpeechVocabulary.action @@ -0,0 +1,9 @@ +# Goal: The new vocabulary to be set in the speech recognition module +# Result: True if the vocabulary was set +# Feedback: None + +string[] words +--- +bool success +--- + diff --git a/action/SpeechWithFeedback.action b/action/SpeechWithFeedback.action new file mode 100644 index 0000000..e756dcc --- /dev/null +++ b/action/SpeechWithFeedback.action @@ -0,0 +1,11 @@ +# Purpose : To have feedback on when the speech was started and when +# NAO stopped talking +# Goal: The sentence for NAO to say +# Result: NAO has finished speaking +# Feedback: When NAO starts speaking + +string say +--- +# Empty result +--- +# Empty feedback diff --git a/src/speech.cpp b/src/speech.cpp new file mode 100644 index 0000000..b0d3490 --- /dev/null +++ b/src/speech.cpp @@ -0,0 +1,219 @@ +#include +#include +#include +#include +#include +#include +#include +#include "sensor_msgs/JointState.h" +#include "sensor_msgs/Imu.h" +#include "message_filters/subscriber.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "actionlib/client/simple_action_client.h" + + + + +using namespace std; + +class Nao_control { + +protected: + + // ROS node handler + ros::NodeHandle nh_; + + // Publisher for nao speech + ros::Publisher speech_pub; + + // Publisher for nao vocabulary parameters + ros::Publisher voc_params_pub; + + // Client for starting speech recognition + ros::ServiceClient recog_start_srv; + + // Client for stopping speech recognition + ros::ServiceClient recog_stop_srv; + + // Subscriber to speech recognition + ros::Subscriber recog_sub; + + // Publisher for recognized commands + ros::Publisher command_pub; + + int speech_id_ctr = 1; + std::vector recognized_words; + bool commandfound = false; + + bool speech_flag = false; + +public: + + Nao_control(){ + + ROS_INFO("Constructor"); + + speech_pub = nh_.advertise( + "/speech_action/goal", 1); + + voc_params_pub= nh_.advertise( + "/speech_vocabulary_action/goal", 1); + + recog_start_srv=nh_.serviceClient("/start_recognition"); + + recog_stop_srv=nh_.serviceClient("/stop_recognition"); + + recog_sub=nh_.subscribe("/word_recognized", 1, + &Nao_control::speechRecognitionCallback, this); + + command_pub = nh_.advertise("/recieved_commands",5); + + } + + + ~Nao_control() { + + ROS_INFO("Destructor"); + //stopping recognition + + std_srvs::Empty srv; + + if (recog_stop_srv.call(srv)) { + ROS_INFO("SUCCESSFULLY STOPPED RECOGNITION"); + } + else { + ROS_ERROR("COULDN'T STOP RECOGNITION"); + } + } + + + void speechRecognitionCallback(const naoqi_bridge_msgs::WordRecognized::ConstPtr& msg) + { + + ROS_INFO("A WORD WAS RECOGNIZED"); + std_srvs::Empty srv; + + ROS_INFO("Confidence Value:"); + std::cout << std::to_string(msg->confidence_values[0]) << std::endl; + + for (int i = 0; i < msg->words.size(); i++) { + + std::cout << msg->words[i] << std::endl; + + } + + if (recog_stop_srv.call(srv) && ((msg->words.size())> 0)) { + + ROS_INFO("SUCCESSFULLY STOPPED RECOGNITION"); + + // Use confidence level to decide wether the recognized word should be published + + if (msg->confidence_values[0] > 0.35) { + + ROS_INFO("SPEECH STARTING"); + std::string say = "Ok I understood"; + + naoqi_bridge_msgs::SpeechWithFeedbackActionGoal s_msg; + s_msg.goal_id.id = std::to_string(this->speech_id_ctr); + this->speech_id_ctr += 1; + s_msg.goal.say = say; + this->speech_pub.publish(s_msg); + this->recognized_words.clear(); + + // publish here + ROS_INFO("PUBLISH"); + + std_msgs::String c_msg; + c_msg.data = msg->words[0]; + this->command_pub.publish(c_msg); + + } + else if(msg->confidence_values[0] > 0.3) { + + ROS_INFO("SPEECH STARTING"); + std::string say = "I did not understand. Could you repeat that please"; + + naoqi_bridge_msgs::SpeechWithFeedbackActionGoal s_msg; + s_msg.goal_id.id = std::to_string(this->speech_id_ctr); + this->speech_id_ctr += 1; + s_msg.goal.say = say; + this->speech_pub.publish(s_msg); + this->recognized_words.clear(); + + } + + this->recognized_words.clear(); + ros::Rate loop_rate(10000); + loop_rate.sleep(); + //program stops here, better loop structure, where recognition is started again has to be added + commandRecognition(); + } + else { + ROS_ERROR("COULDN'T STOP RECOGNITION"); + } + } + + void initializeVocabulary() + { + + std::vector vocabulary; + vocabulary.push_back("Start");// Operation "); + vocabulary.push_back("Stop");// Operation "); + //vocabulary.push_back("hello "); + vocabulary.push_back("Open your hands "); + vocabulary.push_back("Close your hands "); + + naoqi_bridge_msgs::SetSpeechVocabularyActionGoal msg; + msg.goal.words = vocabulary; + msg.goal_id.id = std::to_string(speech_id_ctr); + std::cout << msg << std::endl; + speech_id_ctr += 1; + voc_params_pub.publish(msg); + ROS_INFO("VOCABULARY INITIALIZED"); + } + + void commandRecognition() + { + //recognition has to be started and ended once a valid command was found + + initializeVocabulary(); + std_srvs::Empty srv; + + if (recog_start_srv.call(srv)) { + ROS_INFO("SUCCESSFULLY STARTED RECOGNITION"); + } + else { + ROS_ERROR("COULDN'T START RECOGNITION"); + } + } + +}; + +int main(int argc, char** argv) { + ros::init(argc, argv, "speech"); + + Nao_control TermiNAOtor; + ros::Rate loop_rate(10); + loop_rate.sleep(); + TermiNAOtor.commandRecognition(); + ros::spin(); + return 0; +} diff --git a/src/speech_2.cpp b/src/speech_2.cpp new file mode 100644 index 0000000..2da85f5 --- /dev/null +++ b/src/speech_2.cpp @@ -0,0 +1,177 @@ +#include +#include +#include +#include +#include +#include +#include +#include "sensor_msgs/JointState.h" +#include "sensor_msgs/Imu.h" +#include "message_filters/subscriber.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "actionlib/client/simple_action_client.h" + + + + +using namespace std; + +class Nao_control { + +protected: + + // ROS node handler + ros::NodeHandle nh_; + + // Subscriber to head tactile states + ros::Subscriber tactile_sub; + + // Publisher for nao speech + ros::Publisher speech_pub; + + // Publisher for nao vocabulary parameters + ros::Publisher voc_params_pub; + + // Client for starting speech recognition + ros::ServiceClient recog_start_srv; + + // Client for stopping speech recognition + ros::ServiceClient recog_stop_srv; + + // Subscriber to speech recognition + ros::Subscriber recog_sub; + + int speech_id_ctr = 0; + std::vector recognized_words; + +public: + + Nao_control(){ + + // Subscribe to topic bumper and specify that + // all data will be processed by function bumperCallback + //bumper_sub=nh_.subscribe("/bumper",1, &Nao_control::bumperCallback, + // this); + + // Subscribe to topic tactile_touch and specify + // that all data will be processed by function tactileCallback + //tactile_sub=nh_.subscribe("/tactile_touch", 1, + // &Nao_control::tactileCallback, this); + + speech_pub = nh_.advertise( + "/speech_action/goal", 1); + + voc_params_pub= nh_.advertise( + "/speech_vocabulary_action/goal", 1); + + recog_start_srv=nh_.serviceClient("/start_recognition"); + + recog_stop_srv=nh_.serviceClient("/stop_recognition"); + + recog_sub=nh_.subscribe("/word_recognized", 1, + &Nao_control::speechRecognitionCallback, this); + + ROS_INFO("Constructor"); + tactileCallback(); + + } + + + ~Nao_control() { + + ROS_INFO("Destructor"); + } + + void speechRecognitionCallback(const naoqi_bridge_msgs::WordRecognized::ConstPtr& msg) + { + for (int i = 0; i < msg->words.size(); i++) { + ROS_INFO("RECOGNIZED %s WITH CONFIDENCE %f", + msg->words[i].c_str(), msg->confidence_values[i]); + if (msg->confidence_values[i] > 0.42) { + this->recognized_words.push_back(msg->words[i]); + } + } + } + + + //void tactileCallback(#const naoqi_bridge_msgs::HeadTouch::ConstPtr& tactileState) + void tactileCallback() + { + + if (1 ) { + ROS_INFO("FRONT BUTTON RELEASED"); + std::vector vocabulary; + vocabulary.push_back("hello"); + vocabulary.push_back("world"); + vocabulary.push_back("humans"); + vocabulary.push_back("robots"); + vocabulary.push_back("kill"); + + naoqi_bridge_msgs::SetSpeechVocabularyActionGoal msg; + msg.goal.words = vocabulary; + msg.goal_id.id = std::to_string(speech_id_ctr); + std::cout << msg << std::endl; + speech_id_ctr += 1; + voc_params_pub.publish(msg); + // somehow the message does not get published here + + std_srvs::Empty srv; + + if (recog_start_srv.call(srv)) { + ROS_INFO("SUCCESSFULLY STARTED RECOGNITION"); + } + else { + ROS_ERROR("COULDN'T START RECOGNITION"); + } + } + else if (this->recognized_words.size() > 1) { + ROS_INFO("MIDDLE BUTTON RELEASED"); + std_srvs::Empty srv; + if (recog_stop_srv.call(srv)) { + ROS_INFO("SUCCESSFULLY STOPPED RECOGNITION"); + std::string say; + for (int i = 0; i < this->recognized_words.size(); i++) { + say += this->recognized_words[i] + " "; + } + naoqi_bridge_msgs::SpeechWithFeedbackActionGoal msg; + msg.goal_id.id = std::to_string(this->speech_id_ctr); + this->speech_id_ctr += 1; + msg.goal.say = say; + this->speech_pub.publish(msg); + this->recognized_words.clear(); + } + else { + ROS_ERROR("COULDN'T STOP RECOGNITION"); + } + } + else{ + ROS_INFO("NO BUTTON PRESSED"); + } + } + +}; + +int main(int argc, char** argv) { + ros::init(argc, argv, "speech"); + + Nao_control TermiNAOtor; + ros::spin(); + return 0; +} From a962f28c0921dd28ca68942779bcbf759e0d4161 Mon Sep 17 00:00:00 2001 From: HRS_D Date: Tue, 22 Jan 2019 18:43:24 +0100 Subject: [PATCH 2/3] Timing optimization in specch node, added launch file and script to start application --- launch/teleoperation.launch | 22 ++++++++++++++ scripts/speech_debug.sh | 20 +++++++++++++ scripts/teleoperation.sh | 27 +++++++++++++++++ src/speech.cpp | 58 ++++++++++++++----------------------- 4 files changed, 90 insertions(+), 37 deletions(-) create mode 100644 launch/teleoperation.launch create mode 100755 scripts/speech_debug.sh create mode 100755 scripts/teleoperation.sh diff --git a/launch/teleoperation.launch b/launch/teleoperation.launch new file mode 100644 index 0000000..d37902a --- /dev/null +++ b/launch/teleoperation.launch @@ -0,0 +1,22 @@ + + + + + + + + + + + + + ?> + + + + + diff --git a/scripts/speech_debug.sh b/scripts/speech_debug.sh new file mode 100755 index 0000000..bee05f2 --- /dev/null +++ b/scripts/speech_debug.sh @@ -0,0 +1,20 @@ +#!/bin/bash + +# Script to open debug terminal for speech node + +# Array of commands to run in different tabs +commands=( + 'cd ~/catkin_ws;source devel/setup.bash ;rostopic echo /speech_vocabulary_action/goal' + 'cd ~/catkin_ws;source devel/setup.bash ;rostopic echo /word_recognized' +) + +# Build final command with all the tabs to launch +set finalCommand="" +for (( i = 0; i < ${#commands[@]} ; i++ )); do + export finalCommand+="--tab -e 'bash -c \"${commands[$i]};exec bash\"' " +done + +# Run the final command +eval "gnome-terminal "$finalCommand + +exit 0 diff --git a/scripts/teleoperation.sh b/scripts/teleoperation.sh new file mode 100755 index 0000000..c5bf314 --- /dev/null +++ b/scripts/teleoperation.sh @@ -0,0 +1,27 @@ +#!/bin/bash + +# Script to start the necessary ros functions for the Teleoperating NAO oproject + +# Array of commands to run in different tabs +commands=( + + # Start roscore + #'cd ~/catkin_ws;source devel/setup.bash ;roscore' + # Bringup Nao + 'cd ~/catkin_ws;source devel/setup.bash ;roslaunch nao_bringup nao_full.launch ' + # Start speech recognition server + 'cd ~/catkin_ws;source devel/setup.bash ;roslaunch nao_apps speech.launch' + # Launch nodes in teleoperation package + 'cd ~/catkin_ws;source devel/setup.bash ;roslaunch teleoperation teleoperation.launch' +) + +# Build final command with all the tabs to launch +set finalCommand="" +for (( i = 0; i < ${#commands[@]} ; i++ )); do + export finalCommand+="--tab -e 'bash -c \"${commands[$i]};exec bash\"' " +done + +# Run the final command +eval "gnome-terminal "$finalCommand + +exit 0 diff --git a/src/speech.cpp b/src/speech.cpp index b0d3490..ef19b5b 100644 --- a/src/speech.cpp +++ b/src/speech.cpp @@ -1,36 +1,16 @@ #include #include -#include -#include #include #include -#include -#include "sensor_msgs/JointState.h" -#include "sensor_msgs/Imu.h" -#include "message_filters/subscriber.h" #include -#include -#include -#include -#include -#include + #include -#include -#include #include #include -#include #include -#include #include -#include -#include #include #include -#include "actionlib/client/simple_action_client.h" - - - using namespace std; @@ -63,8 +43,6 @@ protected: std::vector recognized_words; bool commandfound = false; - bool speech_flag = false; - public: Nao_control(){ @@ -88,7 +66,6 @@ public: } - ~Nao_control() { ROS_INFO("Destructor"); @@ -104,7 +81,6 @@ public: } } - void speechRecognitionCallback(const naoqi_bridge_msgs::WordRecognized::ConstPtr& msg) { @@ -119,6 +95,9 @@ public: std::cout << msg->words[i] << std::endl; } + + //set pause duration + double f_pause = 1; if (recog_stop_srv.call(srv) && ((msg->words.size())> 0)) { @@ -146,7 +125,7 @@ public: this->command_pub.publish(c_msg); } - else if(msg->confidence_values[0] > 0.3) { + else if(msg->confidence_values[0] > 0.05) { ROS_INFO("SPEECH STARTING"); std::string say = "I did not understand. Could you repeat that please"; @@ -157,29 +136,34 @@ public: s_msg.goal.say = say; this->speech_pub.publish(s_msg); this->recognized_words.clear(); + + // increase pause duration + f_pause = 0.4; - } + } this->recognized_words.clear(); - ros::Rate loop_rate(10000); - loop_rate.sleep(); - //program stops here, better loop structure, where recognition is started again has to be added - commandRecognition(); } else { ROS_ERROR("COULDN'T STOP RECOGNITION"); - } + } + // pause until NAO stops talking + ros::Rate loop_rate(f_pause); + loop_rate.sleep(); + + // re-start recogntion + commandRecognition(); } void initializeVocabulary() { std::vector vocabulary; - vocabulary.push_back("Start");// Operation "); + vocabulary.push_back("Copy postion"); vocabulary.push_back("Stop");// Operation "); //vocabulary.push_back("hello "); - vocabulary.push_back("Open your hands "); - vocabulary.push_back("Close your hands "); + vocabulary.push_back("Open your hands"); + vocabulary.push_back("Close your hands"); naoqi_bridge_msgs::SetSpeechVocabularyActionGoal msg; msg.goal.words = vocabulary; @@ -204,16 +188,16 @@ public: ROS_ERROR("COULDN'T START RECOGNITION"); } } - }; int main(int argc, char** argv) { ros::init(argc, argv, "speech"); Nao_control TermiNAOtor; - ros::Rate loop_rate(10); + ros::Rate loop_rate(1); loop_rate.sleep(); TermiNAOtor.commandRecognition(); + ROS_INFO("SPIN"); ros::spin(); return 0; } From a52b56ded9ae5e7faf44ac8f8b1308e8416262e3 Mon Sep 17 00:00:00 2001 From: HRS_D Date: Thu, 24 Jan 2019 11:04:42 +0100 Subject: [PATCH 3/3] little changes in shell scripts --- launch/teleoperation.launch | 14 ++++++++++++-- scripts/speech_debug.sh | 4 ++-- scripts/teleoperation.sh | 12 ++++++------ src/speech.cpp | 2 +- 4 files changed, 21 insertions(+), 11 deletions(-) diff --git a/launch/teleoperation.launch b/launch/teleoperation.launch index d37902a..8a879de 100644 --- a/launch/teleoperation.launch +++ b/launch/teleoperation.launch @@ -1,3 +1,5 @@ + + - + + + + + ?> + - + ?> @@ -19,4 +27,6 @@ Launch file for Teleoperating NAO project + + diff --git a/scripts/speech_debug.sh b/scripts/speech_debug.sh index bee05f2..440ea90 100755 --- a/scripts/speech_debug.sh +++ b/scripts/speech_debug.sh @@ -9,9 +9,9 @@ commands=( ) # Build final command with all the tabs to launch -set finalCommand="" +finalCommand="" for (( i = 0; i < ${#commands[@]} ; i++ )); do - export finalCommand+="--tab -e 'bash -c \"${commands[$i]};exec bash\"' " + finalCommand+="--tab -e 'bash -c \"${commands[$i]};exec bash\"' " done # Run the final command diff --git a/scripts/teleoperation.sh b/scripts/teleoperation.sh index c5bf314..01f6268 100755 --- a/scripts/teleoperation.sh +++ b/scripts/teleoperation.sh @@ -6,19 +6,19 @@ commands=( # Start roscore - #'cd ~/catkin_ws;source devel/setup.bash ;roscore' + 'cd ~/catkin_ws;source devel/setup.bash ;roscore' # Bringup Nao - 'cd ~/catkin_ws;source devel/setup.bash ;roslaunch nao_bringup nao_full.launch ' + 'sleep 1;cd ~/catkin_ws;source devel/setup.bash ;roslaunch nao_bringup nao_full.launch ' # Start speech recognition server - 'cd ~/catkin_ws;source devel/setup.bash ;roslaunch nao_apps speech.launch' + 'sleep 3;cd ~/catkin_ws;source devel/setup.bash ;roslaunch nao_apps speech.launch' # Launch nodes in teleoperation package - 'cd ~/catkin_ws;source devel/setup.bash ;roslaunch teleoperation teleoperation.launch' + 'sleep 5;cd ~/catkin_ws;source devel/setup.bash ;roslaunch teleoperation teleoperation.launch' ) # Build final command with all the tabs to launch -set finalCommand="" +finalCommand="" for (( i = 0; i < ${#commands[@]} ; i++ )); do - export finalCommand+="--tab -e 'bash -c \"${commands[$i]};exec bash\"' " + finalCommand+="--tab -e 'bash -c \"${commands[$i]};exec bash\"' " done # Run the final command diff --git a/src/speech.cpp b/src/speech.cpp index ef19b5b..9e71c2f 100644 --- a/src/speech.cpp +++ b/src/speech.cpp @@ -194,7 +194,7 @@ int main(int argc, char** argv) { ros::init(argc, argv, "speech"); Nao_control TermiNAOtor; - ros::Rate loop_rate(1); + ros::Rate loop_rate(0.5); loop_rate.sleep(); TermiNAOtor.commandRecognition(); ROS_INFO("SPIN");