diff --git a/.gitignore b/.gitignore index 3dfb892..12557c5 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,6 @@ # Some files experiments.py + +# Gedit stuff +*~ diff --git a/CMakeLists.txt b/CMakeLists.txt index ed6f87d..ff06705 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -37,5 +37,6 @@ catkin_install_python(PROGRAMS add_executable(aruco_detector src/aruco_detector.cpp) add_executable(aruco_effector src/aruco_effector.cpp) +add_executable(speech src/speech.cpp) target_link_libraries(aruco_detector ${catkin_LIBRARIES} ${aruco_LIB}) target_link_libraries(aruco_effector ${catkin_LIBRARIES} ${aruco_LIB}) diff --git a/action/SetSpeechVocabulary.action b/action/SetSpeechVocabulary.action new file mode 100644 index 0000000..d188731 --- /dev/null +++ b/action/SetSpeechVocabulary.action @@ -0,0 +1,9 @@ +# Goal: The new vocabulary to be set in the speech recognition module +# Result: True if the vocabulary was set +# Feedback: None + +string[] words +--- +bool success +--- + diff --git a/action/SpeechWithFeedback.action b/action/SpeechWithFeedback.action new file mode 100644 index 0000000..e756dcc --- /dev/null +++ b/action/SpeechWithFeedback.action @@ -0,0 +1,11 @@ +# Purpose : To have feedback on when the speech was started and when +# NAO stopped talking +# Goal: The sentence for NAO to say +# Result: NAO has finished speaking +# Feedback: When NAO starts speaking + +string say +--- +# Empty result +--- +# Empty feedback diff --git a/launch/teleoperation.launch b/launch/teleoperation.launch new file mode 100644 index 0000000..8a879de --- /dev/null +++ b/launch/teleoperation.launch @@ -0,0 +1,32 @@ + + + + + + + + + + + + + ?> + + + + + + + ?> + + + + + + + diff --git a/scripts/speech_debug.sh b/scripts/speech_debug.sh new file mode 100755 index 0000000..440ea90 --- /dev/null +++ b/scripts/speech_debug.sh @@ -0,0 +1,20 @@ +#!/bin/bash + +# Script to open debug terminal for speech node + +# Array of commands to run in different tabs +commands=( + 'cd ~/catkin_ws;source devel/setup.bash ;rostopic echo /speech_vocabulary_action/goal' + 'cd ~/catkin_ws;source devel/setup.bash ;rostopic echo /word_recognized' +) + +# Build final command with all the tabs to launch +finalCommand="" +for (( i = 0; i < ${#commands[@]} ; i++ )); do + finalCommand+="--tab -e 'bash -c \"${commands[$i]};exec bash\"' " +done + +# Run the final command +eval "gnome-terminal "$finalCommand + +exit 0 diff --git a/scripts/teleoperation.sh b/scripts/teleoperation.sh new file mode 100755 index 0000000..01f6268 --- /dev/null +++ b/scripts/teleoperation.sh @@ -0,0 +1,27 @@ +#!/bin/bash + +# Script to start the necessary ros functions for the Teleoperating NAO oproject + +# Array of commands to run in different tabs +commands=( + + # Start roscore + 'cd ~/catkin_ws;source devel/setup.bash ;roscore' + # Bringup Nao + 'sleep 1;cd ~/catkin_ws;source devel/setup.bash ;roslaunch nao_bringup nao_full.launch ' + # Start speech recognition server + 'sleep 3;cd ~/catkin_ws;source devel/setup.bash ;roslaunch nao_apps speech.launch' + # Launch nodes in teleoperation package + 'sleep 5;cd ~/catkin_ws;source devel/setup.bash ;roslaunch teleoperation teleoperation.launch' +) + +# Build final command with all the tabs to launch +finalCommand="" +for (( i = 0; i < ${#commands[@]} ; i++ )); do + finalCommand+="--tab -e 'bash -c \"${commands[$i]};exec bash\"' " +done + +# Run the final command +eval "gnome-terminal "$finalCommand + +exit 0 diff --git a/src/speech.cpp b/src/speech.cpp new file mode 100644 index 0000000..9e71c2f --- /dev/null +++ b/src/speech.cpp @@ -0,0 +1,203 @@ +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +using namespace std; + +class Nao_control { + +protected: + + // ROS node handler + ros::NodeHandle nh_; + + // Publisher for nao speech + ros::Publisher speech_pub; + + // Publisher for nao vocabulary parameters + ros::Publisher voc_params_pub; + + // Client for starting speech recognition + ros::ServiceClient recog_start_srv; + + // Client for stopping speech recognition + ros::ServiceClient recog_stop_srv; + + // Subscriber to speech recognition + ros::Subscriber recog_sub; + + // Publisher for recognized commands + ros::Publisher command_pub; + + int speech_id_ctr = 1; + std::vector recognized_words; + bool commandfound = false; + +public: + + Nao_control(){ + + ROS_INFO("Constructor"); + + speech_pub = nh_.advertise( + "/speech_action/goal", 1); + + voc_params_pub= nh_.advertise( + "/speech_vocabulary_action/goal", 1); + + recog_start_srv=nh_.serviceClient("/start_recognition"); + + recog_stop_srv=nh_.serviceClient("/stop_recognition"); + + recog_sub=nh_.subscribe("/word_recognized", 1, + &Nao_control::speechRecognitionCallback, this); + + command_pub = nh_.advertise("/recieved_commands",5); + + } + + ~Nao_control() { + + ROS_INFO("Destructor"); + //stopping recognition + + std_srvs::Empty srv; + + if (recog_stop_srv.call(srv)) { + ROS_INFO("SUCCESSFULLY STOPPED RECOGNITION"); + } + else { + ROS_ERROR("COULDN'T STOP RECOGNITION"); + } + } + + void speechRecognitionCallback(const naoqi_bridge_msgs::WordRecognized::ConstPtr& msg) + { + + ROS_INFO("A WORD WAS RECOGNIZED"); + std_srvs::Empty srv; + + ROS_INFO("Confidence Value:"); + std::cout << std::to_string(msg->confidence_values[0]) << std::endl; + + for (int i = 0; i < msg->words.size(); i++) { + + std::cout << msg->words[i] << std::endl; + + } + + //set pause duration + double f_pause = 1; + + if (recog_stop_srv.call(srv) && ((msg->words.size())> 0)) { + + ROS_INFO("SUCCESSFULLY STOPPED RECOGNITION"); + + // Use confidence level to decide wether the recognized word should be published + + if (msg->confidence_values[0] > 0.35) { + + ROS_INFO("SPEECH STARTING"); + std::string say = "Ok I understood"; + + naoqi_bridge_msgs::SpeechWithFeedbackActionGoal s_msg; + s_msg.goal_id.id = std::to_string(this->speech_id_ctr); + this->speech_id_ctr += 1; + s_msg.goal.say = say; + this->speech_pub.publish(s_msg); + this->recognized_words.clear(); + + // publish here + ROS_INFO("PUBLISH"); + + std_msgs::String c_msg; + c_msg.data = msg->words[0]; + this->command_pub.publish(c_msg); + + } + else if(msg->confidence_values[0] > 0.05) { + + ROS_INFO("SPEECH STARTING"); + std::string say = "I did not understand. Could you repeat that please"; + + naoqi_bridge_msgs::SpeechWithFeedbackActionGoal s_msg; + s_msg.goal_id.id = std::to_string(this->speech_id_ctr); + this->speech_id_ctr += 1; + s_msg.goal.say = say; + this->speech_pub.publish(s_msg); + this->recognized_words.clear(); + + // increase pause duration + f_pause = 0.4; + + } + + this->recognized_words.clear(); + } + else { + ROS_ERROR("COULDN'T STOP RECOGNITION"); + } + // pause until NAO stops talking + ros::Rate loop_rate(f_pause); + loop_rate.sleep(); + + // re-start recogntion + commandRecognition(); + } + + void initializeVocabulary() + { + + std::vector vocabulary; + vocabulary.push_back("Copy postion"); + vocabulary.push_back("Stop");// Operation "); + //vocabulary.push_back("hello "); + vocabulary.push_back("Open your hands"); + vocabulary.push_back("Close your hands"); + + naoqi_bridge_msgs::SetSpeechVocabularyActionGoal msg; + msg.goal.words = vocabulary; + msg.goal_id.id = std::to_string(speech_id_ctr); + std::cout << msg << std::endl; + speech_id_ctr += 1; + voc_params_pub.publish(msg); + ROS_INFO("VOCABULARY INITIALIZED"); + } + + void commandRecognition() + { + //recognition has to be started and ended once a valid command was found + + initializeVocabulary(); + std_srvs::Empty srv; + + if (recog_start_srv.call(srv)) { + ROS_INFO("SUCCESSFULLY STARTED RECOGNITION"); + } + else { + ROS_ERROR("COULDN'T START RECOGNITION"); + } + } +}; + +int main(int argc, char** argv) { + ros::init(argc, argv, "speech"); + + Nao_control TermiNAOtor; + ros::Rate loop_rate(0.5); + loop_rate.sleep(); + TermiNAOtor.commandRecognition(); + ROS_INFO("SPIN"); + ros::spin(); + return 0; +}