diff --git a/.gitignore b/.gitignore index 4d61f1b..9f83993 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,5 @@ # Vim stuff .*.sw* + +# Gedit stuff +*~ diff --git a/CMakeLists.txt b/CMakeLists.txt index f7a67ff..24c18ea 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -16,6 +16,7 @@ find_package(catkin REQUIRED COMPONENTS std_msgs tf aruco + naoqi_bridge_msgs ) ## System dependencies are found with CMake's conventions @@ -66,17 +67,17 @@ find_package(catkin REQUIRED COMPONENTS # ) ## Generate actions in the 'action' folder -# add_action_files( -# FILES -# Action1.action -# Action2.action -# ) +#add_action_files( +# FILES +# Blink.action +#) ## Generate added messages and services with any dependencies listed here -# generate_messages( -# DEPENDENCIES -# sensor_msgs# std_msgs -# ) +generate_messages( + DEPENDENCIES + sensor_msgs + actionlib_msgs +) ################################################ ## Declare ROS dynamic reconfigure parameters ## @@ -112,6 +113,8 @@ catkin_package( # LIBRARIES project_aruco # CATKIN_DEPENDS cv_bridge image_transport roscpp sensor_msgs std_msgs # DEPENDS system_lib + + #CATKIN_DEPENDS actionlib_msgs ) ########### @@ -157,8 +160,10 @@ include_directories( add_executable(aruco_detector src/aruco_detector.cpp) add_executable(aruco_effector src/aruco_effector.cpp) +add_executable(speech src/speech.cpp) target_link_libraries(aruco_detector ${catkin_LIBRARIES} ${aruco_LIB}) target_link_libraries(aruco_effector ${catkin_LIBRARIES} ${aruco_LIB}) +target_link_libraries(speech ${catkin_LIBRARIES} ${aruco_LIB}) # add_dependencies(aruco tutorial_4_generate_messages_cpp) ############# diff --git a/action/SetSpeechVocabulary.action b/action/SetSpeechVocabulary.action new file mode 100644 index 0000000..d188731 --- /dev/null +++ b/action/SetSpeechVocabulary.action @@ -0,0 +1,9 @@ +# Goal: The new vocabulary to be set in the speech recognition module +# Result: True if the vocabulary was set +# Feedback: None + +string[] words +--- +bool success +--- + diff --git a/action/SpeechWithFeedback.action b/action/SpeechWithFeedback.action new file mode 100644 index 0000000..e756dcc --- /dev/null +++ b/action/SpeechWithFeedback.action @@ -0,0 +1,11 @@ +# Purpose : To have feedback on when the speech was started and when +# NAO stopped talking +# Goal: The sentence for NAO to say +# Result: NAO has finished speaking +# Feedback: When NAO starts speaking + +string say +--- +# Empty result +--- +# Empty feedback diff --git a/src/speech.cpp b/src/speech.cpp new file mode 100644 index 0000000..b0d3490 --- /dev/null +++ b/src/speech.cpp @@ -0,0 +1,219 @@ +#include +#include +#include +#include +#include +#include +#include +#include "sensor_msgs/JointState.h" +#include "sensor_msgs/Imu.h" +#include "message_filters/subscriber.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "actionlib/client/simple_action_client.h" + + + + +using namespace std; + +class Nao_control { + +protected: + + // ROS node handler + ros::NodeHandle nh_; + + // Publisher for nao speech + ros::Publisher speech_pub; + + // Publisher for nao vocabulary parameters + ros::Publisher voc_params_pub; + + // Client for starting speech recognition + ros::ServiceClient recog_start_srv; + + // Client for stopping speech recognition + ros::ServiceClient recog_stop_srv; + + // Subscriber to speech recognition + ros::Subscriber recog_sub; + + // Publisher for recognized commands + ros::Publisher command_pub; + + int speech_id_ctr = 1; + std::vector recognized_words; + bool commandfound = false; + + bool speech_flag = false; + +public: + + Nao_control(){ + + ROS_INFO("Constructor"); + + speech_pub = nh_.advertise( + "/speech_action/goal", 1); + + voc_params_pub= nh_.advertise( + "/speech_vocabulary_action/goal", 1); + + recog_start_srv=nh_.serviceClient("/start_recognition"); + + recog_stop_srv=nh_.serviceClient("/stop_recognition"); + + recog_sub=nh_.subscribe("/word_recognized", 1, + &Nao_control::speechRecognitionCallback, this); + + command_pub = nh_.advertise("/recieved_commands",5); + + } + + + ~Nao_control() { + + ROS_INFO("Destructor"); + //stopping recognition + + std_srvs::Empty srv; + + if (recog_stop_srv.call(srv)) { + ROS_INFO("SUCCESSFULLY STOPPED RECOGNITION"); + } + else { + ROS_ERROR("COULDN'T STOP RECOGNITION"); + } + } + + + void speechRecognitionCallback(const naoqi_bridge_msgs::WordRecognized::ConstPtr& msg) + { + + ROS_INFO("A WORD WAS RECOGNIZED"); + std_srvs::Empty srv; + + ROS_INFO("Confidence Value:"); + std::cout << std::to_string(msg->confidence_values[0]) << std::endl; + + for (int i = 0; i < msg->words.size(); i++) { + + std::cout << msg->words[i] << std::endl; + + } + + if (recog_stop_srv.call(srv) && ((msg->words.size())> 0)) { + + ROS_INFO("SUCCESSFULLY STOPPED RECOGNITION"); + + // Use confidence level to decide wether the recognized word should be published + + if (msg->confidence_values[0] > 0.35) { + + ROS_INFO("SPEECH STARTING"); + std::string say = "Ok I understood"; + + naoqi_bridge_msgs::SpeechWithFeedbackActionGoal s_msg; + s_msg.goal_id.id = std::to_string(this->speech_id_ctr); + this->speech_id_ctr += 1; + s_msg.goal.say = say; + this->speech_pub.publish(s_msg); + this->recognized_words.clear(); + + // publish here + ROS_INFO("PUBLISH"); + + std_msgs::String c_msg; + c_msg.data = msg->words[0]; + this->command_pub.publish(c_msg); + + } + else if(msg->confidence_values[0] > 0.3) { + + ROS_INFO("SPEECH STARTING"); + std::string say = "I did not understand. Could you repeat that please"; + + naoqi_bridge_msgs::SpeechWithFeedbackActionGoal s_msg; + s_msg.goal_id.id = std::to_string(this->speech_id_ctr); + this->speech_id_ctr += 1; + s_msg.goal.say = say; + this->speech_pub.publish(s_msg); + this->recognized_words.clear(); + + } + + this->recognized_words.clear(); + ros::Rate loop_rate(10000); + loop_rate.sleep(); + //program stops here, better loop structure, where recognition is started again has to be added + commandRecognition(); + } + else { + ROS_ERROR("COULDN'T STOP RECOGNITION"); + } + } + + void initializeVocabulary() + { + + std::vector vocabulary; + vocabulary.push_back("Start");// Operation "); + vocabulary.push_back("Stop");// Operation "); + //vocabulary.push_back("hello "); + vocabulary.push_back("Open your hands "); + vocabulary.push_back("Close your hands "); + + naoqi_bridge_msgs::SetSpeechVocabularyActionGoal msg; + msg.goal.words = vocabulary; + msg.goal_id.id = std::to_string(speech_id_ctr); + std::cout << msg << std::endl; + speech_id_ctr += 1; + voc_params_pub.publish(msg); + ROS_INFO("VOCABULARY INITIALIZED"); + } + + void commandRecognition() + { + //recognition has to be started and ended once a valid command was found + + initializeVocabulary(); + std_srvs::Empty srv; + + if (recog_start_srv.call(srv)) { + ROS_INFO("SUCCESSFULLY STARTED RECOGNITION"); + } + else { + ROS_ERROR("COULDN'T START RECOGNITION"); + } + } + +}; + +int main(int argc, char** argv) { + ros::init(argc, argv, "speech"); + + Nao_control TermiNAOtor; + ros::Rate loop_rate(10); + loop_rate.sleep(); + TermiNAOtor.commandRecognition(); + ros::spin(); + return 0; +} diff --git a/src/speech_2.cpp b/src/speech_2.cpp new file mode 100644 index 0000000..2da85f5 --- /dev/null +++ b/src/speech_2.cpp @@ -0,0 +1,177 @@ +#include +#include +#include +#include +#include +#include +#include +#include "sensor_msgs/JointState.h" +#include "sensor_msgs/Imu.h" +#include "message_filters/subscriber.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "actionlib/client/simple_action_client.h" + + + + +using namespace std; + +class Nao_control { + +protected: + + // ROS node handler + ros::NodeHandle nh_; + + // Subscriber to head tactile states + ros::Subscriber tactile_sub; + + // Publisher for nao speech + ros::Publisher speech_pub; + + // Publisher for nao vocabulary parameters + ros::Publisher voc_params_pub; + + // Client for starting speech recognition + ros::ServiceClient recog_start_srv; + + // Client for stopping speech recognition + ros::ServiceClient recog_stop_srv; + + // Subscriber to speech recognition + ros::Subscriber recog_sub; + + int speech_id_ctr = 0; + std::vector recognized_words; + +public: + + Nao_control(){ + + // Subscribe to topic bumper and specify that + // all data will be processed by function bumperCallback + //bumper_sub=nh_.subscribe("/bumper",1, &Nao_control::bumperCallback, + // this); + + // Subscribe to topic tactile_touch and specify + // that all data will be processed by function tactileCallback + //tactile_sub=nh_.subscribe("/tactile_touch", 1, + // &Nao_control::tactileCallback, this); + + speech_pub = nh_.advertise( + "/speech_action/goal", 1); + + voc_params_pub= nh_.advertise( + "/speech_vocabulary_action/goal", 1); + + recog_start_srv=nh_.serviceClient("/start_recognition"); + + recog_stop_srv=nh_.serviceClient("/stop_recognition"); + + recog_sub=nh_.subscribe("/word_recognized", 1, + &Nao_control::speechRecognitionCallback, this); + + ROS_INFO("Constructor"); + tactileCallback(); + + } + + + ~Nao_control() { + + ROS_INFO("Destructor"); + } + + void speechRecognitionCallback(const naoqi_bridge_msgs::WordRecognized::ConstPtr& msg) + { + for (int i = 0; i < msg->words.size(); i++) { + ROS_INFO("RECOGNIZED %s WITH CONFIDENCE %f", + msg->words[i].c_str(), msg->confidence_values[i]); + if (msg->confidence_values[i] > 0.42) { + this->recognized_words.push_back(msg->words[i]); + } + } + } + + + //void tactileCallback(#const naoqi_bridge_msgs::HeadTouch::ConstPtr& tactileState) + void tactileCallback() + { + + if (1 ) { + ROS_INFO("FRONT BUTTON RELEASED"); + std::vector vocabulary; + vocabulary.push_back("hello"); + vocabulary.push_back("world"); + vocabulary.push_back("humans"); + vocabulary.push_back("robots"); + vocabulary.push_back("kill"); + + naoqi_bridge_msgs::SetSpeechVocabularyActionGoal msg; + msg.goal.words = vocabulary; + msg.goal_id.id = std::to_string(speech_id_ctr); + std::cout << msg << std::endl; + speech_id_ctr += 1; + voc_params_pub.publish(msg); + // somehow the message does not get published here + + std_srvs::Empty srv; + + if (recog_start_srv.call(srv)) { + ROS_INFO("SUCCESSFULLY STARTED RECOGNITION"); + } + else { + ROS_ERROR("COULDN'T START RECOGNITION"); + } + } + else if (this->recognized_words.size() > 1) { + ROS_INFO("MIDDLE BUTTON RELEASED"); + std_srvs::Empty srv; + if (recog_stop_srv.call(srv)) { + ROS_INFO("SUCCESSFULLY STOPPED RECOGNITION"); + std::string say; + for (int i = 0; i < this->recognized_words.size(); i++) { + say += this->recognized_words[i] + " "; + } + naoqi_bridge_msgs::SpeechWithFeedbackActionGoal msg; + msg.goal_id.id = std::to_string(this->speech_id_ctr); + this->speech_id_ctr += 1; + msg.goal.say = say; + this->speech_pub.publish(msg); + this->recognized_words.clear(); + } + else { + ROS_ERROR("COULDN'T STOP RECOGNITION"); + } + } + else{ + ROS_INFO("NO BUTTON PRESSED"); + } + } + +}; + +int main(int argc, char** argv) { + ros::init(argc, argv, "speech"); + + Nao_control TermiNAOtor; + ros::spin(); + return 0; +}