did work on speech recognition
This commit is contained in:
3
.gitignore
vendored
3
.gitignore
vendored
@@ -1,2 +1,5 @@
|
|||||||
# Vim stuff
|
# Vim stuff
|
||||||
.*.sw*
|
.*.sw*
|
||||||
|
|
||||||
|
# Gedit stuff
|
||||||
|
*~
|
||||||
|
|||||||
@@ -16,6 +16,7 @@ find_package(catkin REQUIRED COMPONENTS
|
|||||||
std_msgs
|
std_msgs
|
||||||
tf
|
tf
|
||||||
aruco
|
aruco
|
||||||
|
naoqi_bridge_msgs
|
||||||
)
|
)
|
||||||
|
|
||||||
## System dependencies are found with CMake's conventions
|
## System dependencies are found with CMake's conventions
|
||||||
@@ -68,15 +69,15 @@ find_package(catkin REQUIRED COMPONENTS
|
|||||||
## Generate actions in the 'action' folder
|
## Generate actions in the 'action' folder
|
||||||
#add_action_files(
|
#add_action_files(
|
||||||
# FILES
|
# FILES
|
||||||
# Action1.action
|
# Blink.action
|
||||||
# Action2.action
|
|
||||||
#)
|
#)
|
||||||
|
|
||||||
## Generate added messages and services with any dependencies listed here
|
## Generate added messages and services with any dependencies listed here
|
||||||
# generate_messages(
|
generate_messages(
|
||||||
# DEPENDENCIES
|
DEPENDENCIES
|
||||||
# sensor_msgs# std_msgs
|
sensor_msgs
|
||||||
# )
|
actionlib_msgs
|
||||||
|
)
|
||||||
|
|
||||||
################################################
|
################################################
|
||||||
## Declare ROS dynamic reconfigure parameters ##
|
## Declare ROS dynamic reconfigure parameters ##
|
||||||
@@ -112,6 +113,8 @@ catkin_package(
|
|||||||
# LIBRARIES project_aruco
|
# LIBRARIES project_aruco
|
||||||
# CATKIN_DEPENDS cv_bridge image_transport roscpp sensor_msgs std_msgs
|
# CATKIN_DEPENDS cv_bridge image_transport roscpp sensor_msgs std_msgs
|
||||||
# DEPENDS system_lib
|
# DEPENDS system_lib
|
||||||
|
|
||||||
|
#CATKIN_DEPENDS actionlib_msgs
|
||||||
)
|
)
|
||||||
|
|
||||||
###########
|
###########
|
||||||
@@ -157,8 +160,10 @@ include_directories(
|
|||||||
|
|
||||||
add_executable(aruco_detector src/aruco_detector.cpp)
|
add_executable(aruco_detector src/aruco_detector.cpp)
|
||||||
add_executable(aruco_effector src/aruco_effector.cpp)
|
add_executable(aruco_effector src/aruco_effector.cpp)
|
||||||
|
add_executable(speech src/speech.cpp)
|
||||||
target_link_libraries(aruco_detector ${catkin_LIBRARIES} ${aruco_LIB})
|
target_link_libraries(aruco_detector ${catkin_LIBRARIES} ${aruco_LIB})
|
||||||
target_link_libraries(aruco_effector ${catkin_LIBRARIES} ${aruco_LIB})
|
target_link_libraries(aruco_effector ${catkin_LIBRARIES} ${aruco_LIB})
|
||||||
|
target_link_libraries(speech ${catkin_LIBRARIES} ${aruco_LIB})
|
||||||
# add_dependencies(aruco tutorial_4_generate_messages_cpp)
|
# add_dependencies(aruco tutorial_4_generate_messages_cpp)
|
||||||
|
|
||||||
#############
|
#############
|
||||||
|
|||||||
9
action/SetSpeechVocabulary.action
Normal file
9
action/SetSpeechVocabulary.action
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
# Goal: The new vocabulary to be set in the speech recognition module
|
||||||
|
# Result: True if the vocabulary was set
|
||||||
|
# Feedback: None
|
||||||
|
|
||||||
|
string[] words
|
||||||
|
---
|
||||||
|
bool success
|
||||||
|
---
|
||||||
|
|
||||||
11
action/SpeechWithFeedback.action
Normal file
11
action/SpeechWithFeedback.action
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
# Purpose : To have feedback on when the speech was started and when
|
||||||
|
# NAO stopped talking
|
||||||
|
# Goal: The sentence for NAO to say
|
||||||
|
# Result: NAO has finished speaking
|
||||||
|
# Feedback: When NAO starts speaking
|
||||||
|
|
||||||
|
string say
|
||||||
|
---
|
||||||
|
# Empty result
|
||||||
|
---
|
||||||
|
# Empty feedback
|
||||||
219
src/speech.cpp
Normal file
219
src/speech.cpp
Normal file
@@ -0,0 +1,219 @@
|
|||||||
|
#include <iostream>
|
||||||
|
#include <fstream>
|
||||||
|
#include <iomanip>
|
||||||
|
#include <cmath>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <ros/ros.h>
|
||||||
|
#include <sensor_msgs/image_encodings.h>
|
||||||
|
#include "sensor_msgs/JointState.h"
|
||||||
|
#include "sensor_msgs/Imu.h"
|
||||||
|
#include "message_filters/subscriber.h"
|
||||||
|
#include <string.h>
|
||||||
|
#include <naoqi_bridge_msgs/JointAnglesWithSpeed.h>
|
||||||
|
#include <naoqi_bridge_msgs/Bumper.h>
|
||||||
|
#include <naoqi_bridge_msgs/HeadTouch.h>
|
||||||
|
#include <naoqi_bridge_msgs/HandTouch.h>
|
||||||
|
#include <naoqi_bridge_msgs/JointAnglesWithSpeedAction.h>
|
||||||
|
#include <std_srvs/Empty.h>
|
||||||
|
#include <boost/algorithm/string.hpp>
|
||||||
|
#include <boost/date_time.hpp>
|
||||||
|
#include <naoqi_bridge_msgs/SpeechWithFeedbackActionGoal.h>
|
||||||
|
#include <actionlib_msgs/GoalStatusArray.h>
|
||||||
|
#include <naoqi_bridge_msgs/BlinkActionGoal.h>
|
||||||
|
#include <naoqi_bridge_msgs/SetSpeechVocabularyActionGoal.h>
|
||||||
|
#include <std_msgs/ColorRGBA.h>
|
||||||
|
#include <naoqi_bridge_msgs/WordRecognized.h>
|
||||||
|
#include <geometry_msgs/PoseStamped.h>
|
||||||
|
#include <geometry_msgs/Pose2D.h>
|
||||||
|
#include <std_msgs/Bool.h>
|
||||||
|
#include <std_msgs/String.h>
|
||||||
|
#include "actionlib/client/simple_action_client.h"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
class Nao_control {
|
||||||
|
|
||||||
|
protected:
|
||||||
|
|
||||||
|
// ROS node handler
|
||||||
|
ros::NodeHandle nh_;
|
||||||
|
|
||||||
|
// Publisher for nao speech
|
||||||
|
ros::Publisher speech_pub;
|
||||||
|
|
||||||
|
// Publisher for nao vocabulary parameters
|
||||||
|
ros::Publisher voc_params_pub;
|
||||||
|
|
||||||
|
// Client for starting speech recognition
|
||||||
|
ros::ServiceClient recog_start_srv;
|
||||||
|
|
||||||
|
// Client for stopping speech recognition
|
||||||
|
ros::ServiceClient recog_stop_srv;
|
||||||
|
|
||||||
|
// Subscriber to speech recognition
|
||||||
|
ros::Subscriber recog_sub;
|
||||||
|
|
||||||
|
// Publisher for recognized commands
|
||||||
|
ros::Publisher command_pub;
|
||||||
|
|
||||||
|
int speech_id_ctr = 1;
|
||||||
|
std::vector<std::string> recognized_words;
|
||||||
|
bool commandfound = false;
|
||||||
|
|
||||||
|
bool speech_flag = false;
|
||||||
|
|
||||||
|
public:
|
||||||
|
|
||||||
|
Nao_control(){
|
||||||
|
|
||||||
|
ROS_INFO("Constructor");
|
||||||
|
|
||||||
|
speech_pub = nh_.advertise<naoqi_bridge_msgs::SpeechWithFeedbackActionGoal>(
|
||||||
|
"/speech_action/goal", 1);
|
||||||
|
|
||||||
|
voc_params_pub= nh_.advertise<naoqi_bridge_msgs::SetSpeechVocabularyActionGoal>(
|
||||||
|
"/speech_vocabulary_action/goal", 1);
|
||||||
|
|
||||||
|
recog_start_srv=nh_.serviceClient<std_srvs::Empty>("/start_recognition");
|
||||||
|
|
||||||
|
recog_stop_srv=nh_.serviceClient<std_srvs::Empty>("/stop_recognition");
|
||||||
|
|
||||||
|
recog_sub=nh_.subscribe("/word_recognized", 1,
|
||||||
|
&Nao_control::speechRecognitionCallback, this);
|
||||||
|
|
||||||
|
command_pub = nh_.advertise<std_msgs::String>("/recieved_commands",5);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
~Nao_control() {
|
||||||
|
|
||||||
|
ROS_INFO("Destructor");
|
||||||
|
//stopping recognition
|
||||||
|
|
||||||
|
std_srvs::Empty srv;
|
||||||
|
|
||||||
|
if (recog_stop_srv.call(srv)) {
|
||||||
|
ROS_INFO("SUCCESSFULLY STOPPED RECOGNITION");
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
ROS_ERROR("COULDN'T STOP RECOGNITION");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void speechRecognitionCallback(const naoqi_bridge_msgs::WordRecognized::ConstPtr& msg)
|
||||||
|
{
|
||||||
|
|
||||||
|
ROS_INFO("A WORD WAS RECOGNIZED");
|
||||||
|
std_srvs::Empty srv;
|
||||||
|
|
||||||
|
ROS_INFO("Confidence Value:");
|
||||||
|
std::cout << std::to_string(msg->confidence_values[0]) << std::endl;
|
||||||
|
|
||||||
|
for (int i = 0; i < msg->words.size(); i++) {
|
||||||
|
|
||||||
|
std::cout << msg->words[i] << std::endl;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
if (recog_stop_srv.call(srv) && ((msg->words.size())> 0)) {
|
||||||
|
|
||||||
|
ROS_INFO("SUCCESSFULLY STOPPED RECOGNITION");
|
||||||
|
|
||||||
|
// Use confidence level to decide wether the recognized word should be published
|
||||||
|
|
||||||
|
if (msg->confidence_values[0] > 0.35) {
|
||||||
|
|
||||||
|
ROS_INFO("SPEECH STARTING");
|
||||||
|
std::string say = "Ok I understood";
|
||||||
|
|
||||||
|
naoqi_bridge_msgs::SpeechWithFeedbackActionGoal s_msg;
|
||||||
|
s_msg.goal_id.id = std::to_string(this->speech_id_ctr);
|
||||||
|
this->speech_id_ctr += 1;
|
||||||
|
s_msg.goal.say = say;
|
||||||
|
this->speech_pub.publish(s_msg);
|
||||||
|
this->recognized_words.clear();
|
||||||
|
|
||||||
|
// publish here
|
||||||
|
ROS_INFO("PUBLISH");
|
||||||
|
|
||||||
|
std_msgs::String c_msg;
|
||||||
|
c_msg.data = msg->words[0];
|
||||||
|
this->command_pub.publish(c_msg);
|
||||||
|
|
||||||
|
}
|
||||||
|
else if(msg->confidence_values[0] > 0.3) {
|
||||||
|
|
||||||
|
ROS_INFO("SPEECH STARTING");
|
||||||
|
std::string say = "I did not understand. Could you repeat that please";
|
||||||
|
|
||||||
|
naoqi_bridge_msgs::SpeechWithFeedbackActionGoal s_msg;
|
||||||
|
s_msg.goal_id.id = std::to_string(this->speech_id_ctr);
|
||||||
|
this->speech_id_ctr += 1;
|
||||||
|
s_msg.goal.say = say;
|
||||||
|
this->speech_pub.publish(s_msg);
|
||||||
|
this->recognized_words.clear();
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
this->recognized_words.clear();
|
||||||
|
ros::Rate loop_rate(10000);
|
||||||
|
loop_rate.sleep();
|
||||||
|
//program stops here, better loop structure, where recognition is started again has to be added
|
||||||
|
commandRecognition();
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
ROS_ERROR("COULDN'T STOP RECOGNITION");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void initializeVocabulary()
|
||||||
|
{
|
||||||
|
|
||||||
|
std::vector<std::string> vocabulary;
|
||||||
|
vocabulary.push_back("Start");// Operation ");
|
||||||
|
vocabulary.push_back("Stop");// Operation ");
|
||||||
|
//vocabulary.push_back("hello ");
|
||||||
|
vocabulary.push_back("Open your hands ");
|
||||||
|
vocabulary.push_back("Close your hands ");
|
||||||
|
|
||||||
|
naoqi_bridge_msgs::SetSpeechVocabularyActionGoal msg;
|
||||||
|
msg.goal.words = vocabulary;
|
||||||
|
msg.goal_id.id = std::to_string(speech_id_ctr);
|
||||||
|
std::cout << msg << std::endl;
|
||||||
|
speech_id_ctr += 1;
|
||||||
|
voc_params_pub.publish(msg);
|
||||||
|
ROS_INFO("VOCABULARY INITIALIZED");
|
||||||
|
}
|
||||||
|
|
||||||
|
void commandRecognition()
|
||||||
|
{
|
||||||
|
//recognition has to be started and ended once a valid command was found
|
||||||
|
|
||||||
|
initializeVocabulary();
|
||||||
|
std_srvs::Empty srv;
|
||||||
|
|
||||||
|
if (recog_start_srv.call(srv)) {
|
||||||
|
ROS_INFO("SUCCESSFULLY STARTED RECOGNITION");
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
ROS_ERROR("COULDN'T START RECOGNITION");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
int main(int argc, char** argv) {
|
||||||
|
ros::init(argc, argv, "speech");
|
||||||
|
|
||||||
|
Nao_control TermiNAOtor;
|
||||||
|
ros::Rate loop_rate(10);
|
||||||
|
loop_rate.sleep();
|
||||||
|
TermiNAOtor.commandRecognition();
|
||||||
|
ros::spin();
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
177
src/speech_2.cpp
Normal file
177
src/speech_2.cpp
Normal file
@@ -0,0 +1,177 @@
|
|||||||
|
#include <iostream>
|
||||||
|
#include <fstream>
|
||||||
|
#include <iomanip>
|
||||||
|
#include <cmath>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <ros/ros.h>
|
||||||
|
#include <sensor_msgs/image_encodings.h>
|
||||||
|
#include "sensor_msgs/JointState.h"
|
||||||
|
#include "sensor_msgs/Imu.h"
|
||||||
|
#include "message_filters/subscriber.h"
|
||||||
|
#include <string.h>
|
||||||
|
#include <naoqi_bridge_msgs/JointAnglesWithSpeed.h>
|
||||||
|
#include <naoqi_bridge_msgs/Bumper.h>
|
||||||
|
#include <naoqi_bridge_msgs/HeadTouch.h>
|
||||||
|
#include <naoqi_bridge_msgs/HandTouch.h>
|
||||||
|
#include <naoqi_bridge_msgs/JointAnglesWithSpeedAction.h>
|
||||||
|
#include <std_srvs/Empty.h>
|
||||||
|
#include <boost/algorithm/string.hpp>
|
||||||
|
#include <boost/date_time.hpp>
|
||||||
|
#include <naoqi_bridge_msgs/SpeechWithFeedbackActionGoal.h>
|
||||||
|
#include <actionlib_msgs/GoalStatusArray.h>
|
||||||
|
#include <naoqi_bridge_msgs/BlinkActionGoal.h>
|
||||||
|
#include <naoqi_bridge_msgs/SetSpeechVocabularyActionGoal.h>
|
||||||
|
#include <std_msgs/ColorRGBA.h>
|
||||||
|
#include <naoqi_bridge_msgs/WordRecognized.h>
|
||||||
|
#include <geometry_msgs/PoseStamped.h>
|
||||||
|
#include <geometry_msgs/Pose2D.h>
|
||||||
|
#include <std_msgs/Bool.h>
|
||||||
|
#include "actionlib/client/simple_action_client.h"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
class Nao_control {
|
||||||
|
|
||||||
|
protected:
|
||||||
|
|
||||||
|
// ROS node handler
|
||||||
|
ros::NodeHandle nh_;
|
||||||
|
|
||||||
|
// Subscriber to head tactile states
|
||||||
|
ros::Subscriber tactile_sub;
|
||||||
|
|
||||||
|
// Publisher for nao speech
|
||||||
|
ros::Publisher speech_pub;
|
||||||
|
|
||||||
|
// Publisher for nao vocabulary parameters
|
||||||
|
ros::Publisher voc_params_pub;
|
||||||
|
|
||||||
|
// Client for starting speech recognition
|
||||||
|
ros::ServiceClient recog_start_srv;
|
||||||
|
|
||||||
|
// Client for stopping speech recognition
|
||||||
|
ros::ServiceClient recog_stop_srv;
|
||||||
|
|
||||||
|
// Subscriber to speech recognition
|
||||||
|
ros::Subscriber recog_sub;
|
||||||
|
|
||||||
|
int speech_id_ctr = 0;
|
||||||
|
std::vector<std::string> recognized_words;
|
||||||
|
|
||||||
|
public:
|
||||||
|
|
||||||
|
Nao_control(){
|
||||||
|
|
||||||
|
// Subscribe to topic bumper and specify that
|
||||||
|
// all data will be processed by function bumperCallback
|
||||||
|
//bumper_sub=nh_.subscribe("/bumper",1, &Nao_control::bumperCallback,
|
||||||
|
// this);
|
||||||
|
|
||||||
|
// Subscribe to topic tactile_touch and specify
|
||||||
|
// that all data will be processed by function tactileCallback
|
||||||
|
//tactile_sub=nh_.subscribe("/tactile_touch", 1,
|
||||||
|
// &Nao_control::tactileCallback, this);
|
||||||
|
|
||||||
|
speech_pub = nh_.advertise<naoqi_bridge_msgs::SpeechWithFeedbackActionGoal>(
|
||||||
|
"/speech_action/goal", 1);
|
||||||
|
|
||||||
|
voc_params_pub= nh_.advertise<naoqi_bridge_msgs::SetSpeechVocabularyActionGoal>(
|
||||||
|
"/speech_vocabulary_action/goal", 1);
|
||||||
|
|
||||||
|
recog_start_srv=nh_.serviceClient<std_srvs::Empty>("/start_recognition");
|
||||||
|
|
||||||
|
recog_stop_srv=nh_.serviceClient<std_srvs::Empty>("/stop_recognition");
|
||||||
|
|
||||||
|
recog_sub=nh_.subscribe("/word_recognized", 1,
|
||||||
|
&Nao_control::speechRecognitionCallback, this);
|
||||||
|
|
||||||
|
ROS_INFO("Constructor");
|
||||||
|
tactileCallback();
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
~Nao_control() {
|
||||||
|
|
||||||
|
ROS_INFO("Destructor");
|
||||||
|
}
|
||||||
|
|
||||||
|
void speechRecognitionCallback(const naoqi_bridge_msgs::WordRecognized::ConstPtr& msg)
|
||||||
|
{
|
||||||
|
for (int i = 0; i < msg->words.size(); i++) {
|
||||||
|
ROS_INFO("RECOGNIZED %s WITH CONFIDENCE %f",
|
||||||
|
msg->words[i].c_str(), msg->confidence_values[i]);
|
||||||
|
if (msg->confidence_values[i] > 0.42) {
|
||||||
|
this->recognized_words.push_back(msg->words[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
//void tactileCallback(#const naoqi_bridge_msgs::HeadTouch::ConstPtr& tactileState)
|
||||||
|
void tactileCallback()
|
||||||
|
{
|
||||||
|
|
||||||
|
if (1 ) {
|
||||||
|
ROS_INFO("FRONT BUTTON RELEASED");
|
||||||
|
std::vector<std::string> vocabulary;
|
||||||
|
vocabulary.push_back("hello");
|
||||||
|
vocabulary.push_back("world");
|
||||||
|
vocabulary.push_back("humans");
|
||||||
|
vocabulary.push_back("robots");
|
||||||
|
vocabulary.push_back("kill");
|
||||||
|
|
||||||
|
naoqi_bridge_msgs::SetSpeechVocabularyActionGoal msg;
|
||||||
|
msg.goal.words = vocabulary;
|
||||||
|
msg.goal_id.id = std::to_string(speech_id_ctr);
|
||||||
|
std::cout << msg << std::endl;
|
||||||
|
speech_id_ctr += 1;
|
||||||
|
voc_params_pub.publish(msg);
|
||||||
|
// somehow the message does not get published here
|
||||||
|
|
||||||
|
std_srvs::Empty srv;
|
||||||
|
|
||||||
|
if (recog_start_srv.call(srv)) {
|
||||||
|
ROS_INFO("SUCCESSFULLY STARTED RECOGNITION");
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
ROS_ERROR("COULDN'T START RECOGNITION");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (this->recognized_words.size() > 1) {
|
||||||
|
ROS_INFO("MIDDLE BUTTON RELEASED");
|
||||||
|
std_srvs::Empty srv;
|
||||||
|
if (recog_stop_srv.call(srv)) {
|
||||||
|
ROS_INFO("SUCCESSFULLY STOPPED RECOGNITION");
|
||||||
|
std::string say;
|
||||||
|
for (int i = 0; i < this->recognized_words.size(); i++) {
|
||||||
|
say += this->recognized_words[i] + " ";
|
||||||
|
}
|
||||||
|
naoqi_bridge_msgs::SpeechWithFeedbackActionGoal msg;
|
||||||
|
msg.goal_id.id = std::to_string(this->speech_id_ctr);
|
||||||
|
this->speech_id_ctr += 1;
|
||||||
|
msg.goal.say = say;
|
||||||
|
this->speech_pub.publish(msg);
|
||||||
|
this->recognized_words.clear();
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
ROS_ERROR("COULDN'T STOP RECOGNITION");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else{
|
||||||
|
ROS_INFO("NO BUTTON PRESSED");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
int main(int argc, char** argv) {
|
||||||
|
ros::init(argc, argv, "speech");
|
||||||
|
|
||||||
|
Nao_control TermiNAOtor;
|
||||||
|
ros::spin();
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user