did work on speech recognition

This commit is contained in:
HRS_D
2019-01-22 11:04:37 +01:00
parent becede0124
commit b77c97afe0
6 changed files with 433 additions and 9 deletions

3
.gitignore vendored
View File

@@ -1,2 +1,5 @@
# Vim stuff
.*.sw*
# Gedit stuff
*~

View File

@@ -16,6 +16,7 @@ find_package(catkin REQUIRED COMPONENTS
std_msgs
tf
aruco
naoqi_bridge_msgs
)
## System dependencies are found with CMake's conventions
@@ -66,17 +67,17 @@ find_package(catkin REQUIRED COMPONENTS
# )
## Generate actions in the 'action' folder
# add_action_files(
# FILES
# Action1.action
# Action2.action
# )
#add_action_files(
# FILES
# Blink.action
#)
## Generate added messages and services with any dependencies listed here
# generate_messages(
# DEPENDENCIES
# sensor_msgs# std_msgs
# )
generate_messages(
DEPENDENCIES
sensor_msgs
actionlib_msgs
)
################################################
## Declare ROS dynamic reconfigure parameters ##
@@ -112,6 +113,8 @@ catkin_package(
# LIBRARIES project_aruco
# CATKIN_DEPENDS cv_bridge image_transport roscpp sensor_msgs std_msgs
# DEPENDS system_lib
#CATKIN_DEPENDS actionlib_msgs
)
###########
@@ -157,8 +160,10 @@ include_directories(
add_executable(aruco_detector src/aruco_detector.cpp)
add_executable(aruco_effector src/aruco_effector.cpp)
add_executable(speech src/speech.cpp)
target_link_libraries(aruco_detector ${catkin_LIBRARIES} ${aruco_LIB})
target_link_libraries(aruco_effector ${catkin_LIBRARIES} ${aruco_LIB})
target_link_libraries(speech ${catkin_LIBRARIES} ${aruco_LIB})
# add_dependencies(aruco tutorial_4_generate_messages_cpp)
#############

View File

@@ -0,0 +1,9 @@
# Goal: The new vocabulary to be set in the speech recognition module
# Result: True if the vocabulary was set
# Feedback: None
string[] words
---
bool success
---

View File

@@ -0,0 +1,11 @@
# Purpose : To have feedback on when the speech was started and when
# NAO stopped talking
# Goal: The sentence for NAO to say
# Result: NAO has finished speaking
# Feedback: When NAO starts speaking
string say
---
# Empty result
---
# Empty feedback

219
src/speech.cpp Normal file
View File

@@ -0,0 +1,219 @@
#include <iostream>
#include <fstream>
#include <iomanip>
#include <cmath>
#include <stdlib.h>
#include <ros/ros.h>
#include <sensor_msgs/image_encodings.h>
#include "sensor_msgs/JointState.h"
#include "sensor_msgs/Imu.h"
#include "message_filters/subscriber.h"
#include <string.h>
#include <naoqi_bridge_msgs/JointAnglesWithSpeed.h>
#include <naoqi_bridge_msgs/Bumper.h>
#include <naoqi_bridge_msgs/HeadTouch.h>
#include <naoqi_bridge_msgs/HandTouch.h>
#include <naoqi_bridge_msgs/JointAnglesWithSpeedAction.h>
#include <std_srvs/Empty.h>
#include <boost/algorithm/string.hpp>
#include <boost/date_time.hpp>
#include <naoqi_bridge_msgs/SpeechWithFeedbackActionGoal.h>
#include <actionlib_msgs/GoalStatusArray.h>
#include <naoqi_bridge_msgs/BlinkActionGoal.h>
#include <naoqi_bridge_msgs/SetSpeechVocabularyActionGoal.h>
#include <std_msgs/ColorRGBA.h>
#include <naoqi_bridge_msgs/WordRecognized.h>
#include <geometry_msgs/PoseStamped.h>
#include <geometry_msgs/Pose2D.h>
#include <std_msgs/Bool.h>
#include <std_msgs/String.h>
#include "actionlib/client/simple_action_client.h"
using namespace std;
class Nao_control {
protected:
// ROS node handler
ros::NodeHandle nh_;
// Publisher for nao speech
ros::Publisher speech_pub;
// Publisher for nao vocabulary parameters
ros::Publisher voc_params_pub;
// Client for starting speech recognition
ros::ServiceClient recog_start_srv;
// Client for stopping speech recognition
ros::ServiceClient recog_stop_srv;
// Subscriber to speech recognition
ros::Subscriber recog_sub;
// Publisher for recognized commands
ros::Publisher command_pub;
int speech_id_ctr = 1;
std::vector<std::string> recognized_words;
bool commandfound = false;
bool speech_flag = false;
public:
Nao_control(){
ROS_INFO("Constructor");
speech_pub = nh_.advertise<naoqi_bridge_msgs::SpeechWithFeedbackActionGoal>(
"/speech_action/goal", 1);
voc_params_pub= nh_.advertise<naoqi_bridge_msgs::SetSpeechVocabularyActionGoal>(
"/speech_vocabulary_action/goal", 1);
recog_start_srv=nh_.serviceClient<std_srvs::Empty>("/start_recognition");
recog_stop_srv=nh_.serviceClient<std_srvs::Empty>("/stop_recognition");
recog_sub=nh_.subscribe("/word_recognized", 1,
&Nao_control::speechRecognitionCallback, this);
command_pub = nh_.advertise<std_msgs::String>("/recieved_commands",5);
}
~Nao_control() {
ROS_INFO("Destructor");
//stopping recognition
std_srvs::Empty srv;
if (recog_stop_srv.call(srv)) {
ROS_INFO("SUCCESSFULLY STOPPED RECOGNITION");
}
else {
ROS_ERROR("COULDN'T STOP RECOGNITION");
}
}
void speechRecognitionCallback(const naoqi_bridge_msgs::WordRecognized::ConstPtr& msg)
{
ROS_INFO("A WORD WAS RECOGNIZED");
std_srvs::Empty srv;
ROS_INFO("Confidence Value:");
std::cout << std::to_string(msg->confidence_values[0]) << std::endl;
for (int i = 0; i < msg->words.size(); i++) {
std::cout << msg->words[i] << std::endl;
}
if (recog_stop_srv.call(srv) && ((msg->words.size())> 0)) {
ROS_INFO("SUCCESSFULLY STOPPED RECOGNITION");
// Use confidence level to decide wether the recognized word should be published
if (msg->confidence_values[0] > 0.35) {
ROS_INFO("SPEECH STARTING");
std::string say = "Ok I understood";
naoqi_bridge_msgs::SpeechWithFeedbackActionGoal s_msg;
s_msg.goal_id.id = std::to_string(this->speech_id_ctr);
this->speech_id_ctr += 1;
s_msg.goal.say = say;
this->speech_pub.publish(s_msg);
this->recognized_words.clear();
// publish here
ROS_INFO("PUBLISH");
std_msgs::String c_msg;
c_msg.data = msg->words[0];
this->command_pub.publish(c_msg);
}
else if(msg->confidence_values[0] > 0.3) {
ROS_INFO("SPEECH STARTING");
std::string say = "I did not understand. Could you repeat that please";
naoqi_bridge_msgs::SpeechWithFeedbackActionGoal s_msg;
s_msg.goal_id.id = std::to_string(this->speech_id_ctr);
this->speech_id_ctr += 1;
s_msg.goal.say = say;
this->speech_pub.publish(s_msg);
this->recognized_words.clear();
}
this->recognized_words.clear();
ros::Rate loop_rate(10000);
loop_rate.sleep();
//program stops here, better loop structure, where recognition is started again has to be added
commandRecognition();
}
else {
ROS_ERROR("COULDN'T STOP RECOGNITION");
}
}
void initializeVocabulary()
{
std::vector<std::string> vocabulary;
vocabulary.push_back("Start");// Operation ");
vocabulary.push_back("Stop");// Operation ");
//vocabulary.push_back("hello ");
vocabulary.push_back("Open your hands ");
vocabulary.push_back("Close your hands ");
naoqi_bridge_msgs::SetSpeechVocabularyActionGoal msg;
msg.goal.words = vocabulary;
msg.goal_id.id = std::to_string(speech_id_ctr);
std::cout << msg << std::endl;
speech_id_ctr += 1;
voc_params_pub.publish(msg);
ROS_INFO("VOCABULARY INITIALIZED");
}
void commandRecognition()
{
//recognition has to be started and ended once a valid command was found
initializeVocabulary();
std_srvs::Empty srv;
if (recog_start_srv.call(srv)) {
ROS_INFO("SUCCESSFULLY STARTED RECOGNITION");
}
else {
ROS_ERROR("COULDN'T START RECOGNITION");
}
}
};
int main(int argc, char** argv) {
ros::init(argc, argv, "speech");
Nao_control TermiNAOtor;
ros::Rate loop_rate(10);
loop_rate.sleep();
TermiNAOtor.commandRecognition();
ros::spin();
return 0;
}

177
src/speech_2.cpp Normal file
View File

@@ -0,0 +1,177 @@
#include <iostream>
#include <fstream>
#include <iomanip>
#include <cmath>
#include <stdlib.h>
#include <ros/ros.h>
#include <sensor_msgs/image_encodings.h>
#include "sensor_msgs/JointState.h"
#include "sensor_msgs/Imu.h"
#include "message_filters/subscriber.h"
#include <string.h>
#include <naoqi_bridge_msgs/JointAnglesWithSpeed.h>
#include <naoqi_bridge_msgs/Bumper.h>
#include <naoqi_bridge_msgs/HeadTouch.h>
#include <naoqi_bridge_msgs/HandTouch.h>
#include <naoqi_bridge_msgs/JointAnglesWithSpeedAction.h>
#include <std_srvs/Empty.h>
#include <boost/algorithm/string.hpp>
#include <boost/date_time.hpp>
#include <naoqi_bridge_msgs/SpeechWithFeedbackActionGoal.h>
#include <actionlib_msgs/GoalStatusArray.h>
#include <naoqi_bridge_msgs/BlinkActionGoal.h>
#include <naoqi_bridge_msgs/SetSpeechVocabularyActionGoal.h>
#include <std_msgs/ColorRGBA.h>
#include <naoqi_bridge_msgs/WordRecognized.h>
#include <geometry_msgs/PoseStamped.h>
#include <geometry_msgs/Pose2D.h>
#include <std_msgs/Bool.h>
#include "actionlib/client/simple_action_client.h"
using namespace std;
class Nao_control {
protected:
// ROS node handler
ros::NodeHandle nh_;
// Subscriber to head tactile states
ros::Subscriber tactile_sub;
// Publisher for nao speech
ros::Publisher speech_pub;
// Publisher for nao vocabulary parameters
ros::Publisher voc_params_pub;
// Client for starting speech recognition
ros::ServiceClient recog_start_srv;
// Client for stopping speech recognition
ros::ServiceClient recog_stop_srv;
// Subscriber to speech recognition
ros::Subscriber recog_sub;
int speech_id_ctr = 0;
std::vector<std::string> recognized_words;
public:
Nao_control(){
// Subscribe to topic bumper and specify that
// all data will be processed by function bumperCallback
//bumper_sub=nh_.subscribe("/bumper",1, &Nao_control::bumperCallback,
// this);
// Subscribe to topic tactile_touch and specify
// that all data will be processed by function tactileCallback
//tactile_sub=nh_.subscribe("/tactile_touch", 1,
// &Nao_control::tactileCallback, this);
speech_pub = nh_.advertise<naoqi_bridge_msgs::SpeechWithFeedbackActionGoal>(
"/speech_action/goal", 1);
voc_params_pub= nh_.advertise<naoqi_bridge_msgs::SetSpeechVocabularyActionGoal>(
"/speech_vocabulary_action/goal", 1);
recog_start_srv=nh_.serviceClient<std_srvs::Empty>("/start_recognition");
recog_stop_srv=nh_.serviceClient<std_srvs::Empty>("/stop_recognition");
recog_sub=nh_.subscribe("/word_recognized", 1,
&Nao_control::speechRecognitionCallback, this);
ROS_INFO("Constructor");
tactileCallback();
}
~Nao_control() {
ROS_INFO("Destructor");
}
void speechRecognitionCallback(const naoqi_bridge_msgs::WordRecognized::ConstPtr& msg)
{
for (int i = 0; i < msg->words.size(); i++) {
ROS_INFO("RECOGNIZED %s WITH CONFIDENCE %f",
msg->words[i].c_str(), msg->confidence_values[i]);
if (msg->confidence_values[i] > 0.42) {
this->recognized_words.push_back(msg->words[i]);
}
}
}
//void tactileCallback(#const naoqi_bridge_msgs::HeadTouch::ConstPtr& tactileState)
void tactileCallback()
{
if (1 ) {
ROS_INFO("FRONT BUTTON RELEASED");
std::vector<std::string> vocabulary;
vocabulary.push_back("hello");
vocabulary.push_back("world");
vocabulary.push_back("humans");
vocabulary.push_back("robots");
vocabulary.push_back("kill");
naoqi_bridge_msgs::SetSpeechVocabularyActionGoal msg;
msg.goal.words = vocabulary;
msg.goal_id.id = std::to_string(speech_id_ctr);
std::cout << msg << std::endl;
speech_id_ctr += 1;
voc_params_pub.publish(msg);
// somehow the message does not get published here
std_srvs::Empty srv;
if (recog_start_srv.call(srv)) {
ROS_INFO("SUCCESSFULLY STARTED RECOGNITION");
}
else {
ROS_ERROR("COULDN'T START RECOGNITION");
}
}
else if (this->recognized_words.size() > 1) {
ROS_INFO("MIDDLE BUTTON RELEASED");
std_srvs::Empty srv;
if (recog_stop_srv.call(srv)) {
ROS_INFO("SUCCESSFULLY STOPPED RECOGNITION");
std::string say;
for (int i = 0; i < this->recognized_words.size(); i++) {
say += this->recognized_words[i] + " ";
}
naoqi_bridge_msgs::SpeechWithFeedbackActionGoal msg;
msg.goal_id.id = std::to_string(this->speech_id_ctr);
this->speech_id_ctr += 1;
msg.goal.say = say;
this->speech_pub.publish(msg);
this->recognized_words.clear();
}
else {
ROS_ERROR("COULDN'T STOP RECOGNITION");
}
}
else{
ROS_INFO("NO BUTTON PRESSED");
}
}
};
int main(int argc, char** argv) {
ros::init(argc, argv, "speech");
Nao_control TermiNAOtor;
ros::spin();
return 0;
}