8000 [audio, microphone] - Allow MicrophoneSource to passively capture/optimization by kahrendt · Pull Request #8732 · esphome/esphome · GitHub
[go: up one dir, main page]
More Web Proxy on the site http://driver.im/
Skip to content

[audio, microphone] - Allow MicrophoneSource to passively capture/optimization #8732

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
May 9, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 24 additions & 1 deletion esphome/components/audio/audio.h
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ const char *audio_file_type_to_string(AudioFileType file_type);
void scale_audio_samples(const int16_t *audio_samples, int16_t *output_buffer, int16_t scale_factor,
size_t samples_to_scale);

/// @brief Unpacks a quantized audio sample into a Q31 fixed point number.
/// @brief Unpacks a quantized audio sample into a Q31 fixed-point number.
/// @param data Pointer to uint8_t array containing the audio sample
/// @param bytes_per_sample The number of bytes per sample
/// @return Q31 sample
Expand All @@ -160,5 +160,28 @@ inline int32_t unpack_audio_sample_to_q31(const uint8_t *data, size_t bytes_per_
return sample;
}

/// @brief Packs a Q31 fixed-point number as an audio sample with the specified number of bytes per sample.
/// Packs the most significant bits - no dithering is applied.
/// @param sample Q31 fixed-point number to pack
/// @param data Pointer to data array to store
/// @param bytes_per_sample The audio data's bytes per sample
inline void pack_q31_as_audio_sample(int32_t sample, uint8_t *data, size_t bytes_per_sample) {
if (bytes_per_sample == 1) {
data[0] = static_cast<uint8_t>(sample >> 24);
} else if (bytes_per_sample == 2) {
data[0] = static_cast<uint8_t>(sample >> 16);
data[1] = static_cast<uint8_t>(sample >> 24);
} else if (bytes_per_sample == 3) {
data[0] = static_cast<uint8_t>(sample >> 8);
data[1] = static_cast<uint8_t>(sample >> 16);
data[2] = static_cast<uint8_t>(sample >> 24);
} else if (bytes_per_sample == 4) {
data[0] = static_cast<uint8_t>(sample);
data[1] = static_cast<uint8_t>(sample >> 8);
data[2] = static_cast<uint8_t>(sample >> 16);
data[3] = static_cast<uint8_t>(sample >> 24);
}
}

} // namespace audio
} // namespace esphome
11 changes: 10 additions & 1 deletion esphome/components/microphone/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,13 +162,22 @@ def _validate_audio_compatability(config):
return _validate_audio_compatability


async def microphone_source_to_code(config):
async def microphone_source_to_code(config, passive=False):
"""Creates a MicrophoneSource variable for codegen.

Setting passive to true makes the MicrophoneSource never start/stop the microphone, but only receives audio when another component has actively started the Microphone. If false, then the microphone needs to be explicitly started/stopped.

Args:
config (Schema): Created with `microphone_source_schema` specifying bits per sample, channels, and gain factor
passive (bool): Enable passive mode for the MicrophoneSource
"""
mic = await cg.get_variable(config[CONF_MICROPHONE])
mic_source = cg.new_Pvariable(
config[CONF_ID],
mic,
config[CONF_BITS_PER_SAMPLE],
config[CONF_GAIN_FACTOR],
passive,
)
for channel in config[CONF_CHANNELS]:
cg.add(mic_source.add_channel(channel))
Expand Down
38 changes: 11 additions & 27 deletions esphome/components/microphone/microphone_source.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,10 @@ namespace microphone {
static const int32_t Q25_MAX_VALUE = (1 << 25) - 1;
static const int32_t Q25_MIN_VALUE = ~Q25_MAX_VALUE;

static const uint32_t HISTORY_VALUES = 32;

void MicrophoneSource::add_data_callback(std::function<void(const std::vector<uint8_t> &)> &&data_callback) {
std::function<void(const std::vector<uint8_t> &)> filtered_callback =
[this, data_callback](const std::vector<uint8_t> &data) {
if (this->enabled_) {
if (this->enabled_ || this->passive_) {
if (this->processed_samples_.use_count() == 0) {
// Create vector if its unused
this->processed_samples_ = std::make_shared<std::vector<uint8_t>>();
Expand All @@ -32,13 +30,14 @@ audio::AudioStreamInfo MicrophoneSource::get_audio_stream_info() {
}

void MicrophoneSource::start() {
if (!this->enabled_) {
if (!this->enabled_ && !this->passive_) {
this->enabled_ = true;
this->mic_->start();
}
}

void MicrophoneSource::stop() {
if (this->enabled_) {
if (this->enabled_ && !this->passive_) {
this->enabled_ = false;
this->mic_->stop();
this->processed_samples_.reset();
Expand All @@ -63,8 +62,9 @@ void MicrophoneSource::process_audio_(const std::vector<uint8_t> &data, std::vec
const size_t target_bytes_per_sample = (this->bits_per_sample_ + 7) / 8;
const size_t target_bytes_per_frame = target_bytes_per_sample * this->channels_.count();

filtered_data.reserve(target_bytes_per_frame * total_frames);
filtered_data.resize(0);
filtered_data.resize(target_bytes_per_frame * total_frames);

uint8_t *current_data = filtered_data.data();

for (uint32_t frame_index = 0; frame_index < total_frames; ++frame_index) {
for (uint32_t channel_index = 0; channel_index < source_channels; ++channel_index) {
Expand All @@ -82,26 +82,10 @@ void MicrophoneSource::process_audio_(const std::vector<uint8_t> &data, std::vec
// Clamp ``sample`` in case gain multiplication overflows 25 bits
sample = clamp<int32_t>(sample, Q25_MIN_VALUE, Q25_MAX_VALUE); // Q25

// Copy ``target_bytes_per_sample`` bytes to the output buffer.
if (target_bytes_per_sample == 1) {
sample >>= 18; // Q25 -> Q7
filtered_data.push_back(static_cast<uint8_t>(sample));
} else if (target_bytes_per_sample == 2) {
sample >>= 10; // Q25 -> Q15
filtered_data.push_back(static_cast<uint8_t>(sample));
filtered_data.push_back(static_cast<uint8_t>(sample >> 8));
} else if (target_bytes_per_sample == 3) {
sample >>= 2; // Q25 -> Q23
filtered_data.push_back(static_cast<uint8_t>(sample));
filtered_data.push_back(static_cast<uint8_t>(sample >> 8));
filtered_data.push_back(static_cast<uint8_t>(sample >> 16));
} else {
sample *= (1 << 6); // Q25 -> Q31
filtered_data.push_back(static_cast<uint8_t>(sample));
filtered_data.push_back(static_cast<uint8_t>(sample >> 8));
filtered_data.push_back(static_cast<uint8_t>(sample >> 16));
filtered_data.push_back(static_cast<uint8_t>(sample >> 24));
}
sample *= (1 << 6); // Q25 -> Q31

audio::pack_q31_as_audio_sample(sample, current_data, target_bytes_per_sample);
current_data = current_data + target_bytes_per_sample;
}
}
}
Expand Down
9 changes: 5 additions & 4 deletions esphome/components/microphone/microphone_source.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@ class MicrophoneSource {
* Note that this class cannot convert sample rates!
*/
public:
MicrophoneSource(Microphone *mic, uint8_t bits_per_sample, int32_t gain_factor)
: mic_(mic), bits_per_sample_(bits_per_sample), gain_factor_(gain_factor) {}
MicrophoneSource(Microphone *mic, uint8_t bits_per_sample, int32_t gain_factor, bool passive)
: mic_(mic), bits_per_sample_(bits_per_sample), gain_factor_(gain_factor), passive_(passive) {}

/// @brief Enables a channel to be processed through the callback.
///
Expand All @@ -59,8 +59,8 @@ class MicrophoneSource {

void start();
void stop();
bool is_running() const { return (this->mic_->is_running() && this->enabled_); }
bool is_stopped() const { return !this->enabled_; }
bool is_running() const { return (this->mic_->is_running() && (this->enabled_ || this->passive_)); }
bool is_stopped() const { return !this->is_running(); };

protected:
void process_audio_(const std::vector<uint8_t> &data, std::vector<uint8_t> &filtered_data);
Expand All @@ -72,6 +72,7 @@ class MicrophoneSource {
std::bitset<8> channels_;
int32_t gain_factor_;
bool enabled_{false};
bool passive_{false};
};

} // namespace microphone
Expand Down
0