diff --git a/posthog/models/error_tracking/hogvm_stl.py b/posthog/models/error_tracking/hogvm_stl.py index 17043d1924ab1..eba207917e29b 100644 --- a/posthog/models/error_tracking/hogvm_stl.py +++ b/posthog/models/error_tracking/hogvm_stl.py @@ -15,9 +15,10 @@ "indexOf", "notEmpty", "match", - "arrayExists", + "JSONExtract", + "arrayReduce", + "arrayCount", "arrayFilter", + "arrayExists", "arrayMap", - "arrayCount", - "arrayReduce", ] diff --git a/rust/common/hogvm/src/bin/stl_dump.rs b/rust/common/hogvm/src/bin/stl_dump.rs index b39414312e386..4fbb633b8d369 100644 --- a/rust/common/hogvm/src/bin/stl_dump.rs +++ b/rust/common/hogvm/src/bin/stl_dump.rs @@ -9,7 +9,7 @@ pub fn main() { "RUST_HOGVM_STL = [\n {}\n]", hogvm::stl() .iter() - .map(|(name, _)| *name) + .map(|(name, _)| name.as_str()) .chain(hog_stl().functions().iter().map(|(name, _)| name.as_str())) .map(|n| format!("\"{}\"", n)) .collect::>() diff --git a/rust/common/hogvm/src/context.rs b/rust/common/hogvm/src/context.rs index 8134d2f02a34c..99d87e329856a 100644 --- a/rust/common/hogvm/src/context.rs +++ b/rust/common/hogvm/src/context.rs @@ -137,22 +137,8 @@ impl ExecutionContext { let Some(native_fn) = self.native_fns.get(name) else { return Err(VmError::UnknownFunction(name.to_string())); }; - let result = native_fn(vm, args); - match result { - Ok(HogValue::Ref(ptr)) => vm.push_stack(ptr), - Ok(HogValue::Lit(lit)) => match lit { - // Object types returned from native functions get heap allocated, just like ones declared - // in the bytecode, whereas other types are pushed directly onto the stack. The purity of - // native functions means we don't need to worry about memory management for these values, - // beyond what the heap internally manages. - HogLiteral::Array(_) | HogLiteral::Object(_) => { - let ptr = vm.heap.emplace(lit)?; - vm.push_stack(ptr) - } - _ => vm.push_stack(lit), - }, - Err(e) => Err(e), - } + let emplaced = walk_emplacing(vm, native_fn(vm, args)?)?; + vm.push_stack(emplaced) } pub fn get_bytecode(&self, ip: usize, symbol: &Option) -> Result<&JsonValue, VmError> { @@ -187,3 +173,59 @@ impl std::fmt::Display for Symbol { write!(f, "{}/{}", self.module, self.name) } } + +/// Walk a HogValue and its children recursively to ensure all indexable types (arrays and objects) are heap allocated, +/// and then return the now-properly-allocated value. This is useful if, for example, you've constructed a HogValue +/// from a JSON object without mutable access to a VM's heap, and now need to push it into the VM's memory space for the +/// program to use. +/// +/// This is exposed as a utility, but generally ExecutionContext::execute_native_function_call should do what you need. +fn walk_emplacing(vm: &mut HogVM, value: HogValue) -> Result { + // Chase the pointer, if this is one, and clone out of it. We hold on to the original pointer + // so we can swap the walked value back into it after we're done. + let (literal, existing_location) = match value { + HogValue::Lit(lit) => (lit, None), + HogValue::Ref(ptr) => { + let val = vm.heap.get(ptr)?.clone(); + (val, Some(ptr)) + } + }; + + match literal { + HogLiteral::Array(arr) => { + let emplaced_arr: Result, _> = + arr.into_iter().map(|i| walk_emplacing(vm, i)).collect(); + let emplaced_arr = HogLiteral::Array(emplaced_arr?); + + if let Some(ptr) = existing_location { + // If this was already a heap-allocated array, replace it with the new one + *vm.heap.get_mut(ptr)? = emplaced_arr; + Ok(ptr.into()) + } else { + // Otherwise heap allocate it and return the pointer + vm.heap.emplace(emplaced_arr).map(|ptr| ptr.into()) + } + } + HogLiteral::Object(obj) => { + let emplaced_obj: Result, _> = obj + .into_iter() + .map(|(k, v)| Ok((k, walk_emplacing(vm, v)?))) + .collect(); + let emplaced_obj = HogLiteral::Object(emplaced_obj?); + + if let Some(ptr) = existing_location { + // As above, if this was already heap allocated, replace it with the new one + *vm.heap.get_mut(ptr)? = emplaced_obj; + Ok(ptr.into()) + } else { + // Otherwise heap allocate it and return the pointer + vm.heap.emplace(emplaced_obj).map(|ptr| ptr.into()) + } + } + // If we're looking at a non-indexable type, just return it, or the reference to it, + // if it was already heap allocated. + _ => Ok(existing_location + .map(|ptr| ptr.into()) + .unwrap_or(literal.into())), + } +} diff --git a/rust/common/hogvm/src/error.rs b/rust/common/hogvm/src/error.rs index d6d5adc115b00..3c631be3cbd13 100644 --- a/rust/common/hogvm/src/error.rs +++ b/rust/common/hogvm/src/error.rs @@ -3,6 +3,7 @@ use thiserror::Error; // TBH this is probably need to be broken up somehow #[derive(Debug, Error, Clone)] +#[non_exhaustive] pub enum VmError { #[error("Expected operation, got {0:?}")] NotAnOperation(Value), diff --git a/rust/common/hogvm/src/lib.rs b/rust/common/hogvm/src/lib.rs index 09223a0465b1b..db783fdea4828 100644 --- a/rust/common/hogvm/src/lib.rs +++ b/rust/common/hogvm/src/lib.rs @@ -24,11 +24,13 @@ pub use vm::VmFailure; // STL - again, we expose a lot, because we want to make it easy to extend this pub use stl::hog_stl; +pub use stl::native_func; pub use stl::stl; pub use stl::stl_map; pub use stl::NativeFunction; // Values - We expose almost everything here for the sake of native function extension authors +pub use values::construct_free_standing; pub use values::Callable; pub use values::Closure; pub use values::FromHogLiteral; diff --git a/rust/common/hogvm/src/stl.rs b/rust/common/hogvm/src/stl.rs index bac4cab9b4db8..27114ca616735 100644 --- a/rust/common/hogvm/src/stl.rs +++ b/rust/common/hogvm/src/stl.rs @@ -4,10 +4,11 @@ use std::collections::HashMap; use serde_json::{json, Value as JsonValue}; use crate::{ + construct_free_standing, error::VmError, memory::VmHeap, program::Module, - util::regex_match, + util::{get_json_nested, regex_match}, values::{HogLiteral, HogValue, Num}, vm::HogVM, ExportedFunction, @@ -17,10 +18,10 @@ pub const TO_STRING_RECURSION_LIMIT: usize = 32; // A "native function" is a function that can be called from within the VM. It takes a list // of arguments, and returns either a value, or null. It's pure (cannot modify the VM state). -pub type NativeFunction = fn(&HogVM, Vec) -> Result; +pub type NativeFunction = Box) -> Result>; pub fn stl_map() -> HashMap { - stl().iter().map(|(a, b)| (a.to_string(), *b)).collect() + stl().into_iter().collect() } pub fn hog_stl_map() -> HashMap { @@ -30,239 +31,316 @@ pub fn hog_stl_map() -> HashMap { } // NOTE - if you make changes to this, be sure to re-run `bin/dump_hogvmrs_stl` -pub const fn stl() -> &'static [(&'static str, NativeFunction)] { - &[ - ("toString", |vm, args| { - // Can't just use a ToString trait implementation, because ToString requires heap access to chase - // references in arrays and dicts - assert_argc(&args, 1, "toString")?; - to_string(&vm.heap, &args[0], 0).map(|s| HogLiteral::String(s).into()) - }), - ("typeof", |vm, args| { - assert_argc(&args, 1, "typeof")?; - let arg = args[0].deref(&vm.heap)?; - // TODO - tuples, dates, datetimes, errors are all just duck-typed "objects" or "arrays", but we should - // still support them I guess - match arg { - HogLiteral::Number(_) => Ok(HogLiteral::String("number".to_string()).into()), - HogLiteral::Boolean(_) => Ok(HogLiteral::String("boolean".to_string()).into()), - HogLiteral::String(_) => Ok(HogLiteral::String("string".to_string()).into()), - HogLiteral::Array(_) => Ok(HogLiteral::String("array".to_string()).into()), - HogLiteral::Object(_) => Ok(HogLiteral::String("object".to_string()).into()), - HogLiteral::Callable(_) => Ok(HogLiteral::String("function".to_string()).into()), - HogLiteral::Closure(_) => Ok(HogLiteral::String("function".to_string()).into()), - HogLiteral::Null => Ok(HogLiteral::String("null".to_string()).into()), - } - }), - ("values", |vm, args| { - assert_argc(&args, 1, "values")?; - let arg = args[0].deref(&vm.heap)?; - match arg { - HogLiteral::Array(_) => Ok(arg.clone().into()), - HogLiteral::Object(obj) => { - Ok(HogLiteral::Array(obj.values().cloned().collect()).into()) +pub fn stl() -> Vec<(String, NativeFunction)> { + [ + ( + "toString", + native_func(|vm, args| { + // Can't just use a ToString trait implementation, because ToString requires heap access to chase + // references in arrays and dicts + assert_argc(&args, 1, "toString")?; + to_string(&vm.heap, &args[0], 0).map(|s| HogLiteral::String(s).into()) + }), + ), + ( + "typeof", + native_func(|vm, args| { + assert_argc(&args, 1, "typeof")?; + let arg = args[0].deref(&vm.heap)?; + // TODO - tuples, dates, datetimes, errors are all just duck-typed "objects" or "arrays", but we should + // still support them I guess + match arg { + HogLiteral::Number(_) => Ok(HogLiteral::String("number".to_string()).into()), + HogLiteral::Boolean(_) => Ok(HogLiteral::String("boolean".to_string()).into()), + HogLiteral::String(_) => Ok(HogLiteral::String("string".to_string()).into()), + HogLiteral::Array(_) => Ok(HogLiteral::String("array".to_string()).into()), + HogLiteral::Object(_) => Ok(HogLiteral::String("object".to_string()).into()), + HogLiteral::Callable(_) => { + Ok(HogLiteral::String("function".to_string()).into()) + } + HogLiteral::Closure(_) => Ok(HogLiteral::String("function".to_string()).into()), + HogLiteral::Null => Ok(HogLiteral::String("null".to_string()).into()), } - _ => Err(VmError::NativeCallFailed( - "values() only supports arrays and objects".to_string(), - )), - } - }), - ("length", |vm, args| { - assert_argc(&args, 1, "length")?; - let arg = args[0].deref(&vm.heap)?; - match arg { - HogLiteral::Array(arr) => Ok(HogLiteral::Number(arr.len().into()).into()), - HogLiteral::Object(obj) => Ok(HogLiteral::Number(obj.len().into()).into()), - HogLiteral::String(str) => Ok(HogLiteral::Number(str.len().into()).into()), - _ => Err(VmError::NativeCallFailed( - "length() only supports arrays, objects and strings".to_string(), - )), - } - }), - ("arrayPushBack", |vm, args| { - // notably, due to all native functions being pure, we don't mutate these arrays in place - assert_argc(&args, 2, "arrayPushBack")?; - let array = args[0].deref(&vm.heap)?; - let value = args[1].clone(); - match array { - HogLiteral::Array(arr) => { - let mut arr = arr.clone(); - arr.push(value); - Ok(HogLiteral::Array(arr).into()) + }), + ), + ( + "values", + native_func(|vm, args| { + assert_argc(&args, 1, "values")?; + let arg = args[0].deref(&vm.heap)?; + match arg { + HogLiteral::Array(_) => Ok(arg.clone().into()), + HogLiteral::Object(obj) => { + Ok(HogLiteral::Array(obj.values().cloned().collect()).into()) + } + _ => Err(VmError::NativeCallFailed( + "values() only supports arrays and objects".to_string(), + )), } - _ => Err(VmError::NativeCallFailed( - "arrayPushBack() only supports arrays".to_string(), - )), - } - }), - ("arrayPushFront", |vm, args| { - assert_argc(&args, 2, "arrayPushFront")?; - let array = args[0].deref(&vm.heap)?; - let value = args[1].clone(); - match array { - HogLiteral::Array(arr) => { - let mut arr = arr.clone(); - arr.insert(0, value); - Ok(HogLiteral::Array(arr).into()) + }), + ), + ( + "length", + native_func(|vm, args| { + assert_argc(&args, 1, "length")?; + let arg = args[0].deref(&vm.heap)?; + match arg { + HogLiteral::Array(arr) => Ok(HogLiteral::Number(arr.len().into()).into()), + HogLiteral::Object(obj) => Ok(HogLiteral::Number(obj.len().into()).into()), + HogLiteral::String(str) => Ok(HogLiteral::Number(str.len().into()).into()), + _ => Err(VmError::NativeCallFailed( + "length() only supports arrays, objects and strings".to_string(), + )), } - _ => Err(VmError::NativeCallFailed( - "arrayPushFront() only supports arrays".to_string(), - )), - } - }), - ("arrayPopBack", |vm, args| { - assert_argc(&args, 1, "arrayPopBack")?; - let array = args[0].deref(&vm.heap)?; - match array { - HogLiteral::Array(arr) => { - let mut arr = arr.clone(); - arr.pop(); - Ok(HogLiteral::Array(arr).into()) + }), + ), + ( + "arrayPushBack", + native_func(|vm, args| { + // notably, due to all native functions being pure, we don't mutate these arrays in place + assert_argc(&args, 2, "arrayPushBack")?; + let array = args[0].deref(&vm.heap)?; + let value = args[1].clone(); + match array { + HogLiteral::Array(arr) => { + let mut arr = arr.clone(); + arr.push(value); + Ok(HogLiteral::Array(arr).into()) + } + _ => Err(VmError::NativeCallFailed( + "arrayPushBack() only supports arrays".to_string(), + )), } - _ => Err(VmError::NativeCallFailed( - "arrayPopBack() only supports arrays".to_string(), - )), - } - }), - ("arrayPopFront", |vm, args| { - assert_argc(&args, 1, "arrayPopFront")?; - let array = args[0].deref(&vm.heap)?; - match array { - HogLiteral::Array(arr) => { - let mut arr = arr.clone(); - // TODO - lol, lmao. This is silly, google the right function to actually use - arr.reverse(); - arr.pop(); - arr.reverse(); - Ok(HogLiteral::Array(arr).into()) + }), + ), + ( + "arrayPushFront", + native_func(|vm, args| { + assert_argc(&args, 2, "arrayPushFront")?; + let array = args[0].deref(&vm.heap)?; + let value = args[1].clone(); + match array { + HogLiteral::Array(arr) => { + let mut arr = arr.clone(); + arr.insert(0, value); + Ok(HogLiteral::Array(arr).into()) + } + _ => Err(VmError::NativeCallFailed( + "arrayPushFront() only supports arrays".to_string(), + )), } - _ => Err(VmError::NativeCallFailed( - "arrayPopFront() only supports arrays".to_string(), - )), - } - }), - ("arraySort", |vm, args| { - assert_argc(&args, 1, "arraySort")?; - let array = args[0].deref(&vm.heap)?; - match array { - HogLiteral::Array(arr) => { - let (vals, errs): (Vec<_>, Vec<_>) = arr - .iter() - .map(|v| v.deref(&vm.heap).and_then(|v| v.try_as::()).cloned()) - .partition(Result::is_ok); - if errs.is_empty() { - let mut vals = vals.into_iter().map(|v| v.unwrap()).collect::>(); - vals.sort_unstable_by(|a, b| a.compare(b)); - Ok(HogLiteral::Array(vals.into_iter().map(|v| v.into()).collect()).into()) - } else { - Err(VmError::NativeCallFailed( - "arraySort() only supports arrays of numbers".to_string(), - )) + }), + ), + ( + "arrayPopBack", + native_func(|vm, args| { + assert_argc(&args, 1, "arrayPopBack")?; + let array = args[0].deref(&vm.heap)?; + match array { + HogLiteral::Array(arr) => { + let mut arr = arr.clone(); + arr.pop(); + Ok(HogLiteral::Array(arr).into()) } + _ => Err(VmError::NativeCallFailed( + "arrayPopBack() only supports arrays".to_string(), + )), } - _ => Err(VmError::NativeCallFailed( - "arraySort() only supports arrays".to_string(), - )), - } - }), - ("arrayReverse", |vm, args| { - assert_argc(&args, 1, "arrayReverse")?; - let array = args[0].deref(&vm.heap)?; - match array { - HogLiteral::Array(arr) => { - let mut arr = arr.clone(); - arr.reverse(); - Ok(HogLiteral::Array(arr).into()) + }), + ), + ( + "arrayPopFront", + native_func(|vm, args| { + assert_argc(&args, 1, "arrayPopFront")?; + let array = args[0].deref(&vm.heap)?; + match array { + HogLiteral::Array(arr) => { + let mut arr = arr.clone(); + if !arr.is_empty() { + arr.remove(0); + } + Ok(HogLiteral::Array(arr).into()) + } + _ => Err(VmError::NativeCallFailed( + "arrayPopFront() only supports arrays".to_string(), + )), } - _ => Err(VmError::NativeCallFailed( - "arrayReverse() only supports arrays".to_string(), - )), - } - }), - ("arrayReverseSort", |vm, args| { - assert_argc(&args, 1, "arrayReverseSort")?; - let array = args[0].deref(&vm.heap)?; - match array { - HogLiteral::Array(arr) => { - let (vals, errs): (Vec<_>, Vec<_>) = arr - .iter() - .map(|v| v.deref(&vm.heap).and_then(|v| v.try_as::()).cloned()) - .partition(Result::is_ok); - if errs.is_empty() { - let mut vals = vals.into_iter().map(|v| v.unwrap()).collect::>(); - vals.sort_unstable_by(|a, b| a.compare(b)); - vals.reverse(); - Ok(HogLiteral::Array(vals.into_iter().map(|v| v.into()).collect()).into()) - } else { - Err(VmError::NativeCallFailed( - "arrayReverseSort() only supports arrays of numbers".to_string(), - )) + }), + ), + ( + "arraySort", + native_func(|vm, args| { + assert_argc(&args, 1, "arraySort")?; + let array = args[0].deref(&vm.heap)?; + match array { + HogLiteral::Array(arr) => { + let (vals, errs): (Vec<_>, Vec<_>) = arr + .iter() + .map(|v| v.deref(&vm.heap).and_then(|v| v.try_as::()).cloned()) + .partition(Result::is_ok); + if errs.is_empty() { + let mut vals = vals.into_iter().map(|v| v.unwrap()).collect::>(); + vals.sort_unstable_by(|a, b| a.compare(b)); + Ok( + HogLiteral::Array(vals.into_iter().map(|v| v.into()).collect()) + .into(), + ) + } else { + Err(VmError::NativeCallFailed( + "arraySort() only supports arrays of numbers".to_string(), + )) + } } + _ => Err(VmError::NativeCallFailed( + "arraySort() only supports arrays".to_string(), + )), } - _ => Err(VmError::NativeCallFailed( - "arrayReverseSort() only supports arrays".to_string(), - )), - } - }), - ("arrayStringConcat", |vm, args| { - assert_argc(&args, 2, "arrayStringConcat")?; - let vals = args[0].deref(&vm.heap)?; - let sep = args[1].deref(&vm.heap)?.try_as::()?; - let HogLiteral::Array(vals) = vals else { - return Err(VmError::NativeCallFailed( - "arrayStringConcat() only supports arrays".to_string(), - )); - }; - let mut parts = Vec::with_capacity(vals.len()); - for val in vals.iter() { - parts.push(to_string(&vm.heap, val, 0)?); - } - Ok(HogLiteral::String(parts.join(sep)).into()) - }), - ("has", |vm, args| { - assert_argc(&args, 2, "has")?; - let haystack = &args[0]; - let needle = &args[1]; - haystack.contains(needle, &vm.heap).map(|res| res.into()) - }), - ("indexOf", |vm, args| { - assert_argc(&args, 2, "indexOf")?; - let haystack = &args[0].deref(&vm.heap)?; - let needle = &args[1]; - match haystack { - HogLiteral::Array(vals) => { - for (i, val) in vals.iter().enumerate() { - if *needle.equals(val, &vm.heap)?.try_as()? { - return Ok((i as i64).saturating_add(1).into()); + }), + ), + ( + "arrayReverse", + native_func(|vm, args| { + assert_argc(&args, 1, "arrayReverse")?; + let array = args[0].deref(&vm.heap)?; + match array { + HogLiteral::Array(arr) => { + let mut arr = arr.clone(); + arr.reverse(); + Ok(HogLiteral::Array(arr).into()) + } + _ => Err(VmError::NativeCallFailed( + "arrayReverse() only supports arrays".to_string(), + )), + } + }), + ), + ( + "arrayReverseSort", + native_func(|vm, args| { + assert_argc(&args, 1, "arrayReverseSort")?; + let array = args[0].deref(&vm.heap)?; + match array { + HogLiteral::Array(arr) => { + let (vals, errs): (Vec<_>, Vec<_>) = arr + .iter() + .map(|v| v.deref(&vm.heap).and_then(|v| v.try_as::()).cloned()) + .partition(Result::is_ok); + if errs.is_empty() { + let mut vals = vals.into_iter().map(|v| v.unwrap()).collect::>(); + vals.sort_unstable_by(|a, b| a.compare(b)); + vals.reverse(); + Ok( + HogLiteral::Array(vals.into_iter().map(|v| v.into()).collect()) + .into(), + ) + } else { + Err(VmError::NativeCallFailed( + "arrayReverseSort() only supports arrays of numbers".to_string(), + )) } } - Ok(HogLiteral::Null.into()) + _ => Err(VmError::NativeCallFailed( + "arrayReverseSort() only supports arrays".to_string(), + )), } - _ => Err(VmError::NativeCallFailed( - "indexOf() only supports arrays".to_string(), - )), - } - }), - ("notEmpty", |vm, args| { - assert_argc(&args, 1, "notEmpty")?; - let val = &args[0]; - match val.deref(&vm.heap)? { - HogLiteral::Array(a) => Ok(HogLiteral::Boolean(!a.is_empty()).into()), - HogLiteral::String(s) => Ok(HogLiteral::Boolean(!s.is_empty()).into()), - HogLiteral::Object(o) => Ok(HogLiteral::Boolean(!o.is_empty()).into()), - _ => Err(VmError::NativeCallFailed(format!( - "{} not supported by notEmpty", - val.type_name() - ))), - } - }), - ("match", |vm, args| { - assert_argc(&args, 2, "match")?; - let value = args[0].deref(&vm.heap)?.try_as::()?; - let regex = args[1].deref(&vm.heap)?.try_as::()?; - Ok(HogLiteral::Boolean(regex_match(value, regex, true)?).into()) - }), + }), + ), + ( + "arrayStringConcat", + native_func(|vm, args| { + assert_argc(&args, 2, "arrayStringConcat")?; + let vals = args[0].deref(&vm.heap)?; + let sep = args[1].deref(&vm.heap)?.try_as::()?; + let HogLiteral::Array(vals) = vals else { + return Err(VmError::NativeCallFailed( + "arrayStringConcat() only supports arrays".to_string(), + )); + }; + let mut parts = Vec::with_capacity(vals.len()); + for val in vals.iter() { + parts.push(to_string(&vm.heap, val, 0)?); + } + Ok(HogLiteral::String(parts.join(sep)).into()) + }), + ), + ( + "has", + native_func(|vm, args| { + assert_argc(&args, 2, "has")?; + let haystack = &args[0]; + let needle = &args[1]; + haystack.contains(needle, &vm.heap).map(|res| res.into()) + }), + ), + ( + "indexOf", + native_func(|vm, args| { + assert_argc(&args, 2, "indexOf")?; + let haystack = &args[0].deref(&vm.heap)?; + let needle = &args[1]; + match haystack { + HogLiteral::Array(vals) => { + for (i, val) in vals.iter().enumerate() { + if *needle.equals(val, &vm.heap)?.try_as()? { + return Ok((i as i64).saturating_add(1).into()); + } + } + Ok(HogLiteral::Null.into()) + } + _ => Err(VmError::NativeCallFailed( + "indexOf() only supports arrays".to_string(), + )), + } + }), + ), + ( + "notEmpty", + native_func(|vm, args| { + assert_argc(&args, 1, "notEmpty")?; + let val = &args[0]; + match val.deref(&vm.heap)? { + HogLiteral::Array(a) => Ok(HogLiteral::Boolean(!a.is_empty()).into()), + HogLiteral::String(s) => Ok(HogLiteral::Boolean(!s.is_empty()).into()), + HogLiteral::Object(o) => Ok(HogLiteral::Boolean(!o.is_empty()).into()), + _ => Err(VmError::NativeCallFailed(format!( + "{} not supported by notEmpty", + val.type_name() + ))), + } + }), + ), + ( + "match", + native_func(|vm, args| { + assert_argc(&args, 2, "match")?; + let value = args[0].deref(&vm.heap)?.try_as::()?; + let regex = args[1].deref(&vm.heap)?.try_as::()?; + Ok(HogLiteral::Boolean(regex_match(value, regex, true)?).into()) + }), + ), + ( + "JSONExtract", + native_func(err_to_null(|vm, args| { + assert( + !args.is_empty(), + "JSONExtract requires at least one argument", + )?; + let json = args[0].deref(&vm.heap)?.try_as::()?; + // Technically JSONExtract can be used simply to parse a string as json + let path = if args.len() > 1 { &args[1..] } else { &[] }; + let json: JsonValue = serde_json::from_str(json) + .map_err(|e| VmError::NativeCallFailed(e.to_string()))?; + let res = get_json_nested(&json, path, vm)?; + let Some(res) = res else { + return Ok(HogLiteral::Null.into()); + }; + construct_free_standing(res, 0) + })), + ), ] + .into_iter() + .map(|(name, func)| (name.to_string(), func)) + .collect() } pub fn hog_stl() -> Module { @@ -343,3 +421,17 @@ fn assert_argc(args: &[HogValue], count: usize, name: impl AsRef) -> Result format!("{} takes exactly {} arguments", name.as_ref(), count), ) } + +fn err_to_null( + func: impl Fn(&HogVM, Vec) -> Result, +) -> impl Fn(&HogVM, Vec) -> Result { + move |vm, args| func(vm, args).or(Ok(HogLiteral::Null.into())) +} + +/// Helper to construct a HogVM native function from a closure. +pub fn native_func(func: F) -> NativeFunction +where + F: Fn(&HogVM, Vec) -> Result + 'static, +{ + Box::new(func) +} diff --git a/rust/common/hogvm/src/values.rs b/rust/common/hogvm/src/values.rs index a282057d1606b..4462403745775 100644 --- a/rust/common/hogvm/src/values.rs +++ b/rust/common/hogvm/src/values.rs @@ -1,9 +1,12 @@ use std::{cmp::Ordering, collections::HashMap, fmt::Display, str::FromStr}; +use serde_json::Value as JsonValue; + use crate::{ context::Symbol, error::VmError, memory::{HeapReference, VmHeap}, + vm::MAX_JSON_SERDE_DEPTH, }; #[derive(Debug, Clone, PartialEq)] @@ -614,3 +617,41 @@ impl Display for Closure { write!(f, "closure of {}", self.callable) } } + +/// Construct a free-standing HogValue from a JSON value. This Value is NOT +/// correctly laid out in VM-memory space, and pushing it directly onto the +/// stack is undefined behavior. It's designed for use within native function +/// extensions, where you don't have mutable access to a VM's heap, but still +/// need to construct a HogValue from a JSON value. +/// +/// `ExecutionContext::execute_native_function_call` correctly maps the return +/// value of the native function call to the VM's memory space, making values +/// constructed with this method safe to return from native extensions. +pub fn construct_free_standing(current: JsonValue, depth: usize) -> Result { + if depth > MAX_JSON_SERDE_DEPTH { + return Err(VmError::OutOfResource( + "json->hog deserialization depth".to_string(), + )); + } + + match current { + JsonValue::Null => Ok(HogLiteral::Null.into()), + JsonValue::Bool(b) => Ok(HogLiteral::Boolean(b).into()), + JsonValue::Number(n) => Ok(HogLiteral::Number(n.into()).into()), + JsonValue::String(s) => Ok(HogLiteral::String(s).into()), + JsonValue::Array(arr) => { + let mut values = Vec::new(); + for value in arr { + values.push(construct_free_standing(value, depth + 1)?); + } + Ok(HogLiteral::Array(values).into()) + } + JsonValue::Object(obj) => { + let mut map = HashMap::new(); + for (key, value) in obj { + map.insert(key, construct_free_standing(value, depth + 1)?); + } + Ok(HogLiteral::Object(map).into()) + } + } +} diff --git a/rust/common/hogvm/src/vm.rs b/rust/common/hogvm/src/vm.rs index 00b8374a37d27..5a5be57b4a5f8 100644 --- a/rust/common/hogvm/src/vm.rs +++ b/rust/common/hogvm/src/vm.rs @@ -12,7 +12,7 @@ use crate::{ values::{Callable, Closure, FromHogLiteral, HogLiteral, HogValue, LocalCallable, Num, NumOp}, }; -const MAX_JSON_SERDE_DEPTH: usize = 64; +pub const MAX_JSON_SERDE_DEPTH: usize = 64; /// The outcome of a virtual machine step. #[derive(Debug, Clone)] @@ -121,7 +121,15 @@ impl<'a> HogVM<'a> { self.push_stack(val)?; } else if let Ok(closure) = self.get_fn_reference(&chain) { self.push_stack(closure)?; + } else if get_json_nested(&self.context.globals, &chain[..1], self)?.is_some() { + // If the first element of the chain is a global, push null onto the stack, e.g. + // if a program is looking for "properties.blah", and "properties" exists, but + // "blah" doesn't, push null onto the stack. + self.push_stack(HogLiteral::Null)?; } else { + // But if the first element in the chain didn't exist, this is an error (the mental model here + // comes from SQL, where a missing column is an error, but a missing field in a column is, or + // at least can be, treated as a null value). return Err(VmError::UnknownGlobal(format!("{:?}", chain))); } } diff --git a/rust/common/hogvm/tests/static/test_programs/globals.hog b/rust/common/hogvm/tests/static/test_programs/globals.hog index b84ae597ae85b..e42af4096d32e 100644 --- a/rust/common/hogvm/tests/static/test_programs/globals.hog +++ b/rust/common/hogvm/tests/static/test_programs/globals.hog @@ -18,4 +18,7 @@ assert_eq(a_number, 42) assert_eq(a_null, null) assert_eq(a_nested_object.nested_key, 'nested_value') +// Nested globals not found are treated as nulls, rather than throwing an error. +assert_eq(a_nested_object.an_unknown_global, null) + return true diff --git a/rust/common/hogvm/tests/static/test_programs/globals.hoge b/rust/common/hogvm/tests/static/test_programs/globals.hoge index df21d528f3f9d..5bfcffde25966 100644 --- a/rust/common/hogvm/tests/static/test_programs/globals.hoge +++ b/rust/common/hogvm/tests/static/test_programs/globals.hoge @@ -5,4 +5,5 @@ 2, "assert_eq", 2, 35, 32, "an_array", 1, 1, 33, 1, 33, 2, 33, 3, 43, 3, 2, "assert_eq", 2, 35, 32, "a_string", 1, 1, 32, "Hello, World!", 2, "assert_eq", 2, 35, 32, "a_boolean", 1, 1, 29, 2, "assert_eq", 2, 35, 32, "a_number", 1, 1, 33, 42, 2, "assert_eq", 2, 35, 32, "a_null", 1, 1, 31, 2, "assert_eq", 2, 35, 32, "nested_key", 32, "a_nested_object", 1, 2, -32, "nested_value", 2, "assert_eq", 2, 35, 29, 38] +32, "nested_value", 2, "assert_eq", 2, 35, 32, "an_unknown_global", 32, "a_nested_object", 1, 2, 31, 2, "assert_eq", 2, +35, 29, 38] diff --git a/rust/common/hogvm/tests/static/test_programs/json_extract.hog b/rust/common/hogvm/tests/static/test_programs/json_extract.hog new file mode 100644 index 0000000000000..79558241e63b7 --- /dev/null +++ b/rust/common/hogvm/tests/static/test_programs/json_extract.hog @@ -0,0 +1,15 @@ +let test_data := '{"name": "John", "age": 30, "city": "New York"}' + +let name := JSONExtract(test_data, 'name') +assert_eq(name, 'John'); +let age := JSONExtract(test_data, 'age') +assert_eq(age, 30); +let city := JSONExtract(test_data, 'city') +assert_eq(city, 'New York'); +let unknown := JSONExtract(test_data, 'unknown') +assert_eq(unknown, null); + +// JSONExtract returns null on error +assert_eq(JSONExtract(test_data, 1, 2, 3), null); + +return true diff --git a/rust/common/hogvm/tests/static/test_programs/json_extract.hoge b/rust/common/hogvm/tests/static/test_programs/json_extract.hoge new file mode 100644 index 0000000000000..c59a825ffd982 --- /dev/null +++ b/rust/common/hogvm/tests/static/test_programs/json_extract.hoge @@ -0,0 +1,5 @@ +["_H", 1, 32, "{\"name\": \"John\", \"age\": 30, \"city\": \"New York\"}", 36, 0, 32, "name", 2, "JSONExtract", 2, 36, +1, 32, "John", 2, "assert_eq", 2, 35, 36, 0, 32, "age", 2, "JSONExtract", 2, 36, 2, 33, 30, 2, "assert_eq", 2, 35, 36, +0, 32, "city", 2, "JSONExtract", 2, 36, 3, 32, "New York", 2, "assert_eq", 2, 35, 36, 0, 32, "unknown", 2, +"JSONExtract", 2, 36, 4, 31, 2, "assert_eq", 2, 35, 36, 0, 33, 1, 33, 2, 33, 3, 2, "JSONExtract", 4, 31, 2, "assert_eq", +2, 35, 29, 38, 35, 35, 35, 35, 35] diff --git a/rust/common/hogvm/tests/vm.rs b/rust/common/hogvm/tests/vm.rs index 73082b78ab121..59b49787cdccf 100644 --- a/rust/common/hogvm/tests/vm.rs +++ b/rust/common/hogvm/tests/vm.rs @@ -1,44 +1,51 @@ use std::collections::HashMap; -use hogvm::{sync_execute, ExecutionContext, HogLiteral, NativeFunction, Program}; +use hogvm::{native_func, sync_execute, ExecutionContext, HogLiteral, NativeFunction, Program}; use serde_json::{json, Value}; -const fn stl_test_extensions() -> &'static [(&'static str, NativeFunction)] { - &[ - ("print", |_, args| { - println!("{:?}", args); - Ok(HogLiteral::Null.into()) - }), - ("assert_eq", |vm, args| { - // Used in test programs - let lhs = args.first().unwrap(); - let rhs = args.get(1).unwrap(); - if lhs - .equals(rhs, &vm.heap) - .expect("Could compare") - .try_into() - .expect("Could convert") - { +fn stl_test_extensions() -> HashMap { + [ + ( + "print", + native_func(|_, args| { + println!("{:?}", args); Ok(HogLiteral::Null.into()) - } else { - panic!("{:?} did not equal {:?}", lhs, rhs) - } - }), - ("assert", |vm, args| { - // Used in test programs - let condition = args.first().unwrap().deref(&vm.heap).unwrap(); - if *condition.try_as().expect("Could convert") { - Ok(HogLiteral::Null.into()) - } else { - panic!("Assertion failed") - } - }), + }), + ), + ( + "assert_eq", + native_func(|vm, args| { + // Used in test programs + let lhs = args.first().unwrap(); + let rhs = args.get(1).unwrap(); + if lhs + .equals(rhs, &vm.heap) + .expect("Could compare") + .try_into() + .expect("Could convert") + { + Ok(HogLiteral::Null.into()) + } else { + panic!("{:?} did not equal {:?}", lhs, rhs) + } + }), + ), + ( + "assert", + native_func(|vm, args| { + // Used in test programs + let condition = args.first().unwrap().deref(&vm.heap).unwrap(); + if *condition.try_as().expect("Could convert") { + Ok(HogLiteral::Null.into()) + } else { + panic!("Assertion failed") + } + }), + ), ] -} - -// This could maybe be moved to the stl module, it seems useful -fn to_extension(ext: &'static [(&'static str, NativeFunction)]) -> HashMap { - ext.iter().map(|(a, b)| (a.to_string(), *b)).collect() + .into_iter() + .map(|(name, func)| (name.to_string(), func)) + .collect() } fn load_test_programs() -> Vec<(String, String)> { @@ -90,7 +97,7 @@ pub fn test_vm() { let parsed: Vec = serde_json::from_str(&code).unwrap(); let program = Program::new(parsed).unwrap(); let ctx = ExecutionContext::with_defaults(program) - .with_ext_fns(to_extension(stl_test_extensions())) + .with_ext_fns(stl_test_extensions()) .with_globals(test_globals()); let res = sync_execute(&ctx, false); println!("{:?}", res);