From cb60059a0eedc120e1f346fc18ba0aafebdc448e Mon Sep 17 00:00:00 2001
From: Andrey Arutiunian <andreylzmw@gmail.com>
Date: Sat, 8 Apr 2023 02:44:43 +0300
Subject: [PATCH 01/27] fix issue

---
 cmake/external-libraries.cmake     | 2 ++
 cmake/init-compilation-flags.cmake | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/cmake/external-libraries.cmake b/cmake/external-libraries.cmake
index 44a6734f28..4ec35b9cb3 100644
--- a/cmake/external-libraries.cmake
+++ b/cmake/external-libraries.cmake
@@ -54,6 +54,8 @@ else()
     add_link_options(-L${kphp-timelib_SOURCE_DIR}/objs)
 endif()
 
+add_compile_options(-Wno-redundant-move)
+
 if(APPLE)
     if (DEFINED ENV{EPOLL_SHIM_REPO})
         FetchContent_Declare(
diff --git a/cmake/init-compilation-flags.cmake b/cmake/init-compilation-flags.cmake
index 5d62ceabe6..a3273c84fd 100644
--- a/cmake/init-compilation-flags.cmake
+++ b/cmake/init-compilation-flags.cmake
@@ -118,7 +118,7 @@ elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64")
 endif()
 
 add_compile_options(-Werror -Wall -Wextra -Wunused-function -Wfloat-conversion -Wno-sign-compare
-                    -Wuninitialized -Wno-redundant-move -Wno-missing-field-initializers)
+                    -Wuninitialized -Wno-missing-field-initializers)
 
 if(NOT APPLE)
     check_cxx_compiler_flag(-gz=zlib DEBUG_COMPRESSION_IS_FOUND)

From 4c156df3341e827e416ac216b59138d51bce6df0 Mon Sep 17 00:00:00 2001
From: Andrey Arutiunian <andreylzmw@gmail.com>
Date: Mon, 10 Apr 2023 23:33:59 +0300
Subject: [PATCH 02/27] mbstring functions

---
 runtime/mbstring/mbstring.cpp | 33 +++++++++++++++++++++++++++++++++
 runtime/mbstring/mbstring.h   | 19 +++++++++++++++++++
 server/server-stats.cpp       |  8 --------
 3 files changed, 52 insertions(+), 8 deletions(-)
 create mode 100644 runtime/mbstring/mbstring.cpp
 create mode 100644 runtime/mbstring/mbstring.h

diff --git a/runtime/mbstring/mbstring.cpp b/runtime/mbstring/mbstring.cpp
new file mode 100644
index 0000000000..a73c42e83c
--- /dev/null
+++ b/runtime/mbstring/mbstring.cpp
@@ -0,0 +1,33 @@
+#include "mbstring.h"
+
+string f$mb_convert_encoding(const string &str, const string &to, const string &from) {
+
+	/* preparing */
+	const char *c_str = str.c_str();
+	const char *c_from = from.c_str();
+	const char *c_to = to.c_str();
+	enum mbfl_no_encoding from_encoding, to_encoding;
+	mbfl_buffer_converter *convd = NULL;
+	mbfl_string tmp, result, *ret;
+
+	/* from internal to mbfl */
+	from_encoding = mbfl_name2no_encoding(c_from);
+	to_encoding = mbfl_name2no_encoding(c_to);
+
+	/* init buffer mbfl strings */
+	long int len = strlen(c_str);
+	mbfl_string_init(&tmp);
+	mbfl_string_init(&result);
+	tmp.no_encoding = from_encoding;
+	tmp.len = len;
+	tmp.val = (unsigned char*)c_str;
+
+	/* converting */
+	convd = mbfl_buffer_converter_new(from_encoding, to_encoding, 0);
+	ret = mbfl_buffer_converter_feed_result(convd, &tmp, &result);
+	mbfl_buffer_converter_delete(convd);
+
+	/* returning kphp's string */
+	string res((const char*)ret->val, strlen((const char*)ret->val));
+	return res;
+}
\ No newline at end of file
diff --git a/runtime/mbstring/mbstring.h b/runtime/mbstring/mbstring.h
new file mode 100644
index 0000000000..87f2d1c53a
--- /dev/null
+++ b/runtime/mbstring/mbstring.h
@@ -0,0 +1,19 @@
+#pragma once
+
+extern "C" {
+	#include <libmbfl/mbfl/mbfilter.h>
+}
+
+#include "runtime/kphp_core.h"
+
+/**
+ * Convert a string from one character encoding to another
+ * @param str The string to be converted
+ * @param from The desired encoding of the result
+ * @param to The current encoding used to interpret string
+ * @return The encoded string
+ * TODO!: mb_check_encoding(str, from) inside
+ * TODO!: own constants for encodings
+ * TODO: issue for timelib
+ */
+string f$mb_convert_encoding(const string &str, const string &to, const string &from);
\ No newline at end of file
diff --git a/server/server-stats.cpp b/server/server-stats.cpp
index 6406e09ed7..a45fcf1f4f 100644
--- a/server/server-stats.cpp
+++ b/server/server-stats.cpp
@@ -143,8 +143,6 @@ struct EnumTable : std::array<T, static_cast<size_t>(E::Key::types_count)> {
 template<class T>
 struct Percentiles {
   T p50{};
-  T p75{};
-  T p90{};
   T p95{};
   T p99{};
   T max{};
@@ -154,8 +152,6 @@ struct Percentiles {
   void update_percentiles(I first, I last, const Mapper &mapper = {}) noexcept {
     const auto size = last - first;
     set_percentile<50>(p50, first, size, mapper);
-    set_percentile<75>(p75, first, size, mapper);
-    set_percentile<90>(p90, first, size, mapper);
     set_percentile<95>(p95, first, size, mapper);
     set_percentile<99>(p99, first, size, mapper);
     set_percentile<100>(max, first, size, mapper);
@@ -702,8 +698,6 @@ template<typename T, typename Mapper = vk::identity>
 void write_to(stats_t *stats, const char *prefix, const char *suffix, const AggregatedSamples<T> &samples, const Mapper &mapper = {}) {
   if (stats->need_aggregated_stats()) {
     stats->add_gauge_stat(mapper(samples.percentiles.p50), prefix, suffix, ".p50");
-    stats->add_gauge_stat(mapper(samples.percentiles.p75), prefix, suffix, ".p75");
-    stats->add_gauge_stat(mapper(samples.percentiles.p90), prefix, suffix, ".p90");
     stats->add_gauge_stat(mapper(samples.percentiles.p95), prefix, suffix, ".p95");
     stats->add_gauge_stat(mapper(samples.percentiles.p99), prefix, suffix, ".p99");
     stats->add_gauge_stat(mapper(samples.percentiles.max), prefix, suffix, ".max");
@@ -714,8 +708,6 @@ template<typename T, typename Mapper = vk::identity>
 void write_to(stats_t *stats, const char *prefix, const char *suffix, const WorkerSamples<T> &samples, const Mapper &mapper = {}) {
   if (stats->need_aggregated_stats()) {
     stats->add_gauge_stat(mapper(samples.percentiles.p50), prefix, suffix, ".p50");
-    stats->add_gauge_stat(mapper(samples.percentiles.p75), prefix, suffix, ".p75");
-    stats->add_gauge_stat(mapper(samples.percentiles.p90), prefix, suffix, ".p90");
     stats->add_gauge_stat(mapper(samples.percentiles.p95), prefix, suffix, ".p95");
     stats->add_gauge_stat(mapper(samples.percentiles.p99), prefix, suffix, ".p99");
     stats->add_gauge_stat(mapper(samples.percentiles.max), prefix, suffix, ".max");

From 8121ae947f1174013a0294e61cd65e71cb73c944 Mon Sep 17 00:00:00 2001
From: Andrey Arutiunian <andreylzmw@gmail.com>
Date: Mon, 17 Apr 2023 14:57:23 +0300
Subject: [PATCH 03/27] build libmbfl from source

---
 builtin-functions/_functions.txt |   2 +
 cmake/external-libraries.cmake   |  15 ++
 compiler/compiler-settings.cpp   |  11 +-
 runtime/interface.cpp            |   1 -
 runtime/mbstring.cpp             | 341 +------------------------------
 runtime/mbstring.h               |  20 +-
 runtime/mbstring/mbstring.cpp    |   4 +-
 runtime/mbstring/mbstring.h      |   7 +-
 runtime/runtime.cmake            |  13 ++
 9 files changed, 49 insertions(+), 365 deletions(-)

diff --git a/builtin-functions/_functions.txt b/builtin-functions/_functions.txt
index 9e6e139f39..c1c2616ff2 100644
--- a/builtin-functions/_functions.txt
+++ b/builtin-functions/_functions.txt
@@ -1620,3 +1620,5 @@ class DateTimeImmutable implements DateTimeInterface {
 }
 
 function getenv(string $varname = '', bool $local_only = false): mixed;
+
+function mb_convert_encoding(string $str, string $to, string $from): string;
diff --git a/cmake/external-libraries.cmake b/cmake/external-libraries.cmake
index 44a6734f28..70a03bcd36 100644
--- a/cmake/external-libraries.cmake
+++ b/cmake/external-libraries.cmake
@@ -1,5 +1,7 @@
 option(DOWNLOAD_MISSING_LIBRARIES "download and build missing libraries if needed" OFF)
+option(MBFL, OFF)
 cmake_print_variables(DOWNLOAD_MISSING_LIBRARIES)
+cmake_print_variables(MBFL)
 function(handle_missing_library LIB_NAME)
     message(STATUS "------${LIB_NAME}---------")
     if(DOWNLOAD_MISSING_LIBRARIES)
@@ -54,6 +56,19 @@ else()
     add_link_options(-L${kphp-timelib_SOURCE_DIR}/objs)
 endif()
 
+if(MBFL)
+#     add_library(libmbfl STATIC IMPORTED ${MBFL})
+# else()
+    add_compile_options(-Wno-unused-parameter -Wno-logical-op-parentheses -Wno-unused-variable -Wno-return-type -Wno-unused-function)
+    handle_missing_library("libmbfl")
+    FetchContent_Declare(libmbfl GIT_REPOSITORY https://github.com/andreylzmw/libmbfl)
+    message(STATUS "---------------------")
+    FetchContent_MakeAvailable(libmbfl)
+    include_directories(${libmbfl_SOURCE_DIR}/include)
+    add_definitions(-DLIBMBFL_LIB_DIR="${libmbfl_SOURCE_DIR}/objs")
+    add_link_options(-L${libmbfl_SOURCE_DIR}/objs)
+endif()
+
 if(APPLE)
     if (DEFINED ENV{EPOLL_SHIM_REPO})
         FetchContent_Declare(
diff --git a/compiler/compiler-settings.cpp b/compiler/compiler-settings.cpp
index 3733401f06..b6cc784042 100644
--- a/compiler/compiler-settings.cpp
+++ b/compiler/compiler-settings.cpp
@@ -320,7 +320,7 @@ void CompilerSettings::init() {
   ld_flags.value_ = extra_ld_flags.get();
   append_curl(cxx_default_flags, ld_flags.value_);
   append_apple_options(cxx_default_flags, ld_flags.value_);
-  std::vector<vk::string_view> external_static_libs{"pcre", "re2", "yaml-cpp", "h3", "z", "zstd", "nghttp2", "kphp-timelib"};
+  std::vector<vk::string_view> external_static_libs{"pcre", "re2", "yaml-cpp", "h3", "z", "zstd", "nghttp2", "kphp-timelib", "libmbfl"};
 
 #ifdef KPHP_TIMELIB_LIB_DIR
   ld_flags.value_ += " -L" KPHP_TIMELIB_LIB_DIR;
@@ -331,6 +331,15 @@ void CompilerSettings::init() {
   ld_flags.value_ += " -L /usr/local/lib";
 #endif
 
+#ifdef LIBMBFL_LIB_DIR
+  ld_flags.value_ += " -L" LIBMBFL_LIB_DIR;
+#else
+  // kphp-timelib is usually installed in /usr/local/lib;
+  // LDD may not find a library in /usr/local/lib if we don't add it here
+  // TODO: can we avoid this hardcoded library path?
+  ld_flags.value_ += " -L /usr/local/lib";
+#endif
+
 #if defined(__APPLE__) && defined(__arm64__)
   // for development under M1, manual installation of libucontext is needed
   // see the docs: https://vkcom.github.io/kphp/kphp-internals/developing-and-extending-kphp/compiling-kphp-from-sources.html
diff --git a/runtime/interface.cpp b/runtime/interface.cpp
index 2837919e53..c48e2e9a27 100644
--- a/runtime/interface.cpp
+++ b/runtime/interface.cpp
@@ -2371,7 +2371,6 @@ static void free_runtime_libs() {
   free_kphp_backtrace();
 
   free_migration_php8();
-  free_detect_incorrect_encoding_names();
 
   vk::singleton<JsonLogger>::get().reset_buffers();
 #ifdef PDO_DRIVER_MYSQL
diff --git a/runtime/mbstring.cpp b/runtime/mbstring.cpp
index 8fa5a03be6..bbf8231b47 100644
--- a/runtime/mbstring.cpp
+++ b/runtime/mbstring.cpp
@@ -7,86 +7,6 @@
 #include "common/unicode/unicode-utils.h"
 #include "common/unicode/utf8-utils.h"
 
-static bool is_detect_incorrect_encoding_names_warning{false};
-
-void f$set_detect_incorrect_encoding_names_warning(bool show) {
-  is_detect_incorrect_encoding_names_warning = show;
-}
-
-void free_detect_incorrect_encoding_names() {
-  is_detect_incorrect_encoding_names_warning = false;
-}
-
-static int mb_detect_encoding_new(const string &encoding) {
-  const auto encoding_name = f$strtolower(encoding).c_str();
-
-  if (!strcmp(encoding_name, "cp1251") || !strcmp(encoding_name, "cp-1251") || !strcmp(encoding_name, "windows-1251")) {
-    return 1251;
-  }
-
-  if (!strcmp(encoding_name, "utf8") || !strcmp(encoding_name, "utf-8")) {
-    return 8;
-  }
-
-  return -1;
-}
-
-static int mb_detect_encoding(const string &encoding) {
-  const int result_new = mb_detect_encoding_new(encoding);
-
-  if (strstr(encoding.c_str(), "1251")) {
-    if (is_detect_incorrect_encoding_names_warning && 1251 != result_new) {
-      php_warning("mb_detect_encoding returns 1251, but new will return %d, encoding %s", result_new, encoding.c_str());
-    }
-    return 1251;
-  }
-  if (strstr(encoding.c_str(), "-8")) {
-    if (is_detect_incorrect_encoding_names_warning && 8 != result_new) {
-      php_warning("mb_detect_encoding returns 8, but new will return %d, encoding %s", result_new, encoding.c_str());
-    }
-    return 8;
-  }
-
-  if (is_detect_incorrect_encoding_names_warning && -1 != result_new) {
-    php_warning("mb_detect_encoding returns -1, but new will return %d, encoding %s", result_new, encoding.c_str());
-  }
-  return -1;
-}
-
-static int64_t mb_UTF8_strlen(const char *s) {
-  int64_t res = 0;
-  for (int64_t i = 0; s[i]; i++) {
-    if ((((unsigned char)s[i]) & 0xc0) != 0x80) {
-      res++;
-    }
-  }
-  return res;
-}
-
-static int64_t mb_UTF8_advance(const char *s, int64_t cnt) {
-  php_assert (cnt >= 0);
-  int64_t i;
-  for (i = 0; s[i] && cnt >= 0; i++) {
-    if ((((unsigned char)s[i]) & 0xc0) != 0x80) {
-      cnt--;
-    }
-  }
-  if (cnt < 0) {
-    i--;
-  }
-  return i;
-}
-
-static int64_t mb_UTF8_get_offset(const char *s, int64_t pos) {
-  int64_t res = 0;
-  for (int64_t i = 0; i < pos && s[i]; i++) {
-    if ((((unsigned char)s[i]) & 0xc0) != 0x80) {
-      res++;
-    }
-  }
-  return res;
-}
-
 bool mb_UTF8_check(const char *s) {
   do {
 #define CHECK(condition) if (!(condition)) {return false;}
@@ -128,263 +48,4 @@ bool mb_UTF8_check(const char *s) {
   } while (true);
 
   php_assert (0);
-}
-
-bool f$mb_check_encoding(const string &str, const string &encoding) {
-  int encoding_num = mb_detect_encoding(encoding);
-  if (encoding_num < 0) {
-    php_critical_error ("encoding \"%s\" doesn't supported in mb_check_encoding", encoding.c_str());
-    return !str.empty();
-  }
-
-  if (encoding_num == 1251) {
-    return true;
-  }
-
-  return mb_UTF8_check(str.c_str());
-}
-
-
-int64_t f$mb_strlen(const string &str, const string &encoding) {
-  int encoding_num = mb_detect_encoding(encoding);
-  if (encoding_num < 0) {
-    php_critical_error ("encoding \"%s\" doesn't supported in mb_strlen", encoding.c_str());
-    return str.size();
-  }
-
-  if (encoding_num == 1251) {
-    return str.size();
-  }
-
-  return mb_UTF8_strlen(str.c_str());
-}
-
-
-string f$mb_strtolower(const string &str, const string &encoding) {
-  int encoding_num = mb_detect_encoding(encoding);
-  if (encoding_num < 0) {
-    php_critical_error ("encoding \"%s\" doesn't supported in mb_strtolower", encoding.c_str());
-    return str;
-  }
-
-  int len = str.size();
-  if (encoding_num == 1251) {
-    string res(len, false);
-    for (int i = 0; i < len; i++) {
-      switch ((unsigned char)str[i]) {
-        case 'A' ... 'Z':
-          res[i] = (char)(str[i] + 'a' - 'A');
-          break;
-        case 0xC0 ... 0xDF:
-          res[i] = (char)(str[i] + 32);
-          break;
-        case 0x81:
-          res[i] = (char)0x83;
-          break;
-        case 0xA3:
-          res[i] = (char)0xBC;
-          break;
-        case 0xA5:
-          res[i] = (char)0xB4;
-          break;
-        case 0xA1:
-        case 0xB2:
-        case 0xBD:
-          res[i] = (char)(str[i] + 1);
-          break;
-        case 0x80:
-        case 0x8A:
-        case 0x8C ... 0x8F:
-        case 0xA8:
-        case 0xAA:
-        case 0xAF:
-          res[i] = (char)(str[i] + 16);
-          break;
-        default:
-          res[i] = str[i];
-      }
-    }
-
-    return res;
-  } else {
-    string res(len * 3, false);
-    const char *s = str.c_str();
-    int res_len = 0;
-    int p;
-    int ch;
-    while ((p = get_char_utf8(&ch, s)) > 0) {
-      s += p;
-      res_len += put_char_utf8(unicode_tolower(ch), &res[res_len]);
-    }
-    if (p < 0) {
-      php_warning("Incorrect UTF-8 string \"%s\" in function mb_strtolower", str.c_str());
-    }
-    res.shrink(res_len);
-
-    return res;
-  }
-}
-
-string f$mb_strtoupper(const string &str, const string &encoding) {
-  int encoding_num = mb_detect_encoding(encoding);
-  if (encoding_num < 0) {
-    php_critical_error ("encoding \"%s\" doesn't supported in mb_strtoupper", encoding.c_str());
-    return str;
-  }
-
-  int len = str.size();
-  if (encoding_num == 1251) {
-    string res(len, false);
-    for (int i = 0; i < len; i++) {
-      switch ((unsigned char)str[i]) {
-        case 'a' ... 'z':
-          res[i] = (char)(str[i] + 'A' - 'a');
-          break;
-        case 0xE0 ... 0xFF:
-          res[i] = (char)(str[i] - 32);
-          break;
-        case 0x83:
-          res[i] = (char)(0x81);
-          break;
-        case 0xBC:
-          res[i] = (char)(0xA3);
-          break;
-        case 0xB4:
-          res[i] = (char)(0xA5);
-          break;
-        case 0xA2:
-        case 0xB3:
-        case 0xBE:
-          res[i] = (char)(str[i] - 1);
-          break;
-        case 0x98:
-        case 0xA0:
-        case 0xAD:
-          res[i] = ' ';
-          break;
-        case 0x90:
-        case 0x9A:
-        case 0x9C ... 0x9F:
-        case 0xB8:
-        case 0xBA:
-        case 0xBF:
-          res[i] = (char)(str[i] - 16);
-          break;
-        default:
-          res[i] = str[i];
-      }
-    }
-
-    return res;
-  } else {
-    string res(len * 3, false);
-    const char *s = str.c_str();
-    int res_len = 0;
-    int p;
-    int ch;
-    while ((p = get_char_utf8(&ch, s)) > 0) {
-      s += p;
-      res_len += put_char_utf8(unicode_toupper(ch), &res[res_len]);
-    }
-    if (p < 0) {
-      php_warning("Incorrect UTF-8 string \"%s\" in function mb_strtoupper", str.c_str());
-    }
-    res.shrink(res_len);
-
-    return res;
-  }
-}
-
-namespace {
-
-int check_strpos_agrs(const char *func_name, const string &needle, int64_t offset, const string &encoding) noexcept {
-  if (unlikely(offset < 0)) {
-    php_warning("Wrong offset = %" PRIi64 " in function %s()", offset, func_name);
-    return 0;
-  }
-  if (unlikely(needle.empty())) {
-    php_warning("Parameter needle is empty in function %s()", func_name);
-    return 0;
-  }
-
-  const int encoding_num = mb_detect_encoding(encoding);
-  if (unlikely(encoding_num < 0)) {
-    php_critical_error ("encoding \"%s\" doesn't supported in %s()", encoding.c_str(), func_name);
-    return 0;
-  }
-  return encoding_num;
-}
-
-Optional<int64_t> mp_strpos_impl(const string &haystack, const string &needle, int64_t offset, int encoding_num) noexcept {
-  if (encoding_num == 1251) {
-    return f$strpos(haystack, needle, offset);
-  }
-
-  int64_t UTF8_offset = mb_UTF8_advance(haystack.c_str(), offset);
-  const char *s = static_cast<const char *>(memmem(haystack.c_str() + UTF8_offset, haystack.size() - UTF8_offset, needle.c_str(), needle.size()));
-  if (unlikely(s == nullptr)) {
-    return false;
-  }
-  return mb_UTF8_get_offset(haystack.c_str() + UTF8_offset, s - (haystack.c_str() + UTF8_offset)) + offset;
-}
-
-} // namespace
-
-Optional<int64_t> f$mb_strpos(const string &haystack, const string &needle, int64_t offset, const string &encoding) noexcept {
-  if (const int encoding_num = check_strpos_agrs("mb_strpos", needle, offset, encoding)) {
-    return mp_strpos_impl(haystack, needle, offset, encoding_num);
-  }
-  return false;
-}
-
-Optional<int64_t> f$mb_stripos(const string &haystack, const string &needle, int64_t offset, const string &encoding) noexcept {
-  if (const int encoding_num = check_strpos_agrs("mb_stripos", needle, offset, encoding)) {
-    return mp_strpos_impl(f$mb_strtolower(haystack, encoding), f$mb_strtolower(needle, encoding), offset, encoding_num);
-  }
-  return false;
-}
-
-string f$mb_substr(const string &str, int64_t start, const mixed &length_var, const string &encoding) {
-  int encoding_num = mb_detect_encoding(encoding);
-  if (encoding_num < 0) {
-    php_critical_error ("encoding \"%s\" doesn't supported in mb_substr", encoding.c_str());
-    return str;
-  }
-
-  int64_t length;
-  if (length_var.is_null()) {
-    length = std::numeric_limits<int64_t>::max();
-  } else {
-    length = length_var.to_int();
-  }
-
-  if (encoding_num == 1251) {
-    Optional<string> res = f$substr(str, start, length);
-    if (!res.has_value()) {
-      return {};
-    }
-    return res.val();
-  }
-
-  int64_t len = mb_UTF8_strlen(str.c_str());
-  if (start < 0) {
-    start += len;
-  }
-  if (start > len) {
-    start = len;
-  }
-  if (length < 0) {
-    length = len - start + length;
-  }
-  if (length <= 0 || start < 0) {
-    return {};
-  }
-  if (len - start < length) {
-    length = len - start;
-  }
-
-  int64_t UTF8_start = mb_UTF8_advance(str.c_str(), start);
-  int64_t UTF8_length = mb_UTF8_advance(str.c_str() + UTF8_start, length);
-
-  return {str.c_str() + UTF8_start, static_cast<string::size_type>(UTF8_length)};
-}
+}
\ No newline at end of file
diff --git a/runtime/mbstring.h b/runtime/mbstring.h
index 9685f4be76..be9aef5b0c 100644
--- a/runtime/mbstring.h
+++ b/runtime/mbstring.h
@@ -9,22 +9,4 @@
 #include "runtime/kphp_core.h"
 #include "runtime/string_functions.h"
 
-bool mb_UTF8_check(const char *s);
-
-bool f$mb_check_encoding(const string &str, const string &encoding = CP1251);
-
-int64_t f$mb_strlen(const string &str, const string &encoding = CP1251);
-
-string f$mb_strtolower(const string &str, const string &encoding = CP1251);
-
-string f$mb_strtoupper(const string &str, const string &encoding = CP1251);
-
-Optional<int64_t> f$mb_strpos(const string &haystack, const string &needle, int64_t offset = 0, const string &encoding = CP1251) noexcept;
-
-Optional<int64_t> f$mb_stripos(const string &haystack, const string &needle, int64_t offset = 0, const string &encoding = CP1251) noexcept;
-
-string f$mb_substr(const string &str, int64_t start, const mixed &length = std::numeric_limits<int64_t>::max(), const string &encoding = CP1251);
-
-void f$set_detect_incorrect_encoding_names_warning(bool show);
-
-void free_detect_incorrect_encoding_names();
+bool mb_UTF8_check(const char *s);
\ No newline at end of file
diff --git a/runtime/mbstring/mbstring.cpp b/runtime/mbstring/mbstring.cpp
index a73c42e83c..52cff09775 100644
--- a/runtime/mbstring/mbstring.cpp
+++ b/runtime/mbstring/mbstring.cpp
@@ -6,6 +6,7 @@ string f$mb_convert_encoding(const string &str, const string &to, const string &
 	const char *c_str = str.c_str();
 	const char *c_from = from.c_str();
 	const char *c_to = to.c_str();
+
 	enum mbfl_no_encoding from_encoding, to_encoding;
 	mbfl_buffer_converter *convd = NULL;
 	mbfl_string tmp, result, *ret;
@@ -28,6 +29,5 @@ string f$mb_convert_encoding(const string &str, const string &to, const string &
 	mbfl_buffer_converter_delete(convd);
 
 	/* returning kphp's string */
-	string res((const char*)ret->val, strlen((const char*)ret->val));
-	return res;
+	return string((const char*)ret->val, ret->len);
 }
\ No newline at end of file
diff --git a/runtime/mbstring/mbstring.h b/runtime/mbstring/mbstring.h
index 87f2d1c53a..3668a6fda0 100644
--- a/runtime/mbstring/mbstring.h
+++ b/runtime/mbstring/mbstring.h
@@ -1,11 +1,14 @@
 #pragma once
 
+#include "runtime/kphp_core.h"
+
 extern "C" {
+	// FIXME
+	// #include <kphp/libmbfl/mbfl/mbfilter.h>
+	// #include "../../build/_deps/libmbfl-src/include/kphp/libmbfl/mbfl/mbfilter.h"
 	#include <libmbfl/mbfl/mbfilter.h>
 }
 
-#include "runtime/kphp_core.h"
-
 /**
  * Convert a string from one character encoding to another
  * @param str The string to be converted
diff --git a/runtime/runtime.cmake b/runtime/runtime.cmake
index dce7d62cb8..2572f6f2c8 100644
--- a/runtime/runtime.cmake
+++ b/runtime/runtime.cmake
@@ -49,7 +49,13 @@ prepend(KPHP_RUNTIME_PDO_PGSQL_SOURCES pdo/pgsql/
         pgsql_pdo_emulated_statement.cpp)
 endif()
 
+if (MBFL)
+prepend(KPHP_RUNTIME_MBSTRING_SOURCES mbstring/
+        mbstring.cpp)
+endif()
+
 prepend(KPHP_RUNTIME_SOURCES ${BASE_DIR}/runtime/
+        ${KPHP_RUNTIME_MBSTRING_SOURCES}
         ${KPHP_RUNTIME_DATETIME_SOURCES}
         ${KPHP_RUNTIME_MEMORY_RESOURCE_SOURCES}
         ${KPHP_RUNTIME_MSGPACK_SOURCES}
@@ -139,6 +145,9 @@ vk_add_library(kphp_runtime OBJECT ${KPHP_RUNTIME_ALL_SOURCES})
 target_include_directories(kphp_runtime PUBLIC ${BASE_DIR} /opt/curl7600/include)
 
 add_dependencies(kphp_runtime kphp-timelib)
+if (MBFL)
+    add_dependencies(kphp_runtime libmbfl)
+endif()
 
 prepare_cross_platform_libs(RUNTIME_LIBS yaml-cpp re2 zstd h3) # todo: linking between static libs is no-op, is this redundant? do we need to add mysqlclient here?
 set(RUNTIME_LIBS vk::kphp_runtime vk::kphp_server vk::popular_common vk::unicode vk::common_src vk::binlog_src vk::net_src ${RUNTIME_LIBS} OpenSSL::Crypto m z pthread)
@@ -157,6 +166,10 @@ if (PDO_DRIVER_PGSQL)
     list(APPEND RUNTIME_LINK_TEST_LIBS PostgreSQL::PostgreSQL)
 endif()
 
+if (MBFL)
+    list(APPEND RUNTIME_LINK_TEST_LIBS libmbfl)
+endif()
+
 file(GLOB_RECURSE KPHP_RUNTIME_ALL_HEADERS
      RELATIVE ${BASE_DIR}
      CONFIGURE_DEPENDS

From 406b0c9c332a6d33b87b01c617186bf41762d65b Mon Sep 17 00:00:00 2001
From: Andrey Arutiunian <andreylzmw@gmail.com>
Date: Mon, 17 Apr 2023 15:19:33 +0300
Subject: [PATCH 04/27] fix for ubuntu from mac os

---
 cmake/external-libraries.cmake     | 11 ++++++++---
 cmake/init-compilation-flags.cmake |  2 +-
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/cmake/external-libraries.cmake b/cmake/external-libraries.cmake
index 70a03bcd36..6d9295f370 100644
--- a/cmake/external-libraries.cmake
+++ b/cmake/external-libraries.cmake
@@ -57,9 +57,12 @@ else()
 endif()
 
 if(MBFL)
-#     add_library(libmbfl STATIC IMPORTED ${MBFL})
-# else()
-    add_compile_options(-Wno-unused-parameter -Wno-logical-op-parentheses -Wno-unused-variable -Wno-return-type -Wno-unused-function)
+    add_library(libmbfl STATIC IMPORTED ${MBFL})
+else()
+    if (APPLE)
+        add_compile_options(-Wno-logical-op-parentheses)
+    endif()
+    add_compile_options(-Wno-unused-parameter -Wno-unused-variable -Wno-return-type -Wno-unused-function)
     handle_missing_library("libmbfl")
     FetchContent_Declare(libmbfl GIT_REPOSITORY https://github.com/andreylzmw/libmbfl)
     message(STATUS "---------------------")
@@ -69,6 +72,8 @@ if(MBFL)
     add_link_options(-L${libmbfl_SOURCE_DIR}/objs)
 endif()
 
+add_compile_options(-Wno-redundant-move)
+
 if(APPLE)
     if (DEFINED ENV{EPOLL_SHIM_REPO})
         FetchContent_Declare(
diff --git a/cmake/init-compilation-flags.cmake b/cmake/init-compilation-flags.cmake
index 9ba0d676d3..c41f116a61 100644
--- a/cmake/init-compilation-flags.cmake
+++ b/cmake/init-compilation-flags.cmake
@@ -115,7 +115,7 @@ elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64")
 endif()
 
 add_compile_options(-Werror -Wall -Wextra -Wunused-function -Wfloat-conversion -Wno-sign-compare
-                    -Wuninitialized -Wno-redundant-move -Wno-missing-field-initializers)
+                    -Wuninitialized -Wno-missing-field-initializers)
 
 if(NOT APPLE)
     check_cxx_compiler_flag(-gz=zlib DEBUG_COMPRESSION_IS_FOUND)

From 35bcc8f7714c0ea09b3e568b0e33236029e03198 Mon Sep 17 00:00:00 2001
From: Andrey Arutiunian <andreylzmw@gmail.com>
Date: Mon, 17 Apr 2023 16:02:40 +0300
Subject: [PATCH 05/27] remove trash

---
 cmake/external-libraries.cmake | 8 --------
 runtime/mbstring/mbstring.h    | 3 +--
 2 files changed, 1 insertion(+), 10 deletions(-)

diff --git a/cmake/external-libraries.cmake b/cmake/external-libraries.cmake
index 6d9295f370..f32e04b0fd 100644
--- a/cmake/external-libraries.cmake
+++ b/cmake/external-libraries.cmake
@@ -57,15 +57,7 @@ else()
 endif()
 
 if(MBFL)
-    add_library(libmbfl STATIC IMPORTED ${MBFL})
-else()
-    if (APPLE)
-        add_compile_options(-Wno-logical-op-parentheses)
-    endif()
-    add_compile_options(-Wno-unused-parameter -Wno-unused-variable -Wno-return-type -Wno-unused-function)
-    handle_missing_library("libmbfl")
     FetchContent_Declare(libmbfl GIT_REPOSITORY https://github.com/andreylzmw/libmbfl)
-    message(STATUS "---------------------")
     FetchContent_MakeAvailable(libmbfl)
     include_directories(${libmbfl_SOURCE_DIR}/include)
     add_definitions(-DLIBMBFL_LIB_DIR="${libmbfl_SOURCE_DIR}/objs")
diff --git a/runtime/mbstring/mbstring.h b/runtime/mbstring/mbstring.h
index 3668a6fda0..7fc83931e7 100644
--- a/runtime/mbstring/mbstring.h
+++ b/runtime/mbstring/mbstring.h
@@ -4,9 +4,8 @@
 
 extern "C" {
 	// FIXME
+	#include "/../../build/_deps/libmbfl-src/include/kphp/libmbfl/mbfl/mbfilter.h"
 	// #include <kphp/libmbfl/mbfl/mbfilter.h>
-	// #include "../../build/_deps/libmbfl-src/include/kphp/libmbfl/mbfl/mbfilter.h"
-	#include <libmbfl/mbfl/mbfilter.h>
 }
 
 /**

From e951d867e51850cf21e19b73f2790f46ded469db Mon Sep 17 00:00:00 2001
From: Andrey Arutiunian <andreylzmw@gmail.com>
Date: Mon, 17 Apr 2023 16:12:46 +0300
Subject: [PATCH 06/27] fix path

---
 runtime/mbstring/mbstring.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/runtime/mbstring/mbstring.h b/runtime/mbstring/mbstring.h
index 7fc83931e7..f4e8c3d527 100644
--- a/runtime/mbstring/mbstring.h
+++ b/runtime/mbstring/mbstring.h
@@ -4,7 +4,7 @@
 
 extern "C" {
 	// FIXME
-	#include "/../../build/_deps/libmbfl-src/include/kphp/libmbfl/mbfl/mbfilter.h"
+	#include "build/_deps/libmbfl-src/include/kphp/libmbfl/mbfl/mbfilter.h"
 	// #include <kphp/libmbfl/mbfl/mbfilter.h>
 }
 

From fac68a63182d5c1e84092bc564d8d99c8422d9fd Mon Sep 17 00:00:00 2001
From: Andrey Arutiunian <andreylzmw@gmail.com>
Date: Mon, 17 Apr 2023 16:46:19 +0300
Subject: [PATCH 07/27] fix path

---
 cmake/external-libraries.cmake | 1 +
 runtime/mbstring/mbstring.h    | 4 +---
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/cmake/external-libraries.cmake b/cmake/external-libraries.cmake
index f32e04b0fd..fc77046abe 100644
--- a/cmake/external-libraries.cmake
+++ b/cmake/external-libraries.cmake
@@ -57,6 +57,7 @@ else()
 endif()
 
 if(MBFL)
+    message(STATUS "MBFL=On, libmbfl will be downloaded and built")
     FetchContent_Declare(libmbfl GIT_REPOSITORY https://github.com/andreylzmw/libmbfl)
     FetchContent_MakeAvailable(libmbfl)
     include_directories(${libmbfl_SOURCE_DIR}/include)
diff --git a/runtime/mbstring/mbstring.h b/runtime/mbstring/mbstring.h
index f4e8c3d527..5a5a0732bf 100644
--- a/runtime/mbstring/mbstring.h
+++ b/runtime/mbstring/mbstring.h
@@ -3,9 +3,7 @@
 #include "runtime/kphp_core.h"
 
 extern "C" {
-	// FIXME
-	#include "build/_deps/libmbfl-src/include/kphp/libmbfl/mbfl/mbfilter.h"
-	// #include <kphp/libmbfl/mbfl/mbfilter.h>
+	#include <kphp/libmbfl/mbfl/mbfilter.h>
 }
 
 /**

From 3c0c54e20a24b7b5edcd391ba30bb08651e8e919 Mon Sep 17 00:00:00 2001
From: Andrey Arutiunian <andreylzmw@gmail.com>
Date: Mon, 17 Apr 2023 18:15:50 +0300
Subject: [PATCH 08/27] fix including

---
 runtime/mbstring/mbstring.cpp | 4 ++++
 runtime/mbstring/mbstring.h   | 4 ----
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/runtime/mbstring/mbstring.cpp b/runtime/mbstring/mbstring.cpp
index 52cff09775..bfb39529dc 100644
--- a/runtime/mbstring/mbstring.cpp
+++ b/runtime/mbstring/mbstring.cpp
@@ -1,5 +1,9 @@
 #include "mbstring.h"
 
+extern "C" {
+	#include <kphp/libmbfl/mbfl/mbfilter.h>
+}
+
 string f$mb_convert_encoding(const string &str, const string &to, const string &from) {
 
 	/* preparing */
diff --git a/runtime/mbstring/mbstring.h b/runtime/mbstring/mbstring.h
index 5a5a0732bf..c82873127a 100644
--- a/runtime/mbstring/mbstring.h
+++ b/runtime/mbstring/mbstring.h
@@ -2,10 +2,6 @@
 
 #include "runtime/kphp_core.h"
 
-extern "C" {
-	#include <kphp/libmbfl/mbfl/mbfilter.h>
-}
-
 /**
  * Convert a string from one character encoding to another
  * @param str The string to be converted

From 9332f451516deb9296e45119d4e71809dbc84313 Mon Sep 17 00:00:00 2001
From: Andrey Arutiunian <andreylzmw@gmail.com>
Date: Thu, 20 Apr 2023 02:18:43 +0300
Subject: [PATCH 09/27] finish basic mbstring functions and building libmbfl

---
 cmake/external-libraries.cmake        |  3 +-
 runtime/mbstring/mbstring.cpp         | 93 ++++++++++++++++++++++-----
 runtime/mbstring/mbstring.h           | 11 +++-
 tests/cpp/runtime/mbstring-test.cpp   | 18 ++++++
 tests/cpp/runtime/runtime-tests.cmake | 45 ++++++-------
 5 files changed, 127 insertions(+), 43 deletions(-)
 create mode 100644 tests/cpp/runtime/mbstring-test.cpp

diff --git a/cmake/external-libraries.cmake b/cmake/external-libraries.cmake
index fc77046abe..8266b44b7b 100644
--- a/cmake/external-libraries.cmake
+++ b/cmake/external-libraries.cmake
@@ -1,5 +1,5 @@
 option(DOWNLOAD_MISSING_LIBRARIES "download and build missing libraries if needed" OFF)
-option(MBFL, OFF)
+option(MBFL "download and build libmbfl" OFF)
 cmake_print_variables(DOWNLOAD_MISSING_LIBRARIES)
 cmake_print_variables(MBFL)
 function(handle_missing_library LIB_NAME)
@@ -58,6 +58,7 @@ endif()
 
 if(MBFL)
     message(STATUS "MBFL=On, libmbfl will be downloaded and built")
+    add_compile_options(-DMBFL)
     FetchContent_Declare(libmbfl GIT_REPOSITORY https://github.com/andreylzmw/libmbfl)
     FetchContent_MakeAvailable(libmbfl)
     include_directories(${libmbfl_SOURCE_DIR}/include)
diff --git a/runtime/mbstring/mbstring.cpp b/runtime/mbstring/mbstring.cpp
index bfb39529dc..4d1637d59a 100644
--- a/runtime/mbstring/mbstring.cpp
+++ b/runtime/mbstring/mbstring.cpp
@@ -4,34 +4,93 @@ extern "C" {
 	#include <kphp/libmbfl/mbfl/mbfilter.h>
 }
 
-string f$mb_convert_encoding(const string &str, const string &to, const string &from) {
-
-	/* preparing */
-	const char *c_str = str.c_str();
-	const char *c_from = from.c_str();
-	const char *c_to = to.c_str();
+mbfl_string *convert_encoding(const char *str, const char *to, const char *from) {
 
+	int len = strlen(str);
 	enum mbfl_no_encoding from_encoding, to_encoding;
 	mbfl_buffer_converter *convd = NULL;
-	mbfl_string tmp, result, *ret;
+	mbfl_string _string, result, *ret;
 
 	/* from internal to mbfl */
-	from_encoding = mbfl_name2no_encoding(c_from);
-	to_encoding = mbfl_name2no_encoding(c_to);
+	from_encoding = mbfl_name2no_encoding(from);
+	to_encoding = mbfl_name2no_encoding(to);
 
 	/* init buffer mbfl strings */
-	long int len = strlen(c_str);
-	mbfl_string_init(&tmp);
+	mbfl_string_init(&_string);
 	mbfl_string_init(&result);
-	tmp.no_encoding = from_encoding;
-	tmp.len = len;
-	tmp.val = (unsigned char*)c_str;
+	_string.no_encoding = from_encoding;
+	_string.len = len;
+	_string.val = (unsigned char*)str;
 
 	/* converting */
 	convd = mbfl_buffer_converter_new(from_encoding, to_encoding, 0);
-	ret = mbfl_buffer_converter_feed_result(convd, &tmp, &result);
+	ret = mbfl_buffer_converter_feed_result(convd, &_string, &result);
 	mbfl_buffer_converter_delete(convd);
 
-	/* returning kphp's string */
-	return string((const char*)ret->val, ret->len);
+	/* fix converting with multibyte encodings */
+	if (len % 2 != 0 && ret->len % 2 == 0 && len < ret->len) {
+		ret->len++;
+		ret->val[ret->len-1] = 63;
+	}
+	
+	return ret;
+}
+
+bool check_encoding(const char *value, const char *encoding) {
+
+	/* init buffer mbfl strins */
+	mbfl_string _string;
+	mbfl_string_init(&_string);
+	_string.val = (unsigned char*)value;
+	_string.len = strlen((char*)value);
+
+	/* from internal to mbfl */
+	const mbfl_encoding *enc = mbfl_name2encoding(encoding);
+
+	/* get all supported encodings */
+	const mbfl_encoding **encs = mbfl_get_supported_encodings();
+	int len = sizeof(**encs);
+
+	/* identify encoding of input string */
+	/* Warning! String can be represented in different encodings, so check needed */
+	const mbfl_encoding *i_enc = mbfl_identify_encoding2(&_string, encs, len, 1);
+
+	/* perform convering */
+	const char *i_enc_str = (const char*)convert_encoding(value, i_enc->name, enc->name)->val;
+	const char *enc_str = (const char*)convert_encoding(i_enc_str, enc->name, i_enc->name)->val;
+
+	/* check equality */
+	/* Warning! strcmp not working, because of different encodings */
+	bool res = true;
+	for (int i = 0; i < strlen(enc_str); i++)
+		if (enc_str[i] != value[i]) {
+			res = false;
+			break;
+		}
+
+	free((void*)i_enc_str);
+	free((void*)enc_str);
+	return res;
+}
+
+bool f$mb_check_encoding(const string &value, const string &encoding) {
+	const char *c_value = value.c_str();
+	const char *c_encoding = encoding.c_str();
+	return check_encoding(c_value, c_encoding);
+}
+
+string f$mb_convert_encoding(const string &str, const string &to_encoding, const string &from_encoding) {
+
+	const char *c_string = str.c_str();
+	const char *c_to_encoding = to_encoding.c_str();
+	const char *c_from_encoding = from_encoding.c_str();
+
+	/* perform convertion */
+	mbfl_string *ret = convert_encoding(c_string, c_to_encoding, c_from_encoding);
+	string res = string((const char*)ret->val, ret->len);
+
+	/* check if string represents in from_encoding, magic number 63 - '?' in ASCII */
+	if (!check_encoding(c_string, c_from_encoding)) res = string(strlen(c_string), (char)63);
+	
+	return res;
 }
\ No newline at end of file
diff --git a/runtime/mbstring/mbstring.h b/runtime/mbstring/mbstring.h
index c82873127a..a2154176a5 100644
--- a/runtime/mbstring/mbstring.h
+++ b/runtime/mbstring/mbstring.h
@@ -2,14 +2,19 @@
 
 #include "runtime/kphp_core.h"
 
+/**
+ * Check if strings are valid for the specified encoding
+ * @param value The byte stream
+ * @param encoding The expected encoding
+ * @return Returns true on success or false on failure
+ */
+bool f$mb_check_encoding(const string &value, const string &encoding);
+
 /**
  * Convert a string from one character encoding to another
  * @param str The string to be converted
  * @param from The desired encoding of the result
  * @param to The current encoding used to interpret string
  * @return The encoded string
- * TODO!: mb_check_encoding(str, from) inside
- * TODO!: own constants for encodings
- * TODO: issue for timelib
  */
 string f$mb_convert_encoding(const string &str, const string &to, const string &from);
\ No newline at end of file
diff --git a/tests/cpp/runtime/mbstring-test.cpp b/tests/cpp/runtime/mbstring-test.cpp
new file mode 100644
index 0000000000..42ab014a48
--- /dev/null
+++ b/tests/cpp/runtime/mbstring-test.cpp
@@ -0,0 +1,18 @@
+#include <gtest/gtest.h>
+#include "runtime/mbstring/mbstring.h"
+
+#ifdef MBFL
+/* TODO: make fun strings for tests */
+TEST(mbstring_test, test_mb_check_encoding) {
+	ASSERT_TRUE(f$mb_check_encoding(string("sdf"), string("Windows-1251")));
+	ASSERT_TRUE(f$mb_check_encoding(string("ыва"), string("Windows-1251")));
+	ASSERT_TRUE(f$mb_check_encoding(string("Ä°nanÃ§ EsaslarÄ±"), string("UTF-8")));
+	ASSERT_TRUE(f$mb_check_encoding(string("Ä°nanÃ§ EsaslarÄ±"), string("Windows-1251")));
+	ASSERT_FALSE(f$mb_check_encoding(string("Ä°nanÃ§ EsaslarÄ±"), string("ASCII")));
+}
+TEST(mbstring_test, test_mb_convert_encoding) {
+	ASSERT_STREQ(f$mb_convert_encoding(string("Hello"), string("UTF-8"), string("EUC-KR")).c_str(), "Hello");
+	ASSERT_STREQ(f$mb_convert_encoding(string("ыавыа"), string("UTF-8"), string("Windows-1251")).c_str(), "С‹Р°РІС‹Р°");
+	ASSERT_STREQ(f$mb_convert_encoding(string("ыва"), string("UTF-8"), string("ASCII")).c_str(), "??????");
+}
+#endif
\ No newline at end of file
diff --git a/tests/cpp/runtime/runtime-tests.cmake b/tests/cpp/runtime/runtime-tests.cmake
index 88d4255228..aea2f10c3d 100644
--- a/tests/cpp/runtime/runtime-tests.cmake
+++ b/tests/cpp/runtime/runtime-tests.cmake
@@ -1,26 +1,27 @@
 prepend(RUNTIME_TESTS_SOURCES ${BASE_DIR}/tests/cpp/runtime/
-        _runtime-tests-env.cpp
-        allocator-malloc-replacement-test.cpp
-        array-test.cpp
-        common-php-functions-test.cpp
-        confdata-functions-test.cpp
-        confdata-key-maker-test.cpp
-        confdata-predefined-wildcards-test.cpp
-        flex-test.cpp
-        inter-process-mutex-test.cpp
-        inter-process-resource-test.cpp
-        json-writer-test.cpp
-        number-string-comparison.cpp
-        kphp-type-traits-test.cpp
-        msgpack-test.cpp
-        memory_resource/details/memory_chunk_list-test.cpp
-        memory_resource/details/memory_chunk_tree-test.cpp
-        memory_resource/details/memory_ordered_chunk_list-test.cpp
-        memory_resource/extra-memory-pool-test.cpp
-        memory_resource/unsynchronized_pool_resource-test.cpp
-        string-list-test.cpp
-        string-test.cpp
-        zstd-test.cpp)
+		_runtime-tests-env.cpp
+		allocator-malloc-replacement-test.cpp
+		array-test.cpp
+		common-php-functions-test.cpp
+		confdata-functions-test.cpp
+		confdata-key-maker-test.cpp
+		confdata-predefined-wildcards-test.cpp
+		flex-test.cpp
+		inter-process-mutex-test.cpp
+		inter-process-resource-test.cpp
+		json-writer-test.cpp
+		number-string-comparison.cpp
+		kphp-type-traits-test.cpp
+		msgpack-test.cpp
+		memory_resource/details/memory_chunk_list-test.cpp
+		memory_resource/details/memory_chunk_tree-test.cpp
+		memory_resource/details/memory_ordered_chunk_list-test.cpp
+		memory_resource/extra-memory-pool-test.cpp
+		memory_resource/unsynchronized_pool_resource-test.cpp
+		string-list-test.cpp
+		string-test.cpp
+		zstd-test.cpp
+		mbstring-test.cpp)
 
 allow_deprecated_declarations_for_apple(${BASE_DIR}/tests/cpp/runtime/inter-process-mutex-test.cpp)
 vk_add_unittest(runtime "${RUNTIME_LIBS};${RUNTIME_LINK_TEST_LIBS}" ${RUNTIME_TESTS_SOURCES})

From 8b3d40e04d17e5fb8e4bf54ccc18c0626b7478a4 Mon Sep 17 00:00:00 2001
From: Andrey Arutiunian <andreylzmw@gmail.com>
Date: Thu, 20 Apr 2023 03:51:32 +0300
Subject: [PATCH 10/27] fix external libs

---
 builtin-functions/_functions.txt | 9 +--------
 compiler/compiler-settings.cpp   | 8 ++------
 2 files changed, 3 insertions(+), 14 deletions(-)

diff --git a/builtin-functions/_functions.txt b/builtin-functions/_functions.txt
index c1c2616ff2..b789a477a7 100644
--- a/builtin-functions/_functions.txt
+++ b/builtin-functions/_functions.txt
@@ -730,14 +730,6 @@ function setlocale ($category ::: int, $locale ::: string) ::: string | false;
 
 function iconv ($input_encoding ::: string, $output_encoding ::: string, $input_str ::: string) ::: string | false;
 
-function mb_check_encoding ($str ::: string, $encoding ::: string = "1251") ::: bool;
-function mb_strlen ($str ::: string, $encoding ::: string = "1251") ::: int;
-function mb_strpos ($haystack ::: string, $needle ::: string, $offset ::: int = 0, $encoding ::: string = "1251") ::: int | false;
-function mb_stripos ($haystack ::: string, $needle ::: string, $offset ::: int = 0, $encoding ::: string = "1251") ::: int | false;
-function mb_strtolower ($str ::: string, $encoding ::: string = "1251") ::: string;
-function mb_strtoupper ($str ::: string, $encoding ::: string = "1251") ::: string;
-function mb_substr ($str ::: string, $start ::: int, $length ::: mixed = PHP_INT_MAX, $encoding ::: string = "1251") ::: string;
-
 define('PHP_ROUND_HALF_UP', 123423141);
 define('PHP_ROUND_HALF_DOWN', 123423144);
 define('PHP_ROUND_HALF_EVEN', 123423145);
@@ -1622,3 +1614,4 @@ class DateTimeImmutable implements DateTimeInterface {
 function getenv(string $varname = '', bool $local_only = false): mixed;
 
 function mb_convert_encoding(string $str, string $to, string $from): string;
+function mb_check_encoding(string $str, string $encoding): bool;
\ No newline at end of file
diff --git a/compiler/compiler-settings.cpp b/compiler/compiler-settings.cpp
index b6cc784042..0368c3d946 100644
--- a/compiler/compiler-settings.cpp
+++ b/compiler/compiler-settings.cpp
@@ -320,7 +320,7 @@ void CompilerSettings::init() {
   ld_flags.value_ = extra_ld_flags.get();
   append_curl(cxx_default_flags, ld_flags.value_);
   append_apple_options(cxx_default_flags, ld_flags.value_);
-  std::vector<vk::string_view> external_static_libs{"pcre", "re2", "yaml-cpp", "h3", "z", "zstd", "nghttp2", "kphp-timelib", "libmbfl"};
+  std::vector<vk::string_view> external_static_libs{"pcre", "re2", "yaml-cpp", "h3", "z", "zstd", "nghttp2", "kphp-timelib"};
 
 #ifdef KPHP_TIMELIB_LIB_DIR
   ld_flags.value_ += " -L" KPHP_TIMELIB_LIB_DIR;
@@ -332,12 +332,8 @@ void CompilerSettings::init() {
 #endif
 
 #ifdef LIBMBFL_LIB_DIR
+  external_static_libs.emplace_back("libmbfl");
   ld_flags.value_ += " -L" LIBMBFL_LIB_DIR;
-#else
-  // kphp-timelib is usually installed in /usr/local/lib;
-  // LDD may not find a library in /usr/local/lib if we don't add it here
-  // TODO: can we avoid this hardcoded library path?
-  ld_flags.value_ += " -L /usr/local/lib";
 #endif
 
 #if defined(__APPLE__) && defined(__arm64__)

From 7e137cb9d8115e18b077dee873538f26e6b7e8bc Mon Sep 17 00:00:00 2001
From: Andrey Arutiunian <andreylzmw@gmail.com>
Date: Thu, 20 Apr 2023 15:18:44 +0300
Subject: [PATCH 11/27] move kphp-timelib installation to the top and add
 explanatory comment

---
 cmake/external-libraries.cmake | 32 +++++++++++++++++---------------
 1 file changed, 17 insertions(+), 15 deletions(-)

diff --git a/cmake/external-libraries.cmake b/cmake/external-libraries.cmake
index 4ec35b9cb3..715f4e6028 100644
--- a/cmake/external-libraries.cmake
+++ b/cmake/external-libraries.cmake
@@ -9,6 +9,23 @@ function(handle_missing_library LIB_NAME)
     endif()
 endfunction()
 
+find_library(KPHP_TIMELIB kphp-timelib)
+if(KPHP_TIMELIB)
+    add_library(kphp-timelib STATIC IMPORTED ${KPHP_TIMELIB})
+else()
+    handle_missing_library("kphp-timelib")
+    FetchContent_Declare(kphp-timelib GIT_REPOSITORY https://github.com/VKCOM/timelib)
+    message(STATUS "---------------------")
+    FetchContent_MakeAvailable(kphp-timelib)
+    include_directories(${kphp-timelib_SOURCE_DIR}/include)
+    add_definitions(-DKPHP_TIMELIB_LIB_DIR="${kphp-timelib_SOURCE_DIR}/objs")
+    add_link_options(-L${kphp-timelib_SOURCE_DIR}/objs)
+endif()
+
+# '-Wno-redundant-move' flag works for C++/ObjC++ but not for C, 
+# so build C libraries above
+add_compile_options(-Wno-redundant-move)
+
 find_package(fmt QUIET)
 if(NOT fmt_FOUND)
     handle_missing_library("fmtlib")
@@ -41,21 +58,6 @@ if(KPHP_TESTS)
     endif()
 endif()
 
-find_library(KPHP_TIMELIB kphp-timelib)
-if(KPHP_TIMELIB)
-    add_library(kphp-timelib STATIC IMPORTED ${KPHP_TIMELIB})
-else()
-    handle_missing_library("kphp-timelib")
-    FetchContent_Declare(kphp-timelib GIT_REPOSITORY https://github.com/VKCOM/timelib)
-    message(STATUS "---------------------")
-    FetchContent_MakeAvailable(kphp-timelib)
-    include_directories(${kphp-timelib_SOURCE_DIR}/include)
-    add_definitions(-DKPHP_TIMELIB_LIB_DIR="${kphp-timelib_SOURCE_DIR}/objs")
-    add_link_options(-L${kphp-timelib_SOURCE_DIR}/objs)
-endif()
-
-add_compile_options(-Wno-redundant-move)
-
 if(APPLE)
     if (DEFINED ENV{EPOLL_SHIM_REPO})
         FetchContent_Declare(

From 5aff222bd91751758bde852a0f15b843b38f00f3 Mon Sep 17 00:00:00 2001
From: Andrey Arutiunian <andreylzmw@gmail.com>
Date: Thu, 20 Apr 2023 15:23:55 +0300
Subject: [PATCH 12/27] move kphp-timelib installation to the top and add
 explanatory comment

---
 cmake/external-libraries.cmake | 52 ++++++++++++++++++----------------
 1 file changed, 27 insertions(+), 25 deletions(-)

diff --git a/cmake/external-libraries.cmake b/cmake/external-libraries.cmake
index 8266b44b7b..cac16d83af 100644
--- a/cmake/external-libraries.cmake
+++ b/cmake/external-libraries.cmake
@@ -11,6 +11,33 @@ function(handle_missing_library LIB_NAME)
     endif()
 endfunction()
 
+find_library(KPHP_TIMELIB kphp-timelib)
+if(KPHP_TIMELIB)
+    add_library(kphp-timelib STATIC IMPORTED ${KPHP_TIMELIB})
+else()
+    handle_missing_library("kphp-timelib")
+    FetchContent_Declare(kphp-timelib GIT_REPOSITORY https://github.com/VKCOM/timelib)
+    message(STATUS "---------------------")
+    FetchContent_MakeAvailable(kphp-timelib)
+    include_directories(${kphp-timelib_SOURCE_DIR}/include)
+    add_definitions(-DKPHP_TIMELIB_LIB_DIR="${kphp-timelib_SOURCE_DIR}/objs")
+    add_link_options(-L${kphp-timelib_SOURCE_DIR}/objs)
+endif()
+
+if(MBFL)
+    message(STATUS "MBFL=On, libmbfl will be downloaded and built")
+    add_compile_options(-DMBFL)
+    FetchContent_Declare(libmbfl GIT_REPOSITORY https://github.com/andreylzmw/libmbfl)
+    FetchContent_MakeAvailable(libmbfl)
+    include_directories(${libmbfl_SOURCE_DIR}/include)
+    add_definitions(-DLIBMBFL_LIB_DIR="${libmbfl_SOURCE_DIR}/objs")
+    add_link_options(-L${libmbfl_SOURCE_DIR}/objs)
+endif()
+
+# '-Wno-redundant-move' flag works for C++/ObjC++ but not for C, 
+# so build C libraries above
+add_compile_options(-Wno-redundant-move)
+
 find_package(fmt QUIET)
 if(NOT fmt_FOUND)
     handle_missing_library("fmtlib")
@@ -43,31 +70,6 @@ if(KPHP_TESTS)
     endif()
 endif()
 
-find_library(KPHP_TIMELIB kphp-timelib)
-if(KPHP_TIMELIB)
-    add_library(kphp-timelib STATIC IMPORTED ${KPHP_TIMELIB})
-else()
-    handle_missing_library("kphp-timelib")
-    FetchContent_Declare(kphp-timelib GIT_REPOSITORY https://github.com/VKCOM/timelib)
-    message(STATUS "---------------------")
-    FetchContent_MakeAvailable(kphp-timelib)
-    include_directories(${kphp-timelib_SOURCE_DIR}/include)
-    add_definitions(-DKPHP_TIMELIB_LIB_DIR="${kphp-timelib_SOURCE_DIR}/objs")
-    add_link_options(-L${kphp-timelib_SOURCE_DIR}/objs)
-endif()
-
-if(MBFL)
-    message(STATUS "MBFL=On, libmbfl will be downloaded and built")
-    add_compile_options(-DMBFL)
-    FetchContent_Declare(libmbfl GIT_REPOSITORY https://github.com/andreylzmw/libmbfl)
-    FetchContent_MakeAvailable(libmbfl)
-    include_directories(${libmbfl_SOURCE_DIR}/include)
-    add_definitions(-DLIBMBFL_LIB_DIR="${libmbfl_SOURCE_DIR}/objs")
-    add_link_options(-L${libmbfl_SOURCE_DIR}/objs)
-endif()
-
-add_compile_options(-Wno-redundant-move)
-
 if(APPLE)
     if (DEFINED ENV{EPOLL_SHIM_REPO})
         FetchContent_Declare(

From 07af7a09361cdc52ea82782243bc979bf50dc9d9 Mon Sep 17 00:00:00 2001
From: Andrey Arutiunian <andreylzmw@gmail.com>
Date: Sun, 30 Apr 2023 23:24:48 +0300
Subject: [PATCH 13/27] add MBFL flag to runtime, restore mbstring functions
 for only UTF-8 and Windows-1251 encodings and add runtime declarations of all
 mbstring functions

---
 builtin-functions/_functions.txt    |  61 ++-
 cmake/init-compilation-flags.cmake  |   1 +
 compiler/compiler-settings.cpp      |   3 +
 runtime/interface.cpp               |   3 +
 runtime/mbstring.cpp                |  51 --
 runtime/mbstring.h                  |  12 -
 runtime/mbstring/mbstring.cpp       | 473 +++++++++++++++++-
 runtime/mbstring/mbstring.h         | 744 +++++++++++++++++++++++++++-
 runtime/regexp.h                    |   2 +-
 runtime/runtime.cmake               |   4 +-
 tests/cpp/runtime/mbstring-test.cpp |   9 +-
 11 files changed, 1266 insertions(+), 97 deletions(-)
 delete mode 100644 runtime/mbstring.cpp
 delete mode 100644 runtime/mbstring.h

diff --git a/builtin-functions/_functions.txt b/builtin-functions/_functions.txt
index febed462ab..34536b2e98 100644
--- a/builtin-functions/_functions.txt
+++ b/builtin-functions/_functions.txt
@@ -1617,5 +1617,62 @@ class DateTimeImmutable implements DateTimeInterface {
 
 function getenv(string $varname = '', bool $local_only = false): mixed;
 
-function mb_convert_encoding(string $str, string $to, string $from): string;
-function mb_check_encoding(string $str, string $encoding): bool;
\ No newline at end of file
+function mb_check_encoding(array|string $value, ?string $encoding = null): bool;
+function mb_convert_encoding(array|string $string, string $to_encoding, array|string|null $from_encoding = null): array|string|false;
+function mb_strlen(string $string, ?string $encoding = null): int;
+function mb_strpos(string $haystack, string $needle, int $offset = 0, ?string $encoding = null): int|false;
+function mb_stripos(string $haystack, string $needle, int $offset = 0, ?string $encoding = null): int|false;
+function mb_strtolower(string $string, ?string $encoding = null): string;
+function mb_strtoupper(string $string, ?string $encoding = null): string;
+function mb_substr(string $string, int $start, ?int $length = null, ?string $encoding = null): string;
+function mb_chr(int $codepoint, ?string $encoding = null): string|false;
+function mb_convert_case(string $string, int $mode, ?string $encoding = null): string;
+function mb_convert_kana(string $string, string $mode = "KV", ?string $encoding = null): string;
+function mb_convert_variables(string $to_encoding, array|string $from_encoding, mixed &$vars): string|false; // ??? (change variable bytes + kwargs)
+function mb_decode_mimeheader(string $string): string;
+function mb_decode_numericentity(string $string, array $map, ?string $encoding = null): string;
+function mb_detect_encoding(string $string, array|string|null $encodings = null, bool $strict = false): string|false;
+function mb_detect_order(array|string|null $encoding = null): array|bool;
+function mb_encode_mimeheader(string $string, ?string $charset = null, ?string $transfer_encoding = null, string $newline = "\r\n", int $indent = 0): string;
+function mb_encode_numericentity(string $string, array $map, ?string $encoding = null, bool $hex = false): string;
+function mb_encoding_aliases(string $encoding): array;
+function mb_ereg_match(string $pattern, string $string, ?string $options = null): bool;
+function mb_ereg_replace_callback(string $pattern, callable $callback, string $string, ?string $options = null): string|false|null;
+function mb_ereg_replace(string $pattern, string $replacement, string $string, ?string $options = null): string|false|null;
+function mb_ereg_search_getpos(): int;
+function mb_ereg_search_getregs(): array|false;
+function mb_ereg_search_init(string $string, ?string $pattern = null, ?string $options = null): bool;
+function mb_ereg_search_pos(?string $pattern = null, ?string $options = null): array|false;
+function mb_ereg_search_regs(?string $pattern = null, ?string $options = null): array|false;
+function mb_ereg_search_setpos(int $offset): bool;
+function mb_ereg_search(?string $pattern = null, ?string $options = null): bool;
+function mb_ereg(string $pattern, string $string, array &$matches = null): bool;
+function mb_eregi_replace(string $pattern, string $replacement, string $string, ?string $options = null): string|false|null;
+function mb_eregi(string $pattern, string $string, array &$matches = null): bool;
+function mb_get_info(string $type = "all"): array|string|int|false;
+function mb_http_input(?string $type = null): array|string|false;
+function mb_http_output(?string $encoding = null): string|false;
+function mb_internal_encoding(?string $encoding = null): string|false;
+function mb_language(?string $language = null): string|false;
+function mb_list_encodings(): array;
+function mb_ord(string $string, ?string $encoding = null): int|false;
+function mb_output_handler(string $string, int $status): string;
+function mb_parse_str(string $string, array &$result): bool;
+function mb_preferred_mime_name(string $encoding): string|false;
+function mb_regex_encoding(?string $encoding = null): string|false;
+function mb_regex_set_options(?string $options = null): string;
+function mb_scrub(string $string, ?string $encoding = null): string;
+function mb_send_mail(string $to, string $subject, string $message, array|string $additional_headers = [], ?string $additional_params = null): bool;
+function mb_split(string $pattern, string $string, int $limit = -1): array|false;
+function mb_str_split(string $string, int $length = 1, ?string $encoding = null): array;
+function mb_strcut(string $string, int $start, ?int $length = null, ?string $encoding = null): string;
+function mb_strimwidth(string $string, int $start, int $width, string $trim_marker = "", ?string $encoding = null): string;
+function mb_stristr(string $haystack, string $needle, bool $before_needle = false, ?string $encoding = null): string|false;
+function mb_strrchr(string $haystack, string $needle, bool $before_needle = false, ?string $encoding = null): string|false;
+function mb_strrichr(string $haystack, string $needle, bool $before_needle = false, ?string $encoding = null): string|false;
+function mb_strripos(string $haystack, string $needle, int $offset = 0, ?string $encoding = null): int|false;
+function mb_strrpos(string $haystack, string $needle, int $offset = 0, string $encoding = null): int|false;
+function mb_strstr(string $haystack, string $needle, bool $before_needle = false, ?string $encoding = null): string|false;
+function mb_strwidth(string $string, ?string $encoding = null): int;
+function mb_substitute_character(string|int|null $substitute_character = null): string|int|false;
+function mb_substr_count(string $haystack, string $needle, ?string $encoding = null): int;
\ No newline at end of file
diff --git a/cmake/init-compilation-flags.cmake b/cmake/init-compilation-flags.cmake
index a3273c84fd..5ed14874bb 100644
--- a/cmake/init-compilation-flags.cmake
+++ b/cmake/init-compilation-flags.cmake
@@ -76,6 +76,7 @@ if (PDO_DRIVER_PGSQL)
     add_definitions(-DPDO_DRIVER_PGSQL)
     add_compile_definitions(PDO_DRIVER_PGSQL_VERSION=${PostgreSQL_VERSION})
 endif()
+
 cmake_print_variables(PDO_DRIVER_PGSQL)
 
 option(KPHP_TESTS "Build the tests" ON)
diff --git a/compiler/compiler-settings.cpp b/compiler/compiler-settings.cpp
index 0368c3d946..5a9ab2ede7 100644
--- a/compiler/compiler-settings.cpp
+++ b/compiler/compiler-settings.cpp
@@ -283,6 +283,9 @@ void CompilerSettings::init() {
 
   remove_extra_spaces(extra_cxx_flags.value_);
   std::stringstream ss;
+  #ifdef MBFL
+  ss << " -DMBFL ";
+  #endif
   ss << extra_cxx_flags.get();
   ss << " -iquote" << kphp_src_path.get()
      << " -iquote " << kphp_src_path.get() << "objs/generated/auto/runtime";
diff --git a/runtime/interface.cpp b/runtime/interface.cpp
index fea37c2618..93522425e8 100644
--- a/runtime/interface.cpp
+++ b/runtime/interface.cpp
@@ -2380,6 +2380,9 @@ static void free_runtime_libs() {
 
   free_migration_php8();
 
+  #ifndef MBFL
+  free_detect_incorrect_encoding_names();
+  #endif
   vk::singleton<JsonLogger>::get().reset_buffers();
 #ifdef PDO_DRIVER_MYSQL
   database_drivers::free_mysql_lib();
diff --git a/runtime/mbstring.cpp b/runtime/mbstring.cpp
deleted file mode 100644
index bbf8231b47..0000000000
--- a/runtime/mbstring.cpp
+++ /dev/null
@@ -1,51 +0,0 @@
-// Compiler for PHP (aka KPHP)
-// Copyright (c) 2020 LLC «V Kontakte»
-// Distributed under the GPL v3 License, see LICENSE.notice.txt
-
-#include "runtime/mbstring.h"
-
-#include "common/unicode/unicode-utils.h"
-#include "common/unicode/utf8-utils.h"
-
-bool mb_UTF8_check(const char *s) {
-  do {
-#define CHECK(condition) if (!(condition)) {return false;}
-    unsigned int a = (unsigned char)(*s++);
-    if ((a & 0x80) == 0) {
-      if (a == 0) {
-        return true;
-      }
-      continue;
-    }
-
-    CHECK ((a & 0x40) != 0);
-
-    unsigned int b = (unsigned char)(*s++);
-    CHECK((b & 0xc0) == 0x80);
-    if ((a & 0x20) == 0) {
-      CHECK((a & 0x1e) > 0);
-      continue;
-    }
-
-    unsigned int c = (unsigned char)(*s++);
-    CHECK((c & 0xc0) == 0x80);
-    if ((a & 0x10) == 0) {
-      int x = (((a & 0x0f) << 6) | (b & 0x20));
-      CHECK(x != 0 && x != 0x360);//surrogates
-      continue;
-    }
-
-    unsigned int d = (unsigned char)(*s++);
-    CHECK((d & 0xc0) == 0x80);
-    if ((a & 0x08) == 0) {
-      int t = (((a & 0x07) << 6) | (b & 0x30));
-      CHECK(0 < t && t < 0x110);//end of unicode
-      continue;
-    }
-
-    return false;
-#undef CHECK
-  } while (true);
-
-  php_assert (0);
-}
\ No newline at end of file
diff --git a/runtime/mbstring.h b/runtime/mbstring.h
deleted file mode 100644
index be9aef5b0c..0000000000
--- a/runtime/mbstring.h
+++ /dev/null
@@ -1,12 +0,0 @@
-// Compiler for PHP (aka KPHP)
-// Copyright (c) 2020 LLC «V Kontakte»
-// Distributed under the GPL v3 License, see LICENSE.notice.txt
-
-#pragma once
-
-#include <climits>
-
-#include "runtime/kphp_core.h"
-#include "runtime/string_functions.h"
-
-bool mb_UTF8_check(const char *s);
\ No newline at end of file
diff --git a/runtime/mbstring/mbstring.cpp b/runtime/mbstring/mbstring.cpp
index 4d1637d59a..82c966f0ad 100644
--- a/runtime/mbstring/mbstring.cpp
+++ b/runtime/mbstring/mbstring.cpp
@@ -1,5 +1,49 @@
 #include "mbstring.h"
 
+bool mb_UTF8_check(const char *s) {
+  do {
+#define CHECK(condition) if (!(condition)) {return false;}
+    unsigned int a = (unsigned char)(*s++);
+    if ((a & 0x80) == 0) {
+      if (a == 0) {
+        return true;
+      }
+      continue;
+    }
+
+    CHECK ((a & 0x40) != 0);
+
+    unsigned int b = (unsigned char)(*s++);
+    CHECK((b & 0xc0) == 0x80);
+    if ((a & 0x20) == 0) {
+      CHECK((a & 0x1e) > 0);
+      continue;
+    }
+
+    unsigned int c = (unsigned char)(*s++);
+    CHECK((c & 0xc0) == 0x80);
+    if ((a & 0x10) == 0) {
+      int x = (((a & 0x0f) << 6) | (b & 0x20));
+      CHECK(x != 0 && x != 0x360);//surrogates
+      continue;
+    }
+
+    unsigned int d = (unsigned char)(*s++);
+    CHECK((d & 0xc0) == 0x80);
+    if ((a & 0x08) == 0) {
+      int t = (((a & 0x07) << 6) | (b & 0x30));
+      CHECK(0 < t && t < 0x110);//end of unicode
+      continue;
+    }
+
+    return false;
+#undef CHECK
+  } while (true);
+
+  php_assert (0);
+}
+
+#ifdef MBFL
 extern "C" {
 	#include <kphp/libmbfl/mbfl/mbfilter.h>
 }
@@ -73,24 +117,423 @@ bool check_encoding(const char *value, const char *encoding) {
 	return res;
 }
 
-bool f$mb_check_encoding(const string &value, const string &encoding) {
-	const char *c_value = value.c_str();
-	const char *c_encoding = encoding.c_str();
+// TODO: check for array as value
+mixed f$mb_convert_encoding(const mixed &str, const string &to_encoding, const mixed &from_encoding) {
+
+	if (str.is_string() && from_encoding.is_string()) {
+		const string &s = str.to_string();
+		const string &from = from_encoding.to_string();
+
+		const char *c_string = s.c_str();
+		const char *c_to_encoding = to_encoding.c_str();
+		const char *c_from_encoding = from.c_str();
+
+		/* perform convertion */
+		mbfl_string *ret = convert_encoding(c_string, c_to_encoding, c_from_encoding);
+		string res = string((const char*)ret->val, ret->len);
+
+		/* check if string represents in from_encoding, magic number 63 - '?' in ASCII */
+		if (!check_encoding(c_string, c_from_encoding)) res = string(strlen(c_string), (char)63);
+
+		return res;
+	}
+	return 0;
+}
+
+// TODO: check for optional value
+bool f$mb_check_encoding(const mixed &value, const Optional<string> &encoding) {
+	const string &val = value.to_string();
+	const string &enc = encoding.val();
+	const char *c_value = val.c_str();
+	const char *c_encoding = enc.c_str();
 	return check_encoding(c_value, c_encoding);
 }
 
-string f$mb_convert_encoding(const string &str, const string &to_encoding, const string &from_encoding) {
+#else
 
-	const char *c_string = str.c_str();
-	const char *c_to_encoding = to_encoding.c_str();
-	const char *c_from_encoding = from_encoding.c_str();
+#include "common/unicode/unicode-utils.h"
+#include "common/unicode/utf8-utils.h"
 
-	/* perform convertion */
-	mbfl_string *ret = convert_encoding(c_string, c_to_encoding, c_from_encoding);
-	string res = string((const char*)ret->val, ret->len);
+static bool is_detect_incorrect_encoding_names_warning{false};
 
-	/* check if string represents in from_encoding, magic number 63 - '?' in ASCII */
-	if (!check_encoding(c_string, c_from_encoding)) res = string(strlen(c_string), (char)63);
-	
-	return res;
-}
\ No newline at end of file
+void f$set_detect_incorrect_encoding_names_warning(bool show) {
+  is_detect_incorrect_encoding_names_warning = show;
+}
+
+void free_detect_incorrect_encoding_names() {
+  is_detect_incorrect_encoding_names_warning = false;
+}
+
+static int mb_detect_encoding_new(const string &encoding) {
+  const auto encoding_name = f$strtolower(encoding).c_str();
+
+  if (!strcmp(encoding_name, "cp1251") || !strcmp(encoding_name, "cp-1251") || !strcmp(encoding_name, "windows-1251")) {
+    return 1251;
+  }
+
+  if (!strcmp(encoding_name, "utf8") || !strcmp(encoding_name, "utf-8")) {
+    return 8;
+  }
+
+  return -1;
+}
+
+static int mb_detect_encoding(const string &encoding) {
+  const int result_new = mb_detect_encoding_new(encoding);
+
+  if (strstr(encoding.c_str(), "1251")) {
+    if (is_detect_incorrect_encoding_names_warning && 1251 != result_new) {
+      php_warning("mb_detect_encoding returns 1251, but new will return %d, encoding %s", result_new, encoding.c_str());
+    }
+    return 1251;
+  }
+  if (strstr(encoding.c_str(), "-8")) {
+    if (is_detect_incorrect_encoding_names_warning && 8 != result_new) {
+      php_warning("mb_detect_encoding returns 8, but new will return %d, encoding %s", result_new, encoding.c_str());
+    }
+    return 8;
+  }
+
+  if (is_detect_incorrect_encoding_names_warning && -1 != result_new) {
+    php_warning("mb_detect_encoding returns -1, but new will return %d, encoding %s", result_new, encoding.c_str());
+  }
+  return -1;
+}
+
+static int64_t mb_UTF8_strlen(const char *s) {
+  int64_t res = 0;
+  for (int64_t i = 0; s[i]; i++) {
+    if ((((unsigned char)s[i]) & 0xc0) != 0x80) {
+      res++;
+    }
+  }
+  return res;
+}
+
+static int64_t mb_UTF8_advance(const char *s, int64_t cnt) {
+  php_assert (cnt >= 0);
+  int64_t i;
+  for (i = 0; s[i] && cnt >= 0; i++) {
+    if ((((unsigned char)s[i]) & 0xc0) != 0x80) {
+      cnt--;
+    }
+  }
+  if (cnt < 0) {
+    i--;
+  }
+  return i;
+}
+
+static int64_t mb_UTF8_get_offset(const char *s, int64_t pos) {
+  int64_t res = 0;
+  for (int64_t i = 0; i < pos && s[i]; i++) {
+    if ((((unsigned char)s[i]) & 0xc0) != 0x80) {
+      res++;
+    }
+  }
+  return res;
+}
+
+bool mb_UTF8_check(const char *s) {
+  do {
+#define CHECK(condition) if (!(condition)) {return false;}
+    unsigned int a = (unsigned char)(*s++);
+    if ((a & 0x80) == 0) {
+      if (a == 0) {
+        return true;
+      }
+      continue;
+    }
+
+    CHECK ((a & 0x40) != 0);
+
+    unsigned int b = (unsigned char)(*s++);
+    CHECK((b & 0xc0) == 0x80);
+    if ((a & 0x20) == 0) {
+      CHECK((a & 0x1e) > 0);
+      continue;
+    }
+
+    unsigned int c = (unsigned char)(*s++);
+    CHECK((c & 0xc0) == 0x80);
+    if ((a & 0x10) == 0) {
+      int x = (((a & 0x0f) << 6) | (b & 0x20));
+      CHECK(x != 0 && x != 0x360);//surrogates
+      continue;
+    }
+
+    unsigned int d = (unsigned char)(*s++);
+    CHECK((d & 0xc0) == 0x80);
+    if ((a & 0x08) == 0) {
+      int t = (((a & 0x07) << 6) | (b & 0x30));
+      CHECK(0 < t && t < 0x110);//end of unicode
+      continue;
+    }
+
+    return false;
+#undef CHECK
+  } while (true);
+
+  php_assert (0);
+}
+
+bool f$mb_check_encoding(const string &str, const string &encoding) {
+  int encoding_num = mb_detect_encoding(encoding);
+  if (encoding_num < 0) {
+    php_critical_error ("encoding \"%s\" doesn't supported in mb_check_encoding", encoding.c_str());
+    return !str.empty();
+  }
+
+  if (encoding_num == 1251) {
+    return true;
+  }
+
+  return mb_UTF8_check(str.c_str());
+}
+
+
+int64_t f$mb_strlen(const string &str, const string &encoding) {
+  int encoding_num = mb_detect_encoding(encoding);
+  if (encoding_num < 0) {
+    php_critical_error ("encoding \"%s\" doesn't supported in mb_strlen", encoding.c_str());
+    return str.size();
+  }
+
+  if (encoding_num == 1251) {
+    return str.size();
+  }
+
+  return mb_UTF8_strlen(str.c_str());
+}
+
+
+string f$mb_strtolower(const string &str, const string &encoding) {
+  int encoding_num = mb_detect_encoding(encoding);
+  if (encoding_num < 0) {
+    php_critical_error ("encoding \"%s\" doesn't supported in mb_strtolower", encoding.c_str());
+    return str;
+  }
+
+  int len = str.size();
+  if (encoding_num == 1251) {
+    string res(len, false);
+    for (int i = 0; i < len; i++) {
+      switch ((unsigned char)str[i]) {
+        case 'A' ... 'Z':
+          res[i] = (char)(str[i] + 'a' - 'A');
+          break;
+        case 0xC0 ... 0xDF:
+          res[i] = (char)(str[i] + 32);
+          break;
+        case 0x81:
+          res[i] = (char)0x83;
+          break;
+        case 0xA3:
+          res[i] = (char)0xBC;
+          break;
+        case 0xA5:
+          res[i] = (char)0xB4;
+          break;
+        case 0xA1:
+        case 0xB2:
+        case 0xBD:
+          res[i] = (char)(str[i] + 1);
+          break;
+        case 0x80:
+        case 0x8A:
+        case 0x8C ... 0x8F:
+        case 0xA8:
+        case 0xAA:
+        case 0xAF:
+          res[i] = (char)(str[i] + 16);
+          break;
+        default:
+          res[i] = str[i];
+      }
+    }
+
+    return res;
+  } else {
+    string res(len * 3, false);
+    const char *s = str.c_str();
+    int res_len = 0;
+    int p;
+    int ch;
+    while ((p = get_char_utf8(&ch, s)) > 0) {
+      s += p;
+      res_len += put_char_utf8(unicode_tolower(ch), &res[res_len]);
+    }
+    if (p < 0) {
+      php_warning("Incorrect UTF-8 string \"%s\" in function mb_strtolower", str.c_str());
+    }
+    res.shrink(res_len);
+
+    return res;
+  }
+}
+
+string f$mb_strtoupper(const string &str, const string &encoding) {
+  int encoding_num = mb_detect_encoding(encoding);
+  if (encoding_num < 0) {
+    php_critical_error ("encoding \"%s\" doesn't supported in mb_strtoupper", encoding.c_str());
+    return str;
+  }
+
+  int len = str.size();
+  if (encoding_num == 1251) {
+    string res(len, false);
+    for (int i = 0; i < len; i++) {
+      switch ((unsigned char)str[i]) {
+        case 'a' ... 'z':
+          res[i] = (char)(str[i] + 'A' - 'a');
+          break;
+        case 0xE0 ... 0xFF:
+          res[i] = (char)(str[i] - 32);
+          break;
+        case 0x83:
+          res[i] = (char)(0x81);
+          break;
+        case 0xBC:
+          res[i] = (char)(0xA3);
+          break;
+        case 0xB4:
+          res[i] = (char)(0xA5);
+          break;
+        case 0xA2:
+        case 0xB3:
+        case 0xBE:
+          res[i] = (char)(str[i] - 1);
+          break;
+        case 0x98:
+        case 0xA0:
+        case 0xAD:
+          res[i] = ' ';
+          break;
+        case 0x90:
+        case 0x9A:
+        case 0x9C ... 0x9F:
+        case 0xB8:
+        case 0xBA:
+        case 0xBF:
+          res[i] = (char)(str[i] - 16);
+          break;
+        default:
+          res[i] = str[i];
+      }
+    }
+
+    return res;
+  } else {
+    string res(len * 3, false);
+    const char *s = str.c_str();
+    int res_len = 0;
+    int p;
+    int ch;
+    while ((p = get_char_utf8(&ch, s)) > 0) {
+      s += p;
+      res_len += put_char_utf8(unicode_toupper(ch), &res[res_len]);
+    }
+    if (p < 0) {
+      php_warning("Incorrect UTF-8 string \"%s\" in function mb_strtoupper", str.c_str());
+    }
+    res.shrink(res_len);
+
+    return res;
+  }
+}
+
+namespace {
+
+int check_strpos_agrs(const char *func_name, const string &needle, int64_t offset, const string &encoding) noexcept {
+  if (unlikely(offset < 0)) {
+    php_warning("Wrong offset = %" PRIi64 " in function %s()", offset, func_name);
+    return 0;
+  }
+  if (unlikely(needle.empty())) {
+    php_warning("Parameter needle is empty in function %s()", func_name);
+    return 0;
+  }
+
+  const int encoding_num = mb_detect_encoding(encoding);
+  if (unlikely(encoding_num < 0)) {
+    php_critical_error ("encoding \"%s\" doesn't supported in %s()", encoding.c_str(), func_name);
+    return 0;
+  }
+  return encoding_num;
+}
+
+Optional<int64_t> mp_strpos_impl(const string &haystack, const string &needle, int64_t offset, int encoding_num) noexcept {
+  if (encoding_num == 1251) {
+    return f$strpos(haystack, needle, offset);
+  }
+
+  int64_t UTF8_offset = mb_UTF8_advance(haystack.c_str(), offset);
+  const char *s = static_cast<const char *>(memmem(haystack.c_str() + UTF8_offset, haystack.size() - UTF8_offset, needle.c_str(), needle.size()));
+  if (unlikely(s == nullptr)) {
+    return false;
+  }
+  return mb_UTF8_get_offset(haystack.c_str() + UTF8_offset, s - (haystack.c_str() + UTF8_offset)) + offset;
+}
+
+} // namespace
+
+Optional<int64_t> f$mb_strpos(const string &haystack, const string &needle, int64_t offset, const string &encoding) noexcept {
+  if (const int encoding_num = check_strpos_agrs("mb_strpos", needle, offset, encoding)) {
+    return mp_strpos_impl(haystack, needle, offset, encoding_num);
+  }
+  return false;
+}
+
+Optional<int64_t> f$mb_stripos(const string &haystack, const string &needle, int64_t offset, const string &encoding) noexcept {
+  if (const int encoding_num = check_strpos_agrs("mb_stripos", needle, offset, encoding)) {
+    return mp_strpos_impl(f$mb_strtolower(haystack, encoding), f$mb_strtolower(needle, encoding), offset, encoding_num);
+  }
+  return false;
+}
+
+string f$mb_substr(const string &str, int64_t start, const mixed &length_var, const string &encoding) {
+  int encoding_num = mb_detect_encoding(encoding);
+  if (encoding_num < 0) {
+    php_critical_error ("encoding \"%s\" doesn't supported in mb_substr", encoding.c_str());
+    return str;
+  }
+
+  int64_t length;
+  if (length_var.is_null()) {
+    length = std::numeric_limits<int64_t>::max();
+  } else {
+    length = length_var.to_int();
+  }
+
+  if (encoding_num == 1251) {
+    Optional<string> res = f$substr(str, start, length);
+    if (!res.has_value()) {
+      return {};
+    }
+    return res.val();
+  }
+
+  int64_t len = mb_UTF8_strlen(str.c_str());
+  if (start < 0) {
+    start += len;
+  }
+  if (start > len) {
+    start = len;
+  }
+  if (length < 0) {
+    length = len - start + length;
+  }
+  if (length <= 0 || start < 0) {
+    return {};
+  }
+  if (len - start < length) {
+    length = len - start;
+  }
+
+  int64_t UTF8_start = mb_UTF8_advance(str.c_str(), start);
+  int64_t UTF8_length = mb_UTF8_advance(str.c_str() + UTF8_start, length);
+
+  return {str.c_str() + UTF8_start, static_cast<string::size_type>(UTF8_length)};
+}
+
+#endif
\ No newline at end of file
diff --git a/runtime/mbstring/mbstring.h b/runtime/mbstring/mbstring.h
index a2154176a5..52f295e3d4 100644
--- a/runtime/mbstring/mbstring.h
+++ b/runtime/mbstring/mbstring.h
@@ -1,20 +1,744 @@
 #pragma once
 
 #include "runtime/kphp_core.h"
+#include "common/type_traits/function_traits.h"
+#include "common/vector-product.h"
+
+#include "runtime/kphp_core.h"
+#include "runtime/math_functions.h"
+#include "runtime/string_functions.h"
+
+bool mb_UTF8_check(const char *s);
+
+#ifdef MBFL
 
 /**
  * Check if strings are valid for the specified encoding
- * @param value The byte stream
- * @param encoding The expected encoding
- * @return Returns true on success or false on failure
+ * Checks if the specified byte stream is valid for the specified encoding. If value is of type array, all keys and values are validated recursively.
+ * It is useful to prevent so-called "Invalid Encoding Attack".
+ * @param array|string value The byte stream
+ * @param ?string encoding (default = null) The expected encoding
+ * @return bool Returns true on success or false on failure
+ */
+bool f$mb_check_encoding(const mixed &value, const Optional<string> &encoding);
+
+/**
+ * Returns a string containing the character specified by the Unicode code point value, encoded in the specified encoding 
+ * @param int codepoint A Unicode codepoint value, e.g. 128024 for U+1F418 ELEPHANT
+ * @param ?string encoding (default = null) The encoding parameter is the character encoding. If it is omitted or null,
+ * the internal character encoding value will be used.
+ * @return string|false A string containing the requested character, if it can be represented in the specified encoding or false on failure.
+ */
+Optional<string> f$mb_chr(const int64_t codepoint, const Optional<string> &encoding);
+
+/**
+ * Perform case folding on a string
+ * @param string str The string being converted
+ * @param int mode The mode of the conversion. It can be one of MB_CASE_UPPER, MB_CASE_LOWER, MB_CASE_TITLE, MB_CASE_FOLD,
+ * MB_CASE_UPPER_SIMPLE, MB_CASE_LOWER_SIMPLE, MB_CASE_TITLE_SIMPLE, MB_CASE_FOLD_SIMPLE
+ * @param ?string encoding (default = null) The encoding parameter is the character encoding. If it is omitted or null,
+ * the internal character encoding value will be used.
+ * @return string A case folded version of string converted in the way specified by mode
+ */
+string f$mb_convert_case(const string &str, const int64_t mode, const Optional<string> &encoding);
+
+/**
+ * Convert from one character encoding to another
+ * @param array|string str The string or array to be converted
+ * @param string to_encoding The desired encoding of the result
+ * @param array|string|null from_encoding (default = null) The current encoding used to interpret string.
+ * Multiple encodings may be specified as an array or comma separated list,
+ * in which case the correct encoding will be guessed using the same algorithm as mb_detect_encoding().
+ * If from_encoding is null or not specified, the mbstring.internal_encoding setting will be used if set, otherwise the default_charset setting.
+ * @return array|string|false The encoded string
+ */
+mixed f$mb_convert_encoding(const mixed &str, const string &to_encoding, const mixed &from_encoding);
+
+/**
+ * Convert "kana" one from another ("zen-kaku", "han-kaku" and more)
+ * @param string str The string being converted
+ * @param string mode The conversion option (default = "KV")
+ * r - Convert "zen-kaku" alphabets to "han-kaku"
+ * R - Convert "han-kaku" alphabets to "zen-kaku"
+ * n - Convert "zen-kaku" numbers to "han-kaku"
+ * N - Convert "han-kaku" numbers to "zen-kaku"
+ * a - Convert "zen-kaku" alphabets and numbers to "han-kaku"
+ * A - Convert "han-kaku" alphabets and numbers to "zen-kaku" 
+ * (Characters included in "a", "A" options are U+0021 - U+007E excluding U+0022, U+0027, U+005C, U+007E)
+ * s - Convert "zen-kaku" space to "han-kaku" (U+3000 -> U+0020)
+ * S - Convert "han-kaku" space to "zen-kaku" (U+0020 -> U+3000)
+ * k - Convert "zen-kaku kata-kana" to "han-kaku kata-kana"
+ * K - Convert "han-kaku kata-kana" to "zen-kaku kata-kana"
+ * h - Convert "zen-kaku hira-gana" to "han-kaku kata-kana"
+ * H - Convert "han-kaku kata-kana" to "zen-kaku hira-gana"
+ * c - Convert "zen-kaku kata-kana" to "zen-kaku hira-gana"
+ * C - Convert "zen-kaku hira-gana" to "zen-kaku kata-kana"
+ * V - Collapse voiced sound notation and convert them into a character. Use with "K","H"
+ * @param ?string encoding (default = null) The encoding parameter is the character encoding.
+ * If it is omitted or null, the internal character encoding value will be used.
+ * @return string The converted string
+ */
+string f$mb_convert_kana(const string &str, const string &mode, const Optional<string> &encoding);
+
+/**
+ * Convert character code in variable(s)
+ * @param string to_encoding The encoding that the string is being converted to
+ * @param array|string from_encoding is specified as an array or comma separated string, it tries to detect encoding from from-coding.
+ * When from_encoding is omitted, detect_order is used.
+ * @param mixed &vars References to the variable being converted. String, Array are accepted. mb_convert_variables() assumes
+ * all parameters have the same encoding.
+ * @return string|false The character encoding before conversion for success, or false for failure
+ */
+Optional<string> f$mb_convert_variables(const string &to_encoding, const mixed &from_encoding, const mixed &vars); // ???
+
+/**
+ * Decode string in MIME header field
+ * @param string str The string being decoded
+ * @return string The decoded string in internal character encoding
+ */
+string f$mb_decode_mimeheader(const string &string);
+
+/**
+ * Decode HTML numeric string reference to character
+ * @param string str The string being decoded
+ * @param array map An array that specifies the code area to convert
+ * @param ?string encoding (default = null) The encoding parameter is the character encoding.
+ * If it is omitted or null, the internal character encoding value will be used.
+ * @return string The converted string
+ */
+string f$mb_decode_numericentity(const string &str, const array<int> &map, const Optional<string> &encoding);
+
+/**
+ * Detect character encoding
+ * Detects the most likely character encoding for string string from an ordered list of candidates. Automatic detection of the intended character encoding
+ * can never be entirely reliable; without some additional information, it is similar to decoding an encrypted string without the key. It is always preferable
+ * to use an indication of character encoding stored or transmitted with the data, such as a "Content-Type" HTTP header. This function is most useful with
+ * multibyte encodings, where not all sequences of bytes form a valid string. If the input string contains such a sequence, that encoding will be rejected,
+ * and the next encoding checked.
+ * @param string str The string being inspected
+ * @param array|string|null encodings (default = null) A list of character encodings to try, in order. The list may be specified as an array of strings,
+ * or a single string separated by commas. If encodings is omitted or null, the current detect_order (set with the mbstring.detect_order configuration option,
+ * or mb_detect_order() function) will be used.
+ * @param bool strict (default = false) Controls the behaviour when string is not valid in any of the listed encodings.
+ * If strict is set to false, the closest matching encoding will be returned; if strict is set to true, false will be returned.
+ * @return string|false Controls the behaviour when string is not valid in any of the listed encodings. If strict is set to false,
+ * the closest matching encoding will be returned; if strict is set to true, false will be returned. The default value for strict can be set
+ * with the mbstring.strict_detection configuration option.
+ */
+Optional<string> f$mb_detect_encoding(const string &str, const mixed &encodings, const bool strict = false);
+
+/**
+ * Set/Get character encoding detection order
+ * @param array|string|null encoding (default = null) encoding is an array or comma separated list of character encoding. See supported encodings.
+ * If encoding is omitted or null, it returns the current character encoding detection order as array. This setting affects
+ * mb_detect_encoding() and mb_send_mail().
+ * @return array|bool When setting the encoding detection order, true is returned on success or false on failure.
+ * When getting the encoding detection order, an ordered array of the encodings is returned.
+ */
+mixed f$mb_detect_order(const mixed &encoding);
+
+/**
+ * Encode string for MIME header
+ * @param string str The string being encoded. Its encoding should be same as mb_internal_encoding()
+ * @param ?string charset (default = null) Specifies the name of the character set in which string is represented in.
+ * The default value is determined by the current NLS setting (mbstring.language)
+ * @param ?string transfer_encoding (default = null) Specifies the scheme of MIME encoding.
+ * It should be either "B" (Base64) or "Q" (Quoted-Printable). Falls back to "B" if not given.
+ * @param string newline (default = "\r\n") Specifies the EOL (end-of-line) marker with which mb_encode_mimeheader() performs line-folding 
+ * (a » RFC term, the act of breaking a line longer than a certain length into multiple lines. The length is currently hard-coded to 74 characters).
+ * Falls back to "\r\n" (CRLF) if not given.
+ * @param int indent (default = 0) Indentation of the first line (number of characters in the header before string)
+ * @return string A converted version of the string represented in ASCII
+ */
+string f$mb_encode_mimeheader(const string &str, const Optional<string> &charset, const Optional<string> &transfer_encoding, const string &newline, const int64_t indent);
+
+/**
+ * Encode character to HTML numeric string reference
+ * Converts specified character codes in string string from character code to HTML numeric character reference
+ * @param string str The string being encoded
+ * @param array map Aarray specifies code area to convert
+ * @param ?string encding (default = null) The encoding parameter is the character encoding. If it is omitted or null, the internal character encoding value will be used
+ * @param bool hex (default = false) Whether the returned entity reference should be in hexadecimal notation (otherwise it is in decimal notation)
+ * @return string The converted string
+ */
+string f$mb_encode_numericentity(const string &str, const array<int> &map, const Optional<string> &encoding, const bool hex = false);
+
+/**
+ * Get aliases of a known encoding type
+ * @param string encoding The encoding type being checked, for aliases
+ * @return array Returns a numerically indexed array of encoding aliases
+ */
+array<string> f$mb_encoding_aliases(const string &encoding);
+
+/**
+ * Regular expression match for multibyte string
+ * @param string pattern The regular expression pattern
+ * @param string str The string being evaluated
+ * @param ?string options (default = null) The search option. See mb_regex_set_options() for explanation
+ * @return bool Returns true if string matches the regular expression pattern, false if not
+ */
+bool f$mb_ereg_match(const string &pattern, const string &str, const Optional<string> &options);
+
+/**
+ * Perform a regular expression search and replace with multibyte support using a callback
+ * Scans string for matches to pattern, then replaces the matched text with the output of callback function.
+ * The behavior of this function is almost identical to mb_ereg_replace(), except for the fact that instead of replacement parameter,
+ * one should specify a callback.
+ * @param string pattern The regular expression pattern. Multibyte characters may be used in pattern.
+ * @param callable callback A callback that will be called and passed an array of matched elements in the subject string. 
+ * The callback should return the replacement string. You'll often need the callback function for a mb_ereg_replace_callback() in just one place. 
+ * In this case you can use an anonymous function to declare the callback within the call to mb_ereg_replace_callback(). 
+ * By doing it this way you have all information for the call in one place and do not clutter the function namespace with a callback
+ * function's name not used anywhere else.
+ * @param string str The string being checked
+ * @param ?string options (default = null) The search option. See mb_regex_set_options() for explanation
+ * @return string|false|null The resultant string on success, or false on error. If string is not valid for the current encoding, null is returned
  */
-bool f$mb_check_encoding(const string &value, const string &encoding);
+// Optional<string> f$mb_ereg_replace_callback(const string &pattern, const CallableT &callback, const string &str, const Optional<string> options); // callback
 
 /**
- * Convert a string from one character encoding to another
- * @param str The string to be converted
- * @param from The desired encoding of the result
- * @param to The current encoding used to interpret string
- * @return The encoded string
+ * Replace regular expression with multibyte support
+ * Scans string for matches to pattern, then replaces the matched text with replacement
+ * @param string pattern The regular expression pattern. Multibyte characters may be used in pattern
+ * @param string replacement The replacement text
+ * @param string str The string being checked
+ * @param ?string options (default = null) The search option. See mb_regex_set_options() for explanation
+ * @return string|false|null The resultant string on success, or false on error. If string is not valid for the current encoding, null is returned
  */
-string f$mb_convert_encoding(const string &str, const string &to, const string &from);
\ No newline at end of file
+Optional<string> f$mb_ereg_replace(const string &pattern, const string &replacement, const string &str, const Optional<string> &options);
+
+/**
+ * Returns start point for next regular expression match
+ * @return int mb_ereg_search_getpos() returns the point to start regular expression match for mb_ereg_search(), mb_ereg_search_pos(), mb_ereg_search_regs().
+ * The position is represented by bytes from the head of string.
+ */
+int64_t f$mb_ereg_search_getpos(void);
+
+/**
+ * Retrieve the result from the last multibyte regular expression match
+ * @return array|false An array including the sub-string of matched part by last mb_ereg_search(), mb_ereg_search_pos(), mb_ereg_search_regs().
+ * If there are some matches, the first element will have the matched sub-string, the second element will have the first part grouped with brackets,
+ * the third element will have the second part grouped with brackets, and so on. It returns false on error.
+ */
+Optional<array> f$mb_ereg_search_getregs(void);
+
+/**
+ * Setup string and regular expression for a multibyte regular expression match
+ * mb_ereg_search_init() sets string and pattern for a multibyte regular expression.
+ * These values are used for mb_ereg_search(), mb_ereg_search_pos(),and mb_ereg_search_regs().
+ * @param string str The search string
+ * @param ?string pattern (default = null) The search pattern
+ * @param ?string options (default = null) The search option. See mb_regex_set_options() for explanation
+ * @return bool Returns true on success or false on failure
+ */
+bool f$mb_ereg_search_init(const string &str, const Optional<string> &pattern, const Optional<string> &options);
+
+/**
+ * Returns position and length of a matched part of the multibyte regular expression for a predefined multibyte string
+ * The string for match is specified by mb_ereg_search_init(). If it is not specified, the previous one will be used
+ * @param ?string pattern (default = null) The search pattern
+ * @param ?string options (default = null) The search option. See mb_regex_set_options() for explanation
+ * @return array|false An array containing two elements. The first element is the offset, in bytes, where the match begins relative to the start of
+ * the search string, and the second element is the length in bytes of the match. If an error occurs, false is returned.
+ */
+Optional<array> f$mb_ereg_search_pos(const Optional<string> &pattern, const Optional<string> &options);
+
+/**
+ * Returns the matched part of a multibyte regular expression
+ * @param ?string pattern (default = null) The search pattern
+ * @param ?string options (deafult = null) The search option. See mb_regex_set_options() for explanation
+ * @return array|false mb_ereg_search_regs() executes the multibyte regular expression match, and if there are some matched part,
+ * it returns an array including substring of matched part as first element, the first grouped part with brackets as second element,
+ * the second grouped part as third element, and so on. It returns false on error.
+ */
+Optional<array> f$mb_ereg_search_regs(const Optional<string> &pattern, const Optional<string> &options);
+
+/**
+ * Set start point of next regular expression match
+ * mb_ereg_search_setpos() sets the starting point of a match for mb_ereg_search().
+ * @param int offset The position to set. If it is negative, it counts from the end of the string
+ * @return bool Returns true on success or false on failure
+ */
+bool f$mb_ereg_search_setpos(const int64_t offset);
+
+/**
+ * Multibyte regular expression match for predefined multibyte string
+ * @param ?string pattern (default = null) The search pattern
+ * @param ?string options (default = null) The search option. See mb_regex_set_options() for explanation
+ * @return bool mb_ereg_search() returns true if the multibyte string matches with the regular expression, or false otherwise. The string for matching
+ * is set by mb_ereg_search_init(). If pattern is not specified, the previous one is used.
+ */
+bool f$mb_ereg_search(const Optional<string> &pattern, const Optional<string> &options);
+
+/**
+ * Regular expression match with multibyte support
+ * @param string pattern The search pattern
+ * @param string str The search string
+ * @param array matches (default = null) If matches are found for parenthesized substrings of pattern and the function is called with the
+ * third argument matches, the matches will be stored in the elements of the array matches. If no matches are found, matches is set to an empty array.
+ * matches[1] will contain the substring which starts at the first left parenthesis; $matches[2] will contain the substring starting at the second,
+ * and so on. $matches[0] will contain a copy of the complete string matched.
+ * @return bool Returns whether pattern matches string
+ */
+bool f$mb_ereg(const string &pattern, const string &str, const array<string> &matches);
+
+/**
+ * Replace regular expression with multibyte support ignoring case
+ * Scans string for matches to pattern, then replaces the matched text with replacement
+ * @param string pattern The regular expression pattern. Multibyte characters may be used. The case will be ignored
+ * @param string replacement The replacement text
+ * @param string str The searched string
+ * @param ?string options (default = null) The search option. See mb_regex_set_options() for explanation
+ * @return string|false|null The resultant string or false on error. If string is not valid for the current encoding, null is returned
+ */
+Optional<string> f$mb_eregi_replace(const string &pattern, const string &replacement, const string &str, const Optional<string> &options);
+
+/**
+ * Regular expression match ignoring case with multibyte support
+ * @param string pattern The regular expression pattern
+ * @param string str The string being searched
+ * @param array matches (default = null) If matches are found for parenthesized substrings of pattern and the function is called with the third argument matches,
+ * the matches will be stored in the elements of the array matches. If no matches are found, matches is set to an empty array.
+ * matches[1] will contain the substring which starts at the first left parenthesis; $matches[2] will contain the substring starting at the second,
+ * and so on. $matches[0] will contain a copy of the complete string matched.
+ * @return bool Returns whether pattern matches string
+ */
+bool f$mb_eregi(const string &pattern, const string &str, const array<string> &matches);
+
+/**
+ * Get internal settings of mbstring
+ * @param string type (default = "all") If type is not specified or is specified as "all", "internal_encoding", "http_input", "http_output",
+ * "http_output_conv_mimetypes", "mail_charset", "mail_header_encoding", "mail_body_encoding", "illegal_chars", "encoding_translation", "language",
+ * "detect_order", "substitute_character" and "strict_detection" will be returned.
+ * If type is specified as "internal_encoding", "http_input", "http_output", "http_output_conv_mimetypes", "mail_charset", "mail_header_encoding",
+ * "mail_body_encoding", "illegal_chars", "encoding_translation", "language", "detect_order", "substitute_character" or "strict_detection"
+ * the specified setting parameter will be returned.
+ * @return array|string|int|false An array of type information if type is not specified, otherwise a specific type, or false on failure
+ */
+mixed f$mb_get_info(const string &type);
+
+/**
+ * Detect HTTP input character encoding
+ * @param ?string type (default = null) Input string specifies the input type. "G" for GET, "P" for POST, "C" for COOKIE, "S" for string,
+ * "L" for list, and "I" for the whole list (will return array). If type is omitted, it returns the last input type processed.
+ * @return array|string|false The character encoding name, as per the type, or an array of character encoding names, if type is "I".
+ * If mb_http_input() does not process specified HTTP input, it returns false.
+ */
+mixed f$mb_http_input(const Optional<string> &type);
+
+/**
+ * Set/Get the HTTP output character encoding. Output after this function is called will be converted from the set internal encoding to encoding
+ * @param ?string encoding (default = null) If encoding is set, mb_http_output() sets the HTTP output character encoding to encoding.
+ * If encoding is omitted, mb_http_output() returns the current HTTP output character encoding.
+ * @return string|bool If encoding is omitted, mb_http_output() returns the current HTTP output character encoding. Otherwise,
+ * Returns true on success or false on failure.
+ */
+mixed f$mb_http_output(const Optional<string> &encoding);
+
+/**
+ * Set/Get internal character encoding
+ * @param ?string encoding (default = null) encoding is the character encoding name used for the HTTP input character encoding conversion,
+ * HTTP output character encoding conversion, and the default character encoding for string functions defined by the mbstring module.
+ * You should notice that the internal encoding is totally different from the one for multibyte regex.
+ * @return string|bool If encoding is set, then Returns true on success or false on failure.
+ * In this case, the character encoding for multibyte regex is NOT changed.
+ * If encoding is omitted, then the current character encoding name is returned.
+ */
+mixed f$mb_internal_encoding(const Optional<string> &encoding);
+
+/**
+ * Set/Get the current language
+ * @param ?string language (default = null) Used for encoding e-mail messages. The valid languages are listed in the following table.
+ * mb_send_mail() uses this setting to encode e-mail.
+ * +---------------------------+-------------+------------------+-----------+
+ * | Language                  | Charset     | Encoding         | Alias     |
+ * +---------------------------+-------------+------------------+-----------+
+ * | German/de                 | ISO-8859-15 | Quoted-Printable | Deutsch   |
+ * | English/en                | ISO-8859-1  | Quoted-Printable |           |
+ * | Armenian/hy               | ArmSCII-8   | Quoted-Printable |           |
+ * | Japanese/ja               | ISO-2022-JP | BASE64           |           |
+ * | Korean/ko                 | ISO-2022-KR | BASE64           |           |
+ * | neutral                   | UTF-8       | BASE64           |           |
+ * | Russian/ru                | KOI8-R      | Quoted-Printable |           |
+ * | Turkish/tr                | ISO-8859-9  | Quoted-Printable |           |
+ * | Ukrainian/ua              | KOI8-U      | Quoted-Printable |           |
+ * | uni                       | UTF-8       | BASE64           | universal |
+ * | Simplified Chinese/zh-cn  | HZ          | BASE64           |           |
+ * | Traditional Chinese/zh-tw | BIG-5       | BASE64           |           |   
+ * +---------------------------+-------------+------------------+-----------+
+ * @return string|bool If language is set and language is valid, it returns true. Otherwise, it returns false. When language is omitted or null,
+ * it returns the language name as a string
+ */
+mixed f$mb_language(const Optional<string> &language);
+
+/**
+ * Returns an array of all supported encodings
+ * @return array Returns a numerically indexed array
+ */
+array<string> f$mb_list_encodings(void);
+
+/**
+ * Returns the Unicode code point value of the given character. This function complements mb_chr().
+ * @param string str A string
+ * @param string? encoding (default = null) The encoding parameter is the character encoding. If it is omitted or null,
+ * the internal character encoding value will be used.
+ * @return int|false The Unicode code point for the first character of string or false on failure.
+ */
+Optional<int> f$mb_ord(const string &str, const Optional<string> &encoding);
+
+/**
+ * mb_output_handler() is ob_start() callback function. mb_output_handler() converts characters in the output buffer from internal
+ * character encoding to HTTP output character encoding.
+ * @param string str The contents of the output buffer
+ * @param int status The status of the output buffer
+ * @return string The converted string
+ */
+string f$mb_output_handler(const string &str, const int64_t status);
+
+/**
+ * Parses GET/POST/COOKIE data and sets global variables. Since PHP does not provide raw POST/COOKIE data, it can only be used for GET data for now.
+ * It parses URL encoded data, detects encoding, converts coding to internal encoding and set values to the result array or global variables.
+ * @param string str The URL encoded data
+ * @param array result An array containing decoded and character encoded converted values
+ * @return bool Returns true on success or false on failure
+ */
+bool f$mb_parse_str(const string &str, const array<string> &result); // result = map<string, string>
+
+/**
+ * Get a MIME charset string for a specific encoding.
+ * @param string encoding The encoding being checked
+ * @return string|false The MIME charset string for character encoding encoding, or false if no charset is preferred for the given encoding
+ */
+Optional<string> f$mb_preferred_mime_name(const string &encoding);
+
+/**
+ * Set/Get character encoding for a multibyte regex
+ * @param ?string encoding (default = null) The encoding parameter is the character encoding. If it is omitted or null,
+ * the internal character encoding value will be used
+ * @return string|bool If encoding is set, then Returns true on success or false on failure. In this case, the internal character encoding is NOT changed.
+ * If encoding is omitted, then the current character encoding name for a multibyte regex is returned
+ */
+mixed f$mb_regex_encoding(const Optional<string> &encoding);
+
+/**
+ * Sets the default options described by options for multibyte regex functions
+ * @param ?string options (default = null) The options to set. This is a string where each character is an option.
+ * To set a mode, the mode character must be the last one set, however there can only be set one mode but multiple options
+ * 
+ * Regex options:
+ * +--------+----------------------------------+
+ * | Option | Meaning                          |
+ * +--------+----------------------------------+ 
+ * | i      | Ambiguity match on               |
+ * | x      | Enables extended pattern form    |
+ * | m      | '.' matches with newlines        |
+ * | s      | '^' -> '\A', '$' -> '\Z'         |
+ * | p      | Same as both the m and s options |
+ * | l      | Finds longest matches            |
+ * | n      | Ignores empty matches            |
+ * | e      | eval() resulting code            |
+ * +--------+----------------------------------+
+ * 
+ * Regex syntax modes:
+ * +------+----------------------------+
+ * | Mode | Meaning                    |
+ * +------+----------------------------+
+ * | j    | Java (Sun java.util.regex) |
+ * | u    | GNU regex                  | 
+ * | g    | grep                       |
+ * | c    | Emacs                      |
+ * | r    | Ruby                       |
+ * | z    | Perl                       |
+ * | b    | POSIX Basic regex          |
+ * | d    | POSIX Extended regex       |
+ * +------+----------------------------+
+ *
+ * @return string The previous options. If options is omitted or null, it returns the string that describes the current options
+ */
+string f$mb_regex_set_options(const Optional<string> &options);
+
+/**
+ * This function is currently not documented; only its argument list is available.
+ * @param string str
+ * @param ?string encoding (default = null)
+ * @return string
+ */
+string f$mb_scrub(const string &str, const Optional<string> &encoding);
+
+/**
+ * Sends email. Headers and messages are converted and encoded according to the mb_language() setting.
+ * It's a wrapper function for mail(), so see also mail() for detail
+ * @param string to The mail addresses being sent to. Multiple recipients may be specified by putting a comma between each address in to.
+ * This parameter is not automatically encoded
+ * @param string subject The subject of the mail
+ * @param string message The message of the mail
+ * @param array|string additional_headers (default = []) String or array to be inserted at the end of the email header.
+ * This is typically used to add extra headers (From, Cc, and Bcc). Multiple extra headers should be separated with a CRLF (\r\n).
+ * Validate parameter not to be injected unwanted headers by attackers. If an array is passed, its keys are the header names and its
+ * values are the respective header values
+ * Note:
+ * If messages are not received, try using a LF (\n) only. Some Unix mail transfer agents (most notably » qmail) replace LF by CRLF automatically
+ * (which leads to doubling CR if CRLF is used). This should be a last resort, as it does not comply with » RFC 2822.
+ * @param ?string additional_params (default = null) additional_params is a MTA command line parameter. It is useful when setting the correct Return-Path header
+ * when using sendmail. This parameter is escaped by escapeshellcmd() internally to prevent command execution. escapeshellcmd() prevents command execution,
+ * but allows to add additional parameters. For security reason, this parameter should be validated. Since escapeshellcmd() is applied automatically,
+ * some characters that are allowed as email addresses by internet RFCs cannot be used. Programs that are required to use these characters mail() cannot be used.
+ * The user that the webserver runs as should be added as a trusted user to the sendmail configuration to prevent a 'X-Warning' header from being added to
+ * the message when the envelope sender (-f) is set using this method. For sendmail users, this file is /etc/mail/trusted-users
+ * @return bool Returns true on success or false on failure
+ */
+bool f$mb_send_mail(const string &to, const string &subject, const string &message, const mixed &additional_headers, const Optional<string> &additional_params);
+
+/**
+ * Split a multibyte string using regular expression pattern and returns the result as an array
+ * @param string pattern The regular expression pattern
+ * @param string str The string being split
+ * @param int limit (default = -1) If optional parameter limit is specified, it will be split in limit elements as maximum
+ * @return array|false The result as an array, or false on failure
+ */
+Optional<array> f$mb_split(const string &pattern, const string &str, const int64_t limit = -1);
+
+/**
+ * This function will return an array of strings, it is a version of str_split() with support for encodings of variable character size as well
+ * as fixed-size encodings of 1,2 or 4 byte characters. If the length parameter is specified, the string is broken down into chunks of the specified
+ * length in characters (not bytes). The encoding parameter can be optionally specified and it is good practice to do so
+ * @param string str The string to split into characters or chunks
+ * @param int length (default = 1) If specified, each element of the returned array will be composed of multiple characters instead of a single character
+ * @param ?string encoding (default = null) The encoding parameter is the character encoding. If it is omitted or null, the internal character encoding value
+ * will be used. A string specifying one of the supported encodings
+ * @return array mb_str_split() returns an array of strings
+ */
+array<string> f$mb_str_split(const string &str, const int64_t length, const Optional<string> &encoding);
+
+/**
+ * mb_strcut() extracts a substring from a string similarly to mb_substr(), but operates on bytes instead of characters.
+ * If the cut position happens to be between two bytes of a multi-byte character, the cut is performed starting from the first byte of that character.
+ * This is also the difference to the substr() function, which would simply cut the string between the bytes and thus result in a malformed byte sequence
+ * @param string str The string being cut
+ * @param int start If start is non-negative, the returned string will start at the start'th byte position in string, counting from zero.
+ * For instance, in the string 'abcdef', the byte at position 0 is 'a', the byte at position 2 is 'c', and so forth.
+ * If start is negative, the returned string will start at the start'th byte counting back from the end of string.
+ * However, if the magnitude of a negative start is greater than the length of the string, the returned portion will start from the beginning of string
+ * @param ?int length (default = null) Length in bytes. If omitted or NULL is passed, extract all bytes to the end of the string.
+ * If length is negative, the returned string will end at the length'th byte counting back from the end of string.
+ * However, if the magnitude of a negative length is greater than the number of characters after the start position, an empty string will be returned
+ * @param ?string encoding The encoding parameter is the character encoding. If it is omitted or null, the internal character encoding value will be used
+ * @return string mb_strcut() returns the portion of string specified by the start and length parameters
+ */
+string f$mb_strcut(const string &str, const int64_t start, const Optional<int64_t> &length, const Optional<string> &encoding);
+
+/**
+ * Truncates string string to specified width, where halfwidth characters count as 1, and fullwidth characters count as 2.
+ * See » http://www.unicode.org/reports/tr11/ for details regarding East Asian character widths
+ * @param string str The string being decoded
+ * @param int start The start position offset. Number of characters from the beginning of string (first character is 0),
+ * or if start is negative, number of characters from the end of the string
+ * @param int width The width of the desired trim. Negative widths count from the end of the string
+ * @param string trim_marker (default = "") A string that is added to the end of string when string is truncated
+ * @param ?string encoding (default = null) The encoding parameter is the character encoding. If it is omitted or null,
+ * the internal character encoding value will be used
+ * @return string The truncated string. If trim_marker is set, trim_marker replaces the last chars to match the width
+ */
+string f$mb_strimwidth(const string &str, const int64_t start, const int64_t width, const string &trim_marker, const Optional<string> &encoding);
+
+/**
+ * mb_stripos() returns the numeric position of the first occurrence of needle in the haystack string. Unlike mb_strpos(),
+ * mb_stripos() is case-insensitive. If needle is not found, it returns false
+ * @param string haystack The string from which to get the position of the first occurrence of needl
+ * @param string needle The string to find in haystack
+ * @param int offset (default = 0) The position in haystack to start searching. A negative offset counts from the end of the string
+ * @param ?string encoding (default = null) Character encoding name to use. If it is omitted, internal character encoding is used
+ * @return int|false Return the numeric position of the first occurrence of needle in the haystack string, or false if needle is not found
+ */
+Optional<int> f$mb_stripos(const string &haystack, const string &needle, const int64_t offset, const Optional<string> &encoding);
+
+/**
+ * mb_stristr() finds the first occurrence of needle in haystack and returns the portion of haystack.
+ * Unlike mb_strstr(), mb_stristr() is case-insensitive. If needle is not found, it returns false
+ * @param string haystack The string from which to get the first occurrence of needle
+ * @param string needle The string to find in haystack
+ * @param bool before_needle (default = false) Determines which portion of haystack this function returns.
+ * If set to true, it returns all of haystack from the beginning to the first occurrence of needle (excluding needle).
+ * If set to false, it returns all of haystack from the first occurrence of needle to the end (including needle)
+ * @param ?string encoding (default = null) Character encoding name to use. If it is omitted, internal character encoding is used
+ * @return string|false Returns the portion of haystack, or false if needle is not found
+ */
+Optional<string> f$mb_stristr(const string &haystack, const string &needle, const bool before_needle, const Optional<string> &encoding);
+
+/**
+ * Gets the length of a string
+ * @param string str The string being checked for length
+ * @param ?string encoding (default = null) The encoding parameter is the character encoding.
+ * If it is omitted or null, the internal character encoding value will be used
+ * @return int Returns the number of characters in string string having character encoding encoding. A multi-byte character is counted as 1
+ */
+int64_t f$mb_strlen(const string &str, const Optional<string> &encoding);
+
+/**
+ * Finds position of the first occurrence of a string in a string. Performs a multi-byte safe strpos() operation based on number of characters.
+ * The first character's position is 0, the second character position is 1, and so on
+ * @param string haystack The string being checked
+ * @param string needle The string to find in haystack. In contrast with strpos(), numeric values are not applied as the ordinal value of a character
+ * @param int offset (default = 0) The search offset. If it is not specified, 0 is used. A negative offset counts from the end of the string
+ * @param ?string encoding (default = null) The encoding parameter is the character encoding. If it is omitted or null,
+ * the internal character encoding value will be used
+ * @return int|false Returns the numeric position of the first occurrence of needle in the haystack string. If needle is not found, it returns false
+ */
+Optional<int> f$mb_strpos(const string &haystack, const string &needle, const int64_t offset, const Optional<string> &encoding);
+
+/**
+ * mb_strrchr() finds the last occurrence of needle in haystack and returns the portion of haystack. If needle is not found, it returns false
+ * @param string haystack The string from which to get the last occurrence of needle
+ * @param string needle The string to find in haystack
+ * @param bool before_needle Determines which portion of haystack this function returns.
+ * If set to true, it returns all of haystack from the beginning to the last occurrence of needle.
+ * If set to false, it returns all of haystack from the last occurrence of needle to the end
+ * @param ?string encoding (default = null) Character encoding name to use. If it is omitted, internal character encoding is used
+ * @return string|false Returns the portion of haystack. or false if needle is not found
+ */
+Optional<string> f$mb_strrchr(const string &haystack, const string &needle, const bool before_needle, const Optional<string> &encoding);
+
+/**
+ * mb_strrichr() finds the last occurrence of needle in haystack and returns the portion of haystack. Unlike mb_strrchr(), mb_strrichr() is case-insensitive.
+ * If needle is not found, it returns false
+ * @param string haystack The string from which to get the last occurrence of needle
+ * @param string needle The string to find in haystack
+ * @param bool before_needle Determines which portion of haystack this function returns.
+ * If set to true, it returns all of haystack from the beginning to the last occurrence of needle.
+ * If set to false, it returns all of haystack from the last occurrence of needle to the end
+ * @param ?string encoding (default = null)
+ * @return string|false Character encoding name to use. If it is omitted, internal character encoding is used
+ */
+Optional<string> f$mb_strrichr(const string &haystack, const string &needle, const bool before_needle, const Optional<string> &encoding);
+
+/**
+ * mb_strripos() performs multi-byte safe strripos() operation based on number of characters. needle position is counted from the beginning of haystack.
+ * First character's position is 0. Second character position is 1. Unlike mb_strrpos(), mb_strripos() is case-insensitive
+ * @param string haystack The string from which to get the position of the last occurrence of needle
+ * @param string needle The string to find in haystack
+ * @param int offset The position in haystack to start searching
+ * @param ?string encoding (default = null) Character encoding name to use. If it is omitted, internal character encoding is used
+ * @return int|false Return the numeric position of the last occurrence of needle in the haystack string, or false if needle is not found
+ */
+Optional<int> f$mb_strripos(const string &haystack, const string &needle, const int64_t offset, const Optional<string> &encoding);
+
+/**
+ * Performs a multibyte safe strrpos() operation based on the number of characters. needle position is counted from the beginning of haystack.
+ * First character's position is 0. Second character position is 1
+ * @param string haystack The string being checked, for the last occurrence of needle
+ * @param string needle The string to find in haystack
+ * @param int offset (default = 0) May be specified to begin searching an arbitrary number of characters into the string. Negative values will stop searching at an arbitrary point prior to the end of the string
+ * @param ?string encoding The encoding parameter is the character encoding. If it is omitted or null, the internal character encoding value will be used
+ * @return int|false Returns the numeric position of the last occurrence of needle in the haystack string. If needle is not found, it returns false
+ */
+Optional<int> f$mb_strrpos(const string &haystack, const string &needle, const int64_t offset, const Optional<string> &encoding);
+
+/**
+ * mb_strstr() finds the first occurrence of needle in haystack and returns the portion of haystack. If needle is not found, it returns false
+ * @param string haystack The string from which to get the first occurrence of needle
+ * @param string needle The string to find in haystack
+ * @param bool before_needle Determines which portion of haystack this function returns.
+ * If set to true, it returns all of haystack from the beginning to the first occurrence of needle (excluding needle).
+ * If set to false, it returns all of haystack from the first occurrence of needle to the end (including needle)
+ * @param ?string encoding (default = null) Character encoding name to use. If it is omitted, internal character encoding is used
+ * @return string|false Returns the portion of haystack, or false if needle is not found
+ */
+Optional<string> f$mb_strstr(const string &haystack, const string &needle, const bool before_needle, const Optional<string> &encoding);
+
+/**
+ * Returns string with all alphabetic characters converted to lowercase
+ * @param string str The string being lowercased
+ * @param ?string encoding (default = null) The encoding parameter is the character encoding.
+ * If it is omitted or null, the internal character encoding value will be used
+ * @return string string with all alphabetic characters converted to lowercase
+ */
+string f$mb_strtolower(const string &str, const Optional<string> &encoding);
+
+/**
+ * Returns string with all alphabetic characters converted to uppercase.
+ * @param string str The string being uppercased
+ * @param ?string encoding (default = null) The encoding parameter is the character encoding.
+ * If it is omitted or null, the internal character encoding value will be used
+ * @return string string with all alphabetic characters converted to uppercase
+ */
+string f$mb_strtoupper(const string &str, const Optional<string> &encoding);
+
+/**
+ * Returns the width of string string, where halfwidth characters count as 1, and fullwidth characters count as 2.
+ * See » http://www.unicode.org/reports/tr11/ for details regarding East Asian character widths. The fullwidth characters are:
+ * U+1100-U+115F, U+11A3-U+11A7, U+11FA-U+11FF, U+2329-U+232A, U+2E80-U+2E99, U+2E9B-U+2EF3, U+2F00-U+2FD5, U+2FF0-U+2FFB, U+3000-U+303E, U+3041-U+3096,
+ * U+3099-U+30FF, U+3105-U+312D, U+3131-U+318E, U+3190-U+31BA, U+31C0-U+31E3, U+31F0-U+321E, U+3220-U+3247, U+3250-U+32FE, U+3300-U+4DBF, U+4E00-U+A48C,
+ * U+A490-U+A4C6, U+A960-U+A97C, U+AC00-U+D7A3, U+D7B0-U+D7C6, U+D7CB-U+D7FB, U+F900-U+FAFF, U+FE10-U+FE19, U+FE30-U+FE52, U+FE54-U+FE66, U+FE68-U+FE6B,
+ * U+FF01-U+FF60, U+FFE0-U+FFE6, U+1B000-U+1B001, U+1F200-U+1F202, U+1F210-U+1F23A, U+1F240-U+1F248, U+1F250-U+1F251, U+20000-U+2FFFD, U+30000-U+3FFFD.
+ * All other characters are halfwidth characters
+ * @param string str The string being decoded
+ * @param ?string encoding (default = null) The encoding parameter is the character encoding.
+ * If it is omitted or null, the internal character encoding value will be used
+ * @return int The width of string string
+ */
+int64_t f$mb_strwidth(const string &str, const Optional<string> &encoding);
+
+/**
+ * Specifies a substitution character when input character encoding is invalid or character code does not exist in output character encoding.
+ * Invalid characters may be substituted "none" (no output), string or int value (Unicode character code value).
+ * This setting affects mb_convert_encoding(), mb_convert_variables(), mb_output_handler(), and mb_send_mail()
+ * @param string|int|null substitute_character (default = null) Specify the Unicode value as an int, or as one of the following strings:
+ * "none": no output
+ * "long": Output character code value (Example: U+3000, JIS+7E7E)
+ * "entity": Output character entity (Example: &#x200;)
+ * @return string|int|bool If substitute_character is set, it returns true for success, otherwise returns false.
+ * If substitute_character is not set, it returns the current setting
+ */
+mixed f$mb_substitute_character(const mixed &substitute_character);
+
+/**
+ * Counts the number of times the needle substring occurs in the haystack string
+ * @param string haystack The string being checked
+ * @param string needle The string being found
+ * @param ?string encoding (default = null) The encoding parameter is the character encoding.
+ * If it is omitted or null, the internal character encoding value will be used
+ * @return int The number of times the needle substring occurs in the haystack string
+ */
+int64_t f$mb_substr_count(const string &haystack, const string &needle, const Optional<string> &encoding);
+
+/**
+ * Performs a multi-byte safe substr() operation based on number of characters. Position is counted from the beginning of string.
+ * First character's position is 0. Second character position is 1, and so on
+ * @param string str The string to extract the substring from
+ * @param int start If start is non-negative, the returned string will start at the start'th position in string, counting from zero.
+ * For instance, in the string 'abcdef', the character at position 0 is 'a', the character at position 2 is 'c', and so forth.
+ * If start is negative, the returned string will start at the start'th character from the end of string
+ * @param ?int length (default = null) Maximum number of characters to use from string.
+ * If omitted or NULL is passed, extract all characters to the end of the string
+ * @param ?string encoding (default = null) The encoding parameter is the character encoding.
+ * If it is omitted or null, the internal character encoding value will be used
+ * @return string mb_substr() returns the portion of string specified by the start and length parameters
+ */
+string f$mb_substr(const string &str, const int64_t start, const Optional<int64_t> &length, const Optional<string> &encoding);
+
+#else
+
+#include <climits>
+
+#include "runtime/kphp_core.h"
+#include "runtime/string_functions.h"
+
+bool f$mb_check_encoding(const string &str, const string &encoding = CP1251);
+
+int64_t f$mb_strlen(const string &str, const string &encoding = CP1251);
+
+string f$mb_strtolower(const string &str, const string &encoding = CP1251);
+
+string f$mb_strtoupper(const string &str, const string &encoding = CP1251);
+
+Optional<int64_t> f$mb_strpos(const string &haystack, const string &needle, int64_t offset = 0, const string &encoding = CP1251) noexcept;
+
+Optional<int64_t> f$mb_stripos(const string &haystack, const string &needle, int64_t offset = 0, const string &encoding = CP1251) noexcept;
+
+string f$mb_substr(const string &str, int64_t start, const mixed &length = std::numeric_limits<int64_t>::max(), const string &encoding = CP1251);
+
+void f$set_detect_incorrect_encoding_names_warning(bool show);
+
+void free_detect_incorrect_encoding_names();
+
+#endif
\ No newline at end of file
diff --git a/runtime/regexp.h b/runtime/regexp.h
index 8c20fe98ad..5eb579b447 100644
--- a/runtime/regexp.h
+++ b/runtime/regexp.h
@@ -9,7 +9,7 @@
 #include "common/mixin/not_copyable.h"
 
 #include "runtime/kphp_core.h"
-#include "runtime/mbstring.h"
+#include "runtime/mbstring/mbstring.h"
 
 namespace re2 {
 class RE2;
diff --git a/runtime/runtime.cmake b/runtime/runtime.cmake
index 16f8a55283..3df11e3680 100644
--- a/runtime/runtime.cmake
+++ b/runtime/runtime.cmake
@@ -49,10 +49,8 @@ prepend(KPHP_RUNTIME_PDO_PGSQL_SOURCES pdo/pgsql/
         pgsql_pdo_emulated_statement.cpp)
 endif()
 
-if (MBFL)
 prepend(KPHP_RUNTIME_MBSTRING_SOURCES mbstring/
         mbstring.cpp)
-endif()
 
 prepend(KPHP_RUNTIME_SOURCES ${BASE_DIR}/runtime/
         ${KPHP_RUNTIME_MBSTRING_SOURCES}
@@ -88,7 +86,6 @@ prepend(KPHP_RUNTIME_SOURCES ${BASE_DIR}/runtime/
         kphp-backtrace.cpp
         mail.cpp
         math_functions.cpp
-        mbstring.cpp
         memcache.cpp
         memory_usage.cpp
         migration_php8.cpp
@@ -150,6 +147,7 @@ if (MBFL)
     add_dependencies(kphp_runtime libmbfl)
 endif()
 
+
 prepare_cross_platform_libs(RUNTIME_LIBS yaml-cpp re2 zstd h3) # todo: linking between static libs is no-op, is this redundant? do we need to add mysqlclient here?
 set(RUNTIME_LIBS vk::kphp_runtime vk::kphp_server vk::popular_common vk::unicode vk::common_src vk::binlog_src vk::net_src ${RUNTIME_LIBS} OpenSSL::Crypto m z pthread)
 vk_add_library(kphp-full-runtime STATIC)
diff --git a/tests/cpp/runtime/mbstring-test.cpp b/tests/cpp/runtime/mbstring-test.cpp
index 42ab014a48..2a0a484302 100644
--- a/tests/cpp/runtime/mbstring-test.cpp
+++ b/tests/cpp/runtime/mbstring-test.cpp
@@ -3,6 +3,7 @@
 
 #ifdef MBFL
 /* TODO: make fun strings for tests */
+
 TEST(mbstring_test, test_mb_check_encoding) {
 	ASSERT_TRUE(f$mb_check_encoding(string("sdf"), string("Windows-1251")));
 	ASSERT_TRUE(f$mb_check_encoding(string("ыва"), string("Windows-1251")));
@@ -10,9 +11,11 @@ TEST(mbstring_test, test_mb_check_encoding) {
 	ASSERT_TRUE(f$mb_check_encoding(string("Ä°nanÃ§ EsaslarÄ±"), string("Windows-1251")));
 	ASSERT_FALSE(f$mb_check_encoding(string("Ä°nanÃ§ EsaslarÄ±"), string("ASCII")));
 }
+
 TEST(mbstring_test, test_mb_convert_encoding) {
-	ASSERT_STREQ(f$mb_convert_encoding(string("Hello"), string("UTF-8"), string("EUC-KR")).c_str(), "Hello");
-	ASSERT_STREQ(f$mb_convert_encoding(string("ыавыа"), string("UTF-8"), string("Windows-1251")).c_str(), "С‹Р°РІС‹Р°");
-	ASSERT_STREQ(f$mb_convert_encoding(string("ыва"), string("UTF-8"), string("ASCII")).c_str(), "??????");
+	ASSERT_STREQ(f$mb_convert_encoding(string("Hello"), string("UTF-8"), string("EUC-KR")).to_string().c_str(), "Hello");
+	ASSERT_STREQ(f$mb_convert_encoding(string("ыавыа"), string("UTF-8"), string("Windows-1251")).to_string().c_str(), "С‹Р°РІС‹Р°");
+	ASSERT_STREQ(f$mb_convert_encoding(string("ыва"), string("UTF-8"), string("ASCII")).to_string().c_str(), "??????");
 }
+
 #endif
\ No newline at end of file

From ac8f9975387c5a7d86827f289d7eb49e76b5851b Mon Sep 17 00:00:00 2001
From: Andrey Arutiunian <andreylzmw@gmail.com>
Date: Mon, 1 May 2023 01:27:12 +0300
Subject: [PATCH 14/27] small fixes

---
 builtin-functions/_functions.txt |  2 +-
 runtime/mbstring/mbstring.cpp    | 43 --------------------------------
 runtime/mbstring/mbstring.h      |  8 +++---
 3 files changed, 5 insertions(+), 48 deletions(-)

diff --git a/builtin-functions/_functions.txt b/builtin-functions/_functions.txt
index 34536b2e98..b7ccdecbfc 100644
--- a/builtin-functions/_functions.txt
+++ b/builtin-functions/_functions.txt
@@ -1632,7 +1632,7 @@ function mb_convert_variables(string $to_encoding, array|string $from_encoding,
 function mb_decode_mimeheader(string $string): string;
 function mb_decode_numericentity(string $string, array $map, ?string $encoding = null): string;
 function mb_detect_encoding(string $string, array|string|null $encodings = null, bool $strict = false): string|false;
-function mb_detect_order(array|string|null $encoding = null): array|bool;
+function mb_detect_order(array|string|null $encoding = null): mixed; // return array|bool
 function mb_encode_mimeheader(string $string, ?string $charset = null, ?string $transfer_encoding = null, string $newline = "\r\n", int $indent = 0): string;
 function mb_encode_numericentity(string $string, array $map, ?string $encoding = null, bool $hex = false): string;
 function mb_encoding_aliases(string $encoding): array;
diff --git a/runtime/mbstring/mbstring.cpp b/runtime/mbstring/mbstring.cpp
index 82c966f0ad..0e11898e04 100644
--- a/runtime/mbstring/mbstring.cpp
+++ b/runtime/mbstring/mbstring.cpp
@@ -234,49 +234,6 @@ static int64_t mb_UTF8_get_offset(const char *s, int64_t pos) {
   return res;
 }
 
-bool mb_UTF8_check(const char *s) {
-  do {
-#define CHECK(condition) if (!(condition)) {return false;}
-    unsigned int a = (unsigned char)(*s++);
-    if ((a & 0x80) == 0) {
-      if (a == 0) {
-        return true;
-      }
-      continue;
-    }
-
-    CHECK ((a & 0x40) != 0);
-
-    unsigned int b = (unsigned char)(*s++);
-    CHECK((b & 0xc0) == 0x80);
-    if ((a & 0x20) == 0) {
-      CHECK((a & 0x1e) > 0);
-      continue;
-    }
-
-    unsigned int c = (unsigned char)(*s++);
-    CHECK((c & 0xc0) == 0x80);
-    if ((a & 0x10) == 0) {
-      int x = (((a & 0x0f) << 6) | (b & 0x20));
-      CHECK(x != 0 && x != 0x360);//surrogates
-      continue;
-    }
-
-    unsigned int d = (unsigned char)(*s++);
-    CHECK((d & 0xc0) == 0x80);
-    if ((a & 0x08) == 0) {
-      int t = (((a & 0x07) << 6) | (b & 0x30));
-      CHECK(0 < t && t < 0x110);//end of unicode
-      continue;
-    }
-
-    return false;
-#undef CHECK
-  } while (true);
-
-  php_assert (0);
-}
-
 bool f$mb_check_encoding(const string &str, const string &encoding) {
   int encoding_num = mb_detect_encoding(encoding);
   if (encoding_num < 0) {
diff --git a/runtime/mbstring/mbstring.h b/runtime/mbstring/mbstring.h
index 52f295e3d4..2647d32163 100644
--- a/runtime/mbstring/mbstring.h
+++ b/runtime/mbstring/mbstring.h
@@ -220,7 +220,7 @@ int64_t f$mb_ereg_search_getpos(void);
  * If there are some matches, the first element will have the matched sub-string, the second element will have the first part grouped with brackets,
  * the third element will have the second part grouped with brackets, and so on. It returns false on error.
  */
-Optional<array> f$mb_ereg_search_getregs(void);
+mixed f$mb_ereg_search_getregs(void);
 
 /**
  * Setup string and regular expression for a multibyte regular expression match
@@ -241,7 +241,7 @@ bool f$mb_ereg_search_init(const string &str, const Optional<string> &pattern, c
  * @return array|false An array containing two elements. The first element is the offset, in bytes, where the match begins relative to the start of
  * the search string, and the second element is the length in bytes of the match. If an error occurs, false is returned.
  */
-Optional<array> f$mb_ereg_search_pos(const Optional<string> &pattern, const Optional<string> &options);
+mixed f$mb_ereg_search_pos(const Optional<string> &pattern, const Optional<string> &options);
 
 /**
  * Returns the matched part of a multibyte regular expression
@@ -251,7 +251,7 @@ Optional<array> f$mb_ereg_search_pos(const Optional<string> &pattern, const Opti
  * it returns an array including substring of matched part as first element, the first grouped part with brackets as second element,
  * the second grouped part as third element, and so on. It returns false on error.
  */
-Optional<array> f$mb_ereg_search_regs(const Optional<string> &pattern, const Optional<string> &options);
+mixed f$mb_ereg_search_regs(const Optional<string> &pattern, const Optional<string> &options);
 
 /**
  * Set start point of next regular expression match
@@ -496,7 +496,7 @@ bool f$mb_send_mail(const string &to, const string &subject, const string &messa
  * @param int limit (default = -1) If optional parameter limit is specified, it will be split in limit elements as maximum
  * @return array|false The result as an array, or false on failure
  */
-Optional<array> f$mb_split(const string &pattern, const string &str, const int64_t limit = -1);
+mixed f$mb_split(const string &pattern, const string &str, const int64_t limit = -1);
 
 /**
  * This function will return an array of strings, it is a version of str_split() with support for encodings of variable character size as well

From 0b967c41930a44b7ce60e257458ad9aa83ae2291 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=90=D1=80=D1=83=D1=82=D1=8E=D0=BD=D1=8F=D0=BD=20=D0=90?=
 =?UTF-8?q?=D0=BD=D0=B4=D1=80=D0=B5=D0=B9=20=D0=A0=D0=BE=D0=BC=D0=B0=D0=BD?=
 =?UTF-8?q?=D0=BE=D0=B2=D0=B8=D1=87?= <andreylzmw@gmail.com>
Date: Tue, 26 Dec 2023 17:48:38 +0300
Subject: [PATCH 15/27] add test workflow

---
 .github/workflows/linux-install.yml | 31 +++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)
 create mode 100644 .github/workflows/linux-install.yml

diff --git a/.github/workflows/linux-install.yml b/.github/workflows/linux-install.yml
new file mode 100644
index 0000000000..39b54e0627
--- /dev/null
+++ b/.github/workflows/linux-install.yml
@@ -0,0 +1,31 @@
+name: linux-install
+
+on:
+  workflow_dispatch:
+
+env:
+  kphp_root_dir: /home/kitten/kphp
+  kphp_polyfills_dir: /home/kitten/kphp/kphp-polyfills
+  kphp_build_dir: /home/kitten/kphp/build
+
+jobs:
+  install-linux:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        include:
+          - os: buster
+          - os: focal
+          # - os: jammy # TODO: enable after release to artifactory servers
+
+    steps:
+    - uses: actions/checkout@v3
+
+    - name: Build and start Docker container
+      run: |
+        docker build -f $GITHUB_WORKSPACE/.github/workflows/Dockerfile.${{matrix.os}}.install $GITHUB_WORKSPACE -t kphp-build-img-${{matrix.os}}-install
+        docker run -dt --name kphp-build-container-${{matrix.os}}-install kphp-build-img-${{matrix.os}}-install
+
+    - name: Run php dummy script
+      run: docker exec -u kitten kphp-build-container-${{matrix.os}}-install bash -c 
+        "cd ${{env.demo_dir}} && echo 'hello world' > demo.php && kphp --mode=cli --cxx=g++ demo.php && ./kphp_out/cli -o --user kitten"

From 2cfb9629237d84ce406c0d3a4e8fdbd1384c7b6a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=90=D1=80=D1=83=D1=82=D1=8E=D0=BD=D1=8F=D0=BD=20=D0=90?=
 =?UTF-8?q?=D0=BD=D0=B4=D1=80=D0=B5=D0=B9=20=D0=A0=D0=BE=D0=BC=D0=B0=D0=BD?=
 =?UTF-8?q?=D0=BE=D0=B2=D0=B8=D1=87?= <andreylzmw@gmail.com>
Date: Tue, 26 Dec 2023 17:48:38 +0300
Subject: [PATCH 16/27] add test workflow

---
 .github/workflows/Dockerfile.buster.install | 16 +++++++++++
 .github/workflows/Dockerfile.focal.install  | 13 +++++++++
 .github/workflows/Dockerfile.jammy.install  | 17 +++++++++++
 .github/workflows/linux-install.yml         | 31 +++++++++++++++++++++
 4 files changed, 77 insertions(+)
 create mode 100644 .github/workflows/Dockerfile.buster.install
 create mode 100644 .github/workflows/Dockerfile.focal.install
 create mode 100644 .github/workflows/Dockerfile.jammy.install
 create mode 100644 .github/workflows/linux-install.yml

diff --git a/.github/workflows/Dockerfile.buster.install b/.github/workflows/Dockerfile.buster.install
new file mode 100644
index 0000000000..598f191b55
--- /dev/null
+++ b/.github/workflows/Dockerfile.buster.install
@@ -0,0 +1,16 @@
+FROM debian:buster
+ARG DEBIAN_FRONTEND=noninteractive
+
+RUN apt-get update && \
+	apt-get install -y --no-install-recommends apt-utils ca-certificates gnupg wget lsb-release && \
+    echo "deb https://deb.debian.org/debian buster-backports main" >> /etc/apt/sources.list && \ 
+    wget -qO /etc/apt/trusted.gpg.d/vkpartner.asc https://artifactory-external.vkpartner.ru/artifactory/api/gpg/key/public && \
+    echo "deb https://artifactory-external.vkpartner.ru/artifactory/kphp buster main" >> /etc/apt/sources.list && \
+    wget -qO - https://packages.sury.org/php/apt.gpg | apt-key add - && \
+    echo "deb https://packages.sury.org/php/ buster main" >> /etc/apt/sources.list.d/php.list
+
+RUN apt-get update && apt-get install -y git cmake make g++ lld gperf netcat php7.4-vkext kphp vk-tl-tools && \
+	mkdir -p /var/www/vkontakte/data/www/vkontakte.com/tl/ && \
+	tl-compiler -e /var/www/vkontakte/data/www/vkontakte.com/tl/scheme.tlo /usr/share/vkontakte/examples/tl-files/common.tl /usr/share/vkontakte/examples/tl-files/tl.tl
+
+RUN useradd -ms /bin/bash kitten
\ No newline at end of file
diff --git a/.github/workflows/Dockerfile.focal.install b/.github/workflows/Dockerfile.focal.install
new file mode 100644
index 0000000000..2645446602
--- /dev/null
+++ b/.github/workflows/Dockerfile.focal.install
@@ -0,0 +1,13 @@
+FROM ubuntu:20.04
+ARG DEBIAN_FRONTEND=noninteractive
+
+RUN apt-get update && \
+	apt-get install -y --no-install-recommends apt-utils ca-certificates gnupg wget software-properties-common pkg-config && \
+	wget -qO /etc/apt/trusted.gpg.d/vkpartner.asc https://artifactory-external.vkpartner.ru/artifactory/api/gpg/key/public && \
+	echo "deb https://artifactory-external.vkpartner.ru/artifactory/kphp focal main" >> /etc/apt/sources.list
+
+RUN apt-get update && apt-get install -y git cmake make g++ lld gperf netcat php7.4-vkext kphp vk-tl-tools && \
+	mkdir -p /var/www/vkontakte/data/www/vkontakte.com/tl/ && \
+	tl-compiler -e /var/www/vkontakte/data/www/vkontakte.com/tl/scheme.tlo /usr/share/vkontakte/examples/tl-files/common.tl /usr/share/vkontakte/examples/tl-files/tl.tl
+
+RUN useradd -ms /bin/bash kitten
\ No newline at end of file
diff --git a/.github/workflows/Dockerfile.jammy.install b/.github/workflows/Dockerfile.jammy.install
new file mode 100644
index 0000000000..4c7fe6a654
--- /dev/null
+++ b/.github/workflows/Dockerfile.jammy.install
@@ -0,0 +1,17 @@
+FROM ubuntu:22.04
+ARG DEBIAN_FRONTEND=noninteractive
+
+RUN apt update && \
+	apt install -y --no-install-recommends apt-utils ca-certificates gnupg wget software-properties-common pkg-config && \
+	wget -qO /etc/apt/trusted.gpg.d/vkpartner.asc https://artifactory-external.vkpartner.ru/artifactory/api/gpg/key/public && \
+	echo "deb [arch=amd64] https://artifactory-external.vkpartner.ru/artifactory/kphp jammy main" | tee /etc/apt/sources.list.d/vkpartner.list
+
+RUN apt install -y software-properties-common && apt update && \
+	add-apt-repository ppa:ondrej/php -y && \
+	apt update
+
+RUN apt install -y git cmake make g++ lld gperf netcat php7.4-vkext kphp vk-tl-tools && \
+	mkdir -p /var/www/vkontakte/data/www/vkontakte.com/tl/ && \
+	tl-compiler -e /var/www/vkontakte/data/www/vkontakte.com/tl/scheme.tlo /usr/share/vkontakte/examples/tl-files/common.tl /usr/share/vkontakte/examples/tl-files/tl.tl
+
+RUN useradd -ms /bin/bash kitten
\ No newline at end of file
diff --git a/.github/workflows/linux-install.yml b/.github/workflows/linux-install.yml
new file mode 100644
index 0000000000..39b54e0627
--- /dev/null
+++ b/.github/workflows/linux-install.yml
@@ -0,0 +1,31 @@
+name: linux-install
+
+on:
+  workflow_dispatch:
+
+env:
+  kphp_root_dir: /home/kitten/kphp
+  kphp_polyfills_dir: /home/kitten/kphp/kphp-polyfills
+  kphp_build_dir: /home/kitten/kphp/build
+
+jobs:
+  install-linux:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        include:
+          - os: buster
+          - os: focal
+          # - os: jammy # TODO: enable after release to artifactory servers
+
+    steps:
+    - uses: actions/checkout@v3
+
+    - name: Build and start Docker container
+      run: |
+        docker build -f $GITHUB_WORKSPACE/.github/workflows/Dockerfile.${{matrix.os}}.install $GITHUB_WORKSPACE -t kphp-build-img-${{matrix.os}}-install
+        docker run -dt --name kphp-build-container-${{matrix.os}}-install kphp-build-img-${{matrix.os}}-install
+
+    - name: Run php dummy script
+      run: docker exec -u kitten kphp-build-container-${{matrix.os}}-install bash -c 
+        "cd ${{env.demo_dir}} && echo 'hello world' > demo.php && kphp --mode=cli --cxx=g++ demo.php && ./kphp_out/cli -o --user kitten"

From b6bec7e1815d1f0d73bf5da79f164417127446dc Mon Sep 17 00:00:00 2001
From: catnyan02 <catnyan02@gmail.com>
Date: Tue, 6 Feb 2024 18:42:20 +0000
Subject: [PATCH 17/27] Add 14 functions (mb_substr, mb_strlen,
 mb_substr_count, mb_strtolower, mb_strtoupper, mb_strwidth, mb_strpos,
 mb_stripos, mb_strripos, mb_strrpos, mb_stristr, mb_strrchr, mb_strrichr,
 mb_strstr) + php tests

---
 builtin-functions/_functions.txt            |  59 +-
 runtime/mbstring/mbstring.cpp               | 562 +++++++++++++++-
 runtime/mbstring/mbstring.h                 | 707 +-------------------
 tests/cpp/runtime/mbstring-test.cpp         |  67 ++
 tests/phpt/mbstring/001_mb_strlen.php       |  64 ++
 tests/phpt/mbstring/002_mb_substr.php       |  57 ++
 tests/phpt/mbstring/003_mb_substr_count.php |  57 ++
 tests/phpt/mbstring/004_mb_strwidth.php     |  63 ++
 tests/phpt/mbstring/005_mb_strtoupper.php   |  33 +
 tests/phpt/mbstring/006_mb_strtolower.php   |  33 +
 tests/phpt/mbstring/007_mb_strpos.php       |  51 ++
 tests/phpt/mbstring/008_mb_stripos.php      |  46 ++
 tests/phpt/mbstring/009_mb_strrpos.php      |  51 ++
 tests/phpt/mbstring/010_mb_strripos.php     |  51 ++
 tests/phpt/mbstring/011_mb_strstr.php       |  63 ++
 tests/phpt/mbstring/012_mb_stristr.php      |  63 ++
 tests/phpt/mbstring/013_mb_strrchr.php      |  63 ++
 tests/phpt/mbstring/014_mb_strrichr.php     |  63 ++
 18 files changed, 1411 insertions(+), 742 deletions(-)
 create mode 100644 tests/phpt/mbstring/001_mb_strlen.php
 create mode 100644 tests/phpt/mbstring/002_mb_substr.php
 create mode 100644 tests/phpt/mbstring/003_mb_substr_count.php
 create mode 100644 tests/phpt/mbstring/004_mb_strwidth.php
 create mode 100644 tests/phpt/mbstring/005_mb_strtoupper.php
 create mode 100644 tests/phpt/mbstring/006_mb_strtolower.php
 create mode 100644 tests/phpt/mbstring/007_mb_strpos.php
 create mode 100644 tests/phpt/mbstring/008_mb_stripos.php
 create mode 100644 tests/phpt/mbstring/009_mb_strrpos.php
 create mode 100644 tests/phpt/mbstring/010_mb_strripos.php
 create mode 100644 tests/phpt/mbstring/011_mb_strstr.php
 create mode 100644 tests/phpt/mbstring/012_mb_stristr.php
 create mode 100644 tests/phpt/mbstring/013_mb_strrchr.php
 create mode 100644 tests/phpt/mbstring/014_mb_strrichr.php

diff --git a/builtin-functions/_functions.txt b/builtin-functions/_functions.txt
index b7ccdecbfc..bb275e4751 100644
--- a/builtin-functions/_functions.txt
+++ b/builtin-functions/_functions.txt
@@ -1619,60 +1619,17 @@ function getenv(string $varname = '', bool $local_only = false): mixed;
 
 function mb_check_encoding(array|string $value, ?string $encoding = null): bool;
 function mb_convert_encoding(array|string $string, string $to_encoding, array|string|null $from_encoding = null): array|string|false;
+function mb_substr(string $string, int $start, ?int $length = null, ?string $encoding = null): string;
 function mb_strlen(string $string, ?string $encoding = null): int;
-function mb_strpos(string $haystack, string $needle, int $offset = 0, ?string $encoding = null): int|false;
-function mb_stripos(string $haystack, string $needle, int $offset = 0, ?string $encoding = null): int|false;
+function mb_substr_count(string $haystack, string $needle, ?string $encoding = null): int;
 function mb_strtolower(string $string, ?string $encoding = null): string;
 function mb_strtoupper(string $string, ?string $encoding = null): string;
-function mb_substr(string $string, int $start, ?int $length = null, ?string $encoding = null): string;
-function mb_chr(int $codepoint, ?string $encoding = null): string|false;
-function mb_convert_case(string $string, int $mode, ?string $encoding = null): string;
-function mb_convert_kana(string $string, string $mode = "KV", ?string $encoding = null): string;
-function mb_convert_variables(string $to_encoding, array|string $from_encoding, mixed &$vars): string|false; // ??? (change variable bytes + kwargs)
-function mb_decode_mimeheader(string $string): string;
-function mb_decode_numericentity(string $string, array $map, ?string $encoding = null): string;
-function mb_detect_encoding(string $string, array|string|null $encodings = null, bool $strict = false): string|false;
-function mb_detect_order(array|string|null $encoding = null): mixed; // return array|bool
-function mb_encode_mimeheader(string $string, ?string $charset = null, ?string $transfer_encoding = null, string $newline = "\r\n", int $indent = 0): string;
-function mb_encode_numericentity(string $string, array $map, ?string $encoding = null, bool $hex = false): string;
-function mb_encoding_aliases(string $encoding): array;
-function mb_ereg_match(string $pattern, string $string, ?string $options = null): bool;
-function mb_ereg_replace_callback(string $pattern, callable $callback, string $string, ?string $options = null): string|false|null;
-function mb_ereg_replace(string $pattern, string $replacement, string $string, ?string $options = null): string|false|null;
-function mb_ereg_search_getpos(): int;
-function mb_ereg_search_getregs(): array|false;
-function mb_ereg_search_init(string $string, ?string $pattern = null, ?string $options = null): bool;
-function mb_ereg_search_pos(?string $pattern = null, ?string $options = null): array|false;
-function mb_ereg_search_regs(?string $pattern = null, ?string $options = null): array|false;
-function mb_ereg_search_setpos(int $offset): bool;
-function mb_ereg_search(?string $pattern = null, ?string $options = null): bool;
-function mb_ereg(string $pattern, string $string, array &$matches = null): bool;
-function mb_eregi_replace(string $pattern, string $replacement, string $string, ?string $options = null): string|false|null;
-function mb_eregi(string $pattern, string $string, array &$matches = null): bool;
-function mb_get_info(string $type = "all"): array|string|int|false;
-function mb_http_input(?string $type = null): array|string|false;
-function mb_http_output(?string $encoding = null): string|false;
-function mb_internal_encoding(?string $encoding = null): string|false;
-function mb_language(?string $language = null): string|false;
-function mb_list_encodings(): array;
-function mb_ord(string $string, ?string $encoding = null): int|false;
-function mb_output_handler(string $string, int $status): string;
-function mb_parse_str(string $string, array &$result): bool;
-function mb_preferred_mime_name(string $encoding): string|false;
-function mb_regex_encoding(?string $encoding = null): string|false;
-function mb_regex_set_options(?string $options = null): string;
-function mb_scrub(string $string, ?string $encoding = null): string;
-function mb_send_mail(string $to, string $subject, string $message, array|string $additional_headers = [], ?string $additional_params = null): bool;
-function mb_split(string $pattern, string $string, int $limit = -1): array|false;
-function mb_str_split(string $string, int $length = 1, ?string $encoding = null): array;
-function mb_strcut(string $string, int $start, ?int $length = null, ?string $encoding = null): string;
-function mb_strimwidth(string $string, int $start, int $width, string $trim_marker = "", ?string $encoding = null): string;
+function mb_strwidth(string $string, ?string $encoding = null): int;
+function mb_strpos(string $haystack, string $needle, int $offset = 0, ?string $encoding = null): int|false;
+function mb_stripos(string $haystack, string $needle, int $offset = 0, ?string $encoding = null): int|false;
+function mb_strripos(string $haystack, string $needle, int $offset = 0, ?string $encoding = null): int|false;
+function mb_strrpos(string $haystack, string $needle, int $offset = 0, string $encoding = null): int|false;
 function mb_stristr(string $haystack, string $needle, bool $before_needle = false, ?string $encoding = null): string|false;
 function mb_strrchr(string $haystack, string $needle, bool $before_needle = false, ?string $encoding = null): string|false;
 function mb_strrichr(string $haystack, string $needle, bool $before_needle = false, ?string $encoding = null): string|false;
-function mb_strripos(string $haystack, string $needle, int $offset = 0, ?string $encoding = null): int|false;
-function mb_strrpos(string $haystack, string $needle, int $offset = 0, string $encoding = null): int|false;
-function mb_strstr(string $haystack, string $needle, bool $before_needle = false, ?string $encoding = null): string|false;
-function mb_strwidth(string $string, ?string $encoding = null): int;
-function mb_substitute_character(string|int|null $substitute_character = null): string|int|false;
-function mb_substr_count(string $haystack, string $needle, ?string $encoding = null): int;
\ No newline at end of file
+function mb_strstr(string $haystack, string $needle, bool $before_needle = false, ?string $encoding = null): string|false;
\ No newline at end of file
diff --git a/runtime/mbstring/mbstring.cpp b/runtime/mbstring/mbstring.cpp
index 0e11898e04..e42d999c34 100644
--- a/runtime/mbstring/mbstring.cpp
+++ b/runtime/mbstring/mbstring.cpp
@@ -1,4 +1,11 @@
 #include "mbstring.h"
+#include "runtime/exception.h"
+
+#include "common/unicode/unicode-utils.h"
+#include "common/unicode/utf8-utils.h"
+
+#define MIN(a, b)  (((a)<(b))?(a):(b))
+#define MBFL_SUBSTR_UNTIL_END ((size_t) -1)
 
 bool mb_UTF8_check(const char *s) {
   do {
@@ -46,6 +53,23 @@ bool mb_UTF8_check(const char *s) {
 #ifdef MBFL
 extern "C" {
 	#include <kphp/libmbfl/mbfl/mbfilter.h>
+#include <kphp/libmbfl/mbfl/mbfilter_wchar.h>
+}
+
+#define KPHP_UNICODE_CASE_UPPER        0
+#define KPHP_UNICODE_CASE_LOWER        1
+#define KPHP_UNICODE_CASE_TITLE        2
+#define KPHP_UNICODE_CASE_FOLD         3
+#define KPHP_UNICODE_CASE_UPPER_SIMPLE 4
+#define KPHP_UNICODE_CASE_LOWER_SIMPLE 5
+#define KPHP_UNICODE_CASE_TITLE_SIMPLE 6
+#define KPHP_UNICODE_CASE_FOLD_SIMPLE  7
+#define KPHP_UNICODE_CASE_MODE_MAX     7
+
+static const char * DEFAULT_ENCODING = "UTF-8" ;
+
+static inline int mbfl_is_error(size_t len) {
+  return len >= (size_t) -16;
 }
 
 mbfl_string *convert_encoding(const char *str, const char *to, const char *from) {
@@ -149,10 +173,544 @@ bool f$mb_check_encoding(const mixed &value, const Optional<string> &encoding) {
 	return check_encoding(c_value, c_encoding);
 }
 
+static const mbfl_encoding *mb_get_encoding(const Optional<string> &enc_name) {
+  if (enc_name.has_value()) {
+    // no caching unlike PHP version - can be changed if we're going to add mbstring config
+    const mbfl_encoding *encoding;
+    encoding = mbfl_name2encoding(enc_name.val().c_str());
+    if (!encoding) {
+      return NULL;
+    } else {
+      return encoding;
+    }
+  }
+  return mbfl_name2encoding(DEFAULT_ENCODING); // change if we are going to use current encoding
+}
+
+int64_t f$mb_strlen(const string &str, const Optional<string> &enc_name){
+  const mbfl_encoding *encoding = mb_get_encoding(enc_name);
+  if (!encoding) {
+    php_critical_error ("encoding \"%s\" isn't supported in mb_strlen", enc_name.val().c_str());
+  }
+  mbfl_string _string;
+  mbfl_string_init(&_string);
+  _string.no_encoding = encoding->no_encoding;
+  _string.len = str.size();
+  _string.val = (unsigned char*)str.c_str();
+
+  size_t n = mbfl_strlen(&_string);
+
+  if (mbfl_is_error(n)) {
+    php_critical_error ("error working with \"%s\" string", str.c_str());
+  }
+
+  return (int64_t) n;
+
+}
+
+
+string f$mb_substr(const string &str, const int64_t start, const Optional<int64_t> &length, const Optional<string> &encoding){
+  size_t real_start, real_len;
+  bool len_is_null = !length.has_value();
+
+  const mbfl_encoding *enc = mb_get_encoding(encoding);
+
+  if (!enc) {
+    php_critical_error ("encoding \"%s\" isn't supported in mb_substr", encoding.val().c_str());
+  }
+
+  mbfl_string _string, result, *ret;
+  mbfl_string_init(&_string);
+  _string.no_encoding = enc->no_encoding;
+  _string.len = str.size();
+  _string.val = (unsigned char*)str.c_str();
+
+  size_t mblen = 0;
+  if (start < 0 || (!len_is_null && val(length) < 0)) {
+    mblen = mbfl_strlen(&_string);
+  }
+
+  if (start >= 0) {
+    real_start = (size_t) start;
+  } else if (-start < mblen) {
+    real_start = mblen + start;
+  } else {
+    real_start = 0;
+  }
+
+  /* if "length" position is negative, set it to the length
+         * needed to stop that many chars from the end of the string */
+  if (len_is_null) {
+    real_len = mbfl_strlen(&_string) + 1;
+  } else if (val(length) >= 0) {
+    real_len = (size_t) val(length);
+  } else if (real_start < mblen && - val(length) < mblen - real_start) {
+    real_len = (mblen - real_start) + val(length);
+  } else {
+    real_len = 0;
+  }
+
+  ret = mbfl_substr(&_string, &result, real_start, real_len);
+  php_assert(ret != NULL);
+  return string((const char*) ret->val, ret->len);
+}
+
+int64_t f$mb_substr_count(const string &haystack, const string &needle, const Optional<string> &encoding){
+
+  size_t n;
+  mbfl_string _haystack, _needle;
+
+  const mbfl_encoding *enc = mb_get_encoding(encoding);
+
+  if (!enc) {
+    php_critical_error ("encoding \"%s\" isn't supported in mb_substr_count", encoding.val().c_str());
+  }
+
+  mbfl_string_init(&_haystack);
+  _haystack.no_encoding = enc->no_encoding;
+  _haystack.len = haystack.size();
+  _haystack.val = (unsigned char*) haystack.c_str();
+
+  mbfl_string_init(&_needle);
+  _needle.no_encoding = enc->no_encoding;
+  _needle.len = needle.size();
+  _needle.val = (unsigned char*) needle.c_str();
+
+  if (needle.size() <= 0) {
+    php_warning("empty substring");
+  }
+
+  n = mbfl_substr_count(&_haystack, &_needle);
+
+  if (mbfl_is_error(n)) {
+    php_critical_error ("internal error");
+  }
+
+  return (int64_t) n;
+}
+
+string mb_convert_case(const string &str, const int64_t mode, const Optional<string> &encoding){
+
+  mixed unicode = f$mb_convert_encoding(str, string("UTF_8"), encoding.val());
+
+  if (unicode.is_string()) {
+    const string &unicode_str = unicode.to_string();
+
+    int len = str.size();
+    string unicode_res(len * 3, false);
+    const char *s = str.c_str();
+    int p = 0, ch = 0, res_len = 0;
+
+    switch(mode) {
+      case KPHP_UNICODE_CASE_UPPER:
+        while ((p = get_char_utf8(&ch, s)) > 0) {
+          s += p;
+          res_len += put_char_utf8(unicode_toupper(ch), &unicode_res[res_len]);
+        }
+        break;
+
+      case KPHP_UNICODE_CASE_LOWER:
+        while ((p = get_char_utf8(&ch, s)) > 0) {
+          s += p;
+          res_len += put_char_utf8(unicode_tolower(ch), &unicode_res[res_len]);
+        }
+        break;
+    }
+
+    if (p < 0) {
+      php_warning("Incorrect UTF-8 string \"%s\" in function mb_convert_case", str.c_str());
+    }
+    unicode_res.shrink(res_len);
+
+    mixed res = f$mb_convert_encoding(unicode_res, encoding.val(), string("UTF-8"));
+
+    if (res.is_string()) {
+      return res.to_string();
+    }
+    else {
+        php_critical_error ("encoding \"%s\" isn't supported in mb_convert_case", encoding.val().c_str());
+    }
+  }
+  else {
+    php_critical_error ("encoding \"%s\" isn't supported in mb_convert_case", encoding.val().c_str());
+  }
+
+//  if (mode < 0 || mode > PHP_UNICODE_CASE_MODE_MAX) {
+//    php_critical_error ("case mode isn't supported");
+//  }
+
+//  if (mode != PHP_UNICODE_CASE_UPPER || mode != PHP_UNICODE_CASE_LOWER) {
+//    php_critical_error ("case mode isn't supported");
+//  }
+//
+//  struct convert_case_data data;
+//  mbfl_convert_filter *from_wchar, *to_wchar;
+//  mbfl_string result, *result_ptr;
+//
+//  mbfl_memory_device device;
+//  mbfl_memory_device_init(&device, str.size() + 1, 0);
+//
+//  /* encoding -> wchar filter */
+//  to_wchar = mbfl_convert_filter_new(enc->no_encoding,
+//                                     (&mbfl_encoding_wchar)->no_encoding, convert_case_filter, NULL, &data);
+//  if (to_wchar == NULL) {
+//    mbfl_memory_device_clear(&device);
+//    php_critical_error ("encoding isn't supported");
+//  }
+//
+//  /* wchar -> encoding filter */
+//  from_wchar = mbfl_convert_filter_new((&mbfl_encoding_wchar)->no_encoding, enc->no_encoding, mbfl_memory_device_output,
+//                                       NULL, &device);
+//  if (from_wchar == NULL) {
+//    mbfl_convert_filter_delete(to_wchar);
+//    mbfl_memory_device_clear(&device);
+//    php_critical_error ("encoding isn't supported");
+//  }
+//
+//  data.next_filter = from_wchar;
+//  data.no_encoding = enc->no_encoding;
+//  data.case_mode = mode;
+//  data.title_mode = 0;
+//
+//  {
+//    /* feed data */
+//    const unsigned char *p = (const unsigned char *) str.c_str();
+//    size_t n = str.size();
+//    while (n > 0) {
+//      if ((*to_wchar->filter_function)(*p++, to_wchar) < 0) {
+//        break;
+//      }
+//      n--;
+//    }
+//  }
+//
+//  mbfl_convert_filter_flush(to_wchar);
+//  mbfl_convert_filter_flush(from_wchar);
+//  result_ptr = mbfl_memory_device_result(&device, &result);
+//  mbfl_convert_filter_delete(to_wchar);
+//  mbfl_convert_filter_delete(from_wchar);
+//
+//  if (!result_ptr) {
+//    THROW_EXCEPTION (new_Exception(string(__FILE__), __LINE__, string("mbfl error", 10)));
+//  }
+//
+//  return string((const char*) result_ptr->val, result_ptr->len);
+}
+
+string f$mb_strtoupper(const string &str, const Optional<string> &encoding){
+  return mb_convert_case(str, KPHP_UNICODE_CASE_UPPER, encoding);
+}
+
+string f$mb_strtolower(const string &str, const Optional<string> &encoding){
+  return mb_convert_case(str, KPHP_UNICODE_CASE_LOWER, encoding);
+}
+
+int64_t f$mb_strwidth(const string &str, const Optional<string> &encoding){
+  size_t n;
+
+  const mbfl_encoding *enc = mb_get_encoding(encoding);
+
+  if (!enc) {
+    php_critical_error ("encoding \"%s\" isn't supported in mb_strwidth", encoding.val().c_str());
+  }
+
+  mbfl_string _string;
+  mbfl_string_init(&_string);
+  _string.no_encoding = enc->no_encoding;
+  _string.len = str.size();
+  _string.val = (unsigned char*)str.c_str();
+
+  n = mbfl_strwidth(&_string);
+
+  return n;
+}
+
+Optional<int64_t> f$mb_strpos(const string &haystack, const string &needle, const int64_t offset, const Optional<string> &encoding){
+  int reverse = 0;
+  size_t real_offset = offset;
+  mbfl_string _haystack, _needle;
+  size_t n;
+
+  const mbfl_encoding *enc = mb_get_encoding(encoding);
+
+  if (!enc) {
+    php_critical_error ("encoding \"%s\" isn't supported in mb_strpos", encoding.val().c_str());
+  }
+
+  mbfl_string_init(&_haystack);
+  _haystack.no_encoding = enc->no_encoding;
+  _haystack.len = haystack.size();
+  _haystack.val = (unsigned char*) haystack.c_str();
+
+  mbfl_string_init(&_needle);
+  _needle.no_encoding = enc->no_encoding;
+  _needle.len = needle.size();
+  _needle.val = (unsigned char*) needle.c_str();
+
+  if (real_offset != 0) {
+    size_t slen = mbfl_strlen(&_haystack);
+    if (offset < 0) {
+      real_offset += slen;
+    }
+    if (real_offset > slen) {
+      php_warning ("offset not contained in string");
+      return false;
+    }
+  }
+
+  if (needle.size() <= 0) {
+    php_warning ("empty delimiter");
+    return false;
+  }
+
+  n = mbfl_strpos(&_haystack, &_needle, real_offset, reverse);
+  if (!mbfl_is_error(n)){
+    return n;
+  } else {
+    switch (-n) {
+      case 1:
+        break;
+      case 2:
+        php_warning ("Needle has not positive length");
+        break;
+      case 4:
+        php_warning ("Unknown encoding or conversion error");
+        break;
+      case 8:
+        php_warning ("Argument is empty");
+        break;
+      default:
+        php_warning ("Unknown error in mb_strpos");
+        break;
+    }
+    return false;
+  }
+}
+
+Optional<int64_t> f$mb_strrpos(const string &haystack, const string &needle, const int64_t offset, const Optional<string> &encoding){
+  int reverse = 1;
+  mbfl_string _haystack, _needle;
+  size_t n;
+
+  const mbfl_encoding *enc = mb_get_encoding(encoding);
+
+  if (!enc) {
+    php_critical_error ("encoding \"%s\" isn't supported in mb_strrpos", encoding.val().c_str());
+  }
+
+  mbfl_string_init(&_haystack);
+  _haystack.no_encoding = enc->no_encoding;
+  _haystack.len = haystack.size();
+  _haystack.val = (unsigned char*) haystack.c_str();
+
+  mbfl_string_init(&_needle);
+  _needle.no_encoding = enc->no_encoding;
+  _needle.len = needle.size();
+  _needle.val = (unsigned char*) needle.c_str();
+
+  if (offset != 0) {
+    size_t haystack_char_len = mbfl_strlen(&_haystack);
+    if ((offset > 0 && offset > haystack_char_len) ||
+        (offset < 0 && -offset > haystack_char_len)) {
+      php_warning ("Offset is greater than the length of haystack string");
+      return false;
+    }
+  }
+
+  n = mbfl_strpos(&_haystack, &_needle, offset, reverse);
+  if (!mbfl_is_error(n)) { return n; } else { return false; }
+
+}
+
+Optional<int64_t> f$mb_strripos(const string &haystack, const string &needle, const int64_t offset, const Optional<string> &encoding){
+  int reverse = 1;
+  int64_t real_offset = offset;
+  mbfl_string _haystack, _needle;
+  size_t n = (size_t) - 1;
+
+  const mbfl_encoding *enc = mb_get_encoding(encoding);
+
+  if (!enc) {
+    return n;
+  }
+
+  if (needle.size() == 0) {
+    php_warning ("Empty delimiter");
+    return false;
+  }
+
+  mbfl_string_init(&_haystack);
+  _haystack.no_encoding = enc->no_encoding;
+
+  mbfl_string_init(&_needle);
+  _needle.no_encoding = enc->no_encoding;
+
+  do {
+    /* We're using simple case-folding here, because we'd have to deal with remapping of
+		 * offsets otherwise. */
+
+    string lower_haystack = f$mb_strtolower(haystack, encoding);
+    _haystack.len = lower_haystack.size();
+    _haystack.val = (unsigned char*) lower_haystack.c_str();
+
+    if (!_haystack.val || _haystack.len == 0) {
+      break;
+    }
+
+    string lower_needle = f$mb_strtolower(needle, encoding);
+    _needle.len = lower_needle.size();
+    _needle.val = (unsigned char*) lower_needle.c_str();
+
+    if (!_needle.val || _needle.len == 0) {
+      break;
+    }
+
+    if (offset != 0) {
+      size_t haystack_char_len = mbfl_strlen(&_haystack);
+
+      if (reverse) {
+        if ((offset > 0 && (size_t)offset > haystack_char_len) ||
+            (offset < 0 && (size_t)(-offset) > haystack_char_len)) {
+          php_warning("Offset is greater than the length of haystack string");
+          break;
+        }
+      } else {
+        if (offset < 0) {
+          real_offset += (int64_t )haystack_char_len;
+        }
+        if (real_offset < 0 || (size_t)real_offset > haystack_char_len) {
+          php_warning("Offset not contained in string");
+          break;
+        }
+      }
+    }
+
+    n = mbfl_strpos(&_haystack, &_needle, real_offset, reverse);
+  } while(0);
+
+  if (!mbfl_is_error(n)) { return n; } else { return false; }
+}
+
+Optional<int64_t> f$mb_stripos(const string &haystack, const string &needle, const int64_t offset, const Optional<string> &encoding){
+  int reverse = 0;
+  int64_t real_offset = offset;
+  mbfl_string _haystack, _needle;
+  size_t n = (size_t) - 1;
+
+  const mbfl_encoding *enc = mb_get_encoding(encoding);
+
+  if (!enc) {
+    return n;
+  }
+
+  if (needle.size() == 0) {
+    php_warning ("Empty delimiter");
+    return false;
+  }
+
+  mbfl_string_init(&_haystack);
+  _haystack.no_encoding = enc->no_encoding;
+
+  mbfl_string_init(&_needle);
+  _needle.no_encoding = enc->no_encoding;
+
+  do {
+    /* We're using simple case-folding here, because we'd have to deal with remapping of
+		 * offsets otherwise. */
+
+    string lower_haystack = f$mb_strtolower(haystack, encoding);
+    _haystack.len = lower_haystack.size();
+    _haystack.val = (unsigned char*) lower_haystack.c_str();
+
+    if (!_haystack.val || _haystack.len == 0) {
+      break;
+    }
+
+    string lower_needle = f$mb_strtolower(needle, encoding);
+    _needle.len = lower_needle.size();
+    _needle.val = (unsigned char*) lower_needle.c_str();
+
+    if (!_needle.val || _needle.len == 0) {
+      break;
+    }
+
+    if (offset != 0) {
+      size_t haystack_char_len = mbfl_strlen(&_haystack);
+
+      if (reverse) {
+        if ((offset > 0 && (size_t)offset > haystack_char_len) ||
+            (offset < 0 && (size_t)(-offset) > haystack_char_len)) {
+          php_warning("Offset is greater than the length of haystack string");
+          break;
+        }
+      } else {
+        if (offset < 0) {
+          real_offset += (int64_t )haystack_char_len;
+        }
+        if (real_offset < 0 || (size_t)real_offset > haystack_char_len) {
+          php_warning("Offset not contained in string");
+          break;
+        }
+      }
+    }
+
+    n = mbfl_strpos(&_haystack, &_needle, real_offset, reverse);
+  } while(0);
+
+  if (!mbfl_is_error(n)) { return n; } else { return false; }
+}
+
+Optional<string> f$mb_strstr(const string &haystack, const string &needle, const bool before_needle, const Optional<string> &encoding) {
+  Optional<int64_t> start = f$mb_strpos(haystack, needle, 0, encoding);
+  if (start.has_value()) {
+    if (before_needle) {
+      return f$mb_substr(haystack, 0, val(start), encoding);
+    } else {
+      return f$mb_substr(haystack, val(start), false, encoding);
+    }
+  }
+  return false;
+}
+
+
+Optional<string> f$mb_stristr(const string &haystack, const string &needle, const bool before_needle, const Optional<string> &encoding){
+  Optional<int64_t> start = f$mb_stripos(haystack, needle, 0, encoding);
+  if (start.has_value()) {
+    if (before_needle) {
+      return f$mb_substr(haystack, 0, val(start), encoding);
+    } else {
+      return f$mb_substr(haystack, val(start), false, encoding);
+    }
+  }
+  return false;
+}
+
+Optional<string> f$mb_strrchr(const string &haystack, const string &needle, const bool before_needle, const Optional<string> &encoding){
+  Optional<int64_t> start = f$mb_strrpos(haystack, needle, 0, encoding);
+  if (start.has_value()) {
+    if (before_needle) {
+      return f$mb_substr(haystack, 0, val(start), encoding);
+    } else {
+      return f$mb_substr(haystack, val(start), false, encoding);
+    }
+  }
+  return false;
+}
+
+Optional<string> f$mb_strrichr(const string &haystack, const string &needle, const bool before_needle, const Optional<string> &encoding){
+  Optional<int64_t> start = f$mb_strripos(haystack, needle, 0, encoding);
+  if (start.has_value()) {
+    if (before_needle) {
+      return f$mb_substr(haystack, 0, val(start), encoding);
+    } else {
+      return f$mb_substr(haystack, val(start), false, encoding);
+    }
+  }
+  return false;
+}
+
 #else
 
-#include "common/unicode/unicode-utils.h"
-#include "common/unicode/utf8-utils.h"
 
 static bool is_detect_incorrect_encoding_names_warning{false};
 
diff --git a/runtime/mbstring/mbstring.h b/runtime/mbstring/mbstring.h
index 2647d32163..bcf0da9c0b 100644
--- a/runtime/mbstring/mbstring.h
+++ b/runtime/mbstring/mbstring.h
@@ -12,709 +12,38 @@ bool mb_UTF8_check(const char *s);
 
 #ifdef MBFL
 
-/**
- * Check if strings are valid for the specified encoding
- * Checks if the specified byte stream is valid for the specified encoding. If value is of type array, all keys and values are validated recursively.
- * It is useful to prevent so-called "Invalid Encoding Attack".
- * @param array|string value The byte stream
- * @param ?string encoding (default = null) The expected encoding
- * @return bool Returns true on success or false on failure
- */
+
 bool f$mb_check_encoding(const mixed &value, const Optional<string> &encoding);
 
-/**
- * Returns a string containing the character specified by the Unicode code point value, encoded in the specified encoding 
- * @param int codepoint A Unicode codepoint value, e.g. 128024 for U+1F418 ELEPHANT
- * @param ?string encoding (default = null) The encoding parameter is the character encoding. If it is omitted or null,
- * the internal character encoding value will be used.
- * @return string|false A string containing the requested character, if it can be represented in the specified encoding or false on failure.
- */
-Optional<string> f$mb_chr(const int64_t codepoint, const Optional<string> &encoding);
-
-/**
- * Perform case folding on a string
- * @param string str The string being converted
- * @param int mode The mode of the conversion. It can be one of MB_CASE_UPPER, MB_CASE_LOWER, MB_CASE_TITLE, MB_CASE_FOLD,
- * MB_CASE_UPPER_SIMPLE, MB_CASE_LOWER_SIMPLE, MB_CASE_TITLE_SIMPLE, MB_CASE_FOLD_SIMPLE
- * @param ?string encoding (default = null) The encoding parameter is the character encoding. If it is omitted or null,
- * the internal character encoding value will be used.
- * @return string A case folded version of string converted in the way specified by mode
- */
-string f$mb_convert_case(const string &str, const int64_t mode, const Optional<string> &encoding);
-
-/**
- * Convert from one character encoding to another
- * @param array|string str The string or array to be converted
- * @param string to_encoding The desired encoding of the result
- * @param array|string|null from_encoding (default = null) The current encoding used to interpret string.
- * Multiple encodings may be specified as an array or comma separated list,
- * in which case the correct encoding will be guessed using the same algorithm as mb_detect_encoding().
- * If from_encoding is null or not specified, the mbstring.internal_encoding setting will be used if set, otherwise the default_charset setting.
- * @return array|string|false The encoded string
- */
 mixed f$mb_convert_encoding(const mixed &str, const string &to_encoding, const mixed &from_encoding);
 
-/**
- * Convert "kana" one from another ("zen-kaku", "han-kaku" and more)
- * @param string str The string being converted
- * @param string mode The conversion option (default = "KV")
- * r - Convert "zen-kaku" alphabets to "han-kaku"
- * R - Convert "han-kaku" alphabets to "zen-kaku"
- * n - Convert "zen-kaku" numbers to "han-kaku"
- * N - Convert "han-kaku" numbers to "zen-kaku"
- * a - Convert "zen-kaku" alphabets and numbers to "han-kaku"
- * A - Convert "han-kaku" alphabets and numbers to "zen-kaku" 
- * (Characters included in "a", "A" options are U+0021 - U+007E excluding U+0022, U+0027, U+005C, U+007E)
- * s - Convert "zen-kaku" space to "han-kaku" (U+3000 -> U+0020)
- * S - Convert "han-kaku" space to "zen-kaku" (U+0020 -> U+3000)
- * k - Convert "zen-kaku kata-kana" to "han-kaku kata-kana"
- * K - Convert "han-kaku kata-kana" to "zen-kaku kata-kana"
- * h - Convert "zen-kaku hira-gana" to "han-kaku kata-kana"
- * H - Convert "han-kaku kata-kana" to "zen-kaku hira-gana"
- * c - Convert "zen-kaku kata-kana" to "zen-kaku hira-gana"
- * C - Convert "zen-kaku hira-gana" to "zen-kaku kata-kana"
- * V - Collapse voiced sound notation and convert them into a character. Use with "K","H"
- * @param ?string encoding (default = null) The encoding parameter is the character encoding.
- * If it is omitted or null, the internal character encoding value will be used.
- * @return string The converted string
- */
-string f$mb_convert_kana(const string &str, const string &mode, const Optional<string> &encoding);
-
-/**
- * Convert character code in variable(s)
- * @param string to_encoding The encoding that the string is being converted to
- * @param array|string from_encoding is specified as an array or comma separated string, it tries to detect encoding from from-coding.
- * When from_encoding is omitted, detect_order is used.
- * @param mixed &vars References to the variable being converted. String, Array are accepted. mb_convert_variables() assumes
- * all parameters have the same encoding.
- * @return string|false The character encoding before conversion for success, or false for failure
- */
-Optional<string> f$mb_convert_variables(const string &to_encoding, const mixed &from_encoding, const mixed &vars); // ???
-
-/**
- * Decode string in MIME header field
- * @param string str The string being decoded
- * @return string The decoded string in internal character encoding
- */
-string f$mb_decode_mimeheader(const string &string);
-
-/**
- * Decode HTML numeric string reference to character
- * @param string str The string being decoded
- * @param array map An array that specifies the code area to convert
- * @param ?string encoding (default = null) The encoding parameter is the character encoding.
- * If it is omitted or null, the internal character encoding value will be used.
- * @return string The converted string
- */
-string f$mb_decode_numericentity(const string &str, const array<int> &map, const Optional<string> &encoding);
-
-/**
- * Detect character encoding
- * Detects the most likely character encoding for string string from an ordered list of candidates. Automatic detection of the intended character encoding
- * can never be entirely reliable; without some additional information, it is similar to decoding an encrypted string without the key. It is always preferable
- * to use an indication of character encoding stored or transmitted with the data, such as a "Content-Type" HTTP header. This function is most useful with
- * multibyte encodings, where not all sequences of bytes form a valid string. If the input string contains such a sequence, that encoding will be rejected,
- * and the next encoding checked.
- * @param string str The string being inspected
- * @param array|string|null encodings (default = null) A list of character encodings to try, in order. The list may be specified as an array of strings,
- * or a single string separated by commas. If encodings is omitted or null, the current detect_order (set with the mbstring.detect_order configuration option,
- * or mb_detect_order() function) will be used.
- * @param bool strict (default = false) Controls the behaviour when string is not valid in any of the listed encodings.
- * If strict is set to false, the closest matching encoding will be returned; if strict is set to true, false will be returned.
- * @return string|false Controls the behaviour when string is not valid in any of the listed encodings. If strict is set to false,
- * the closest matching encoding will be returned; if strict is set to true, false will be returned. The default value for strict can be set
- * with the mbstring.strict_detection configuration option.
- */
-Optional<string> f$mb_detect_encoding(const string &str, const mixed &encodings, const bool strict = false);
-
-/**
- * Set/Get character encoding detection order
- * @param array|string|null encoding (default = null) encoding is an array or comma separated list of character encoding. See supported encodings.
- * If encoding is omitted or null, it returns the current character encoding detection order as array. This setting affects
- * mb_detect_encoding() and mb_send_mail().
- * @return array|bool When setting the encoding detection order, true is returned on success or false on failure.
- * When getting the encoding detection order, an ordered array of the encodings is returned.
- */
-mixed f$mb_detect_order(const mixed &encoding);
-
-/**
- * Encode string for MIME header
- * @param string str The string being encoded. Its encoding should be same as mb_internal_encoding()
- * @param ?string charset (default = null) Specifies the name of the character set in which string is represented in.
- * The default value is determined by the current NLS setting (mbstring.language)
- * @param ?string transfer_encoding (default = null) Specifies the scheme of MIME encoding.
- * It should be either "B" (Base64) or "Q" (Quoted-Printable). Falls back to "B" if not given.
- * @param string newline (default = "\r\n") Specifies the EOL (end-of-line) marker with which mb_encode_mimeheader() performs line-folding 
- * (a » RFC term, the act of breaking a line longer than a certain length into multiple lines. The length is currently hard-coded to 74 characters).
- * Falls back to "\r\n" (CRLF) if not given.
- * @param int indent (default = 0) Indentation of the first line (number of characters in the header before string)
- * @return string A converted version of the string represented in ASCII
- */
-string f$mb_encode_mimeheader(const string &str, const Optional<string> &charset, const Optional<string> &transfer_encoding, const string &newline, const int64_t indent);
-
-/**
- * Encode character to HTML numeric string reference
- * Converts specified character codes in string string from character code to HTML numeric character reference
- * @param string str The string being encoded
- * @param array map Aarray specifies code area to convert
- * @param ?string encding (default = null) The encoding parameter is the character encoding. If it is omitted or null, the internal character encoding value will be used
- * @param bool hex (default = false) Whether the returned entity reference should be in hexadecimal notation (otherwise it is in decimal notation)
- * @return string The converted string
- */
-string f$mb_encode_numericentity(const string &str, const array<int> &map, const Optional<string> &encoding, const bool hex = false);
-
-/**
- * Get aliases of a known encoding type
- * @param string encoding The encoding type being checked, for aliases
- * @return array Returns a numerically indexed array of encoding aliases
- */
-array<string> f$mb_encoding_aliases(const string &encoding);
-
-/**
- * Regular expression match for multibyte string
- * @param string pattern The regular expression pattern
- * @param string str The string being evaluated
- * @param ?string options (default = null) The search option. See mb_regex_set_options() for explanation
- * @return bool Returns true if string matches the regular expression pattern, false if not
- */
-bool f$mb_ereg_match(const string &pattern, const string &str, const Optional<string> &options);
-
-/**
- * Perform a regular expression search and replace with multibyte support using a callback
- * Scans string for matches to pattern, then replaces the matched text with the output of callback function.
- * The behavior of this function is almost identical to mb_ereg_replace(), except for the fact that instead of replacement parameter,
- * one should specify a callback.
- * @param string pattern The regular expression pattern. Multibyte characters may be used in pattern.
- * @param callable callback A callback that will be called and passed an array of matched elements in the subject string. 
- * The callback should return the replacement string. You'll often need the callback function for a mb_ereg_replace_callback() in just one place. 
- * In this case you can use an anonymous function to declare the callback within the call to mb_ereg_replace_callback(). 
- * By doing it this way you have all information for the call in one place and do not clutter the function namespace with a callback
- * function's name not used anywhere else.
- * @param string str The string being checked
- * @param ?string options (default = null) The search option. See mb_regex_set_options() for explanation
- * @return string|false|null The resultant string on success, or false on error. If string is not valid for the current encoding, null is returned
- */
-// Optional<string> f$mb_ereg_replace_callback(const string &pattern, const CallableT &callback, const string &str, const Optional<string> options); // callback
-
-/**
- * Replace regular expression with multibyte support
- * Scans string for matches to pattern, then replaces the matched text with replacement
- * @param string pattern The regular expression pattern. Multibyte characters may be used in pattern
- * @param string replacement The replacement text
- * @param string str The string being checked
- * @param ?string options (default = null) The search option. See mb_regex_set_options() for explanation
- * @return string|false|null The resultant string on success, or false on error. If string is not valid for the current encoding, null is returned
- */
-Optional<string> f$mb_ereg_replace(const string &pattern, const string &replacement, const string &str, const Optional<string> &options);
-
-/**
- * Returns start point for next regular expression match
- * @return int mb_ereg_search_getpos() returns the point to start regular expression match for mb_ereg_search(), mb_ereg_search_pos(), mb_ereg_search_regs().
- * The position is represented by bytes from the head of string.
- */
-int64_t f$mb_ereg_search_getpos(void);
-
-/**
- * Retrieve the result from the last multibyte regular expression match
- * @return array|false An array including the sub-string of matched part by last mb_ereg_search(), mb_ereg_search_pos(), mb_ereg_search_regs().
- * If there are some matches, the first element will have the matched sub-string, the second element will have the first part grouped with brackets,
- * the third element will have the second part grouped with brackets, and so on. It returns false on error.
- */
-mixed f$mb_ereg_search_getregs(void);
-
-/**
- * Setup string and regular expression for a multibyte regular expression match
- * mb_ereg_search_init() sets string and pattern for a multibyte regular expression.
- * These values are used for mb_ereg_search(), mb_ereg_search_pos(),and mb_ereg_search_regs().
- * @param string str The search string
- * @param ?string pattern (default = null) The search pattern
- * @param ?string options (default = null) The search option. See mb_regex_set_options() for explanation
- * @return bool Returns true on success or false on failure
- */
-bool f$mb_ereg_search_init(const string &str, const Optional<string> &pattern, const Optional<string> &options);
-
-/**
- * Returns position and length of a matched part of the multibyte regular expression for a predefined multibyte string
- * The string for match is specified by mb_ereg_search_init(). If it is not specified, the previous one will be used
- * @param ?string pattern (default = null) The search pattern
- * @param ?string options (default = null) The search option. See mb_regex_set_options() for explanation
- * @return array|false An array containing two elements. The first element is the offset, in bytes, where the match begins relative to the start of
- * the search string, and the second element is the length in bytes of the match. If an error occurs, false is returned.
- */
-mixed f$mb_ereg_search_pos(const Optional<string> &pattern, const Optional<string> &options);
-
-/**
- * Returns the matched part of a multibyte regular expression
- * @param ?string pattern (default = null) The search pattern
- * @param ?string options (deafult = null) The search option. See mb_regex_set_options() for explanation
- * @return array|false mb_ereg_search_regs() executes the multibyte regular expression match, and if there are some matched part,
- * it returns an array including substring of matched part as first element, the first grouped part with brackets as second element,
- * the second grouped part as third element, and so on. It returns false on error.
- */
-mixed f$mb_ereg_search_regs(const Optional<string> &pattern, const Optional<string> &options);
-
-/**
- * Set start point of next regular expression match
- * mb_ereg_search_setpos() sets the starting point of a match for mb_ereg_search().
- * @param int offset The position to set. If it is negative, it counts from the end of the string
- * @return bool Returns true on success or false on failure
- */
-bool f$mb_ereg_search_setpos(const int64_t offset);
-
-/**
- * Multibyte regular expression match for predefined multibyte string
- * @param ?string pattern (default = null) The search pattern
- * @param ?string options (default = null) The search option. See mb_regex_set_options() for explanation
- * @return bool mb_ereg_search() returns true if the multibyte string matches with the regular expression, or false otherwise. The string for matching
- * is set by mb_ereg_search_init(). If pattern is not specified, the previous one is used.
- */
-bool f$mb_ereg_search(const Optional<string> &pattern, const Optional<string> &options);
-
-/**
- * Regular expression match with multibyte support
- * @param string pattern The search pattern
- * @param string str The search string
- * @param array matches (default = null) If matches are found for parenthesized substrings of pattern and the function is called with the
- * third argument matches, the matches will be stored in the elements of the array matches. If no matches are found, matches is set to an empty array.
- * matches[1] will contain the substring which starts at the first left parenthesis; $matches[2] will contain the substring starting at the second,
- * and so on. $matches[0] will contain a copy of the complete string matched.
- * @return bool Returns whether pattern matches string
- */
-bool f$mb_ereg(const string &pattern, const string &str, const array<string> &matches);
-
-/**
- * Replace regular expression with multibyte support ignoring case
- * Scans string for matches to pattern, then replaces the matched text with replacement
- * @param string pattern The regular expression pattern. Multibyte characters may be used. The case will be ignored
- * @param string replacement The replacement text
- * @param string str The searched string
- * @param ?string options (default = null) The search option. See mb_regex_set_options() for explanation
- * @return string|false|null The resultant string or false on error. If string is not valid for the current encoding, null is returned
- */
-Optional<string> f$mb_eregi_replace(const string &pattern, const string &replacement, const string &str, const Optional<string> &options);
-
-/**
- * Regular expression match ignoring case with multibyte support
- * @param string pattern The regular expression pattern
- * @param string str The string being searched
- * @param array matches (default = null) If matches are found for parenthesized substrings of pattern and the function is called with the third argument matches,
- * the matches will be stored in the elements of the array matches. If no matches are found, matches is set to an empty array.
- * matches[1] will contain the substring which starts at the first left parenthesis; $matches[2] will contain the substring starting at the second,
- * and so on. $matches[0] will contain a copy of the complete string matched.
- * @return bool Returns whether pattern matches string
- */
-bool f$mb_eregi(const string &pattern, const string &str, const array<string> &matches);
-
-/**
- * Get internal settings of mbstring
- * @param string type (default = "all") If type is not specified or is specified as "all", "internal_encoding", "http_input", "http_output",
- * "http_output_conv_mimetypes", "mail_charset", "mail_header_encoding", "mail_body_encoding", "illegal_chars", "encoding_translation", "language",
- * "detect_order", "substitute_character" and "strict_detection" will be returned.
- * If type is specified as "internal_encoding", "http_input", "http_output", "http_output_conv_mimetypes", "mail_charset", "mail_header_encoding",
- * "mail_body_encoding", "illegal_chars", "encoding_translation", "language", "detect_order", "substitute_character" or "strict_detection"
- * the specified setting parameter will be returned.
- * @return array|string|int|false An array of type information if type is not specified, otherwise a specific type, or false on failure
- */
-mixed f$mb_get_info(const string &type);
-
-/**
- * Detect HTTP input character encoding
- * @param ?string type (default = null) Input string specifies the input type. "G" for GET, "P" for POST, "C" for COOKIE, "S" for string,
- * "L" for list, and "I" for the whole list (will return array). If type is omitted, it returns the last input type processed.
- * @return array|string|false The character encoding name, as per the type, or an array of character encoding names, if type is "I".
- * If mb_http_input() does not process specified HTTP input, it returns false.
- */
-mixed f$mb_http_input(const Optional<string> &type);
-
-/**
- * Set/Get the HTTP output character encoding. Output after this function is called will be converted from the set internal encoding to encoding
- * @param ?string encoding (default = null) If encoding is set, mb_http_output() sets the HTTP output character encoding to encoding.
- * If encoding is omitted, mb_http_output() returns the current HTTP output character encoding.
- * @return string|bool If encoding is omitted, mb_http_output() returns the current HTTP output character encoding. Otherwise,
- * Returns true on success or false on failure.
- */
-mixed f$mb_http_output(const Optional<string> &encoding);
-
-/**
- * Set/Get internal character encoding
- * @param ?string encoding (default = null) encoding is the character encoding name used for the HTTP input character encoding conversion,
- * HTTP output character encoding conversion, and the default character encoding for string functions defined by the mbstring module.
- * You should notice that the internal encoding is totally different from the one for multibyte regex.
- * @return string|bool If encoding is set, then Returns true on success or false on failure.
- * In this case, the character encoding for multibyte regex is NOT changed.
- * If encoding is omitted, then the current character encoding name is returned.
- */
-mixed f$mb_internal_encoding(const Optional<string> &encoding);
-
-/**
- * Set/Get the current language
- * @param ?string language (default = null) Used for encoding e-mail messages. The valid languages are listed in the following table.
- * mb_send_mail() uses this setting to encode e-mail.
- * +---------------------------+-------------+------------------+-----------+
- * | Language                  | Charset     | Encoding         | Alias     |
- * +---------------------------+-------------+------------------+-----------+
- * | German/de                 | ISO-8859-15 | Quoted-Printable | Deutsch   |
- * | English/en                | ISO-8859-1  | Quoted-Printable |           |
- * | Armenian/hy               | ArmSCII-8   | Quoted-Printable |           |
- * | Japanese/ja               | ISO-2022-JP | BASE64           |           |
- * | Korean/ko                 | ISO-2022-KR | BASE64           |           |
- * | neutral                   | UTF-8       | BASE64           |           |
- * | Russian/ru                | KOI8-R      | Quoted-Printable |           |
- * | Turkish/tr                | ISO-8859-9  | Quoted-Printable |           |
- * | Ukrainian/ua              | KOI8-U      | Quoted-Printable |           |
- * | uni                       | UTF-8       | BASE64           | universal |
- * | Simplified Chinese/zh-cn  | HZ          | BASE64           |           |
- * | Traditional Chinese/zh-tw | BIG-5       | BASE64           |           |   
- * +---------------------------+-------------+------------------+-----------+
- * @return string|bool If language is set and language is valid, it returns true. Otherwise, it returns false. When language is omitted or null,
- * it returns the language name as a string
- */
-mixed f$mb_language(const Optional<string> &language);
-
-/**
- * Returns an array of all supported encodings
- * @return array Returns a numerically indexed array
- */
-array<string> f$mb_list_encodings(void);
-
-/**
- * Returns the Unicode code point value of the given character. This function complements mb_chr().
- * @param string str A string
- * @param string? encoding (default = null) The encoding parameter is the character encoding. If it is omitted or null,
- * the internal character encoding value will be used.
- * @return int|false The Unicode code point for the first character of string or false on failure.
- */
-Optional<int> f$mb_ord(const string &str, const Optional<string> &encoding);
-
-/**
- * mb_output_handler() is ob_start() callback function. mb_output_handler() converts characters in the output buffer from internal
- * character encoding to HTTP output character encoding.
- * @param string str The contents of the output buffer
- * @param int status The status of the output buffer
- * @return string The converted string
- */
-string f$mb_output_handler(const string &str, const int64_t status);
-
-/**
- * Parses GET/POST/COOKIE data and sets global variables. Since PHP does not provide raw POST/COOKIE data, it can only be used for GET data for now.
- * It parses URL encoded data, detects encoding, converts coding to internal encoding and set values to the result array or global variables.
- * @param string str The URL encoded data
- * @param array result An array containing decoded and character encoded converted values
- * @return bool Returns true on success or false on failure
- */
-bool f$mb_parse_str(const string &str, const array<string> &result); // result = map<string, string>
-
-/**
- * Get a MIME charset string for a specific encoding.
- * @param string encoding The encoding being checked
- * @return string|false The MIME charset string for character encoding encoding, or false if no charset is preferred for the given encoding
- */
-Optional<string> f$mb_preferred_mime_name(const string &encoding);
-
-/**
- * Set/Get character encoding for a multibyte regex
- * @param ?string encoding (default = null) The encoding parameter is the character encoding. If it is omitted or null,
- * the internal character encoding value will be used
- * @return string|bool If encoding is set, then Returns true on success or false on failure. In this case, the internal character encoding is NOT changed.
- * If encoding is omitted, then the current character encoding name for a multibyte regex is returned
- */
-mixed f$mb_regex_encoding(const Optional<string> &encoding);
-
-/**
- * Sets the default options described by options for multibyte regex functions
- * @param ?string options (default = null) The options to set. This is a string where each character is an option.
- * To set a mode, the mode character must be the last one set, however there can only be set one mode but multiple options
- * 
- * Regex options:
- * +--------+----------------------------------+
- * | Option | Meaning                          |
- * +--------+----------------------------------+ 
- * | i      | Ambiguity match on               |
- * | x      | Enables extended pattern form    |
- * | m      | '.' matches with newlines        |
- * | s      | '^' -> '\A', '$' -> '\Z'         |
- * | p      | Same as both the m and s options |
- * | l      | Finds longest matches            |
- * | n      | Ignores empty matches            |
- * | e      | eval() resulting code            |
- * +--------+----------------------------------+
- * 
- * Regex syntax modes:
- * +------+----------------------------+
- * | Mode | Meaning                    |
- * +------+----------------------------+
- * | j    | Java (Sun java.util.regex) |
- * | u    | GNU regex                  | 
- * | g    | grep                       |
- * | c    | Emacs                      |
- * | r    | Ruby                       |
- * | z    | Perl                       |
- * | b    | POSIX Basic regex          |
- * | d    | POSIX Extended regex       |
- * +------+----------------------------+
- *
- * @return string The previous options. If options is omitted or null, it returns the string that describes the current options
- */
-string f$mb_regex_set_options(const Optional<string> &options);
-
-/**
- * This function is currently not documented; only its argument list is available.
- * @param string str
- * @param ?string encoding (default = null)
- * @return string
- */
-string f$mb_scrub(const string &str, const Optional<string> &encoding);
-
-/**
- * Sends email. Headers and messages are converted and encoded according to the mb_language() setting.
- * It's a wrapper function for mail(), so see also mail() for detail
- * @param string to The mail addresses being sent to. Multiple recipients may be specified by putting a comma between each address in to.
- * This parameter is not automatically encoded
- * @param string subject The subject of the mail
- * @param string message The message of the mail
- * @param array|string additional_headers (default = []) String or array to be inserted at the end of the email header.
- * This is typically used to add extra headers (From, Cc, and Bcc). Multiple extra headers should be separated with a CRLF (\r\n).
- * Validate parameter not to be injected unwanted headers by attackers. If an array is passed, its keys are the header names and its
- * values are the respective header values
- * Note:
- * If messages are not received, try using a LF (\n) only. Some Unix mail transfer agents (most notably » qmail) replace LF by CRLF automatically
- * (which leads to doubling CR if CRLF is used). This should be a last resort, as it does not comply with » RFC 2822.
- * @param ?string additional_params (default = null) additional_params is a MTA command line parameter. It is useful when setting the correct Return-Path header
- * when using sendmail. This parameter is escaped by escapeshellcmd() internally to prevent command execution. escapeshellcmd() prevents command execution,
- * but allows to add additional parameters. For security reason, this parameter should be validated. Since escapeshellcmd() is applied automatically,
- * some characters that are allowed as email addresses by internet RFCs cannot be used. Programs that are required to use these characters mail() cannot be used.
- * The user that the webserver runs as should be added as a trusted user to the sendmail configuration to prevent a 'X-Warning' header from being added to
- * the message when the envelope sender (-f) is set using this method. For sendmail users, this file is /etc/mail/trusted-users
- * @return bool Returns true on success or false on failure
- */
-bool f$mb_send_mail(const string &to, const string &subject, const string &message, const mixed &additional_headers, const Optional<string> &additional_params);
-
-/**
- * Split a multibyte string using regular expression pattern and returns the result as an array
- * @param string pattern The regular expression pattern
- * @param string str The string being split
- * @param int limit (default = -1) If optional parameter limit is specified, it will be split in limit elements as maximum
- * @return array|false The result as an array, or false on failure
- */
-mixed f$mb_split(const string &pattern, const string &str, const int64_t limit = -1);
-
-/**
- * This function will return an array of strings, it is a version of str_split() with support for encodings of variable character size as well
- * as fixed-size encodings of 1,2 or 4 byte characters. If the length parameter is specified, the string is broken down into chunks of the specified
- * length in characters (not bytes). The encoding parameter can be optionally specified and it is good practice to do so
- * @param string str The string to split into characters or chunks
- * @param int length (default = 1) If specified, each element of the returned array will be composed of multiple characters instead of a single character
- * @param ?string encoding (default = null) The encoding parameter is the character encoding. If it is omitted or null, the internal character encoding value
- * will be used. A string specifying one of the supported encodings
- * @return array mb_str_split() returns an array of strings
- */
-array<string> f$mb_str_split(const string &str, const int64_t length, const Optional<string> &encoding);
-
-/**
- * mb_strcut() extracts a substring from a string similarly to mb_substr(), but operates on bytes instead of characters.
- * If the cut position happens to be between two bytes of a multi-byte character, the cut is performed starting from the first byte of that character.
- * This is also the difference to the substr() function, which would simply cut the string between the bytes and thus result in a malformed byte sequence
- * @param string str The string being cut
- * @param int start If start is non-negative, the returned string will start at the start'th byte position in string, counting from zero.
- * For instance, in the string 'abcdef', the byte at position 0 is 'a', the byte at position 2 is 'c', and so forth.
- * If start is negative, the returned string will start at the start'th byte counting back from the end of string.
- * However, if the magnitude of a negative start is greater than the length of the string, the returned portion will start from the beginning of string
- * @param ?int length (default = null) Length in bytes. If omitted or NULL is passed, extract all bytes to the end of the string.
- * If length is negative, the returned string will end at the length'th byte counting back from the end of string.
- * However, if the magnitude of a negative length is greater than the number of characters after the start position, an empty string will be returned
- * @param ?string encoding The encoding parameter is the character encoding. If it is omitted or null, the internal character encoding value will be used
- * @return string mb_strcut() returns the portion of string specified by the start and length parameters
- */
-string f$mb_strcut(const string &str, const int64_t start, const Optional<int64_t> &length, const Optional<string> &encoding);
-
-/**
- * Truncates string string to specified width, where halfwidth characters count as 1, and fullwidth characters count as 2.
- * See » http://www.unicode.org/reports/tr11/ for details regarding East Asian character widths
- * @param string str The string being decoded
- * @param int start The start position offset. Number of characters from the beginning of string (first character is 0),
- * or if start is negative, number of characters from the end of the string
- * @param int width The width of the desired trim. Negative widths count from the end of the string
- * @param string trim_marker (default = "") A string that is added to the end of string when string is truncated
- * @param ?string encoding (default = null) The encoding parameter is the character encoding. If it is omitted or null,
- * the internal character encoding value will be used
- * @return string The truncated string. If trim_marker is set, trim_marker replaces the last chars to match the width
- */
-string f$mb_strimwidth(const string &str, const int64_t start, const int64_t width, const string &trim_marker, const Optional<string> &encoding);
-
-/**
- * mb_stripos() returns the numeric position of the first occurrence of needle in the haystack string. Unlike mb_strpos(),
- * mb_stripos() is case-insensitive. If needle is not found, it returns false
- * @param string haystack The string from which to get the position of the first occurrence of needl
- * @param string needle The string to find in haystack
- * @param int offset (default = 0) The position in haystack to start searching. A negative offset counts from the end of the string
- * @param ?string encoding (default = null) Character encoding name to use. If it is omitted, internal character encoding is used
- * @return int|false Return the numeric position of the first occurrence of needle in the haystack string, or false if needle is not found
- */
-Optional<int> f$mb_stripos(const string &haystack, const string &needle, const int64_t offset, const Optional<string> &encoding);
-
-/**
- * mb_stristr() finds the first occurrence of needle in haystack and returns the portion of haystack.
- * Unlike mb_strstr(), mb_stristr() is case-insensitive. If needle is not found, it returns false
- * @param string haystack The string from which to get the first occurrence of needle
- * @param string needle The string to find in haystack
- * @param bool before_needle (default = false) Determines which portion of haystack this function returns.
- * If set to true, it returns all of haystack from the beginning to the first occurrence of needle (excluding needle).
- * If set to false, it returns all of haystack from the first occurrence of needle to the end (including needle)
- * @param ?string encoding (default = null) Character encoding name to use. If it is omitted, internal character encoding is used
- * @return string|false Returns the portion of haystack, or false if needle is not found
- */
-Optional<string> f$mb_stristr(const string &haystack, const string &needle, const bool before_needle, const Optional<string> &encoding);
-
-/**
- * Gets the length of a string
- * @param string str The string being checked for length
- * @param ?string encoding (default = null) The encoding parameter is the character encoding.
- * If it is omitted or null, the internal character encoding value will be used
- * @return int Returns the number of characters in string string having character encoding encoding. A multi-byte character is counted as 1
- */
 int64_t f$mb_strlen(const string &str, const Optional<string> &encoding);
 
-/**
- * Finds position of the first occurrence of a string in a string. Performs a multi-byte safe strpos() operation based on number of characters.
- * The first character's position is 0, the second character position is 1, and so on
- * @param string haystack The string being checked
- * @param string needle The string to find in haystack. In contrast with strpos(), numeric values are not applied as the ordinal value of a character
- * @param int offset (default = 0) The search offset. If it is not specified, 0 is used. A negative offset counts from the end of the string
- * @param ?string encoding (default = null) The encoding parameter is the character encoding. If it is omitted or null,
- * the internal character encoding value will be used
- * @return int|false Returns the numeric position of the first occurrence of needle in the haystack string. If needle is not found, it returns false
- */
-Optional<int> f$mb_strpos(const string &haystack, const string &needle, const int64_t offset, const Optional<string> &encoding);
-
-/**
- * mb_strrchr() finds the last occurrence of needle in haystack and returns the portion of haystack. If needle is not found, it returns false
- * @param string haystack The string from which to get the last occurrence of needle
- * @param string needle The string to find in haystack
- * @param bool before_needle Determines which portion of haystack this function returns.
- * If set to true, it returns all of haystack from the beginning to the last occurrence of needle.
- * If set to false, it returns all of haystack from the last occurrence of needle to the end
- * @param ?string encoding (default = null) Character encoding name to use. If it is omitted, internal character encoding is used
- * @return string|false Returns the portion of haystack. or false if needle is not found
- */
-Optional<string> f$mb_strrchr(const string &haystack, const string &needle, const bool before_needle, const Optional<string> &encoding);
+string f$mb_substr(const string &str, const int64_t start, const Optional<int64_t> &length, const Optional<string> &encoding);
 
-/**
- * mb_strrichr() finds the last occurrence of needle in haystack and returns the portion of haystack. Unlike mb_strrchr(), mb_strrichr() is case-insensitive.
- * If needle is not found, it returns false
- * @param string haystack The string from which to get the last occurrence of needle
- * @param string needle The string to find in haystack
- * @param bool before_needle Determines which portion of haystack this function returns.
- * If set to true, it returns all of haystack from the beginning to the last occurrence of needle.
- * If set to false, it returns all of haystack from the last occurrence of needle to the end
- * @param ?string encoding (default = null)
- * @return string|false Character encoding name to use. If it is omitted, internal character encoding is used
- */
-Optional<string> f$mb_strrichr(const string &haystack, const string &needle, const bool before_needle, const Optional<string> &encoding);
+int64_t f$mb_substr_count(const string &haystack, const string &needle, const Optional<string> &encoding);
 
-/**
- * mb_strripos() performs multi-byte safe strripos() operation based on number of characters. needle position is counted from the beginning of haystack.
- * First character's position is 0. Second character position is 1. Unlike mb_strrpos(), mb_strripos() is case-insensitive
- * @param string haystack The string from which to get the position of the last occurrence of needle
- * @param string needle The string to find in haystack
- * @param int offset The position in haystack to start searching
- * @param ?string encoding (default = null) Character encoding name to use. If it is omitted, internal character encoding is used
- * @return int|false Return the numeric position of the last occurrence of needle in the haystack string, or false if needle is not found
- */
-Optional<int> f$mb_strripos(const string &haystack, const string &needle, const int64_t offset, const Optional<string> &encoding);
-
-/**
- * Performs a multibyte safe strrpos() operation based on the number of characters. needle position is counted from the beginning of haystack.
- * First character's position is 0. Second character position is 1
- * @param string haystack The string being checked, for the last occurrence of needle
- * @param string needle The string to find in haystack
- * @param int offset (default = 0) May be specified to begin searching an arbitrary number of characters into the string. Negative values will stop searching at an arbitrary point prior to the end of the string
- * @param ?string encoding The encoding parameter is the character encoding. If it is omitted or null, the internal character encoding value will be used
- * @return int|false Returns the numeric position of the last occurrence of needle in the haystack string. If needle is not found, it returns false
- */
-Optional<int> f$mb_strrpos(const string &haystack, const string &needle, const int64_t offset, const Optional<string> &encoding);
-
-/**
- * mb_strstr() finds the first occurrence of needle in haystack and returns the portion of haystack. If needle is not found, it returns false
- * @param string haystack The string from which to get the first occurrence of needle
- * @param string needle The string to find in haystack
- * @param bool before_needle Determines which portion of haystack this function returns.
- * If set to true, it returns all of haystack from the beginning to the first occurrence of needle (excluding needle).
- * If set to false, it returns all of haystack from the first occurrence of needle to the end (including needle)
- * @param ?string encoding (default = null) Character encoding name to use. If it is omitted, internal character encoding is used
- * @return string|false Returns the portion of haystack, or false if needle is not found
- */
-Optional<string> f$mb_strstr(const string &haystack, const string &needle, const bool before_needle, const Optional<string> &encoding);
+string f$mb_strtoupper(const string &str, const Optional<string> &encoding);
 
-/**
- * Returns string with all alphabetic characters converted to lowercase
- * @param string str The string being lowercased
- * @param ?string encoding (default = null) The encoding parameter is the character encoding.
- * If it is omitted or null, the internal character encoding value will be used
- * @return string string with all alphabetic characters converted to lowercase
- */
 string f$mb_strtolower(const string &str, const Optional<string> &encoding);
 
-/**
- * Returns string with all alphabetic characters converted to uppercase.
- * @param string str The string being uppercased
- * @param ?string encoding (default = null) The encoding parameter is the character encoding.
- * If it is omitted or null, the internal character encoding value will be used
- * @return string string with all alphabetic characters converted to uppercase
- */
-string f$mb_strtoupper(const string &str, const Optional<string> &encoding);
-
-/**
- * Returns the width of string string, where halfwidth characters count as 1, and fullwidth characters count as 2.
- * See » http://www.unicode.org/reports/tr11/ for details regarding East Asian character widths. The fullwidth characters are:
- * U+1100-U+115F, U+11A3-U+11A7, U+11FA-U+11FF, U+2329-U+232A, U+2E80-U+2E99, U+2E9B-U+2EF3, U+2F00-U+2FD5, U+2FF0-U+2FFB, U+3000-U+303E, U+3041-U+3096,
- * U+3099-U+30FF, U+3105-U+312D, U+3131-U+318E, U+3190-U+31BA, U+31C0-U+31E3, U+31F0-U+321E, U+3220-U+3247, U+3250-U+32FE, U+3300-U+4DBF, U+4E00-U+A48C,
- * U+A490-U+A4C6, U+A960-U+A97C, U+AC00-U+D7A3, U+D7B0-U+D7C6, U+D7CB-U+D7FB, U+F900-U+FAFF, U+FE10-U+FE19, U+FE30-U+FE52, U+FE54-U+FE66, U+FE68-U+FE6B,
- * U+FF01-U+FF60, U+FFE0-U+FFE6, U+1B000-U+1B001, U+1F200-U+1F202, U+1F210-U+1F23A, U+1F240-U+1F248, U+1F250-U+1F251, U+20000-U+2FFFD, U+30000-U+3FFFD.
- * All other characters are halfwidth characters
- * @param string str The string being decoded
- * @param ?string encoding (default = null) The encoding parameter is the character encoding.
- * If it is omitted or null, the internal character encoding value will be used
- * @return int The width of string string
- */
 int64_t f$mb_strwidth(const string &str, const Optional<string> &encoding);
 
-/**
- * Specifies a substitution character when input character encoding is invalid or character code does not exist in output character encoding.
- * Invalid characters may be substituted "none" (no output), string or int value (Unicode character code value).
- * This setting affects mb_convert_encoding(), mb_convert_variables(), mb_output_handler(), and mb_send_mail()
- * @param string|int|null substitute_character (default = null) Specify the Unicode value as an int, or as one of the following strings:
- * "none": no output
- * "long": Output character code value (Example: U+3000, JIS+7E7E)
- * "entity": Output character entity (Example: &#x200;)
- * @return string|int|bool If substitute_character is set, it returns true for success, otherwise returns false.
- * If substitute_character is not set, it returns the current setting
- */
-mixed f$mb_substitute_character(const mixed &substitute_character);
-
-/**
- * Counts the number of times the needle substring occurs in the haystack string
- * @param string haystack The string being checked
- * @param string needle The string being found
- * @param ?string encoding (default = null) The encoding parameter is the character encoding.
- * If it is omitted or null, the internal character encoding value will be used
- * @return int The number of times the needle substring occurs in the haystack string
- */
-int64_t f$mb_substr_count(const string &haystack, const string &needle, const Optional<string> &encoding);
+Optional<int64_t> f$mb_strpos(const string &haystack, const string &needle, const int64_t offset, const Optional<string> &encoding);
 
-/**
- * Performs a multi-byte safe substr() operation based on number of characters. Position is counted from the beginning of string.
- * First character's position is 0. Second character position is 1, and so on
- * @param string str The string to extract the substring from
- * @param int start If start is non-negative, the returned string will start at the start'th position in string, counting from zero.
- * For instance, in the string 'abcdef', the character at position 0 is 'a', the character at position 2 is 'c', and so forth.
- * If start is negative, the returned string will start at the start'th character from the end of string
- * @param ?int length (default = null) Maximum number of characters to use from string.
- * If omitted or NULL is passed, extract all characters to the end of the string
- * @param ?string encoding (default = null) The encoding parameter is the character encoding.
- * If it is omitted or null, the internal character encoding value will be used
- * @return string mb_substr() returns the portion of string specified by the start and length parameters
- */
-string f$mb_substr(const string &str, const int64_t start, const Optional<int64_t> &length, const Optional<string> &encoding);
+Optional<int64_t> f$mb_strrpos(const string &haystack, const string &needle, const int64_t offset, const Optional<string> &encoding);
+
+Optional<int64_t> f$mb_strripos(const string &haystack, const string &needle, const int64_t offset, const Optional<string> &encoding);
+
+Optional<int64_t> f$mb_stripos(const string &haystack, const string &needle, const int64_t offset, const Optional<string> &encoding);
+
+Optional<string> f$mb_stristr(const string &haystack, const string &needle, const bool before_needle, const Optional<string> &encoding);
+
+Optional<string> f$mb_strstr(const string &haystack, const string &needle, const bool before_needle, const Optional<string> &encoding);
+
+Optional<string> f$mb_strrchr(const string &haystack, const string &needle, const bool before_needle, const Optional<string> &encoding);
+
+Optional<string> f$mb_strrichr(const string &haystack, const string &needle, const bool before_needle, const Optional<string> &encoding);
 
 #else
 
diff --git a/tests/cpp/runtime/mbstring-test.cpp b/tests/cpp/runtime/mbstring-test.cpp
index 2a0a484302..b2c663dbdc 100644
--- a/tests/cpp/runtime/mbstring-test.cpp
+++ b/tests/cpp/runtime/mbstring-test.cpp
@@ -18,4 +18,71 @@ TEST(mbstring_test, test_mb_convert_encoding) {
 	ASSERT_STREQ(f$mb_convert_encoding(string("ыва"), string("UTF-8"), string("ASCII")).to_string().c_str(), "??????");
 }
 
+TEST(mbstring_test, test_mb_strlen) {
+  const int predicted = 12;
+  int real = 0;
+  real = f$mb_strlen(string("Hello world!"), string("UTF-8"));
+  ASSERT_TRUE(real == predicted);
+}
+
+TEST(mbstring_test, test_mb_substr) {
+  ASSERT_STREQ(f$mb_substr(string("Hello world"), 2, 3, string("UTF-8")).c_str(), "llo");
+}
+
+TEST(mbstring_test, test_mb_substr_null_length) {
+  ASSERT_STREQ(f$mb_substr(string("Hello world"), 3, false, string("UTF-8")).c_str(), "lo world");
+}
+
+TEST(mbstring_test, test_mb_substr_count) {
+  ASSERT_TRUE(f$mb_substr_count(string("Hello world"), string("l"), string("UTF-8")) == 3);
+}
+
+TEST(mbstring_test, test_mb_strpos) {
+  ASSERT_TRUE(val(f$mb_strpos(string("This is a test string"), string("test"), 0, string("UTF-8"))) == 10);
+}
+
+TEST(mbstring_test, test_mb_strrpos) {
+  ASSERT_TRUE(val(f$mb_strrpos(string("españololol"), string("ol"), 0, string("UTF-8"))) == 9);
+}
+
+TEST(mbstring_test, test_mb_strtoupper) {
+  ASSERT_STREQ(f$mb_strtoupper(string("españololol"), string("UTF-8")).c_str(), "ESPAÑOLOLOL");
+}
+
+TEST(mbstring_test, test_mb_strtolower) {
+  ASSERT_STREQ(f$mb_strtolower(string("ESPAÑOLOLOL"), string("UTF-8")).c_str(), "españololol");
+}
+
+TEST(mbstring_test, test_mb_stripos) {
+  ASSERT_TRUE(val(f$mb_stripos(string("This is a tEsT string"), string("TeSt"), 0, string("UTF-8"))) == 10);
+}
+
+TEST(mbstring_test, test_mb_strripos) {
+  ASSERT_TRUE(val(f$mb_strripos(string("espaÑOLOlol"), string("oL"), 0, string("UTF-8"))) == 9);
+}
+
+TEST(mbstring_test, test_mb_strwidth) {
+  ASSERT_TRUE(val(f$mb_strwidth(string("現現"), string("UTF-8"))) == 4);
+}
+
+TEST(mbstring_test, test_mb_strstr) {
+  ASSERT_STREQ(f$mb_strstr(string("This is a test string"), string("test"), true, string("UTF-8")).val().c_str(), "This is a ");
+  ASSERT_STREQ(f$mb_strstr(string("This is a test string"), string("test"), false, string("UTF-8")).val().c_str(), "test string");
+}
+
+TEST(mbstring_test, test_mb_stristr_before_needle) {
+  ASSERT_STREQ(val(f$mb_stristr(string("This is a tEsT string"), string("TeSt"), true, string("UTF-8"))).c_str(), "This is a ");
+  ASSERT_STREQ(val(f$mb_stristr(string("This is a tEsT string"), string("TeSt"), false, string("UTF-8"))).c_str(), "tEsT string");
+}
+
+TEST(mbstring_test, test_mb_strrchr) {
+  ASSERT_STREQ(f$mb_strrchr(string("This is a test string"), string("test"), true, string("UTF-8")).val().c_str(), "This is a ");
+  ASSERT_STREQ(f$mb_strrchr(string("This is a test string"), string("test"), false, string("UTF-8")).val().c_str(), "test string");
+}
+
+TEST(mbstring_test, test_mb_strrichr) {
+  ASSERT_STREQ(f$mb_strrichr(string("This is a test string"), string("test"), true, string("UTF-8")).val().c_str(), "This is a ");
+  ASSERT_STREQ(f$mb_strrichr(string("This is a test string"), string("test"), false, string("UTF-8")).val().c_str(), "test string");
+}
+
 #endif
\ No newline at end of file
diff --git a/tests/phpt/mbstring/001_mb_strlen.php b/tests/phpt/mbstring/001_mb_strlen.php
new file mode 100644
index 0000000000..8d2f990cf0
--- /dev/null
+++ b/tests/phpt/mbstring/001_mb_strlen.php
@@ -0,0 +1,64 @@
+@ok
+<?php
+
+// Test case 1: Basic test with ASCII string
+function test_mb_strlen_basic_ascii() {
+  var_dump(mb_strlen("Hello", "ASCII"));
+}
+
+// Test case 2: Basic test with multibyte characters (UTF-8)
+function test_mb_strlen_basic_utf_8() {
+  var_dump(mb_strlen("こんにちは", "UTF-8"));
+}
+
+// Test case 3: Testing with empty string
+function test_mb_strlen_empty_string() {
+  var_dump(mb_strlen("", "UTF-8"));
+}
+
+// Test case 4: Testing with null encoding parameter (should use internal encoding)
+function test_mb_strlen_null_encoding() {
+  var_dump(mb_strlen("你好"));
+}
+
+// Test case 5: Testing with specific encoding (UTF-16)
+function test_mb_strlen_utf_16_encoding() {
+  var_dump(mb_strlen("안녕하세요", "UTF-16"));
+}
+
+// Test case 6: Testing with HTML entities
+function test_mb_strlen_html_entities() {
+  var_dump(mb_strlen("&lt;p&gt;This is a test&lt;/p&gt;", "UTF-8"));
+}
+
+// Test case 7: Testing with whitespace characters
+function test_mb_strlen_whitespaces() {
+  var_dump(mb_strlen("   ", "UTF-8"));
+}
+
+// Test case 8: Testing with control characters
+function test_mb_strlen_control_characters() {
+  var_dump(mb_strlen("\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F", "UTF-8"));
+}
+
+// Test case 9: Testing with a mixture of characters
+function test_mb_strlen_mixed_characters() {
+  var_dump(mb_strlen("Hello こんにちは 你好", "UTF-8"));
+}
+
+// Test case 10: Testing with a very large string
+function test_mb_strlen_long_string() {
+  $string = str_repeat("a", 100000);
+  var_dump(mb_strlen($string, "UTF-8"));
+}
+
+test_mb_strlen_basic_ascii();
+test_mb_strlen_basic_utf_8();
+test_mb_strlen_empty_string();
+// test_mb_strlen_null_encoding(); // doesn't put null through for some reason
+test_mb_strlen_utf_16_encoding();
+test_mb_strlen_html_entities();
+test_mb_strlen_whitespaces();
+test_mb_strlen_control_characters();
+test_mb_strlen_mixed_characters();
+test_mb_strlen_long_string();
diff --git a/tests/phpt/mbstring/002_mb_substr.php b/tests/phpt/mbstring/002_mb_substr.php
new file mode 100644
index 0000000000..b0a0a779fb
--- /dev/null
+++ b/tests/phpt/mbstring/002_mb_substr.php
@@ -0,0 +1,57 @@
+@ok
+<?php
+
+// Test case 1: Extract all characters from start position to the end of the string
+function test_mb_substr_full_str() {
+  var_dump(mb_substr("abcdef", 0, null, "UTF-8"));
+}
+
+// Test case 2: Extract characters starting from the specified position
+function test_mb_substr_with_start() {
+  var_dump(mb_substr("abcdef", 2, null, "UTF-8"));
+}
+
+// Test case 3: Extract characters using negative start position
+function test_mb_substr_with_neg_start() {
+  var_dump(mb_substr("abcdef", -3, null, "UTF-8"));
+}
+
+// Test case 4: Extract a specific number of characters
+function test_mb_substr_with_length() {
+  var_dump(mb_substr("abcdef", 1, 3, "UTF-8"));
+}
+
+// Test case 5: Test with different encoding
+function test_mb_substr_different_encoding() {
+  var_dump(mb_substr("abcdef", 1, 3, "ISO-8859-1"));
+}
+
+// Test case 6: Test with non-ASCII characters
+function test_mb_substr_non_ascii_characters() {
+  var_dump(mb_substr("你好，世界！", 0, 3, "UTF-8"));
+}
+
+// Test case 7: Test with start position beyond string length
+function test_mb_substr_start_after_string_ends() {
+  var_dump(mb_substr("你好，世界！", 20, 3, "UTF-8"));
+}
+
+// Test case 8: Test with negative start position beyond string length
+function test_mb_substr_neg_start_after_string_ends() {
+  var_dump(mb_substr("你好，世界！", -20, 3, "UTF-8"));
+}
+
+// Test case 9: Test with empty string
+function test_mb_substr_empty_string() {
+  var_dump(mb_substr("你好，世界！", -20, 3, "UTF-8"));
+}
+
+test_mb_substr_full_str();
+test_mb_substr_with_start();
+test_mb_substr_with_neg_start();
+test_mb_substr_with_length();
+test_mb_substr_different_encoding();
+test_mb_substr_non_ascii_characters();
+test_mb_substr_start_after_string_ends();
+test_mb_substr_neg_start_after_string_ends();
+test_mb_substr_empty_string();
diff --git a/tests/phpt/mbstring/003_mb_substr_count.php b/tests/phpt/mbstring/003_mb_substr_count.php
new file mode 100644
index 0000000000..661609a480
--- /dev/null
+++ b/tests/phpt/mbstring/003_mb_substr_count.php
@@ -0,0 +1,57 @@
+@ok
+<?php
+
+// Test case 1: Basic test with single occurrence
+function test_mb_substr_count_basic() {
+  var_dump(mb_substr_count("hello world", "world", "UTF-8"));
+}
+
+// Test case 2: Test with multiple occurrences
+function test_mb_substr_count_basic_multiple() {
+  var_dump(mb_substr_count("hello world, hello universe", "hello", "UTF-8"));
+}
+
+// Test case 3: Test with empty haystack
+function test_mb_substr_count_empty_haystack() {
+  var_dump(mb_substr_count("", "hello", "UTF-8"));
+}
+
+// Test case 4: Test with ru letters
+function test_mb_substr_count_basic_ru() {
+  var_dump(mb_substr_count("привет мир", "мир", "UTF-8"));
+}
+
+// Test case 5: Test with non-UTF-8 encoding
+function test_mb_substr_count_non_utf_8() {
+  var_dump(mb_substr_count("пこんにちは世界", "世界", "SJIS"));
+}
+
+// Test case 6: Test with haystack and needle being identical
+function test_mb_substr_count_haystack_equals_needle() {
+  var_dump(mb_substr_count("hello", "hello", "UTF-8"));
+}
+
+// Test case 7: Test with haystack containing repeating needle
+function test_mb_substr_count_haystack_is_repeated_needle() {
+  var_dump(mb_substr_count("aaaaaa", "aa", "UTF-8"));
+}
+
+// Test case 8: Test with Unicode characters
+function test_mb_substr_count_unicode() {
+  var_dump(mb_substr_count("🎉🎉🎉", "🎉", "Unicode"));
+}
+
+// Test case 9: Test with needle not found
+function test_mb_substr_basic_not_found() {
+  var_dump(mb_substr_count("hello world", "foo", "UTF-8"));
+}
+
+test_mb_substr_count_basic();
+test_mb_substr_count_basic_multiple();
+test_mb_substr_count_empty_haystack();
+test_mb_substr_count_basic_ru();
+test_mb_substr_count_non_utf_8();
+test_mb_substr_count_haystack_equals_needle();
+test_mb_substr_count_haystack_is_repeated_needle();
+test_mb_substr_count_unicode();
+test_mb_substr_basic_not_found();
diff --git a/tests/phpt/mbstring/004_mb_strwidth.php b/tests/phpt/mbstring/004_mb_strwidth.php
new file mode 100644
index 0000000000..a1d7ce13b3
--- /dev/null
+++ b/tests/phpt/mbstring/004_mb_strwidth.php
@@ -0,0 +1,63 @@
+@ok
+<?php
+
+// Test case 1: Basic test with ASCII characters
+function test_mb_strwidth_basic() {
+  var_dump(mb_strwidth('Hello', "ASCII"));
+}
+
+// Test case 2: Test with halfwidth characters
+function test_mb_strwidth_halfwidth() {
+  var_dump(mb_strwidth('こんにちは', "UTF-8"));
+}
+
+// Test case 3: Test with fullwidth characters
+function test_mb_strwidth_fullwidth() {
+  var_dump(mb_strwidth('Ｈｅｌｌｏ', "UTF-8"));
+}
+
+// Test case 4: Test with mixed halfwidth and fullwidth characters
+function test_mb_strwidth_mixed() {
+  var_dump(mb_strwidth('Ｈｅｌｌｏ, こんにちは', "UTF-8"));
+}
+
+// Test case 5: Test with mixed halfwidth and fullwidth characters
+function test_mb_strwidth_special_characters() {
+  var_dump(mb_strwidth('🙂👍', "UTF-8"));
+}
+
+// Test case 6: Test with specified encoding (Shift_JIS)
+function test_mb_strwidth_encoding() {
+  var_dump(mb_strwidth('こんにちは', "Shift_JIS"));
+}
+
+// Test case 7: Test with string containing whitespace
+function test_mb_strwidth_whitespaces() {
+  var_dump(mb_strwidth('  ', "UTF-8"));
+}
+
+// Test case 8: Test with string containing newlines
+function test_mb_strwidth_newlines() {
+  var_dump(mb_strwidth("Hello\nWorld", "UTF-8"));
+}
+
+// Test case 9: Test with string containing tabs
+function test_mb_strwidth_tabs() {
+  var_dump(mb_strwidth("Hello\tWorld", "UTF-8"));
+}
+
+// Test case 10: Test with string control characters
+function test_mb_strwidth_control_characters() {
+  var_dump(mb_strwidth("\x00\x01\x02", "UTF-8"));
+}
+
+test_mb_strwidth_basic();
+test_mb_strwidth_halfwidth();
+test_mb_strwidth_fullwidth();
+test_mb_strwidth_mixed();
+test_mb_strwidth_special_characters();
+test_mb_strwidth_encoding();
+test_mb_strwidth_whitespaces();
+test_mb_strwidth_newlines();
+test_mb_strwidth_tabs();
+test_mb_strwidth_control_characters();
diff --git a/tests/phpt/mbstring/005_mb_strtoupper.php b/tests/phpt/mbstring/005_mb_strtoupper.php
new file mode 100644
index 0000000000..c6450f18b7
--- /dev/null
+++ b/tests/phpt/mbstring/005_mb_strtoupper.php
@@ -0,0 +1,33 @@
+@ok
+<?php
+
+// Test case 1: Basic test with ASCII characters
+function test_mb_strtoupper_basic_ascii() {
+  var_dump(mb_strtoupper("hello world", "ASCII"));
+}
+
+// Test Case 2: Basic test with non-ASCII characters
+function test_mb_strtoupper_basic_utf_8() {
+  var_dump(mb_strtoupper("héllø wørld", "UTF-8"));
+}
+
+// Test Case 3: Test with an empty string
+function test_mb_strtoupper_empty() {
+  var_dump(mb_strtoupper("", "UTF-8"));
+}
+
+// Test Case 4: Test with numbers and special characters
+function test_mb_strtoupper_numbers() {
+  var_dump(mb_strtoupper("123!@#", "UTF-8"));
+}
+
+// Test Case 5: Test with a mix of upper and lower case characters
+function test_mb_strtoupper_mixed() {
+  var_dump(mb_strtoupper("Hello WoRlD", "UTF-8"));
+}
+
+test_mb_strtoupper_basic_ascii();
+test_mb_strtoupper_basic_utf_8();
+test_mb_strtoupper_empty();
+test_mb_strtoupper_numbers();
+test_mb_strtoupper_mixed();
diff --git a/tests/phpt/mbstring/006_mb_strtolower.php b/tests/phpt/mbstring/006_mb_strtolower.php
new file mode 100644
index 0000000000..9a2b579361
--- /dev/null
+++ b/tests/phpt/mbstring/006_mb_strtolower.php
@@ -0,0 +1,33 @@
+@ok
+<?php
+
+// Test case 1: Basic test with ASCII characters
+function test_mb_strtolower_basic_ascii() {
+  var_dump(mb_strtolower("HELLO WORLD", "ASCII"));
+}
+
+// Test Case 2: Basic test with non-ASCII characters
+function test_mb_strtolower_basic_utf_8() {
+  var_dump(mb_strtolower("HÉLLØ WØRLD", "UTF-8"));
+}
+
+// Test Case 3: Test with an empty string
+function test_mb_strtolower_empty() {
+  var_dump(mb_strtolower("", "UTF-8"));
+}
+
+// Test Case 4: Test with numbers and special characters
+function test_mb_strtolower_numbers() {
+  var_dump(mb_strtolower("123!@#", "UTF-8"));
+}
+
+// Test Case 5: Test with a mix of upper and lower case characters
+function test_mb_strtolower_mixed() {
+  var_dump(mb_strtolower("Hello WoRlD", "UTF-8"));
+}
+
+test_mb_strtolower_basic_ascii();
+test_mb_strtolower_basic_utf_8();
+test_mb_strtolower_empty();
+test_mb_strtolower_numbers();
+test_mb_strtolower_mixed();
diff --git a/tests/phpt/mbstring/007_mb_strpos.php b/tests/phpt/mbstring/007_mb_strpos.php
new file mode 100644
index 0000000000..4ad3f11b6d
--- /dev/null
+++ b/tests/phpt/mbstring/007_mb_strpos.php
@@ -0,0 +1,51 @@
+@ok
+<?php
+
+// Test Case 1: Basic test with a simple string
+function test_mb_strpos_basic() {
+  var_dump(mb_strpos("hello world", "world", 0, "UTF-8"));
+}
+
+// Test Case 2: Basic test with offset
+function test_mb_strpos_with_offset() {
+  var_dump(mb_strpos("hello world", "o", 5, "UTF-8"));
+}
+
+// Test Case 3: Test with negative offset
+function test_mb_strpos_with_neg_offset() {
+  var_dump(mb_strpos("hello world", "o", -5, "UTF-8"));
+}
+
+// Test Case 4: Test with multibyte characters
+function test_mb_strpos_multibyte() {
+  var_dump(mb_strpos("こんにちは世界", "世界", 5, "UTF-8"));
+}
+
+// Test Case 5: Test with empty needle
+function test_mb_strpos_empty_needle() {
+  var_dump(mb_strpos("hello world", "", 0, "UTF-8"));
+}
+
+// Test Case 6: Test with needle not found
+function test_mb_strpos_needle_not_found() {
+  var_dump(mb_strpos("hello world", "foo", 0, "UTF-8"));
+}
+
+// Test Case 7: Test with offset greater than haystack length
+function test_mb_strpos_big_offset() {
+  var_dump(mb_strpos("hello world", "world", 100, "UTF-8"));
+}
+
+// Test Case 8: Test with case-sensitive search
+function test_mb_strpos_case_sensitive() {
+  var_dump(mb_strpos("Hello World", "world", 0, "UTF-8"));
+}
+
+test_mb_strpos_basic();
+test_mb_strpos_with_offset();
+test_mb_strpos_with_neg_offset();
+test_mb_strpos_multibyte();
+test_mb_strpos_empty_needle();
+test_mb_strpos_needle_not_found();
+test_mb_strpos_big_offset();
+test_mb_strpos_case_sensitive();
diff --git a/tests/phpt/mbstring/008_mb_stripos.php b/tests/phpt/mbstring/008_mb_stripos.php
new file mode 100644
index 0000000000..eded3c8dae
--- /dev/null
+++ b/tests/phpt/mbstring/008_mb_stripos.php
@@ -0,0 +1,46 @@
+@ok
+
+<?php
+
+// Test Case 1: Basic test with a simple string
+function test_mb_stripos_basic() {
+  var_dump(mb_stripos("hello world", "WORLD", 0, "UTF-8"));
+}
+
+// Test Case 2: Basic test with offset
+function test_mb_stripos_with_offset() {
+  var_dump(mb_stripos("hello world", "O", 5, "UTF-8"));
+}
+
+// Test Case 3: Test with negative offset
+function test_mb_stripos_with_neg_offset() {
+  var_dump(mb_stripos("hello world", "O", -5, "UTF-8"));
+}
+
+// Test Case 4: Test with multibyte characters
+function test_mb_stripos_multibyte() {
+  var_dump(mb_stripos("こんにちは世界", "世界", 5, "UTF-8"));
+}
+
+// Test Case 5: Test with empty needle
+function test_mb_stripos_empty_needle() {
+  var_dump(mb_stripos("hello world", "", 0, "UTF-8"));
+}
+
+// Test Case 6: Test with needle not found
+function test_mb_stripos_needle_not_found() {
+  var_dump(mb_stripos("hello world", "FOO", 0, "UTF-8"));
+}
+
+// Test Case 7: Test with offset greater than haystack length
+function test_mb_stripos_big_offset() {
+  var_dump(mb_stripos("hello world", "WORLD", 100, "UTF-8"));
+}
+
+test_mb_stripos_basic();
+test_mb_stripos_with_offset();
+test_mb_stripos_with_neg_offset();
+test_mb_stripos_multibyte();
+test_mb_stripos_empty_needle();
+test_mb_stripos_needle_not_found();
+test_mb_stripos_big_offset();
diff --git a/tests/phpt/mbstring/009_mb_strrpos.php b/tests/phpt/mbstring/009_mb_strrpos.php
new file mode 100644
index 0000000000..b648d5f53e
--- /dev/null
+++ b/tests/phpt/mbstring/009_mb_strrpos.php
@@ -0,0 +1,51 @@
+@ok
+<?php
+
+// Test Case 1: Basic test with a simple string
+function test_mb_strrpos_basic() {
+  var_dump(mb_strrpos("hello world", "world", 0, "UTF-8"));
+}
+
+// Test Case 2: Basic test with offset
+function test_mb_strrpos_with_offset() {
+  var_dump(mb_strrpos("hello world", "o", 5, "UTF-8"));
+}
+
+// Test Case 3: Test with negative offset
+function test_mb_strrpos_with_neg_offset() {
+  var_dump(mb_strrpos("hello world", "o", -5, "UTF-8"));
+}
+
+// Test Case 4: Test with multibyte characters
+function test_mb_strrpos_multibyte() {
+  var_dump(mb_strrpos("こんにちは世界", "世界", 5, "UTF-8"));
+}
+
+// Test Case 5: Test with empty needle
+function test_mb_strrpos_empty_needle() {
+  var_dump(mb_strrpos("hello world", "", 0, "UTF-8"));
+}
+
+// Test Case 6: Test with needle not found
+function test_mb_strrpos_needle_not_found() {
+  var_dump(mb_strrpos("hello world", "foo", 0, "UTF-8"));
+}
+
+// Test Case 7: Test with offset greater than haystack length
+function test_mb_strrpos_big_offset() {
+  var_dump(mb_strrpos("hello world", "world", 100, "UTF-8"));
+}
+
+// Test Case 8: Test with case-sensitive search
+function test_mb_strrpos_case_sensitive() {
+  var_dump(mb_strrpos("Hello World", "world", 0, "UTF-8"));
+}
+
+test_mb_strrpos_basic();
+test_mb_strrpos_with_offset();
+test_mb_strrpos_with_neg_offset();
+test_mb_strrpos_multibyte();
+test_mb_strrpos_empty_needle();
+test_mb_strrpos_needle_not_found();
+test_mb_strrpos_big_offset();
+test_mb_strrpos_case_sensitive();
diff --git a/tests/phpt/mbstring/010_mb_strripos.php b/tests/phpt/mbstring/010_mb_strripos.php
new file mode 100644
index 0000000000..ed5719d335
--- /dev/null
+++ b/tests/phpt/mbstring/010_mb_strripos.php
@@ -0,0 +1,51 @@
+@ok
+<?php
+
+// Test Case 1: Basic test with a simple string
+function test_mb_strripos_basic() {
+  var_dump(mb_strripos("hello world", "o", 0, "UTF-8"));
+}
+
+// Test Case 2: Basic test with offset
+function test_mb_strripos_with_offset() {
+  var_dump(mb_strripos("hello world", "o", 5, "UTF-8"));
+}
+
+// Test Case 3: Test with negative offset
+function test_mb_strripos_with_neg_offset() {
+  var_dump(mb_strripos("hello world", "o", -5, "UTF-8"));
+}
+
+// Test Case 4: Test with multibyte characters
+function test_mb_strripos_multibyte() {
+  var_dump(mb_strripos("こんにちは世界", "世界", 5, "UTF-8"));
+}
+
+// Test Case 5: Test with empty needle
+function test_mb_strripos_empty_needle() {
+  var_dump(mb_strripos("hello world", "", 0, "UTF-8"));
+}
+
+// Test Case 6: Test with needle not found
+function test_mb_strripos_needle_not_found() {
+  var_dump(mb_strripos("hello world", "foo", 0, "UTF-8"));
+}
+
+// Test Case 7: Test with offset greater than haystack length
+function test_mb_strripos_big_offset() {
+  var_dump(mb_strripos("hello world", "world", 100, "UTF-8"));
+}
+
+// Test Case 8: Test with case-insensitive search
+function test_mb_strripos_case_insensitive() {
+  var_dump(mb_strripos("Hello World", "world", 0, "UTF-8"));
+}
+
+test_mb_strripos_basic();
+test_mb_strripos_with_offset();
+test_mb_strripos_with_neg_offset();
+test_mb_strripos_multibyte();
+test_mb_strripos_empty_needle();
+test_mb_strripos_needle_not_found();
+test_mb_strripos_big_offset();
+test_mb_strripos_case_insensitive();
diff --git a/tests/phpt/mbstring/011_mb_strstr.php b/tests/phpt/mbstring/011_mb_strstr.php
new file mode 100644
index 0000000000..40c3030057
--- /dev/null
+++ b/tests/phpt/mbstring/011_mb_strstr.php
@@ -0,0 +1,63 @@
+@ok
+<?php
+
+// Test Case 1: Basic test with needle found
+function test_mb_strstr_basic() {
+  var_dump(mb_strstr("hello world", "world", false, "UTF-8"));
+}
+
+// Test Case 2: Test with needle found and before_needle set to true
+function test_mb_strstr_before_needle_true() {
+  var_dump(mb_strstr("hello world", "world", true, "UTF-8"));
+}
+
+// Test Case 3: Test with needle found and before_needle set to false
+function test_mb_strstr_before_needle_false() {
+  var_dump(mb_strstr("hello world", "world", false, "UTF-8"));
+}
+
+// Test Case 4: Test with needle found and encoding specified
+function test_mb_strstr_encoding_specified() {
+  var_dump(mb_strstr("résumé", "é", false, "UTF-8"));
+}
+
+// Test Case 5: Test with needle found, before_needle set to true, and encoding specified
+function test_mb_strstr_before_needle_true_encoding_specified() {
+  var_dump(mb_strstr("résumé", "é", true, "UTF-8"));
+}
+
+// Test Case 6: Test with needle found, before_needle set to false, and encoding specified
+function test_mb_strstr_before_needle_false_encoding_specified() {
+  var_dump(mb_strstr("résumé", "é", false, "UTF-8"));
+}
+
+// Test Case 7: Test with needle not found
+function test_mb_strstr_needle_not_found() {
+  var_dump(mb_strstr("hello world", "universe", false, "UTF-8"));
+}
+
+// Test Case 8: Test with needle found at the beginning of haystack
+function test_mb_strstr_needle_at_beginning() {
+  var_dump(mb_strstr("hello world", "hello", false, "UTF-8"));
+}
+
+// Test Case 9: Test with empty needle
+function test_mb_strstr_empty_needle() {
+  var_dump(mb_strstr("hello world", "", false, "UTF-8"));
+}
+
+// Test Case 10: Test with empty haystack
+function test_mb_strstr_empty_haystack() {
+  var_dump(mb_strstr("", "world", false, "UTF-8"));
+}
+
+test_mb_strstr_basic();
+test_mb_strstr_before_needle_true();
+test_mb_strstr_before_needle_false();
+test_mb_strstr_encoding_specified();
+test_mb_strstr_before_needle_true_encoding_specified();
+test_mb_strstr_before_needle_false_encoding_specified();
+test_mb_strstr_needle_not_found();
+test_mb_strstr_needle_at_beginning();
+test_mb_strstr_empty_needle();
+test_mb_strstr_empty_haystack();
diff --git a/tests/phpt/mbstring/012_mb_stristr.php b/tests/phpt/mbstring/012_mb_stristr.php
new file mode 100644
index 0000000000..dc95056367
--- /dev/null
+++ b/tests/phpt/mbstring/012_mb_stristr.php
@@ -0,0 +1,63 @@
+@ok
+<?php
+
+// Test Case 1: Basic test with needle found
+function test_mb_stristr_basic() {
+  var_dump(mb_stristr("hello world", "WORLD", false, "UTF-8"));
+}
+
+// Test Case 2: Test with needle found and before_needle set to true
+function test_mb_stristr_before_needle_true() {
+  var_dump(mb_stristr("hello world", "WORLD", true, "UTF-8"));
+}
+
+// Test Case 3: Test with needle found and before_needle set to false
+function test_mb_stristr_before_needle_false() {
+  var_dump(mb_stristr("hello world", "WORLD", false, "UTF-8"));
+}
+
+// Test Case 4: Test with needle found and encoding specified
+function test_mb_stristr_encoding_specified() {
+  var_dump(mb_stristr("Résumé", "É", false, "UTF-8"));
+}
+
+// Test Case 5: Test with needle found, before_needle set to true, and encoding specified
+function test_mb_stristr_before_needle_true_encoding_specified() {
+  var_dump(mb_stristr("Résumé", "É", true, "UTF-8"));
+}
+
+// Test Case 6: Test with needle found, before_needle set to false, and encoding specified
+function test_mb_stristr_before_needle_false_encoding_specified() {
+  var_dump(mb_stristr("Résumé", "É", false, "UTF-8"));
+}
+
+// Test Case 7: Test with needle not found
+function test_mb_stristr_needle_not_found() {
+  var_dump(mb_stristr("hello world", "UNIVERSE", false, "UTF-8"));
+}
+
+// Test Case 8: Test with needle found at the beginning of haystack
+function test_mb_stristr_needle_at_beginning() {
+  var_dump(mb_stristr("hello world", "HELLO", false, "UTF-8"));
+}
+
+// Test Case 9: Test with empty needle
+function test_mb_stristr_empty_needle() {
+  var_dump(mb_stristr("hello world", "", false, "UTF-8"));
+}
+
+// Test Case 10: Test with empty haystack
+function test_mb_stristr_empty_haystack() {
+  var_dump(mb_stristr("", "world", false, "UTF-8"));
+}
+
+test_mb_stristr_basic();
+test_mb_stristr_before_needle_true();
+test_mb_stristr_before_needle_false();
+test_mb_stristr_encoding_specified();
+test_mb_stristr_before_needle_true_encoding_specified();
+test_mb_stristr_before_needle_false_encoding_specified();
+test_mb_stristr_needle_not_found();
+test_mb_stristr_needle_at_beginning();
+test_mb_stristr_empty_needle();
+test_mb_stristr_empty_haystack();
diff --git a/tests/phpt/mbstring/013_mb_strrchr.php b/tests/phpt/mbstring/013_mb_strrchr.php
new file mode 100644
index 0000000000..599981de6b
--- /dev/null
+++ b/tests/phpt/mbstring/013_mb_strrchr.php
@@ -0,0 +1,63 @@
+@ok
+<?php
+
+// Test Case 1: Basic test with needle found
+function test_mb_strrchr_basic() {
+  var_dump(mb_strrchr("hello world", "o", false, "UTF-8"));
+}
+
+// Test Case 2: Test with needle found and before_needle set to true
+function test_mb_strrchr_before_needle_true() {
+  var_dump(mb_strrchr("hello world", "o", true, "UTF-8"));
+}
+
+// Test Case 3: Test with needle found and before_needle set to false
+function test_mb_strrchr_before_needle_false() {
+  var_dump(mb_strrchr("hello world", "o", false, "UTF-8"));
+}
+
+// Test Case 4: Test with needle found and encoding specified
+function test_mb_strrchr_encoding_specified() {
+  var_dump(mb_strrchr("résumé", "é", false, "UTF-8"));
+}
+
+// Test Case 5: Test with needle found, before_needle set to true, and encoding specified
+function test_mb_strrchr_before_needle_true_encoding_specified() {
+  var_dump(mb_strrchr("résumé", "é", true, "UTF-8"));
+}
+
+// Test Case 6: Test with needle found, before_needle set to false, and encoding specified
+function test_mb_strrchr_before_needle_false_encoding_specified() {
+  var_dump(mb_strrchr("résumé", "é", false, "UTF-8"));
+}
+
+// Test Case 7: Test with needle not found
+function test_mb_strrchr_needle_not_found() {
+  var_dump(mb_strrchr("hello world", "x", false, "UTF-8"));
+}
+
+// Test Case 8: Test with needle found at the beginning of haystack
+function test_mb_strrchr_needle_at_beginning() {
+  var_dump(mb_strrchr("hello world", "h", false, "UTF-8"));
+}
+
+// Test Case 9: Test with empty needle
+function test_mb_strrchr_empty_needle() {
+  var_dump(mb_strrchr("hello world", "", false, "UTF-8"));
+}
+
+// Test Case 10: Test with empty haystack
+function test_mb_strrchr_empty_haystack() {
+  var_dump(mb_strrchr("", "world", false, "UTF-8"));
+}
+
+test_mb_strrchr_basic();
+test_mb_strrchr_before_needle_true();
+test_mb_strrchr_before_needle_false();
+test_mb_strrchr_encoding_specified();
+test_mb_strrchr_before_needle_true_encoding_specified();
+test_mb_strrchr_before_needle_false_encoding_specified();
+test_mb_strrchr_needle_not_found();
+test_mb_strrchr_needle_at_beginning();
+test_mb_strrchr_empty_needle();
+test_mb_strrchr_empty_haystack();
diff --git a/tests/phpt/mbstring/014_mb_strrichr.php b/tests/phpt/mbstring/014_mb_strrichr.php
new file mode 100644
index 0000000000..2936548693
--- /dev/null
+++ b/tests/phpt/mbstring/014_mb_strrichr.php
@@ -0,0 +1,63 @@
+@ok
+<?php
+
+// Test Case 1: Basic test with needle found
+function test_mb_strrichr_basic() {
+  var_dump(mb_strrichr("hello World", "world", false, "UTF-8"));
+}
+
+// Test Case 2: Test with needle found and before_needle set to true
+function test_mb_strrichr_before_needle_true() {
+  var_dump(mb_strrichr("hello World", "world", true, "UTF-8"));
+}
+
+// Test Case 3: Test with needle found and before_needle set to false
+function test_mb_strrichr_before_needle_false() {
+  var_dump(mb_strrichr("hello World", "world", false, "UTF-8"));
+}
+
+// Test Case 4: Test with needle found and encoding specified
+function test_mb_strrichr_encoding_specified() {
+  var_dump(mb_strrichr("résumé", "É", false, "UTF-8"));
+}
+
+// Test Case 5: Test with needle found, before_needle set to true, and encoding specified
+function test_mb_strrichr_before_needle_true_encoding_specified() {
+  var_dump(mb_strrichr("résumé", "é", true, "UTF-8"));
+}
+
+// Test Case 6: Test with needle found, before_needle set to false, and encoding specified
+function test_mb_strrichr_before_needle_false_encoding_specified() {
+  var_dump(mb_strrichr("résumé", "é", false, "UTF-8"));
+}
+
+// Test Case 7: Test with needle not found
+function test_mb_strrichr_needle_not_found() {
+  var_dump(mb_strrichr("hello World", "universe", false, "UTF-8"));
+}
+
+// Test Case 8: Test with needle found at the beginning of haystack
+function test_mb_strrichr_needle_at_beginning() {
+  var_dump(mb_strrichr("hello World", "hello", false, "UTF-8"));
+}
+
+// Test Case 9: Test with empty needle
+function test_mb_strrichr_empty_needle() {
+  var_dump(mb_strrichr("hello World", "", false, "UTF-8"));
+}
+
+// Test Case 10: Test with empty haystack
+function test_mb_strrichr_empty_haystack() {
+  var_dump(mb_strrichr("", "World", false, "UTF-8"));
+}
+
+test_mb_strrichr_basic();
+test_mb_strrichr_before_needle_true();
+test_mb_strrichr_before_needle_false();
+test_mb_strrichr_encoding_specified();
+test_mb_strrichr_before_needle_true_encoding_specified();
+test_mb_strrichr_before_needle_false_encoding_specified();
+test_mb_strrichr_needle_not_found();
+test_mb_strrichr_needle_at_beginning();
+test_mb_strrichr_empty_needle();
+test_mb_strrichr_empty_haystack();

From 482d782b74284bc30b0105a78012563a456d5325 Mon Sep 17 00:00:00 2001
From: catnyan02 <catnyan02@gmail.com>
Date: Wed, 7 Feb 2024 19:29:25 +0000
Subject: [PATCH 18/27] Fix small compilation errors

---
 cmake/external-libraries.cmake | 10 ++++++++++
 runtime/mbstring/mbstring.cpp  | 12 +++++++++++-
 runtime/mbstring/mbstring.h    |  4 +++-
 3 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/cmake/external-libraries.cmake b/cmake/external-libraries.cmake
index b040ca6f18..0fec2a0a56 100644
--- a/cmake/external-libraries.cmake
+++ b/cmake/external-libraries.cmake
@@ -11,6 +11,16 @@ function(handle_missing_library LIB_NAME)
     endif()
 endfunction()
 
+if(MBFL)
+    message(STATUS "MBFL=On, libmbfl will be downloaded and built")
+    add_compile_options(-DMBFL)
+    FetchContent_Declare(libmbfl GIT_REPOSITORY https://github.com/andreylzmw/libmbfl)
+    FetchContent_MakeAvailable(libmbfl)
+    include_directories(${libmbfl_SOURCE_DIR}/include)
+    add_definitions(-DLIBMBFL_LIB_DIR="${libmbfl_SOURCE_DIR}/objs")
+    add_link_options(-L${libmbfl_SOURCE_DIR}/objs)
+endif()
+
 find_package(fmt QUIET)
 if(NOT fmt_FOUND)
     handle_missing_library("fmtlib")
diff --git a/runtime/mbstring/mbstring.cpp b/runtime/mbstring/mbstring.cpp
index e42d999c34..588f37a14b 100644
--- a/runtime/mbstring/mbstring.cpp
+++ b/runtime/mbstring/mbstring.cpp
@@ -50,6 +50,8 @@ bool mb_UTF8_check(const char *s) {
   php_assert (0);
 }
 
+static bool is_detect_incorrect_encoding_names_warning{false};
+
 #ifdef MBFL
 extern "C" {
 	#include <kphp/libmbfl/mbfl/mbfilter.h>
@@ -1051,4 +1053,12 @@ string f$mb_substr(const string &str, int64_t start, const mixed &length_var, co
   return {str.c_str() + UTF8_start, static_cast<string::size_type>(UTF8_length)};
 }
 
-#endif
\ No newline at end of file
+#endif
+
+void f$set_detect_incorrect_encoding_names_warning(bool show) {
+  is_detect_incorrect_encoding_names_warning = show;
+}
+
+void free_detect_incorrect_encoding_names() {
+  is_detect_incorrect_encoding_names_warning = false;
+}
diff --git a/runtime/mbstring/mbstring.h b/runtime/mbstring/mbstring.h
index bcf0da9c0b..3a3b3ec4b2 100644
--- a/runtime/mbstring/mbstring.h
+++ b/runtime/mbstring/mbstring.h
@@ -70,4 +70,6 @@ void f$set_detect_incorrect_encoding_names_warning(bool show);
 
 void free_detect_incorrect_encoding_names();
 
-#endif
\ No newline at end of file
+#endif
+
+void free_detect_incorrect_encoding_names();

From c6be4b33c2da8b5946d8203c7492c6251724b6fb Mon Sep 17 00:00:00 2001
From: catnyan02 <catnyan02@gmail.com>
Date: Sat, 24 Feb 2024 07:34:31 +0000
Subject: [PATCH 19/27] Fix null encoding input, small fix mb_convert_case

---
 runtime/mbstring/mbstring.cpp         | 15 ++++++++------
 runtime/mbstring/mbstring.h           | 28 +++++++++++++--------------
 tests/cpp/runtime/mbstring-test.cpp   |  7 +++++++
 tests/phpt/mbstring/001_mb_strlen.php |  2 +-
 4 files changed, 31 insertions(+), 21 deletions(-)

diff --git a/runtime/mbstring/mbstring.cpp b/runtime/mbstring/mbstring.cpp
index 588f37a14b..c69020ca63 100644
--- a/runtime/mbstring/mbstring.cpp
+++ b/runtime/mbstring/mbstring.cpp
@@ -293,14 +293,17 @@ int64_t f$mb_substr_count(const string &haystack, const string &needle, const Op
 
 string mb_convert_case(const string &str, const int64_t mode, const Optional<string> &encoding){
 
-  mixed unicode = f$mb_convert_encoding(str, string("UTF_8"), encoding.val());
+  mixed utf_8 = f$mb_convert_encoding(str, string("UTF_8"), encoding.val());
 
-  if (unicode.is_string()) {
-    const string &unicode_str = unicode.to_string();
+  if (utf_8.is_string()) {
+    string utf_8_str = utf_8.to_string();
+    if (strcmp(encoding.val().c_str(), "UTF_8")){
+      utf_8_str = str;
+    }
 
-    int len = str.size();
+    int len = utf_8_str.size();
     string unicode_res(len * 3, false);
-    const char *s = str.c_str();
+    const char *s = utf_8_str.c_str();
     int p = 0, ch = 0, res_len = 0;
 
     switch(mode) {
@@ -320,7 +323,7 @@ string mb_convert_case(const string &str, const int64_t mode, const Optional<str
     }
 
     if (p < 0) {
-      php_warning("Incorrect UTF-8 string \"%s\" in function mb_convert_case", str.c_str());
+      php_warning("Incorrect UTF-8 string \"%s\" in function mb_convert_case", utf_8_str.c_str());
     }
     unicode_res.shrink(res_len);
 
diff --git a/runtime/mbstring/mbstring.h b/runtime/mbstring/mbstring.h
index 3a3b3ec4b2..b3c153de9e 100644
--- a/runtime/mbstring/mbstring.h
+++ b/runtime/mbstring/mbstring.h
@@ -17,33 +17,33 @@ bool f$mb_check_encoding(const mixed &value, const Optional<string> &encoding);
 
 mixed f$mb_convert_encoding(const mixed &str, const string &to_encoding, const mixed &from_encoding);
 
-int64_t f$mb_strlen(const string &str, const Optional<string> &encoding);
+int64_t f$mb_strlen(const string &str, const Optional<string> &encoding=false);
 
-string f$mb_substr(const string &str, const int64_t start, const Optional<int64_t> &length, const Optional<string> &encoding);
+string f$mb_substr(const string &str, const int64_t start, const Optional<int64_t> &length=0, const Optional<string> &encoding=false);
 
-int64_t f$mb_substr_count(const string &haystack, const string &needle, const Optional<string> &encoding);
+int64_t f$mb_substr_count(const string &haystack, const string &needle, const Optional<string> &encoding=false);
 
-string f$mb_strtoupper(const string &str, const Optional<string> &encoding);
+string f$mb_strtoupper(const string &str, const Optional<string> &encoding=false);
 
-string f$mb_strtolower(const string &str, const Optional<string> &encoding);
+string f$mb_strtolower(const string &str, const Optional<string> &encoding=false);
 
-int64_t f$mb_strwidth(const string &str, const Optional<string> &encoding);
+int64_t f$mb_strwidth(const string &str, const Optional<string> &encoding=false);
 
-Optional<int64_t> f$mb_strpos(const string &haystack, const string &needle, const int64_t offset, const Optional<string> &encoding);
+Optional<int64_t> f$mb_strpos(const string &haystack, const string &needle, const int64_t offset, const Optional<string> &encoding=false);
 
-Optional<int64_t> f$mb_strrpos(const string &haystack, const string &needle, const int64_t offset, const Optional<string> &encoding);
+Optional<int64_t> f$mb_strrpos(const string &haystack, const string &needle, const int64_t offset, const Optional<string> &encoding=false);
 
-Optional<int64_t> f$mb_strripos(const string &haystack, const string &needle, const int64_t offset, const Optional<string> &encoding);
+Optional<int64_t> f$mb_strripos(const string &haystack, const string &needle, const int64_t offset, const Optional<string> &encoding=false);
 
-Optional<int64_t> f$mb_stripos(const string &haystack, const string &needle, const int64_t offset, const Optional<string> &encoding);
+Optional<int64_t> f$mb_stripos(const string &haystack, const string &needle, const int64_t offset, const Optional<string> &encoding=false);
 
-Optional<string> f$mb_stristr(const string &haystack, const string &needle, const bool before_needle, const Optional<string> &encoding);
+Optional<string> f$mb_stristr(const string &haystack, const string &needle, const bool before_needle, const Optional<string> &encoding=false);
 
-Optional<string> f$mb_strstr(const string &haystack, const string &needle, const bool before_needle, const Optional<string> &encoding);
+Optional<string> f$mb_strstr(const string &haystack, const string &needle, const bool before_needle, const Optional<string> &encoding=false);
 
-Optional<string> f$mb_strrchr(const string &haystack, const string &needle, const bool before_needle, const Optional<string> &encoding);
+Optional<string> f$mb_strrchr(const string &haystack, const string &needle, const bool before_needle, const Optional<string> &encoding=false);
 
-Optional<string> f$mb_strrichr(const string &haystack, const string &needle, const bool before_needle, const Optional<string> &encoding);
+Optional<string> f$mb_strrichr(const string &haystack, const string &needle, const bool before_needle, const Optional<string> &encoding=false);
 
 #else
 
diff --git a/tests/cpp/runtime/mbstring-test.cpp b/tests/cpp/runtime/mbstring-test.cpp
index b2c663dbdc..ec7506da61 100644
--- a/tests/cpp/runtime/mbstring-test.cpp
+++ b/tests/cpp/runtime/mbstring-test.cpp
@@ -25,6 +25,13 @@ TEST(mbstring_test, test_mb_strlen) {
   ASSERT_TRUE(real == predicted);
 }
 
+TEST(mbstring_test, test_mb_strlen_no_encoding) {
+  const int predicted = 12;
+  int real = 0;
+  real = f$mb_strlen(string("Hello world!"));
+  ASSERT_TRUE(real == predicted);
+}
+
 TEST(mbstring_test, test_mb_substr) {
   ASSERT_STREQ(f$mb_substr(string("Hello world"), 2, 3, string("UTF-8")).c_str(), "llo");
 }
diff --git a/tests/phpt/mbstring/001_mb_strlen.php b/tests/phpt/mbstring/001_mb_strlen.php
index 8d2f990cf0..2c395957d4 100644
--- a/tests/phpt/mbstring/001_mb_strlen.php
+++ b/tests/phpt/mbstring/001_mb_strlen.php
@@ -55,7 +55,7 @@ function test_mb_strlen_long_string() {
 test_mb_strlen_basic_ascii();
 test_mb_strlen_basic_utf_8();
 test_mb_strlen_empty_string();
-// test_mb_strlen_null_encoding(); // doesn't put null through for some reason
+test_mb_strlen_null_encoding(); // doesn't put null through for some reason
 test_mb_strlen_utf_16_encoding();
 test_mb_strlen_html_entities();
 test_mb_strlen_whitespaces();

From ad2584146c378c0ec2a61e8d9909f0401c863c46 Mon Sep 17 00:00:00 2001
From: Marat Omarov <mtomarov@miem.hse.ru>
Date: Fri, 8 Mar 2024 19:17:52 +0300
Subject: [PATCH 20/27] Add mb_strcut function with simple tests

---
 builtin-functions/_functions.txt      |  1 +
 runtime/mbstring/mbstring.cpp         | 45 +++++++++++++++++++++
 runtime/mbstring/mbstring.h           |  2 +
 tests/cpp/runtime/mbstring-test.cpp   |  8 ++++
 tests/phpt/mbstring/015_mb_strcut.php | 57 +++++++++++++++++++++++++++
 5 files changed, 113 insertions(+)
 create mode 100644 tests/phpt/mbstring/015_mb_strcut.php

diff --git a/builtin-functions/_functions.txt b/builtin-functions/_functions.txt
index 9e3af0c621..145b1d7254 100644
--- a/builtin-functions/_functions.txt
+++ b/builtin-functions/_functions.txt
@@ -1649,6 +1649,7 @@ function getenv(string $varname = '', bool $local_only = false): mixed;
 
 function mb_check_encoding(array|string $value, ?string $encoding = null): bool;
 function mb_convert_encoding(array|string $string, string $to_encoding, array|string|null $from_encoding = null): array|string|false;
+function mb_strcut(string $string, int $start, ?int $length = null, ?string $encoding = null): string;
 function mb_substr(string $string, int $start, ?int $length = null, ?string $encoding = null): string;
 function mb_strlen(string $string, ?string $encoding = null): int;
 function mb_substr_count(string $haystack, string $needle, ?string $encoding = null): int;
diff --git a/runtime/mbstring/mbstring.cpp b/runtime/mbstring/mbstring.cpp
index c69020ca63..a3e38a42ee 100644
--- a/runtime/mbstring/mbstring.cpp
+++ b/runtime/mbstring/mbstring.cpp
@@ -210,6 +210,51 @@ int64_t f$mb_strlen(const string &str, const Optional<string> &enc_name){
 
 }
 
+string f$mb_strcut(const string &str, const int64_t start, const Optional<int64_t> &length, const Optional<string> &encoding){
+  int64_t _start, _length;
+  bool len_is_null = !length.has_value();
+  const mbfl_encoding *enc = mb_get_encoding(encoding);
+
+  if (!enc) {
+    php_critical_error ("encoding \"%s\" isn't supported in mb_strcut", encoding.val().c_str());
+  }
+
+  mbfl_string _string, result, *ret;
+  mbfl_string_init(&_string);
+  _string.no_encoding = enc->no_encoding;
+  _string.len = str.size();
+  _string.val = (unsigned char*)str.c_str();
+
+  if (len_is_null) {
+    _length = _string.len;
+  } else {
+    _length = length.val();
+  }
+
+  _start = start;
+  if (start < 0) {
+    _start = _string.len + start;
+    if (_start < 0) {
+      _start = 0;
+    }
+  }
+
+  if (_length < 0) {
+    _length = (_string.len - _start) + _length;
+    if (_length < 0) {
+      _length = 0;
+    }
+  }
+
+  if (_start > _length) {
+    return string();
+  }
+
+  ret = mbfl_strcut(&_string, &result, _start, _length);
+  php_assert(ret != NULL);
+  return string((const char*) ret->val, ret->len);
+}
+
 
 string f$mb_substr(const string &str, const int64_t start, const Optional<int64_t> &length, const Optional<string> &encoding){
   size_t real_start, real_len;
diff --git a/runtime/mbstring/mbstring.h b/runtime/mbstring/mbstring.h
index b3c153de9e..5d8ce56849 100644
--- a/runtime/mbstring/mbstring.h
+++ b/runtime/mbstring/mbstring.h
@@ -19,6 +19,8 @@ mixed f$mb_convert_encoding(const mixed &str, const string &to_encoding, const m
 
 int64_t f$mb_strlen(const string &str, const Optional<string> &encoding=false);
 
+string f$mb_strcut(const string &str, const int64_t start, const Optional<int64_t> &length=0, const Optional<string> &encoding=false);
+
 string f$mb_substr(const string &str, const int64_t start, const Optional<int64_t> &length=0, const Optional<string> &encoding=false);
 
 int64_t f$mb_substr_count(const string &haystack, const string &needle, const Optional<string> &encoding=false);
diff --git a/tests/cpp/runtime/mbstring-test.cpp b/tests/cpp/runtime/mbstring-test.cpp
index ec7506da61..17ec3ef9dd 100644
--- a/tests/cpp/runtime/mbstring-test.cpp
+++ b/tests/cpp/runtime/mbstring-test.cpp
@@ -32,6 +32,14 @@ TEST(mbstring_test, test_mb_strlen_no_encoding) {
   ASSERT_TRUE(real == predicted);
 }
 
+TEST(mbstring_test, test_mb_strcut) {
+  ASSERT_STREQ(f$mb_strcut(string("Hello world"), 2, 3, string("UTF-8")).c_str(), "llo");
+}
+
+TEST(mbstring_test, test_mb_strcut_null_length) {
+  ASSERT_STREQ(f$mb_strcut(string("Hello world"), 3, false, string("UTF-8")).c_str(), "lo world");
+}
+
 TEST(mbstring_test, test_mb_substr) {
   ASSERT_STREQ(f$mb_substr(string("Hello world"), 2, 3, string("UTF-8")).c_str(), "llo");
 }
diff --git a/tests/phpt/mbstring/015_mb_strcut.php b/tests/phpt/mbstring/015_mb_strcut.php
new file mode 100644
index 0000000000..93b42e8ecf
--- /dev/null
+++ b/tests/phpt/mbstring/015_mb_strcut.php
@@ -0,0 +1,57 @@
+@ok
+<?php
+
+// Test case 1: Extract all characters from start position to the end of the string
+function test_mb_strcut_full_string() {
+  var_dump(mb_strcut("abcdef", 0, null, "UTF-8"));
+}
+
+// Test case 2: Extract characters starting from the specified position
+function test_mb_strcut_with_start() {
+  var_dump(mb_strcut("abcdef", 2, null, "UTF-8"));
+}
+
+// Test case 3: Extract characters using negative start position
+function test_mb_strcut_with_neg_start() {
+  var_dump(mb_strcut("abcdef", -3, null, "UTF-8"));
+}
+
+// Test case 4: Extract a specific number of characters
+function test_mb_strcut_with_length() {
+  var_dump(mb_strcut("abcdef", 1, 3, "UTF-8"));
+}
+
+// Test case 5: Test with different encoding
+function test_mb_strcut_different_encoding() {
+  var_dump(mb_strcut("abcdef", 1, 3, "ISO-8859-1"));
+}
+
+// Test case 6: Test with non-ASCII characters
+function test_mb_strcut_non_ascii_characters() {
+  var_dump(mb_strcut("你好，世界！", 0, 3, "UTF-8"));
+}
+
+// Test case 7: Test with start position beyond string length
+function test_mb_strcut_start_after_string_ends() {
+  var_dump(mb_strcut("你好，世界！", 20, 3, "UTF-8"));
+}
+
+// Test case 8: Test with negative start position beyond string length
+function test_mb_strcut_neg_start_after_string_ends() {
+  var_dump(mb_strcut("你好，世界！", -20, 3, "UTF-8"));
+}
+
+// Test case 9: Test with empty string
+function test_mb_strcut_empty_string() {
+  var_dump(mb_strcut("你好，世界！", -20, 3, "UTF-8"));
+}
+
+test_mb_strcut_full_string();
+test_mb_strcut_with_start();
+test_mb_strcut_with_neg_start();
+test_mb_strcut_with_length();
+test_mb_strcut_different_encoding();
+test_mb_strcut_non_ascii_characters();
+test_mb_strcut_start_after_string_ends();
+test_mb_strcut_neg_start_after_string_ends();
+test_mb_strcut_empty_string();

From 600e393363df773d6a7cdbeec928fc4913053f11 Mon Sep 17 00:00:00 2001
From: Marat Omarov <mtomarov@miem.hse.ru>
Date: Sat, 9 Mar 2024 17:13:33 +0300
Subject: [PATCH 21/27] Add mb_str_split function

---
 builtin-functions/_functions.txt |  2 ++
 runtime/mbstring/mbstring.cpp    | 35 ++++++++++++++++++++++++++++++++
 runtime/mbstring/mbstring.h      |  2 ++
 3 files changed, 39 insertions(+)

diff --git a/builtin-functions/_functions.txt b/builtin-functions/_functions.txt
index 145b1d7254..e94b93fa7c 100644
--- a/builtin-functions/_functions.txt
+++ b/builtin-functions/_functions.txt
@@ -1649,6 +1649,8 @@ function getenv(string $varname = '', bool $local_only = false): mixed;
 
 function mb_check_encoding(array|string $value, ?string $encoding = null): bool;
 function mb_convert_encoding(array|string $string, string $to_encoding, array|string|null $from_encoding = null): array|string|false;
+
+function mb_str_split(string $string, int $length = 1, ?string $encoding = null): array;
 function mb_strcut(string $string, int $start, ?int $length = null, ?string $encoding = null): string;
 function mb_substr(string $string, int $start, ?int $length = null, ?string $encoding = null): string;
 function mb_strlen(string $string, ?string $encoding = null): int;
diff --git a/runtime/mbstring/mbstring.cpp b/runtime/mbstring/mbstring.cpp
index a3e38a42ee..c2c3c6446a 100644
--- a/runtime/mbstring/mbstring.cpp
+++ b/runtime/mbstring/mbstring.cpp
@@ -189,6 +189,41 @@ static const mbfl_encoding *mb_get_encoding(const Optional<string> &enc_name) {
   return mbfl_name2encoding(DEFAULT_ENCODING); // change if we are going to use current encoding
 }
 
+array<string> f$mb_str_split(const string &str, const int64_t &length, const Optional<string> &encoding){
+  if (length <= 0) {
+    php_critical_error ("mb_str_split(): Argument #2 ($length) must be greater than 0");
+  } else if (length > INT_MAX / 4) {
+    php_critical_error ("mb_str_split(): Argument #2 ($length) is too large");
+  }
+
+  const mbfl_encoding *enc = mb_get_encoding(encoding);
+  if (!enc) {
+    php_critical_error ("encoding \"%s\" isn't supported in mb_strlen", encoding.val().c_str());
+  }
+
+  array<string> result = array<string>();
+
+  if (!str.size()) {
+    return result;
+  }
+
+  mbfl_string _string;
+  mbfl_string_init(&_string);
+  _string.no_encoding = enc->no_encoding;
+  _string.len = str.size();
+  _string.val = (unsigned char*)str.c_str();
+
+  size_t n = mbfl_strlen(&_string);     // take into account the number of bytes in the encoding character
+  size_t char_length = _string.len / n; // get the number of bytes of a character
+  size_t chunk_length = char_length * (size_t)length;
+
+  for (auto i = 0; i < _string.len; i += chunk_length) {
+    result.push_back(str.substr(i, chunk_length));
+    // result.push_back(string(reinterpret_cast<const char*>(_string.val) + i, chunk_length));
+  }
+  return result;
+}
+
 int64_t f$mb_strlen(const string &str, const Optional<string> &enc_name){
   const mbfl_encoding *encoding = mb_get_encoding(enc_name);
   if (!encoding) {
diff --git a/runtime/mbstring/mbstring.h b/runtime/mbstring/mbstring.h
index 5d8ce56849..89e119cd15 100644
--- a/runtime/mbstring/mbstring.h
+++ b/runtime/mbstring/mbstring.h
@@ -17,6 +17,8 @@ bool f$mb_check_encoding(const mixed &value, const Optional<string> &encoding);
 
 mixed f$mb_convert_encoding(const mixed &str, const string &to_encoding, const mixed &from_encoding);
 
+array<string> f$mb_str_split(const string &str, const int64_t &length=1, const Optional<string> &encoding=false);
+
 int64_t f$mb_strlen(const string &str, const Optional<string> &encoding=false);
 
 string f$mb_strcut(const string &str, const int64_t start, const Optional<int64_t> &length=0, const Optional<string> &encoding=false);

From cfbe230f76aaf524487a79526c0a49226cde1cc6 Mon Sep 17 00:00:00 2001
From: Marat Omarov <mtomarov@miem.hse.ru>
Date: Sat, 9 Mar 2024 21:24:27 +0300
Subject: [PATCH 22/27] Add mb_preferred_mime_name function and simple
 php-tests

---
 builtin-functions/_functions.txt              |  2 +-
 runtime/mbstring/mbstring.cpp                 | 21 +++++++++++++++---
 runtime/mbstring/mbstring.h                   |  2 ++
 tests/phpt/mbstring/016_mb_str_split.php      |  0
 .../mbstring/017_mb_preferred_mime_name.php   | 22 +++++++++++++++++++
 5 files changed, 43 insertions(+), 4 deletions(-)
 create mode 100644 tests/phpt/mbstring/016_mb_str_split.php
 create mode 100644 tests/phpt/mbstring/017_mb_preferred_mime_name.php

diff --git a/builtin-functions/_functions.txt b/builtin-functions/_functions.txt
index e94b93fa7c..57a5e9957b 100644
--- a/builtin-functions/_functions.txt
+++ b/builtin-functions/_functions.txt
@@ -1649,7 +1649,7 @@ function getenv(string $varname = '', bool $local_only = false): mixed;
 
 function mb_check_encoding(array|string $value, ?string $encoding = null): bool;
 function mb_convert_encoding(array|string $string, string $to_encoding, array|string|null $from_encoding = null): array|string|false;
-
+function mb_preferred_mime_name(string $encoding): string|false;
 function mb_str_split(string $string, int $length = 1, ?string $encoding = null): array;
 function mb_strcut(string $string, int $start, ?int $length = null, ?string $encoding = null): string;
 function mb_substr(string $string, int $start, ?int $length = null, ?string $encoding = null): string;
diff --git a/runtime/mbstring/mbstring.cpp b/runtime/mbstring/mbstring.cpp
index c2c3c6446a..8821d0c72c 100644
--- a/runtime/mbstring/mbstring.cpp
+++ b/runtime/mbstring/mbstring.cpp
@@ -189,16 +189,31 @@ static const mbfl_encoding *mb_get_encoding(const Optional<string> &enc_name) {
   return mbfl_name2encoding(DEFAULT_ENCODING); // change if we are going to use current encoding
 }
 
+Optional<string> f$mb_preferred_mime_name(const string &enc_name) {
+  const mbfl_encoding *encoding;
+  encoding = mbfl_name2encoding(enc_name.c_str());
+  if (!encoding) {
+    php_critical_error("encoding must be a valid encoding, \"%s\" given", enc_name.c_str());
+  }
+  const char *preferred_name = (encoding->mime_name && encoding->mime_name[0] != '\0') ? encoding->mime_name : NULL;
+  if (preferred_name == NULL || *preferred_name == '\0') {
+    php_warning("No MIME preferred name corresponding to \"%s\"", enc_name.c_str());
+    return false;
+  } else {
+    return string(preferred_name);
+  }
+}
+
 array<string> f$mb_str_split(const string &str, const int64_t &length, const Optional<string> &encoding){
   if (length <= 0) {
-    php_critical_error ("mb_str_split(): Argument #2 ($length) must be greater than 0");
+    php_critical_error("length argument must be greater than 0");
   } else if (length > INT_MAX / 4) {
-    php_critical_error ("mb_str_split(): Argument #2 ($length) is too large");
+    php_critical_error("length argument is too large");
   }
 
   const mbfl_encoding *enc = mb_get_encoding(encoding);
   if (!enc) {
-    php_critical_error ("encoding \"%s\" isn't supported in mb_strlen", encoding.val().c_str());
+    php_critical_error("encoding \"%s\" isn't supported in mb_strlen", encoding.val().c_str());
   }
 
   array<string> result = array<string>();
diff --git a/runtime/mbstring/mbstring.h b/runtime/mbstring/mbstring.h
index 89e119cd15..59c2d5e4f4 100644
--- a/runtime/mbstring/mbstring.h
+++ b/runtime/mbstring/mbstring.h
@@ -17,6 +17,8 @@ bool f$mb_check_encoding(const mixed &value, const Optional<string> &encoding);
 
 mixed f$mb_convert_encoding(const mixed &str, const string &to_encoding, const mixed &from_encoding);
 
+Optional<string> f$mb_preferred_mime_name(const string &$encoding);
+
 array<string> f$mb_str_split(const string &str, const int64_t &length=1, const Optional<string> &encoding=false);
 
 int64_t f$mb_strlen(const string &str, const Optional<string> &encoding=false);
diff --git a/tests/phpt/mbstring/016_mb_str_split.php b/tests/phpt/mbstring/016_mb_str_split.php
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/tests/phpt/mbstring/017_mb_preferred_mime_name.php b/tests/phpt/mbstring/017_mb_preferred_mime_name.php
new file mode 100644
index 0000000000..7d7df3cc8b
--- /dev/null
+++ b/tests/phpt/mbstring/017_mb_preferred_mime_name.php
@@ -0,0 +1,22 @@
+@ok
+<?php
+
+function test_preferred_mime_valid_name() {
+	var_dump(mb_preferred_mime_name('sjis-win'));
+	var_dump(mb_preferred_mime_name('SJIS'));
+	var_dump(mb_preferred_mime_name('EUC-JP'));
+	var_dump(mb_preferred_mime_name('UTF-8'));
+	var_dump(mb_preferred_mime_name('ISO-2022-JP'));
+	var_dump(mb_preferred_mime_name('JIS'));
+	var_dump(mb_preferred_mime_name('ISO-8859-1'));
+	var_dump(mb_preferred_mime_name('UCS2'));
+	var_dump(mb_preferred_mime_name('UCS4'));
+}
+
+// function test_preferred_mime_invalid_name() {
+// 	try {
+// 		var_dump(mb_preferred_mime_name('BAD_NAME'));
+// 	} catch (\ValueError $e) {
+// 		echo $e->getMessage() . \PHP_EOL;
+// 	}
+// }
\ No newline at end of file

From 07588d3f8fa9aa00454c836b1b60874ee97170c8 Mon Sep 17 00:00:00 2001
From: Marat Omarov <mtomarov@miem.hse.ru>
Date: Mon, 11 Mar 2024 22:32:26 +0300
Subject: [PATCH 23/27] Add mb_list_encodings function

---
 builtin-functions/_functions.txt |  1 +
 runtime/mbstring/mbstring.cpp    | 19 +++++++++++++++++++
 runtime/mbstring/mbstring.h      |  2 ++
 3 files changed, 22 insertions(+)

diff --git a/builtin-functions/_functions.txt b/builtin-functions/_functions.txt
index 57a5e9957b..f3cb3b9c56 100644
--- a/builtin-functions/_functions.txt
+++ b/builtin-functions/_functions.txt
@@ -1649,6 +1649,7 @@ function getenv(string $varname = '', bool $local_only = false): mixed;
 
 function mb_check_encoding(array|string $value, ?string $encoding = null): bool;
 function mb_convert_encoding(array|string $string, string $to_encoding, array|string|null $from_encoding = null): array|string|false;
+function mb_list_encodings(): array;
 function mb_preferred_mime_name(string $encoding): string|false;
 function mb_str_split(string $string, int $length = 1, ?string $encoding = null): array;
 function mb_strcut(string $string, int $start, ?int $length = null, ?string $encoding = null): string;
diff --git a/runtime/mbstring/mbstring.cpp b/runtime/mbstring/mbstring.cpp
index 8821d0c72c..acd199156d 100644
--- a/runtime/mbstring/mbstring.cpp
+++ b/runtime/mbstring/mbstring.cpp
@@ -68,8 +68,15 @@ extern "C" {
 #define KPHP_UNICODE_CASE_FOLD_SIMPLE  7
 #define KPHP_UNICODE_CASE_MODE_MAX     7
 
+// TO-DO:
+// #define MBFL_BAD_INPUT (-1)
+
 static const char * DEFAULT_ENCODING = "UTF-8" ;
 
+static inline array<string> get_supported_encodings();
+
+static const array<string> supported_encodings_list = get_supported_encodings();
+
 static inline int mbfl_is_error(size_t len) {
   return len >= (size_t) -16;
 }
@@ -189,6 +196,18 @@ static const mbfl_encoding *mb_get_encoding(const Optional<string> &enc_name) {
   return mbfl_name2encoding(DEFAULT_ENCODING); // change if we are going to use current encoding
 }
 
+static inline array<string> get_supported_encodings() {
+  array<string> result;
+  for (const mbfl_encoding **encodings = mbfl_get_supported_encodings(); *encodings; encodings++) {
+    result.push_back(string((*encodings)->name));
+  }
+  return result;
+}
+
+array<string> f$mb_list_encodings() {
+  return supported_encodings_list;
+}
+
 Optional<string> f$mb_preferred_mime_name(const string &enc_name) {
   const mbfl_encoding *encoding;
   encoding = mbfl_name2encoding(enc_name.c_str());
diff --git a/runtime/mbstring/mbstring.h b/runtime/mbstring/mbstring.h
index 59c2d5e4f4..ef126abc66 100644
--- a/runtime/mbstring/mbstring.h
+++ b/runtime/mbstring/mbstring.h
@@ -17,6 +17,8 @@ bool f$mb_check_encoding(const mixed &value, const Optional<string> &encoding);
 
 mixed f$mb_convert_encoding(const mixed &str, const string &to_encoding, const mixed &from_encoding);
 
+array<string> f$mb_list_encodings();
+
 Optional<string> f$mb_preferred_mime_name(const string &$encoding);
 
 array<string> f$mb_str_split(const string &str, const int64_t &length=1, const Optional<string> &encoding=false);

From 2ecb94d1807086031b940a2f65448e48ec3bf509 Mon Sep 17 00:00:00 2001
From: Marat Omarov <mtomarov@miem.hse.ru>
Date: Tue, 12 Mar 2024 15:24:04 +0300
Subject: [PATCH 24/27] Add mb_encoding_aliases function

---
 builtin-functions/_functions.txt |  1 +
 runtime/mbstring/mbstring.cpp    | 18 +++++++++++++++++-
 runtime/mbstring/mbstring.h      |  2 ++
 3 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/builtin-functions/_functions.txt b/builtin-functions/_functions.txt
index f3cb3b9c56..9adb08de4c 100644
--- a/builtin-functions/_functions.txt
+++ b/builtin-functions/_functions.txt
@@ -1649,6 +1649,7 @@ function getenv(string $varname = '', bool $local_only = false): mixed;
 
 function mb_check_encoding(array|string $value, ?string $encoding = null): bool;
 function mb_convert_encoding(array|string $string, string $to_encoding, array|string|null $from_encoding = null): array|string|false;
+function mb_encoding_aliases(string $encoding): array;
 function mb_list_encodings(): array;
 function mb_preferred_mime_name(string $encoding): string|false;
 function mb_str_split(string $string, int $length = 1, ?string $encoding = null): array;
diff --git a/runtime/mbstring/mbstring.cpp b/runtime/mbstring/mbstring.cpp
index acd199156d..f8ef898908 100644
--- a/runtime/mbstring/mbstring.cpp
+++ b/runtime/mbstring/mbstring.cpp
@@ -196,9 +196,25 @@ static const mbfl_encoding *mb_get_encoding(const Optional<string> &enc_name) {
   return mbfl_name2encoding(DEFAULT_ENCODING); // change if we are going to use current encoding
 }
 
+array<string> f$mb_encoding_aliases(const string &encoding) {
+  const mbfl_encoding *enc = mb_get_encoding(encoding);
+  if (!enc) {
+    php_critical_error("encoding \"%s\" isn't supported in mb_encoding_aliases", encoding.c_str());
+  }
+
+  array<string> result;
+  if (enc->aliases) {
+    const char *(*aliases)[] = enc->aliases;
+    for (int i = 0; (*aliases)[i]; ++i) {
+      result.push_back(string((*aliases)[i]));
+    }
+  }
+  return result;
+}
+
 static inline array<string> get_supported_encodings() {
   array<string> result;
-  for (const mbfl_encoding **encodings = mbfl_get_supported_encodings(); *encodings; encodings++) {
+  for (const mbfl_encoding **encodings = mbfl_get_supported_encodings(); *encodings; ++encodings) {
     result.push_back(string((*encodings)->name));
   }
   return result;
diff --git a/runtime/mbstring/mbstring.h b/runtime/mbstring/mbstring.h
index ef126abc66..ec8b2b5822 100644
--- a/runtime/mbstring/mbstring.h
+++ b/runtime/mbstring/mbstring.h
@@ -17,6 +17,8 @@ bool f$mb_check_encoding(const mixed &value, const Optional<string> &encoding);
 
 mixed f$mb_convert_encoding(const mixed &str, const string &to_encoding, const mixed &from_encoding);
 
+array<string> f$mb_encoding_aliases(const string &encoding);
+
 array<string> f$mb_list_encodings();
 
 Optional<string> f$mb_preferred_mime_name(const string &$encoding);

From 24400a0ff5d5cfe1d47e6803962269f432f6ab12 Mon Sep 17 00:00:00 2001
From: catnyan02 <catnyan02@gmail.com>
Date: Wed, 10 Jul 2024 21:23:41 +0100
Subject: [PATCH 25/27] Add oniguruma and regular expressions

---
 .idea/misc.xml                            |   3 +
 builtin-functions/_functions.txt          |   8 +-
 cmake/external-libraries.cmake            |  13 +
 compiler/compiler-settings.cpp            |   5 +
 runtime/mbstring/mbstring.cpp             | 672 +++++++++++++++++++++-
 runtime/mbstring/mbstring.h               |  12 +
 runtime/runtime.cmake                     |   2 +
 tests/cpp/runtime/mbstring-test.cpp       |  13 +
 tests/phpt/mbstring/001_mb_strlen.php     |   2 +-
 tests/phpt/mbstring/015_mb_strimwidth.php |  58 ++
 10 files changed, 785 insertions(+), 3 deletions(-)
 create mode 100644 tests/phpt/mbstring/015_mb_strimwidth.php

diff --git a/.idea/misc.xml b/.idea/misc.xml
index bdd226825f..9f883b3c5b 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -1,5 +1,8 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
+  <component name="CMakePythonSetting">
+    <option name="pythonIntegrationState" value="YES" />
+  </component>
   <component name="CMakeWorkspace" PROJECT_DIR="$PROJECT_DIR$" />
   <component name="CidrRootsConfiguration">
     <sourceRoots>
diff --git a/builtin-functions/_functions.txt b/builtin-functions/_functions.txt
index 9e3af0c621..5c700db035 100644
--- a/builtin-functions/_functions.txt
+++ b/builtin-functions/_functions.txt
@@ -1650,6 +1650,7 @@ function getenv(string $varname = '', bool $local_only = false): mixed;
 function mb_check_encoding(array|string $value, ?string $encoding = null): bool;
 function mb_convert_encoding(array|string $string, string $to_encoding, array|string|null $from_encoding = null): array|string|false;
 function mb_substr(string $string, int $start, ?int $length = null, ?string $encoding = null): string;
+function mb_substitute_character(string|int|null $substitute_character = null): string|int|false;
 function mb_strlen(string $string, ?string $encoding = null): int;
 function mb_substr_count(string $haystack, string $needle, ?string $encoding = null): int;
 function mb_strtolower(string $string, ?string $encoding = null): string;
@@ -1662,4 +1663,9 @@ function mb_strrpos(string $haystack, string $needle, int $offset = 0, string $e
 function mb_stristr(string $haystack, string $needle, bool $before_needle = false, ?string $encoding = null): string|false;
 function mb_strrchr(string $haystack, string $needle, bool $before_needle = false, ?string $encoding = null): string|false;
 function mb_strrichr(string $haystack, string $needle, bool $before_needle = false, ?string $encoding = null): string|false;
-function mb_strstr(string $haystack, string $needle, bool $before_needle = false, ?string $encoding = null): string|false;
\ No newline at end of file
+function mb_strstr(string $haystack, string $needle, bool $before_needle = false, ?string $encoding = null): string|false;
+function mb_strimwidth(string $string, int $start, int $width, string $trim_marker = "", ?string $encoding = null): string;
+function mb_scrub(string $string, ?string $encoding = null): string;
+function mb_regex_encoding(?string $encoding = null): string|false;
+function mb_regex_set_options(?string $options = null): string;
+function mb_ereg_match(string $pattern, string $string, ?string $options = null): bool;
diff --git a/cmake/external-libraries.cmake b/cmake/external-libraries.cmake
index 0fec2a0a56..169fb14ae4 100644
--- a/cmake/external-libraries.cmake
+++ b/cmake/external-libraries.cmake
@@ -19,6 +19,19 @@ if(MBFL)
     include_directories(${libmbfl_SOURCE_DIR}/include)
     add_definitions(-DLIBMBFL_LIB_DIR="${libmbfl_SOURCE_DIR}/objs")
     add_link_options(-L${libmbfl_SOURCE_DIR}/objs)
+
+    find_package(onig QUIET)
+    if(NOT onig_FOUND)
+        handle_missing_library("onig")
+        FetchContent_Declare(
+                onig
+                GIT_REPOSITORY https://github.com/kkos/oniguruma/
+                GIT_TAG        v6.9.9
+        )
+        FetchContent_MakeAvailable(onig)
+        include_directories(${onig_SOURCE_DIR}/src)
+        message(STATUS "---------------------")
+    endif()
 endif()
 
 find_package(fmt QUIET)
diff --git a/compiler/compiler-settings.cpp b/compiler/compiler-settings.cpp
index 9e761f3fcf..e18ea72368 100644
--- a/compiler/compiler-settings.cpp
+++ b/compiler/compiler-settings.cpp
@@ -342,6 +342,11 @@ void CompilerSettings::init() {
   ld_flags.value_ += " -L" LIBMBFL_LIB_DIR;
 #endif
 
+#ifdef ONIG_LIB_DIR
+  external_static_libs.emplace_back("onig");
+  ld_flags.value_ += " -lonig";
+#endif
+
 #ifdef KPHP_H3_LIB_DIR
   ld_flags.value_ += " -L" KPHP_H3_LIB_DIR;
 #else
diff --git a/runtime/mbstring/mbstring.cpp b/runtime/mbstring/mbstring.cpp
index c69020ca63..fe62de73bd 100644
--- a/runtime/mbstring/mbstring.cpp
+++ b/runtime/mbstring/mbstring.cpp
@@ -58,6 +58,8 @@ extern "C" {
 #include <kphp/libmbfl/mbfl/mbfilter_wchar.h>
 }
 
+#include <oniguruma.h>
+
 #define KPHP_UNICODE_CASE_UPPER        0
 #define KPHP_UNICODE_CASE_LOWER        1
 #define KPHP_UNICODE_CASE_TITLE        2
@@ -68,7 +70,16 @@ extern "C" {
 #define KPHP_UNICODE_CASE_FOLD_SIMPLE  7
 #define KPHP_UNICODE_CASE_MODE_MAX     7
 
-static const char * DEFAULT_ENCODING = "UTF-8" ;
+#define MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE 0
+#define MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR 1
+#define MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG 2
+#define MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY 3
+#define MBFL_OUTPUTFILTER_ILLEGAL_MODE_BADUTF8 4
+
+static const char * DEFAULT_ENCODING = "UTF-8";
+
+int current_filter_illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
+int current_filter_illegal_substchar = '?';
 
 static inline int mbfl_is_error(size_t len) {
   return len >= (size_t) -16;
@@ -94,6 +105,8 @@ mbfl_string *convert_encoding(const char *str, const char *to, const char *from)
 
 	/* converting */
 	convd = mbfl_buffer_converter_new(from_encoding, to_encoding, 0);
+  mbfl_buffer_converter_illegal_mode(convd, current_filter_illegal_mode);
+  mbfl_buffer_converter_illegal_substchar(convd, current_filter_illegal_substchar);
 	ret = mbfl_buffer_converter_feed_result(convd, &_string, &result);
 	mbfl_buffer_converter_delete(convd);
 
@@ -257,6 +270,73 @@ string f$mb_substr(const string &str, const int64_t start, const Optional<int64_
   return string((const char*) ret->val, ret->len);
 }
 
+static inline int php_mb_check_code_point(long cp)
+{
+  if (cp <= 0 || cp >= 0x110000) {
+    return 0;
+  }
+
+  if (cp >= 0xd800 && cp <= 0xdfff) {
+    return 0;
+  }
+
+  return 1;
+}
+
+//static uint8_t is_numeric_string_ex(const char *str, size_t length, zend_long *lval,
+//                                                       double *dval)
+//{
+//  if (*str > '9') {
+//    return 0;
+//  }
+//  return _is_numeric_string_ex(str, length, lval, dval, allow_errors, oflow_info, trailing_data);
+//}
+//
+//long convert_to_long(string *op)
+//{
+//  long lval;
+//  double dval;
+//  if (0 == is_numeric_string(op->c_str(), op->size(), &lval, &dval)) {
+//    return 0;
+//  } else {
+//    return (long)lval;
+//  }
+//}
+
+mixed f$mb_substitute_character(const mixed &substitute_character){
+  if (!substitute_character) {
+    if (current_filter_illegal_mode == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
+      return string("none", 4);
+    } else if (current_filter_illegal_mode == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
+      return string("long", 4);
+    } else if (current_filter_illegal_mode == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
+      return string("entity", 6);
+    } else {
+      return current_filter_illegal_substchar;
+    }
+  } else {
+    if (substitute_character.is_string()) {
+      if (strncasecmp("none", substitute_character.to_string().c_str(), substitute_character.to_string().size()) == 0) {
+        current_filter_illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
+      } else if (strncasecmp("long", substitute_character.to_string().c_str(), substitute_character.to_string().size()) == 0) {
+        current_filter_illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
+      } else if (strncasecmp("entity", substitute_character.to_string().c_str(), substitute_character.to_string().size()) == 0) {
+        current_filter_illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
+      }
+    } else {
+//      long substitute_char = convert_to_long(substitute_character);
+//      if (php_mb_check_code_point(substitute_char)) {
+//        current_filter_illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
+//        current_filter_illegal_substchar = substitute_char;
+//      } else {
+        php_warning("Unknown character");
+        return false;
+//      }
+    }
+    return true;
+  }
+}
+
 int64_t f$mb_substr_count(const string &haystack, const string &needle, const Optional<string> &encoding){
 
   size_t n;
@@ -714,6 +794,596 @@ Optional<string> f$mb_strrichr(const string &haystack, const string &needle, con
   return false;
 }
 
+string f$mb_strimwidth(const string &str, const int64_t start, const int64_t width, const string &trim_marker, const Optional<string> &encoding){
+  const mbfl_encoding *enc = mb_get_encoding(encoding);
+
+  if (!enc) {
+    php_critical_error ("encoding \"%s\" isn't supported in mb_strrpos", encoding.val().c_str());
+  }
+
+  mbfl_string _string, result, _trim_marker, *ret;
+  int64_t from = start;
+  int64_t swidth = 0;
+
+  mbfl_string_init(&_string);
+  _string.no_encoding = enc->no_encoding;
+  _string.len = str.size();
+  _string.val = (unsigned char*) str.c_str();
+
+  mbfl_string_init(&_trim_marker);
+  _trim_marker.no_encoding = enc->no_encoding;
+  _trim_marker.len = 0;
+  _trim_marker.val = NULL;
+
+  if ((from < 0) || (width < 0)) {
+    swidth = mbfl_strwidth(&_string);
+  }
+
+  if (from < 0) {
+    from += swidth;
+  }
+
+  if (from < 0 || (size_t)from > str.size()) {
+    php_critical_error ("Start position is out of range");
+  }
+
+  if (width < 0) {
+    swidth = swidth + width - from;
+  }
+
+  if (swidth < 0) {
+    php_critical_error ("Width is out of range");
+  }
+
+  if (trim_marker.size() > 0) {
+    _trim_marker.len = trim_marker.size();
+    _trim_marker.val = (unsigned char*) trim_marker.c_str();
+  }
+
+  ret = mbfl_strimwidth(&_string, &_trim_marker, &result, from, width);
+
+  if (ret == NULL) {
+    php_critical_error ("Internal error");
+  }
+
+  return string((const char*) ret->val, ret->len);
+
+}
+
+Optional<string> f$mb_scrub(const string &str, const Optional<string> &encoding){
+
+  const mbfl_encoding *enc = mb_get_encoding(encoding);
+
+  if (!enc) {
+    return false;
+  }
+
+  mbfl_string *ret = convert_encoding(str.c_str(), val(encoding).c_str(), val(encoding).c_str());
+
+  if (ret == NULL) {
+    return false;
+  }
+
+  return string((const char*)ret->val, ret->len);
+}
+
+///*            REGEXPS           */
+//
+///*
+// * encoding name resolver
+// */
+typedef struct _kphp_mb_regex_enc_name_map_t {
+  const char *names;
+  OnigEncoding code;
+} kphp_mb_regex_enc_name_map_t;
+
+static const kphp_mb_regex_enc_name_map_t enc_name_map[] = {
+#ifdef ONIG_ENCODING_EUC_JP
+  {
+    "EUC-JP\0EUCJP\0X-EUC-JP\0UJIS\0EUCJP\0EUCJP-WIN\0",
+    ONIG_ENCODING_EUC_JP
+  },
+#endif
+#ifdef ONIG_ENCODING_UTF8
+  {
+    "UTF-8\0UTF8\0",
+    ONIG_ENCODING_UTF8
+  },
+#endif
+#ifdef ONIG_ENCODING_UTF16_BE
+  {
+    "UTF-16\0UTF-16BE\0",
+    ONIG_ENCODING_UTF16_BE
+  },
+#endif
+#ifdef ONIG_ENCODING_UTF16_LE
+  {
+    "UTF-16LE\0",
+    ONIG_ENCODING_UTF16_LE
+  },
+#endif
+#ifdef ONIG_ENCODING_UTF32_BE
+  {
+    "UCS-4\0UTF-32\0UTF-32BE\0",
+    ONIG_ENCODING_UTF32_BE
+  },
+#endif
+#ifdef ONIG_ENCODING_UTF32_LE
+  {
+    "UCS-4LE\0UTF-32LE\0",
+    ONIG_ENCODING_UTF32_LE
+  },
+#endif
+#ifdef ONIG_ENCODING_SJIS
+  {
+    "SJIS\0CP932\0MS932\0SHIFT_JIS\0SJIS-WIN\0WINDOWS-31J\0",
+    ONIG_ENCODING_SJIS
+  },
+#endif
+#ifdef ONIG_ENCODING_BIG5
+  {
+    "BIG5\0BIG-5\0BIGFIVE\0CN-BIG5\0BIG-FIVE\0",
+    ONIG_ENCODING_BIG5
+  },
+#endif
+#ifdef ONIG_ENCODING_EUC_CN
+  {
+    "EUC-CN\0EUCCN\0EUC_CN\0GB-2312\0GB2312\0",
+    ONIG_ENCODING_EUC_CN
+  },
+#endif
+#ifdef ONIG_ENCODING_EUC_TW
+  {
+    "EUC-TW\0EUCTW\0EUC_TW\0",
+    ONIG_ENCODING_EUC_TW
+  },
+#endif
+#ifdef ONIG_ENCODING_EUC_KR
+  {
+    "EUC-KR\0EUCKR\0EUC_KR\0",
+    ONIG_ENCODING_EUC_KR
+  },
+#endif
+#if defined(ONIG_ENCODING_KOI8) && !PHP_ONIG_BAD_KOI8_ENTRY
+  {
+    "KOI8\0KOI-8\0",
+    ONIG_ENCODING_KOI8
+  },
+#endif
+#ifdef ONIG_ENCODING_KOI8_R
+  {
+    "KOI8R\0KOI8-R\0KOI-8R\0",
+    ONIG_ENCODING_KOI8_R
+  },
+#endif
+#ifdef ONIG_ENCODING_ISO_8859_1
+  {
+    "ISO-8859-1\0ISO8859-1\0",
+    ONIG_ENCODING_ISO_8859_1
+  },
+#endif
+#ifdef ONIG_ENCODING_ISO_8859_2
+  {
+    "ISO-8859-2\0ISO8859-2\0",
+    ONIG_ENCODING_ISO_8859_2
+  },
+#endif
+#ifdef ONIG_ENCODING_ISO_8859_3
+  {
+    "ISO-8859-3\0ISO8859-3\0",
+    ONIG_ENCODING_ISO_8859_3
+  },
+#endif
+#ifdef ONIG_ENCODING_ISO_8859_4
+  {
+    "ISO-8859-4\0ISO8859-4\0",
+    ONIG_ENCODING_ISO_8859_4
+  },
+#endif
+#ifdef ONIG_ENCODING_ISO_8859_5
+  {
+    "ISO-8859-5\0ISO8859-5\0",
+    ONIG_ENCODING_ISO_8859_5
+  },
+#endif
+#ifdef ONIG_ENCODING_ISO_8859_6
+  {
+    "ISO-8859-6\0ISO8859-6\0",
+    ONIG_ENCODING_ISO_8859_6
+  },
+#endif
+#ifdef ONIG_ENCODING_ISO_8859_7
+  {
+    "ISO-8859-7\0ISO8859-7\0",
+    ONIG_ENCODING_ISO_8859_7
+  },
+#endif
+#ifdef ONIG_ENCODING_ISO_8859_8
+  {
+    "ISO-8859-8\0ISO8859-8\0",
+    ONIG_ENCODING_ISO_8859_8
+  },
+#endif
+#ifdef ONIG_ENCODING_ISO_8859_9
+  {
+    "ISO-8859-9\0ISO8859-9\0",
+    ONIG_ENCODING_ISO_8859_9
+  },
+#endif
+#ifdef ONIG_ENCODING_ISO_8859_10
+  {
+    "ISO-8859-10\0ISO8859-10\0",
+    ONIG_ENCODING_ISO_8859_10
+  },
+#endif
+#ifdef ONIG_ENCODING_ISO_8859_11
+  {
+    "ISO-8859-11\0ISO8859-11\0",
+    ONIG_ENCODING_ISO_8859_11
+  },
+#endif
+#ifdef ONIG_ENCODING_ISO_8859_13
+  {
+    "ISO-8859-13\0ISO8859-13\0",
+    ONIG_ENCODING_ISO_8859_13
+  },
+#endif
+#ifdef ONIG_ENCODING_ISO_8859_14
+  {
+    "ISO-8859-14\0ISO8859-14\0",
+    ONIG_ENCODING_ISO_8859_14
+  },
+#endif
+#ifdef ONIG_ENCODING_ISO_8859_15
+  {
+    "ISO-8859-15\0ISO8859-15\0",
+    ONIG_ENCODING_ISO_8859_15
+  },
+#endif
+#ifdef ONIG_ENCODING_ISO_8859_16
+  {
+    "ISO-8859-16\0ISO8859-16\0",
+    ONIG_ENCODING_ISO_8859_16
+  },
+#endif
+#ifdef ONIG_ENCODING_ASCII
+  {
+    "ASCII\0US-ASCII\0US_ASCII\0ISO646\0",
+    ONIG_ENCODING_ASCII
+  },
+#endif
+  { NULL, ONIG_ENCODING_UNDEF }
+};
+
+OnigEncoding default_mbctype = ONIG_ENCODING_UTF8;
+OnigEncoding current_mbctype = ONIG_ENCODING_UTF8;
+mbfl_no_encoding current_mbctype_mbfl_encoding = mbfl_no_encoding_utf8;
+
+OnigOptionType regex_default_options = ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE;
+OnigSyntaxType *regex_default_syntax = ONIG_SYNTAX_RUBY;
+
+long regex_stack_limit = 100000;
+long regex_retry_limit = 1000000;
+
+//static OnigEncoding _kphp_mb_regex_name2mbctype(const char *pname)
+//{
+//  const char *p;
+//  const kphp_mb_regex_enc_name_map_t *mapping;
+//
+//  if (pname == NULL || !*pname) {
+//    return ONIG_ENCODING_UNDEF;
+//  }
+//
+//  for (mapping = enc_name_map; mapping->names != NULL; mapping++) {
+//    for (p = mapping->names; *p != '\0'; p += (strlen(p) + 1)) {
+//      if (strcasecmp(p, pname) == 0) {
+//        return mapping->code;
+//      }
+//    }
+//  }
+//
+//  return ONIG_ENCODING_UNDEF;
+//}
+//
+//
+//static const char *_kphp_mb_regex_mbctype2name(OnigEncoding mbctype)
+//{
+//  const kphp_mb_regex_enc_name_map_t *mapping;
+//
+//  for (mapping = enc_name_map; mapping->names != NULL; mapping++) {
+//    if (mapping->code == mbctype) {
+//      return mapping->names;
+//    }
+//  }
+//
+//  return NULL;
+//}
+
+//mixed f$mb_regex_encoding(const Optional<string> &encoding){
+//  if (!encoding.has_value()) {
+//    const char *retval = _kphp_mb_regex_mbctype2name(current_mbctype);
+//    if (retval != NULL){
+//      return string(retval);
+//    }
+//    return NULL;
+//  } else {
+//    OnigEncoding mbctype = _kphp_mb_regex_name2mbctype(val(encoding).c_str());
+//    if (mbctype == ONIG_ENCODING_UNDEF) {
+//      php_critical_error ("must be a valid encoding, \"%s\" given", encoding.val().c_str());
+//    }
+//    current_mbctype = mbctype;
+//    current_mbctype_mbfl_encoding = mb_get_encoding(encoding)->no_encoding;
+//    return true;
+//  }
+//}
+
+static bool _kphp_mb_regex_init_options(const char *parg, size_t narg, OnigOptionType *option, OnigSyntaxType **syntax)
+{
+  size_t n;
+  char c;
+  OnigOptionType optm = 0;
+
+  *syntax = ONIG_SYNTAX_RUBY;
+
+  if (parg != NULL) {
+    n = 0;
+    while(n < narg) {
+      c = parg[n++];
+      switch (c) {
+        case 'i':
+          optm |= ONIG_OPTION_IGNORECASE;
+          break;
+        case 'x':
+          optm |= ONIG_OPTION_EXTEND;
+          break;
+        case 'm':
+          optm |= ONIG_OPTION_MULTILINE;
+          break;
+        case 's':
+          optm |= ONIG_OPTION_SINGLELINE;
+          break;
+        case 'p':
+          optm |= ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE;
+          break;
+        case 'l':
+          optm |= ONIG_OPTION_FIND_LONGEST;
+          break;
+        case 'n':
+          optm |= ONIG_OPTION_FIND_NOT_EMPTY;
+          break;
+        case 'j':
+          *syntax = ONIG_SYNTAX_JAVA;
+          break;
+        case 'u':
+          *syntax = ONIG_SYNTAX_GNU_REGEX;
+          break;
+        case 'g':
+          *syntax = ONIG_SYNTAX_GREP;
+          break;
+        case 'c':
+          *syntax = ONIG_SYNTAX_EMACS;
+          break;
+        case 'r':
+          *syntax = ONIG_SYNTAX_RUBY;
+          break;
+        case 'z':
+          *syntax = ONIG_SYNTAX_PERL;
+          break;
+        case 'b':
+          *syntax = ONIG_SYNTAX_POSIX_BASIC;
+          break;
+        case 'd':
+          *syntax = ONIG_SYNTAX_POSIX_EXTENDED;
+          break;
+        default:
+          return false;
+      }
+    }
+    if (option != NULL) *option|=optm;
+  }
+  return true;
+}
+
+static void _kphp_mb_regex_set_options(OnigOptionType options, OnigSyntaxType *syntax, OnigOptionType *prev_options, OnigSyntaxType **prev_syntax)
+{
+  if (prev_options != NULL) {
+    *prev_options = regex_default_options;
+  }
+  if (prev_syntax != NULL) {
+    *prev_syntax = regex_default_syntax;
+  }
+  regex_default_options = options;
+  regex_default_syntax = syntax;
+}
+
+static size_t _kphp_mb_regex_get_option_string(char *str, size_t len, OnigOptionType option, OnigSyntaxType *syntax)
+{
+  size_t len_left = len;
+  size_t len_req = 0;
+  char *p = str;
+  char c;
+
+  if ((option & ONIG_OPTION_IGNORECASE) != 0) {
+    if (len_left > 0) {
+      --len_left;
+      *(p++) = 'i';
+    }
+    ++len_req;
+  }
+
+  if ((option & ONIG_OPTION_EXTEND) != 0) {
+    if (len_left > 0) {
+      --len_left;
+      *(p++) = 'x';
+    }
+    ++len_req;
+  }
+
+  if ((option & (ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE)) ==
+      (ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE)) {
+    if (len_left > 0) {
+      --len_left;
+      *(p++) = 'p';
+    }
+    ++len_req;
+  } else {
+    if ((option & ONIG_OPTION_MULTILINE) != 0) {
+      if (len_left > 0) {
+        --len_left;
+        *(p++) = 'm';
+      }
+      ++len_req;
+    }
+
+    if ((option & ONIG_OPTION_SINGLELINE) != 0) {
+      if (len_left > 0) {
+        --len_left;
+        *(p++) = 's';
+      }
+      ++len_req;
+    }
+  }
+  if ((option & ONIG_OPTION_FIND_LONGEST) != 0) {
+    if (len_left > 0) {
+      --len_left;
+      *(p++) = 'l';
+    }
+    ++len_req;
+  }
+  if ((option & ONIG_OPTION_FIND_NOT_EMPTY) != 0) {
+    if (len_left > 0) {
+      --len_left;
+      *(p++) = 'n';
+    }
+    ++len_req;
+  }
+
+  c = 0;
+
+  if (syntax == ONIG_SYNTAX_JAVA) {
+    c = 'j';
+  } else if (syntax == ONIG_SYNTAX_GNU_REGEX) {
+    c = 'u';
+  } else if (syntax == ONIG_SYNTAX_GREP) {
+    c = 'g';
+  } else if (syntax == ONIG_SYNTAX_EMACS) {
+    c = 'c';
+  } else if (syntax == ONIG_SYNTAX_RUBY) {
+    c = 'r';
+  } else if (syntax == ONIG_SYNTAX_PERL) {
+    c = 'z';
+  } else if (syntax == ONIG_SYNTAX_POSIX_BASIC) {
+    c = 'b';
+  } else if (syntax == ONIG_SYNTAX_POSIX_EXTENDED) {
+    c = 'd';
+  }
+
+  if (c != 0) {
+    if (len_left > 0) {
+      --len_left;
+      *(p++) = c;
+    }
+    ++len_req;
+  }
+
+
+  if (len_left > 0) {
+    --len_left;
+    *(p++) = '\0';
+  }
+  ++len_req;
+  if (len < len_req) {
+    return len_req;
+  }
+
+  return 0;
+}
+
+string f$mb_regex_set_options(const Optional<string> &options){
+  OnigOptionType opt, prev_opt;
+  OnigSyntaxType *syntax, *prev_syntax;
+  char buf[16];
+
+  if (options.has_value()) {
+    opt = 0;
+    syntax = NULL;
+    if (!_kphp_mb_regex_init_options(val(options).c_str(), val(options).size(), &opt, &syntax)) {
+      php_critical_error("Wrong regex options.");
+    }
+    _kphp_mb_regex_set_options(opt, syntax, &prev_opt, &prev_syntax);
+    opt = prev_opt;
+    syntax = prev_syntax;
+  } else {
+    opt = regex_default_options;
+    syntax = regex_default_syntax;
+  }
+
+  _kphp_mb_regex_get_option_string(buf, sizeof(buf), opt, syntax);
+  return string((const char *) buf, sizeof(buf));
+}
+
+static regex_t *kphp_mbregex_compile_pattern(const char *pattern, size_t patlen, OnigOptionType options, OnigSyntaxType *syntax)
+{
+  // TODO: hashing
+  int err_code = 0;
+  regex_t *retval = NULL;
+  OnigErrorInfo err_info;
+  OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
+  OnigEncoding enc = current_mbctype;
+
+  if ((err_code = onig_new(&retval, (OnigUChar *)pattern, (OnigUChar *)(pattern + patlen), options, enc, syntax, &err_info)) != ONIG_NORMAL) {
+    onig_error_code_to_str(err_str, err_code, &err_info);
+    php_warning("mbregex compile err: %s", err_str);
+    return NULL;
+  }
+
+  return retval;
+}
+
+bool f$mb_ereg_match(const string &pattern, const string &str, const Optional<string> &options){
+  regex_t *re;
+  OnigSyntaxType *syntax;
+  OnigOptionType option = 0;
+  int err;
+  OnigMatchParam *mp;
+
+  if (options.has_value()) {
+      if(!_kphp_mb_regex_init_options(val(options).c_str(), val(options).size(), &option, &syntax)) {
+        php_critical_error("Wrong regex options.");
+      }
+  } else {
+      option |= regex_default_options;
+      syntax = regex_default_syntax;
+  }
+
+  if (!f$mb_check_encoding(str, DEFAULT_ENCODING)) {
+    return false;
+  }
+
+  if ((re = kphp_mbregex_compile_pattern(pattern.c_str(), pattern.size(), option, syntax)) == NULL) {
+    return false;
+  }
+
+  mp = onig_new_match_param();
+  onig_initialize_match_param(mp);
+
+  if (regex_stack_limit > 0 && regex_stack_limit < UINT_MAX) {
+    onig_set_match_stack_limit_size_of_match_param(mp, (unsigned int) regex_stack_limit);
+  }
+  if (regex_retry_limit > 0 && regex_retry_limit < UINT_MAX) {
+    onig_set_retry_limit_in_match_of_match_param(mp, (unsigned int) regex_retry_limit);
+  }
+  /* match */
+  err = onig_match_with_param(re, (OnigUChar *) str.c_str(), (OnigUChar *)(str.c_str() + str.size()), (OnigUChar *) str.c_str(), NULL, 0, mp); // error is here
+  onig_free_match_param(mp);
+
+  if (err >= 0) {
+    return true;
+  } else {
+    return false;
+  }
+}
+
 #else
 
 
diff --git a/runtime/mbstring/mbstring.h b/runtime/mbstring/mbstring.h
index b3c153de9e..f90283be9b 100644
--- a/runtime/mbstring/mbstring.h
+++ b/runtime/mbstring/mbstring.h
@@ -21,6 +21,8 @@ int64_t f$mb_strlen(const string &str, const Optional<string> &encoding=false);
 
 string f$mb_substr(const string &str, const int64_t start, const Optional<int64_t> &length=0, const Optional<string> &encoding=false);
 
+mixed f$mb_substitute_character(const mixed &substitute_character);
+
 int64_t f$mb_substr_count(const string &haystack, const string &needle, const Optional<string> &encoding=false);
 
 string f$mb_strtoupper(const string &str, const Optional<string> &encoding=false);
@@ -45,6 +47,16 @@ Optional<string> f$mb_strrchr(const string &haystack, const string &needle, cons
 
 Optional<string> f$mb_strrichr(const string &haystack, const string &needle, const bool before_needle, const Optional<string> &encoding=false);
 
+string f$mb_strimwidth(const string &str, const int64_t start, const int64_t width, const string &trim_marker, const Optional<string> &encoding=false);
+
+Optional<string> f$mb_scrub(const string &str, const Optional<string> &encoding=false);
+
+//mixed f$mb_regex_encoding(const Optional<string> &encoding=false);
+
+string f$mb_regex_set_options(const Optional<string> &options=false);
+
+bool f$mb_ereg_match(const string &pattern, const string &str, const Optional<string> &options=false);
+
 #else
 
 #include <climits>
diff --git a/runtime/runtime.cmake b/runtime/runtime.cmake
index a24173eb8a..a5d07756a9 100644
--- a/runtime/runtime.cmake
+++ b/runtime/runtime.cmake
@@ -151,6 +151,7 @@ target_include_directories(kphp_runtime PUBLIC ${BASE_DIR} /opt/curl7600/include
 add_dependencies(kphp_runtime kphp-timelib)
 if (MBFL)
     add_dependencies(kphp_runtime libmbfl)
+    add_dependencies(kphp_runtime onig)
 endif()
 
 
@@ -173,6 +174,7 @@ endif()
 
 if (MBFL)
     list(APPEND RUNTIME_LINK_TEST_LIBS libmbfl)
+    list(APPEND RUNTIME_LINK_TEST_LIBS onig)
 endif()
 
 file(GLOB_RECURSE KPHP_RUNTIME_ALL_HEADERS
diff --git a/tests/cpp/runtime/mbstring-test.cpp b/tests/cpp/runtime/mbstring-test.cpp
index ec7506da61..4c10fe9442 100644
--- a/tests/cpp/runtime/mbstring-test.cpp
+++ b/tests/cpp/runtime/mbstring-test.cpp
@@ -92,4 +92,17 @@ TEST(mbstring_test, test_mb_strrichr) {
   ASSERT_STREQ(f$mb_strrichr(string("This is a test string"), string("test"), false, string("UTF-8")).val().c_str(), "test string");
 }
 
+TEST(mbstring_test, test_mb_strimwidth) {
+  ASSERT_STREQ(f$mb_strimwidth(string("This is a very long string that needs to be trimmed"), 0, 20, string("...")).c_str(), "This is a very lo...");
+}
+
+TEST(mbstring_test, test_mb_regex_set_options) {
+  ASSERT_STREQ(f$mb_regex_set_options("xpu").c_str(), "pr");
+  ASSERT_STREQ(f$mb_regex_set_options("npj").c_str(), "xpu");
+}
+
+TEST(mbstring_test, test_mb_ereg_match) {
+  ASSERT_TRUE(f$mb_ereg_match(string("^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}$"), string("dachman@gmail.com")));
+}
+
 #endif
\ No newline at end of file
diff --git a/tests/phpt/mbstring/001_mb_strlen.php b/tests/phpt/mbstring/001_mb_strlen.php
index 2c395957d4..55c9d2b761 100644
--- a/tests/phpt/mbstring/001_mb_strlen.php
+++ b/tests/phpt/mbstring/001_mb_strlen.php
@@ -55,7 +55,7 @@ function test_mb_strlen_long_string() {
 test_mb_strlen_basic_ascii();
 test_mb_strlen_basic_utf_8();
 test_mb_strlen_empty_string();
-test_mb_strlen_null_encoding(); // doesn't put null through for some reason
+test_mb_strlen_null_encoding();
 test_mb_strlen_utf_16_encoding();
 test_mb_strlen_html_entities();
 test_mb_strlen_whitespaces();
diff --git a/tests/phpt/mbstring/015_mb_strimwidth.php b/tests/phpt/mbstring/015_mb_strimwidth.php
new file mode 100644
index 0000000000..2559eaa84e
--- /dev/null
+++ b/tests/phpt/mbstring/015_mb_strimwidth.php
@@ -0,0 +1,58 @@
+@ok
+<?php
+
+// Test case 1: Basic test with ASCII string
+function test_mb_strimwidth_basic_ascii() {
+  var_dump(mb_strimwidth("Hello World", 0, 5, "..."));
+}
+
+// Test case 2: Basic test with multibyte characters (UTF-8)
+function test_mb_strimwidth_basic_utf_8() {
+  var_dump(mb_strimwidth("Ä°nanÃ§ EsaslarÄ±", 0, 5, "...", "UTF-8"));
+}
+
+// Test case 3: Testing with empty string
+function test_mb_strimwidth_empty_string() {
+  var_dump(mb_strimwidth("", 0, 5, "...", "UTF-8"));
+}
+
+// Test case 4: Testing with specific encoding (UTF-16)
+function test_mb_strimwidth_utf_16_encoding() {
+  var_dump(mb_strimwidth("안녕하세요", 0, 5, "...", "UTF-16"));
+}
+
+// Test case 5: Testing with trim marker
+function test_mb_strimwidth_with_trim_marker() {
+  var_dump(mb_strimwidth("This is a long string", 0, 10, "..."));
+}
+
+// Test case 6: Testing with negative width (deprecated)
+function test_mb_strimwidth_negative_width() {
+  var_dump(mb_strimwidth("This is a long string", 0, -5, "..."));
+}
+
+// Test case 7: Testing with negative start position
+function test_mb_strimwidth_negative_start() {
+  var_dump(mb_strimwidth("This is a long string", -10, 5, "..."));
+}
+
+// Test case 8: Testing with start position beyond string length
+function test_mb_strimwidth_start_beyond_length() {
+  var_dump(mb_strimwidth("This is a long string", 100, 5, "..."));
+}
+
+// Test case 9: Testing with width greater than string length
+function test_mb_strimwidth_width_greater_than_length() {
+  var_dump(mb_strimwidth("Hello", 0, 10, "..."));
+}
+
+
+test_mb_strimwidth_basic_ascii();
+test_mb_strimwidth_basic_utf_8();
+test_mb_strimwidth_empty_string();
+test_mb_strimwidth_utf_16_encoding();
+test_mb_strimwidth_with_trim_marker();
+test_mb_strimwidth_negative_width();
+test_mb_strimwidth_negative_start();
+test_mb_strimwidth_start_beyond_length();
+test_mb_strimwidth_width_greater_than_length();

From 86453a0e1179ef33c5e088f05d1fbfb9c420db49 Mon Sep 17 00:00:00 2001
From: catnyan02 <catnyan02@gmail.com>
Date: Thu, 18 Jul 2024 16:08:59 +0300
Subject: [PATCH 26/27] Fix linking problem

---
 cmake/external-libraries.cmake | 1 +
 compiler/compiler-settings.cpp | 3 +--
 runtime/mbstring/mbstring.cpp  | 5 ++---
 3 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/cmake/external-libraries.cmake b/cmake/external-libraries.cmake
index 169fb14ae4..da79e2cbdf 100644
--- a/cmake/external-libraries.cmake
+++ b/cmake/external-libraries.cmake
@@ -30,6 +30,7 @@ if(MBFL)
         )
         FetchContent_MakeAvailable(onig)
         include_directories(${onig_SOURCE_DIR}/src)
+        add_definitions(-DONIG_LIB_DIR="${onig_BINARY_DIR}")
         message(STATUS "---------------------")
     endif()
 endif()
diff --git a/compiler/compiler-settings.cpp b/compiler/compiler-settings.cpp
index e18ea72368..274ea633a5 100644
--- a/compiler/compiler-settings.cpp
+++ b/compiler/compiler-settings.cpp
@@ -343,8 +343,7 @@ void CompilerSettings::init() {
 #endif
 
 #ifdef ONIG_LIB_DIR
-  external_static_libs.emplace_back("onig");
-  ld_flags.value_ += " -lonig";
+  ld_flags.value_ += " -L " ONIG_LIB_DIR " -lonig";
 #endif
 
 #ifdef KPHP_H3_LIB_DIR
diff --git a/runtime/mbstring/mbstring.cpp b/runtime/mbstring/mbstring.cpp
index fe62de73bd..411d361b63 100644
--- a/runtime/mbstring/mbstring.cpp
+++ b/runtime/mbstring/mbstring.cpp
@@ -55,11 +55,10 @@ static bool is_detect_incorrect_encoding_names_warning{false};
 #ifdef MBFL
 extern "C" {
 	#include <kphp/libmbfl/mbfl/mbfilter.h>
-#include <kphp/libmbfl/mbfl/mbfilter_wchar.h>
+  #include <kphp/libmbfl/mbfl/mbfilter_wchar.h>
+  #include <oniguruma.h>
 }
 
-#include <oniguruma.h>
-
 #define KPHP_UNICODE_CASE_UPPER        0
 #define KPHP_UNICODE_CASE_LOWER        1
 #define KPHP_UNICODE_CASE_TITLE        2

From cf7f205e03e9591c714cb1dd5a9ecfe8f8663830 Mon Sep 17 00:00:00 2001
From: catnyan02 <catnyan02@gmail.com>
Date: Tue, 23 Jul 2024 16:04:35 +0300
Subject: [PATCH 27/27] Add php tests

---
 .../mbstring/016_mb_regex_set_options.php     | 67 +++++++++++++++++++
 tests/phpt/mbstring/017_mb_ereg_match.php     | 58 ++++++++++++++++
 2 files changed, 125 insertions(+)
 create mode 100644 tests/phpt/mbstring/016_mb_regex_set_options.php
 create mode 100644 tests/phpt/mbstring/017_mb_ereg_match.php

diff --git a/tests/phpt/mbstring/016_mb_regex_set_options.php b/tests/phpt/mbstring/016_mb_regex_set_options.php
new file mode 100644
index 0000000000..cd1de1c144
--- /dev/null
+++ b/tests/phpt/mbstring/016_mb_regex_set_options.php
@@ -0,0 +1,67 @@
+@ok
+<?php
+
+// Test case 1: Set and get options
+function test_mb_regex_set_options_basic() {
+    $original_options = mb_regex_set_options();
+    var_dump(mb_regex_set_options("is"));
+    var_dump(mb_regex_set_options());
+    mb_regex_set_options($original_options);
+}
+
+// Test case 2: Set multiple options
+function test_mb_regex_set_options_multiple() {
+    $original_options = mb_regex_set_options();
+    var_dump(mb_regex_set_options("ixm"));
+    var_dump(mb_regex_set_options());
+    mb_regex_set_options($original_options);
+}
+
+// Test case 3: Set mode option
+function test_mb_regex_set_options_mode() {
+    $original_options = mb_regex_set_options();
+    var_dump(mb_regex_set_options("j"));
+    var_dump(mb_regex_set_options());
+    mb_regex_set_options($original_options);
+}
+
+// Test case 4: Set invalid option
+function test_mb_regex_set_options_invalid() {
+    $original_options = mb_regex_set_options();
+    var_dump(mb_regex_set_options("z"));
+    var_dump(mb_regex_set_options());
+    mb_regex_set_options($original_options);
+}
+
+// Test case 5: Set empty string
+function test_mb_regex_set_options_empty_string() {
+    $original_options = mb_regex_set_options();
+    var_dump(mb_regex_set_options(""));
+    var_dump(mb_regex_set_options());
+    mb_regex_set_options($original_options);
+}
+
+// Test case 6: Set null
+function test_mb_regex_set_options_null() {
+    $original_options = mb_regex_set_options();
+    var_dump(mb_regex_set_options(null));
+    var_dump(mb_regex_set_options());
+    mb_regex_set_options($original_options);
+}
+
+// Test case 7: Set deprecated 'e' option (for PHP versions < 7.1.0)
+function test_mb_regex_set_options_deprecated_e() {
+    $original_options = mb_regex_set_options();
+    var_dump(mb_regex_set_options("e"));
+    var_dump(mb_regex_set_options());
+    mb_regex_set_options($original_options);
+}
+
+// Run the tests
+test_mb_regex_set_options_basic();
+test_mb_regex_set_options_multiple();
+test_mb_regex_set_options_mode();
+test_mb_regex_set_options_invalid();
+test_mb_regex_set_options_empty_string();
+test_mb_regex_set_options_null();
+test_mb_regex_set_options_deprecated_e();
diff --git a/tests/phpt/mbstring/017_mb_ereg_match.php b/tests/phpt/mbstring/017_mb_ereg_match.php
new file mode 100644
index 0000000000..c49424f1f3
--- /dev/null
+++ b/tests/phpt/mbstring/017_mb_ereg_match.php
@@ -0,0 +1,58 @@
+@ok
+<?php
+
+// Test case 1: Basic match at the beginning of the string
+function test_mb_ereg_match_basic() {
+    var_dump(mb_ereg_match("abc", "abcdef"));
+}
+
+// Test case 2: Match fails when pattern is not at the beginning
+function test_mb_ereg_match_not_beginning() {
+    var_dump(mb_ereg_match("bcd", "abcdef"));
+}
+
+// Test case 3: Case-sensitive match
+function test_mb_ereg_match_case_sensitive() {
+    var_dump(mb_ereg_match("ABC", "abcdef"));
+}
+
+// Test case 4: Case-insensitive match using options
+function test_mb_ereg_match_case_insensitive() {
+    var_dump(mb_ereg_match("ABC", "abcdef", "i"));
+}
+
+// Test case 5: Match with multibyte characters
+function test_mb_ereg_match_multibyte() {
+    var_dump(mb_ereg_match("こん", "こんにちは", ""));
+}
+
+// Test case 6: Match with regex special characters
+function test_mb_ereg_match_special_chars() {
+    var_dump(mb_ereg_match("a.c", "abc"));
+}
+
+// Test case 7: Match with empty pattern
+function test_mb_ereg_match_empty_pattern() {
+    var_dump(mb_ereg_match("", "abcdef"));
+}
+
+// Test case 8: Match with empty string
+function test_mb_ereg_match_empty_string() {
+    var_dump(mb_ereg_match("abc", ""));
+}
+
+// Test case 9: Match with extended mode option
+function test_mb_ereg_match_extended_mode() {
+    var_dump(mb_ereg_match("a b c", "abc", "x"));
+}
+
+// Run the tests
+test_mb_ereg_match_basic();
+test_mb_ereg_match_not_beginning();
+test_mb_ereg_match_case_sensitive();
+test_mb_ereg_match_case_insensitive();
+test_mb_ereg_match_multibyte();
+test_mb_ereg_match_special_chars();
+test_mb_ereg_match_empty_pattern();
+test_mb_ereg_match_empty_string();
+test_mb_ereg_match_extended_mode();