8000 Pgvector generator by andreibondarev · Pull Request #363 · patterns-ai-core/langchainrb · GitHub
[go: up one dir, main page]
More Web Proxy on the site http://driver.im/
Skip to content

Pgvector generator #363

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,11 @@ GEM
erubi (~> 1.4)
rails-dom-testing (~> 2.0)
rails-html-sanitizer (~> 1.1, >= 1.2.0)
activemodel (7.0.8)
activesupport (= 7.0.8)
activerecord (7.0.8)
activemodel (= 7.0.8)
activesupport (= 7.0.8)
activesupport (7.0.8)
concurrent-ruby (~> 1.0, >= 1.0.2)
i18n (>= 1.6, < 2)
Expand Down Expand Up @@ -164,6 +169,8 @@ GEM
multi_json (1.15.0)
multi_xml (0.6.0)
multipart-post (2.3.0)
neighbor (0.3.1)
activerecord (>= 6.1)
nokogiri (1.14.3)
mini_portile2 (~> 2.8.0)
racc (~> 1.4)
Expand Down Expand Up @@ -342,6 +349,7 @@ DEPENDENCIES
langchainrb!
llama_cpp (~> 0.3.7)
milvus (~> 0.9.2)
neighbor (~> 0.3.0)
nokogiri (~> 1.13)
open-weather-ruby-client (~> 0.4.0)
pdf-reader (~> 1.4)
Expand Down
1 change: 1 addition & 0 deletions langchain.gemspec
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ Gem::Specification.new do |spec|
spec.add_development_dependency "hnswlib", "~> 0.8.1"
spec.add_development_dependency "hugging-face", "~> 0.3.4"
spec.add_development_dependency "milvus", "~> 0.9.2"
spec.add_development_dependency "neighbor", "~> 0.3.0"
spec.add_development_dependency "llama_cpp", "~> 0.3.7"
spec.add_development_dependency "nokogiri", "~> 1.13"
spec.add_development_dependency "open-weather-ruby-client", "~> 0.4.0"
Expand Down
75 changes: 75 additions & 0 deletions lib/generators/langchain/pgvector_generator.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
require "rails/generators/active_record"

module Langchain
module Generators
#
# Usage:
# rails g langchain:pgvector -model=Product -llm=openai
#
class PgvectorGenerator < Rails::Generators::Base
desc "This generator adds Pgvector vectorsearch integration to your ActiveRecord model"

include ::ActiveRecord::Generators::Migration
source_root File.join(__dir__, "templates")

class_option :model, type: :string, required: true, desc: "ActiveRecord Model to add vectorsearch to", aliases: "-m"
class_option :llm, type: :string, required: true, desc: "LLM provider that will be used to generate embeddings and completions"

LLMS = {
"cohere" => "Langchain::LLM::Cohere",
"google_palm" => "Langchain::LLM::GooglePalm",
"hugging_face" => "Langchain::LLM::HuggingFace",
"llama_cpp" => "Langchain::LLM::LlamaCpp",
"ollama" => "Langchain::LLM::Ollama",
"openai" => "Langchain::LLM::OpenAI",
"replicate" => "Langchain::LLM::Replicate"
}

def copy_migration
migration_template "enable_vector_extension_template.rb", "db/migrate/enable_vector_extension.rb", migration_version: migration_version
migration_template "add_vector_column_template.rb", "db/migrate/add_vector_column_to_#{table_name}.rb", migration_version: migration_version
end

def create_initializer_file
template "initializer.rb", "config/initializers/langchain.rb"
end

def migration_version
"[#{::ActiveRecord::VERSION::MAJOR}.#{::ActiveRecord::VERSION::MINOR}]"
end

def add_to_model
inject_into_class "app/models/#{model_name.downcase}.rb", model_name do
" vectorsearch\n\n after_save :upsert_to_vectorsearch\n\n"
end
end

private

# @return [String] Name of the model
def model_name
options["model"]
end

# @return [String] Table name of the model
def table_name
model_name.downcase.pluralize
end

# @return [String] LLM provider to use
def llm
options["llm"]
end

# @return [Langchain::LLM::*] LLM class
def llm_class
Langchain::LLM.const_get(LLMS[llm])
end

# @return [Integer] Dimension of the vector to be used
def vector_dimension
llm_class.default_dimension
end
end
end
end
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
class <%= migration_class_name %> < ActiveRecord::Migration<%= migration_version %>
def change
add_column :<%= table_name %>, :embedding, :vector,
limit: Langchain
.config
.vectorsearch
.llm
.default_dimension
end
end
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
class <%= migration_class_name %> < ActiveRecord::Migration<%= migration_version %>
def change
enable_extension "vector"
end
end
7 changes: 7 additions & 0 deletions lib/generators/langchain/templates/initializer.rb.tt
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# frozen_string_literal: true

Langchain.configure do |config|
config.vectorsearch = Langchain::Vectorsearch::Pgvector.new(
llm: <%= llm_class %>.new(api_key: ENV["OPENAI_API_KEY"])
)
end
20 changes: 18 additions & 2 deletions lib/langchain.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
require "zeitwerk"
loader = Zeitwerk::Loader.for_gem
loader.ignore("#{__dir__}/langchainrb.rb")
loader.ignore("#{__dir__}/generators")
loader.inflector.inflect(
"ai21" => "AI21",
"ai21_response" => "AI21Response",
Expand Down Expand Up @@ -71,14 +72,29 @@ class << self
# @return [ContextualLogger]
attr_reader :logger

# @return [Pathname]
attr_reader :root

# @param logger [Logger]
# @return [ContextualLogger]
def logger=(logger)
@logger = ContextualLogger.new(logger)
end

# @return [Pathname]
attr_reader :root
# Configures global settings for Langchain
# Langchain.configure do |config|
# config.vectorsearch = Langchain::Vectorsearch::Pgvector.new(
# llm: Langchain::LLM::OpenAI.new(api_key: ENV["OPENAI_API_KEY"])
# )
# end
def configure
yield(config)
end

# @return [Config] The global configuration object
def config
@_config ||= Config.new
end
end

self.logger ||= ::Logger.new($stdout, level: :warn)
Expand Down
22 changes: 16 additions & 6 deletions lib/langchain/active_record/hooks.rb
Original file line number Diff line number Diff line change
Expand Up @@ -66,15 +66,21 @@ def upsert_to_vectorsearch
#
# @return [String] the text representation of the model
def as_vector
to_json
to_json(except: :embedding)
end

module ClassMethods
# Set the vector search provider
#
# @param provider [Object] The `Langchain::Vectorsearch::*` instance
def vectorsearch(provider:)
class_variable_set(:@@provider, provider)
def vectorsearch
# Pgvector-specific configuration
if Langchain.config.vectorsearch.is_a?(Langchain::Vectorsearch::Pgvector)
has_neighbors(:embedding)
end

Langchain.config.vectorsearch.model = self
class_variable_set(:@@provider, Langchain.config.vectorsearch)
end

# Search for similar texts
Expand All @@ -88,9 +94,13 @@ def similarity_search(query, k: 1)
k: k
)

# We use "__id" when Weaviate is the provider
ids = records.map { |record| record.dig("id") || record.dig("__id") }
where(id: ids)
if records.is_a?(::ActiveRecord::Relation)
records
else
# We use "__id" when Weaviate is the provider
ids = records.map { |record| record.dig("id") || record.dig("__id") }
where(id: ids)
end
end

# Ask a question and return the answer
Expand Down
20 changes: 20 additions & 0 deletions lib/langchain/config.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# frozen_string_literal: true

module Langchain
class Config
# This class is used to configure the Langchain.rb gem inside Rails apps, in the `config/initializers/langchain.rb` file.
#
# Langchain is configured in the following way:
# Langchain.configure do |config|
# config.vectorsearch = Langchain::Vectorsearch::Pgvector.new(
# llm: Langchain::LLM::OpenAI.new(api_key: ENV["OPENAI_API_KEY"])
# )
# end
attr_accessor :vectorsearch

def initialize
# Define the defaults for future configuration here
@vectorsearch = {}
end
end
end
100 changes: 28 additions & 72 deletions lib/langchain/vectorsearch/pgvector.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,105 +6,63 @@ class Pgvector < Base
# The PostgreSQL vector search adapter
#
# Gem requirements:
# gem "sequel", "~> 5.68.0"
# gem "pgvector", " 10000 ~> 0.2"
#
# Usage:
# pgvector = Langchain::Vectorsearch::Pgvector.new(url:, index_name:, llm:, namespace: nil)
# pgvector = Langchain::Vectorsearch::Pgvector.new(llm:, model_name:)
#

# The operators supported by the PostgreSQL vector search adapter
OPERATORS = {
"cosine_distance" => "cosine",
"euclidean_distance" => "euclidean"
}
DEFAULT_OPERATOR = "cosine_distance"
OPERATORS = [
"cosine",
"euclidean",
"inner_product"
]
DEFAULT_OPERATOR = "cosine"

attr_reader :db, :operator, :table_name, :namespace_column, :namespace, :documents_table
attr_reader :db, :operator, :llm
attr_accessor :model

# @param url [String] The URL of the PostgreSQL database
# @param index_name [String] The name of the table to use for the index
# @param llm [Object] The LLM client to use
# @param namespace [String] The namespace to use for the index when inserting/querying
def initialize(url:, index_name:, llm:, namespace: nil)
depends_on "sequel"
def initialize(llm:)
depends_on "pgvector"
depends_on "neighbor"

@db = Sequel.connect(url)

@table_name = index_name

@namespace_column = "namespace"
@namespace = namespace
@operator = OPERATORS[DEFAULT_OPERATOR]
@operator = DEFAULT_OPERATOR

super(llm: llm)
end

def documents_model
Class.new(Sequel::Model(table_name.to_sym)) do
plugin :pgvector, :vectors
end
end

# Upsert a list of texts to the index
# @param texts [Array<String>] The texts to add to the index
# @param ids [Array<Integer>] The ids of the objects to add to the index, in the same order as the texts
# @return [PG::Result] The response from the database including the ids of
# the added or updated texts.
def upsert_texts(texts:, ids:)
data = texts.zip(ids).flat_map do |(text, id)|
{id: id, content: text, vectors: llm.embed(text: text).embedding.to_s, namespace: namespace}
end
# @db[table_name.to_sym].multi_insert(data, return: :primary_key)
@db[table_name.to_sym]
.insert_conflict(
target: :id,
update: {content: Sequel[:excluded][:content], vectors: Sequel[:excluded][:vectors]}
)
.multi_insert(data, return: :primary_key)
end

# Add a list of texts to the index
# @param texts [Array<String>] The texts to add to the index
# @param ids [Array<String>] The ids to add to the index, in the same order as the texts
# @return [Array<Integer>] The the ids of the added texts.
def add_texts(texts:, ids: nil)
if ids.nil? || ids.empty?
data = texts.map do |text|
{content: text, vectors: llm.embed(text: text).embedding.to_s, namespace: namespace}
end

@db[table_name.to_sym].multi_insert(data, return: :primary_key)
else
upsert_texts(texts: texts, ids: ids)
def add_texts(texts:, ids:)
embeddings = texts.map do |text|
llm.embed(text: text).embedding
end

model.find_each.with_index do |record, i|
record.update_column(:embedding, embeddings[i])
end
end

# Update a list of ids and corresponding texts to the index
# @param texts [Array<String>] The texts to add to the index
# @param ids [Array<String>] The ids to add to the index, in the same order as the texts
# @return [Array<Integer>] The ids of the updated texts.
def update_texts(texts:, ids:)
upsert_texts(texts: texts, ids: ids)
add_texts(texts: texts, ids: ids)
end

# Create default schema
# Invoke a rake task that will create an initializer (`config/initializers/langchain.rb`) file
# and db/migrations/* files
def create_default_schema
db.run "CREATE EXTENSION IF NOT EXISTS vector"
namespace_column = @namespace_column
vector_dimension = llm.default_dimension
db.create_table? table_name.to_sym do
primary_key :id
text :content
column :vectors, "vector(#{vector_dimension})"
text namespace_column.to_sym, default: nil
end
Rake::Task["pgvector"].invoke
end

# Destroy default schema
def destroy_default_schema
db.drop_table? table_name.to_sym
# Tell the user to rollback the migration
end

# Search for similar texts in the index
Expand All @@ -126,11 +84,9 @@ def similarity_search(query:, k: 4)
# @param k [Integer] The number of top results to return
# @return [Array<Hash>] The results of the search
def similarity_search_by_vector(embedding:, k: 4)
db.transaction do # BEGIN
documents_model
.nearest_neighbors(:vectors, embedding, distance: operator).limit(k)
.where(namespace_column.to_sym => namespace)
end
model
.nearest_neighbors(:embedding, embedding, distance: operator)
.limit(k)
end

# Ask a question and return the answer
Expand All @@ -142,7 +98,7 @@ def ask(question:, k: 4, &block)
search_results = similarity_search(query: question, k: k)

context = search_results.map do |result|
result.content.to_s
result.as_vector
end
context = context.join("\n---\n")

Expand Down
Loading
0