partitech / doctrine-pgvector
Postgresql vector type with doctrine
Installs: 8 432
Dependents: 0
Suggesters: 0
Security: 0
Stars: 8
Watchers: 2
Forks: 4
Open Issues: 0
Requires
- php: ^8.0
- doctrine/dbal: ^3.6
- doctrine/orm: ^2.19|^3.0
Requires (Dev)
- phpunit/phpunit: ^5.2
README
Description
PGVector type for Doctrine
Installation
composer require partitech/doctrine-pgvector
Configuration Doctrine
doctrine: dbal: types: vector: Partitech\DoctrinePgVector\Type\VectorType orm: dql: string_functions: distance: Partitech\DoctrinePgVector\Query\Distance inner_product: Partitech\DoctrinePgVector\Query\InnerProduct cosine_similarity: Partitech\DoctrinePgVector\Query\CosineSimilarity
Utilisation
You can now use vector
type in your entities :
use Doctrine\ORM\Mapping as ORM; /** * @ORM\Entity() */ class YourEntity { #[ORM\Column(type: 'vector', length: 1024, nullable: true)] private $vectors; }
If you use symfony console make:entity
add manually the length
parameter attribute as vector. Length is your model embedding's dimension.
For example OpenAi use these dimensions:
text-embedding-3-small : 1536
text-embedding-3-large : 3072 (customizable)
Mistral AI
Mistral-embed : 1024
Additionally, you should manually add an HNSW index to your vector's column. Be aware that dimension should be 2000 max for HNSW indexes.
L2 distance
CREATE INDEX ON items USING hnsw (embedding vector_l2_ops);
Inner product
CREATE INDEX ON items USING hnsw (embedding vector_ip_ops);
Cosine distance
CREATE INDEX ON items USING hnsw (embedding vector_cosine_ops);
Basic usage:
distance
To get
SELECT * FROM embeddings WHERE vectors <-> '[3,1,2]' < 5
use
$floatArray = array_map(function() { return mt_rand(0, 1000000) / 1000000; }, array_fill(0, 1024, null)); $query = $this->entityManager->createQuery( "SELECT i FROM App\Entity\Embeddings i ORDER BY distance(i.vectors, :vector) ASC" ); $query->setParameter('vector', $floatArray, 'vector'); $results = $query->setMaxResults(5)->getResult(); dump($results);
$qb = $this->entityManager->createQueryBuilder(); $qb->select('e') ->from('App:Embeddings', 'e') ->orderBy('distance(e.vectors, :vector)') ->setParameter('vector', $floatArray, 'vector') ->setMaxResults(5) ; $result = $qb->getQuery()->getResult(); dump($result);
Inner product
To get
SELECT (vectors <#> '[3,1,2]') * -1, * FROM embeddings
use
$floatArray = array_map(function() { return mt_rand(0, 1000000) / 1000000; }, array_fill(0, 1024, null)); $query = $this->entityManager->createQuery( "SELECT inner_product(e.vectors, :vector) , e FROM App\Entity\Embeddings e" ); $query->setParameter('vector', $floatArray, 'vector'); $results = $query->setMaxResults(5)->getResult(); dump($results);
$qb = $this->entityManager->createQueryBuilder(); $qb->select('e') ->addSelect('inner_product(e.vectors, :vector)') ->from('App:Embeddings', 'e') ->setParameter('vector', $floatArray, 'vector') ->setMaxResults(5) ; $result = $qb->getQuery()->getResult(); dump($result);
Cosine similarity
To get
SELECT 1 - (vectors <=> '[3,1,2]'), * FROM embeddings
use
$floatArray = array_map(function() { return mt_rand(0, 1000000) / 1000000; }, array_fill(0, 1024, null)); $query = $this->entityManager->createQuery( "SELECT cosine_similarity(e.vectors, :vector) , e FROM App\Entity\Embeddings e" ); $query->setParameter('vector', $floatArray, 'vector'); $results = $query->setMaxResults(5)->getResult(); dump($results);
$qb = $this->entityManager->createQueryBuilder(); $qb->select('e') ->addSelect('cosine_similarity(e.vectors, :vector)') ->from('App:Embeddings', 'e') ->setParameter('vector', $floatArray, 'vector') ->setMaxResults(5) ; $result = $qb->getQuery()->getResult(); dump($result);