Neo4j Graph Database

Neo4j graph database queries, algorithms, and vector search capabilities.


Docker Setup

 1version: '3.8'
 2
 3services:
 4  neo4j:
 5    image: neo4j:5
 6    container_name: neo4j
 7    environment:
 8      NEO4J_AUTH: neo4j/password
 9      NEO4J_PLUGINS: '["apoc", "graph-data-science"]'
10      NEO4J_dbms_security_procedures_unrestricted: apoc.*,gds.*
11    volumes:
12      - neo4j-data:/data
13      - neo4j-logs:/logs
14    ports:
15      - "7474:7474"  # HTTP
16      - "7687:7687"  # Bolt
17    healthcheck:
18      test: ["CMD-SHELL", "cypher-shell -u neo4j -p password 'RETURN 1'"]
19      interval: 10s
20      timeout: 5s
21      retries: 5
22    restart: unless-stopped
23
24volumes:
25  neo4j-data:
26  neo4j-logs:
1# Start Neo4j
2docker-compose up -d
3
4# Access browser interface
5# http://localhost:7474
6# Username: neo4j
7# Password: password

Cypher Basics

Create Nodes

 1// Create single node
 2CREATE (p:Person {name: 'Alice', age: 30})
 3
 4// Create multiple nodes
 5CREATE 
 6  (p1:Person {name: 'Bob', age: 35}),
 7  (p2:Person {name: 'Charlie', age: 28})
 8
 9// Create with RETURN
10CREATE (p:Person {name: 'David', age: 40})
11RETURN p

Create Relationships

1// Create nodes and relationship
2CREATE (a:Person {name: 'Alice'})-[:KNOWS]->(b:Person {name: 'Bob'})
3
4// Create relationship between existing nodes
5MATCH (a:Person {name: 'Alice'}), (b:Person {name: 'Charlie'})
6CREATE (a)-[:KNOWS {since: 2020}]->(b)
7
8// Create with properties
9CREATE (a:Person {name: 'Alice'})-[:WORKS_AT {role: 'Engineer', since: 2019}]->(c:Company {name: 'TechCorp'})

Match (Query)

 1// Match all nodes
 2MATCH (n) RETURN n
 3
 4// Match by label
 5MATCH (p:Person) RETURN p
 6
 7// Match by property
 8MATCH (p:Person {name: 'Alice'}) RETURN p
 9
10// Match with WHERE
11MATCH (p:Person)
12WHERE p.age > 30
13RETURN p
14
15// Match relationships
16MATCH (p:Person)-[:KNOWS]->(friend)
17RETURN p.name, friend.name
18
19// Match with relationship properties
20MATCH (p:Person)-[r:WORKS_AT]->(c:Company)
21WHERE r.since > 2018
22RETURN p.name, c.name, r.role
23
24// Match patterns
25MATCH (p:Person)-[:KNOWS]->(friend)-[:KNOWS]->(fof)
26WHERE p.name = 'Alice'
27RETURN fof.name AS friend_of_friend

Update

 1// Update property
 2MATCH (p:Person {name: 'Alice'})
 3SET p.age = 31
 4RETURN p
 5
 6// Add property
 7MATCH (p:Person {name: 'Alice'})
 8SET p.email = 'alice@example.com'
 9
10// Remove property
11MATCH (p:Person {name: 'Alice'})
12REMOVE p.email
13
14// Add label
15MATCH (p:Person {name: 'Alice'})
16SET p:Developer
17
18// Update multiple properties
19MATCH (p:Person {name: 'Alice'})
20SET p += {age: 32, city: 'New York'}

Delete

 1// Delete node (must delete relationships first)
 2MATCH (p:Person {name: 'Alice'})
 3DELETE p
 4
 5// Delete node and relationships
 6MATCH (p:Person {name: 'Alice'})
 7DETACH DELETE p
 8
 9// Delete relationship
10MATCH (p:Person)-[r:KNOWS]->(friend)
11WHERE p.name = 'Alice'
12DELETE r
13
14// Delete all
15MATCH (n)
16DETACH DELETE n

Advanced Queries

Aggregation

 1// Count
 2MATCH (p:Person)
 3RETURN count(p)
 4
 5// Group by
 6MATCH (p:Person)-[:WORKS_AT]->(c:Company)
 7RETURN c.name, count(p) AS employee_count
 8
 9// Sum, avg, min, max
10MATCH (p:Person)
11RETURN 
12  avg(p.age) AS avg_age,
13  min(p.age) AS min_age,
14  max(p.age) AS max_age

Path Finding

 1// Shortest path
 2MATCH path = shortestPath(
 3  (a:Person {name: 'Alice'})-[*]-(b:Person {name: 'David'})
 4)
 5RETURN path
 6
 7// All shortest paths
 8MATCH path = allShortestPaths(
 9  (a:Person {name: 'Alice'})-[*]-(b:Person {name: 'David'})
10)
11RETURN path
12
13// Path with max length
14MATCH path = (a:Person {name: 'Alice'})-[*..3]-(b:Person)
15RETURN path

Collections

 1// Collect results
 2MATCH (p:Person)-[:KNOWS]->(friend)
 3RETURN p.name, collect(friend.name) AS friends
 4
 5// Unwind (expand list)
 6UNWIND [1, 2, 3] AS number
 7RETURN number
 8
 9// List comprehension
10MATCH (p:Person)
11RETURN [x IN collect(p.age) WHERE x > 30] AS ages_over_30

Graph Algorithms (GDS Library)

PageRank

 1// Create graph projection
 2CALL gds.graph.project(
 3  'myGraph',
 4  'Person',
 5  'KNOWS'
 6)
 7
 8// Run PageRank
 9CALL gds.pageRank.stream('myGraph')
10YIELD nodeId, score
11RETURN gds.util.asNode(nodeId).name AS name, score
12ORDER BY score DESC
13
14// Write results back
15CALL gds.pageRank.write('myGraph', {
16  writeProperty: 'pagerank'
17})

Community Detection (Louvain)

 1// Detect communities
 2CALL gds.louvain.stream('myGraph')
 3YIELD nodeId, communityId
 4RETURN gds.util.asNode(nodeId).name AS name, communityId
 5ORDER BY communityId
 6
 7// Write communities
 8CALL gds.louvain.write('myGraph', {
 9  writeProperty: 'community'
10})

Centrality

 1// Betweenness Centrality
 2CALL gds.betweenness.stream('myGraph')
 3YIELD nodeId, score
 4RETURN gds.util.asNode(nodeId).name AS name, score
 5ORDER BY score DESC
 6
 7// Degree Centrality
 8CALL gds.degree.stream('myGraph')
 9YIELD nodeId, score
10RETURN gds.util.asNode(nodeId).name AS name, score
11ORDER BY score DESC
12
13// Closeness Centrality
14CALL gds.closeness.stream('myGraph')
15YIELD nodeId, score
16RETURN gds.util.asNode(nodeId).name AS name, score
17ORDER BY score DESC

Similarity

1// Node Similarity
2CALL gds.nodeSimilarity.stream('myGraph')
3YIELD node1, node2, similarity
4RETURN 
5  gds.util.asNode(node1).name AS person1,
6  gds.util.asNode(node2).name AS person2,
7  similarity
8ORDER BY similarity DESC

Setup Vector Index

 1// Create vector index
 2CREATE VECTOR INDEX person_embeddings IF NOT EXISTS
 3FOR (p:Person)
 4ON p.embedding
 5OPTIONS {indexConfig: {
 6  `vector.dimensions`: 1536,
 7  `vector.similarity_function`: 'cosine'
 8}}
 9
10// Add embeddings to nodes
11MATCH (p:Person {name: 'Alice'})
12SET p.embedding = [0.1, 0.2, 0.3, ...]  // 1536 dimensions
 1// Find similar nodes
 2MATCH (p:Person)
 3WHERE p.embedding IS NOT NULL
 4WITH p, 
 5  gds.similarity.cosine(p.embedding, $queryEmbedding) AS similarity
 6WHERE similarity > 0.8
 7RETURN p.name, similarity
 8ORDER BY similarity DESC
 9LIMIT 10
10
11// Using vector index (Neo4j 5.11+)
12CALL db.index.vector.queryNodes(
13  'person_embeddings',
14  10,
15  [0.1, 0.2, 0.3, ...]  // Query vector
16)
17YIELD node, score
18RETURN node.name, score

Hybrid Search (Vector + Filters)

 1// Vector search with filters
 2CALL db.index.vector.queryNodes(
 3  'person_embeddings',
 4  100,
 5  $queryEmbedding
 6)
 7YIELD node, score
 8WHERE node.age > 25 AND node.city = 'New York'
 9RETURN node.name, node.age, score
10ORDER BY score DESC
11LIMIT 10

Python Integration

 1from neo4j import GraphDatabase
 2import numpy as np
 3
 4class Neo4jConnection:
 5    def __init__(self, uri, user, password):
 6        self.driver = GraphDatabase.driver(uri, auth=(user, password))
 7    
 8    def close(self):
 9        self.driver.close()
10    
11    def query(self, query, parameters=None):
12        with self.driver.session() as session:
13            result = session.run(query, parameters)
14            return [record.data() for record in result]
15    
16    def create_person(self, name, age):
17        query = """
18        CREATE (p:Person {name: $name, age: $age})
19        RETURN p
20        """
21        return self.query(query, {'name': name, 'age': age})
22    
23    def create_relationship(self, person1, person2, rel_type):
24        query = f"""
25        MATCH (a:Person {{name: $person1}})
26        MATCH (b:Person {{name: $person2}})
27        CREATE (a)-[r:{rel_type}]->(b)
28        RETURN r
29        """
30        return self.query(query, {'person1': person1, 'person2': person2})
31    
32    def find_friends(self, name):
33        query = """
34        MATCH (p:Person {name: $name})-[:KNOWS]->(friend)
35        RETURN friend.name AS name, friend.age AS age
36        """
37        return self.query(query, {'name': name})
38    
39    def add_embedding(self, name, embedding):
40        """Add vector embedding to person"""
41        query = """
42        MATCH (p:Person {name: $name})
43        SET p.embedding = $embedding
44        RETURN p
45        """
46        return self.query(query, {'name': name, 'embedding': embedding})
47    
48    def vector_search(self, query_embedding, limit=10):
49        """Search by vector similarity"""
50        query = """
51        MATCH (p:Person)
52        WHERE p.embedding IS NOT NULL
53        WITH p, 
54          reduce(dot = 0.0, i IN range(0, size(p.embedding)-1) | 
55            dot + p.embedding[i] * $query[i]
56          ) AS similarity
57        RETURN p.name, similarity
58        ORDER BY similarity DESC
59        LIMIT $limit
60        """
61        return self.query(query, {'query': query_embedding, 'limit': limit})
62
63# Usage
64conn = Neo4jConnection('bolt://localhost:7687', 'neo4j', 'password')
65
66# Create nodes
67conn.create_person('Alice', 30)
68conn.create_person('Bob', 35)
69
70# Create relationship
71conn.create_relationship('Alice', 'Bob', 'KNOWS')
72
73# Query
74friends = conn.find_friends('Alice')
75print(friends)
76
77# Add embeddings (from OpenAI or similar)
78embedding = np.random.rand(1536).tolist()
79conn.add_embedding('Alice', embedding)
80
81# Vector search
82results = conn.vector_search(embedding)
83print(results)
84
85conn.close()

Indexes

 1// Create index
 2CREATE INDEX person_name IF NOT EXISTS FOR (p:Person) ON (p.name)
 3
 4// Composite index
 5CREATE INDEX person_name_age IF NOT EXISTS FOR (p:Person) ON (p.name, p.age)
 6
 7// Full-text index
 8CREATE FULLTEXT INDEX person_search IF NOT EXISTS
 9FOR (p:Person) ON EACH [p.name, p.bio]
10
11// Use full-text search
12CALL db.index.fulltext.queryNodes('person_search', 'alice developer')
13YIELD node, score
14RETURN node.name, score
15
16// List indexes
17SHOW INDEXES
18
19// Drop index
20DROP INDEX person_name

Performance

 1// Explain query
 2EXPLAIN
 3MATCH (p:Person)-[:KNOWS]->(friend)
 4WHERE p.name = 'Alice'
 5RETURN friend.name
 6
 7// Profile query (with execution stats)
 8PROFILE
 9MATCH (p:Person)-[:KNOWS]->(friend)
10WHERE p.name = 'Alice'
11RETURN friend.name
12
13// Show query plan
14CALL dbms.listQueries()

Related Snippets