Python Dataclasses
Python dataclasses for clean, boilerplate-free data structures.
Basic Usage
1from dataclasses import dataclass
2
3@dataclass
4class Point:
5 x: float
6 y: float
7
8# Automatically generates __init__, __repr__, __eq__
9p1 = Point(1.0, 2.0)
10p2 = Point(1.0, 2.0)
11
12print(p1) # Point(x=1.0, y=2.0)
13print(p1 == p2) # True
Default Values
1from dataclasses import dataclass, field
2
3@dataclass
4class User:
5 name: str
6 age: int = 0
7 email: str = "unknown@example.com"
8 active: bool = True
9
10user1 = User("Alice")
11print(user1)
12# User(name='Alice', age=0, email='unknown@example.com', active=True)
13
14user2 = User("Bob", 30, "bob@example.com")
15print(user2)
16# User(name='Bob', age=30, email='bob@example.com', active=True)
Mutable Default Values
1from dataclasses import dataclass, field
2from typing import List
3
4# ❌ WRONG - Don't use mutable defaults directly
5# @dataclass
6# class Team:
7# members: List[str] = [] # This is shared across instances!
8
9# ✅ CORRECT - Use field with default_factory
10@dataclass
11class Team:
12 name: str
13 members: List[str] = field(default_factory=list)
14
15team1 = Team("Alpha")
16team2 = Team("Beta")
17
18team1.members.append("Alice")
19print(team1.members) # ['Alice']
20print(team2.members) # [] (separate list!)
Field Options
1from dataclasses import dataclass, field
2
3@dataclass
4class Product:
5 name: str
6 price: float
7
8 # Exclude from __init__
9 id: int = field(init=False)
10
11 # Exclude from __repr__
12 secret_key: str = field(repr=False, default="secret")
13
14 # Exclude from comparison
15 created_at: str = field(compare=False, default="2024-01-01")
16
17 # Exclude from hash
18 description: str = field(hash=False, default="")
19
20 def __post_init__(self):
21 # Set id after initialization
22 self.id = hash(self.name)
23
24product = Product("Widget", 9.99)
25print(product)
26# Product(name='Widget', price=9.99, id=-123456789, created_at='2024-01-01', description='')
Post-Init Processing
1from dataclasses import dataclass
2
3@dataclass
4class Rectangle:
5 width: float
6 height: float
7 area: float = field(init=False)
8
9 def __post_init__(self):
10 self.area = self.width * self.height
11
12rect = Rectangle(10, 5)
13print(rect.area) # 50.0
Frozen (Immutable) Dataclasses
1from dataclasses import dataclass
2
3@dataclass(frozen=True)
4class Point:
5 x: float
6 y: float
7
8p = Point(1.0, 2.0)
9# p.x = 3.0 # FrozenInstanceError!
10
11# Frozen dataclasses are hashable
12points = {Point(0, 0), Point(1, 1), Point(0, 0)}
13print(len(points)) # 2 (duplicates removed)
Inheritance
1from dataclasses import dataclass
2
3@dataclass
4class Person:
5 name: str
6 age: int
7
8@dataclass
9class Employee(Person):
10 employee_id: int
11 department: str
12
13emp = Employee("Alice", 30, 12345, "Engineering")
14print(emp)
15# Employee(name='Alice', age=30, employee_id=12345, department='Engineering')
Ordering
1from dataclasses import dataclass
2
3@dataclass(order=True)
4class Student:
5 name: str = field(compare=False)
6 grade: float
7
8students = [
9 Student("Charlie", 85.5),
10 Student("Alice", 92.0),
11 Student("Bob", 88.0),
12]
13
14students.sort()
15for s in students:
16 print(s)
17# Student(name='Charlie', grade=85.5)
18# Student(name='Bob', grade=88.0)
19# Student(name='Alice', grade=92.0)
Custom Sort Key
1from dataclasses import dataclass, field
2from typing import Any
3
4@dataclass(order=True)
5class Task:
6 priority: int = field(compare=True)
7 name: str = field(compare=False)
8 description: str = field(compare=False)
9
10tasks = [
11 Task(3, "Low priority", "Can wait"),
12 Task(1, "Critical", "Do now!"),
13 Task(2, "Medium", "Soon"),
14]
15
16tasks.sort()
17for t in tasks:
18 print(f"{t.priority}: {t.name}")
19# 1: Critical
20# 2: Medium
21# 3: Low priority
Conversion Methods
1from dataclasses import dataclass, asdict, astuple
2
3@dataclass
4class User:
5 name: str
6 age: int
7 email: str
8
9user = User("Alice", 30, "alice@example.com")
10
11# Convert to dict
12print(asdict(user))
13# {'name': 'Alice', 'age': 30, 'email': 'alice@example.com'}
14
15# Convert to tuple
16print(astuple(user))
17# ('Alice', 30, 'alice@example.com')
Nested Dataclasses
1from dataclasses import dataclass, asdict
2
3@dataclass
4class Address:
5 street: str
6 city: str
7 zip_code: str
8
9@dataclass
10class Person:
11 name: str
12 age: int
13 address: Address
14
15person = Person(
16 "Alice",
17 30,
18 Address("123 Main St", "Springfield", "12345")
19)
20
21print(person)
22# Person(name='Alice', age=30, address=Address(street='123 Main St', city='Springfield', zip_code='12345'))
23
24# Convert nested to dict
25print(asdict(person))
26# {'name': 'Alice', 'age': 30, 'address': {'street': '123 Main St', 'city': 'Springfield', 'zip_code': '12345'}}
Slots for Memory Efficiency
1from dataclasses import dataclass
2
3@dataclass(slots=True) # Python 3.10+
4class Point:
5 x: float
6 y: float
7
8# Uses __slots__ internally for memory efficiency
9# Faster attribute access, lower memory usage
10# Cannot add new attributes dynamically
KW-Only Fields
1from dataclasses import dataclass
2
3@dataclass(kw_only=True) # Python 3.10+
4class User:
5 name: str
6 age: int
7 email: str
8
9# Must use keyword arguments
10user = User(name="Alice", age=30, email="alice@example.com")
11# user = User("Alice", 30, "alice@example.com") # TypeError!
Match-Case Support
1from dataclasses import dataclass
2
3@dataclass
4class Point:
5 x: float
6 y: float
7
8def describe_point(p: Point):
9 match p:
10 case Point(0, 0):
11 return "Origin"
12 case Point(x, 0):
13 return f"On X-axis at {x}"
14 case Point(0, y):
15 return f"On Y-axis at {y}"
16 case Point(x, y):
17 return f"At ({x}, {y})"
18
19print(describe_point(Point(0, 0))) # Origin
20print(describe_point(Point(5, 0))) # On X-axis at 5
21print(describe_point(Point(3, 4))) # At (3, 4)
Validation
1from dataclasses import dataclass
2
3@dataclass
4class User:
5 name: str
6 age: int
7 email: str
8
9 def __post_init__(self):
10 if self.age < 0:
11 raise ValueError("Age cannot be negative")
12 if "@" not in self.email:
13 raise ValueError("Invalid email")
14 if not self.name:
15 raise ValueError("Name cannot be empty")
16
17try:
18 user = User("", -5, "invalid")
19except ValueError as e:
20 print(e) # Name cannot be empty
Factory Pattern
1from dataclasses import dataclass, field
2from typing import ClassVar
3from datetime import datetime
4
5@dataclass
6class LogEntry:
7 message: str
8 timestamp: datetime = field(default_factory=datetime.now)
9 level: str = "INFO"
10
11 # Class variable (not an instance field)
12 log_count: ClassVar[int] = 0
13
14 def __post_init__(self):
15 LogEntry.log_count += 1
16
17log1 = LogEntry("First log")
18log2 = LogEntry("Second log")
19
20print(f"Total logs: {LogEntry.log_count}") # 2
21print(log1.timestamp < log2.timestamp) # True
JSON Serialization
1from dataclasses import dataclass, asdict
2import json
3
4@dataclass
5class User:
6 name: str
7 age: int
8 email: str
9
10user = User("Alice", 30, "alice@example.com")
11
12# To JSON
13json_str = json.dumps(asdict(user))
14print(json_str)
15# {"name": "Alice", "age": 30, "email": "alice@example.com"}
16
17# From JSON
18data = json.loads(json_str)
19user2 = User(**data)
20print(user2)
21# User(name='Alice', age=30, email='alice@example.com')
Complex Example: API Response
1from dataclasses import dataclass, field
2from typing import List, Optional
3from datetime import datetime
4
5@dataclass
6class Author:
7 id: int
8 name: str
9 email: str
10
11@dataclass
12class Comment:
13 id: int
14 text: str
15 author: Author
16 created_at: datetime = field(default_factory=datetime.now)
17
18@dataclass
19class Post:
20 id: int
21 title: str
22 content: str
23 author: Author
24 comments: List[Comment] = field(default_factory=list)
25 tags: List[str] = field(default_factory=list)
26 published: bool = False
27 views: int = 0
28 created_at: datetime = field(default_factory=datetime.now)
29
30 def add_comment(self, text: str, author: Author):
31 comment = Comment(
32 id=len(self.comments) + 1,
33 text=text,
34 author=author
35 )
36 self.comments.append(comment)
37
38 def publish(self):
39 self.published = True
40
41# Usage
42author = Author(1, "Alice", "alice@example.com")
43post = Post(1, "Python Tips", "Here are some tips...", author)
44post.tags = ["python", "programming"]
45post.publish()
46
47commenter = Author(2, "Bob", "bob@example.com")
48post.add_comment("Great post!", commenter)
49
50print(f"Post: {post.title}")
51print(f"Published: {post.published}")
52print(f"Comments: {len(post.comments)}")
Comparison with Regular Classes
1# Without dataclass (verbose)
2class PointOld:
3 def __init__(self, x: float, y: float):
4 self.x = x
5 self.y = y
6
7 def __repr__(self):
8 return f"PointOld(x={self.x}, y={self.y})"
9
10 def __eq__(self, other):
11 if not isinstance(other, PointOld):
12 return NotImplemented
13 return self.x == other.x and self.y == other.y
14
15# With dataclass (concise)
16@dataclass
17class Point:
18 x: float
19 y: float
20
21# Both work the same, but dataclass is much shorter!
Related Snippets
- Click CLI Framework
Building CLI applications with Click in Python - FastAPI with OpenAPI
FastAPI with automatic OpenAPI documentation using Pydantic models and … - Flask Essentials
Flask web framework essentials for building web applications and APIs. … - Function Timing Decorator
Decorator to measure function execution time - LangChain Chatbot with Tools
Simple stdin chatbot using LangChain with tool calling (OpenRouter). … - Pandas DataFrames Essential Patterns
Essential patterns for working with Pandas DataFrames: creation, manipulation, … - Pydantic Data Validation
Pydantic - Data validation using Python type hints. Installation 1pip install … - Python Metaclasses
Python metaclasses with visual explanations using Mermaid diagrams. What are … - Python Virtual Environments
Managing Python virtual environments and dependencies - Random Forests in Depth
Comprehensive guide to Random Forests: theory, implementation, tuning, and … - Scikit-learn Common Patterns
Common patterns and workflows for scikit-learn: preprocessing, model training, …