Python Dataclasses

Python dataclasses for clean, boilerplate-free data structures.


Basic Usage

 1from dataclasses import dataclass
 2
 3@dataclass
 4class Point:
 5    x: float
 6    y: float
 7
 8# Automatically generates __init__, __repr__, __eq__
 9p1 = Point(1.0, 2.0)
10p2 = Point(1.0, 2.0)
11
12print(p1)           # Point(x=1.0, y=2.0)
13print(p1 == p2)     # True

Default Values

 1from dataclasses import dataclass, field
 2
 3@dataclass
 4class User:
 5    name: str
 6    age: int = 0
 7    email: str = "unknown@example.com"
 8    active: bool = True
 9
10user1 = User("Alice")
11print(user1)
12# User(name='Alice', age=0, email='unknown@example.com', active=True)
13
14user2 = User("Bob", 30, "bob@example.com")
15print(user2)
16# User(name='Bob', age=30, email='bob@example.com', active=True)

Mutable Default Values

 1from dataclasses import dataclass, field
 2from typing import List
 3
 4# ❌ WRONG - Don't use mutable defaults directly
 5# @dataclass
 6# class Team:
 7#     members: List[str] = []  # This is shared across instances!
 8
 9# ✅ CORRECT - Use field with default_factory
10@dataclass
11class Team:
12    name: str
13    members: List[str] = field(default_factory=list)
14
15team1 = Team("Alpha")
16team2 = Team("Beta")
17
18team1.members.append("Alice")
19print(team1.members)  # ['Alice']
20print(team2.members)  # []  (separate list!)

Field Options

 1from dataclasses import dataclass, field
 2
 3@dataclass
 4class Product:
 5    name: str
 6    price: float
 7    
 8    # Exclude from __init__
 9    id: int = field(init=False)
10    
11    # Exclude from __repr__
12    secret_key: str = field(repr=False, default="secret")
13    
14    # Exclude from comparison
15    created_at: str = field(compare=False, default="2024-01-01")
16    
17    # Exclude from hash
18    description: str = field(hash=False, default="")
19    
20    def __post_init__(self):
21        # Set id after initialization
22        self.id = hash(self.name)
23
24product = Product("Widget", 9.99)
25print(product)
26# Product(name='Widget', price=9.99, id=-123456789, created_at='2024-01-01', description='')

Post-Init Processing

 1from dataclasses import dataclass
 2
 3@dataclass
 4class Rectangle:
 5    width: float
 6    height: float
 7    area: float = field(init=False)
 8    
 9    def __post_init__(self):
10        self.area = self.width * self.height
11
12rect = Rectangle(10, 5)
13print(rect.area)  # 50.0

Frozen (Immutable) Dataclasses

 1from dataclasses import dataclass
 2
 3@dataclass(frozen=True)
 4class Point:
 5    x: float
 6    y: float
 7
 8p = Point(1.0, 2.0)
 9# p.x = 3.0  # FrozenInstanceError!
10
11# Frozen dataclasses are hashable
12points = {Point(0, 0), Point(1, 1), Point(0, 0)}
13print(len(points))  # 2 (duplicates removed)

Inheritance

 1from dataclasses import dataclass
 2
 3@dataclass
 4class Person:
 5    name: str
 6    age: int
 7
 8@dataclass
 9class Employee(Person):
10    employee_id: int
11    department: str
12
13emp = Employee("Alice", 30, 12345, "Engineering")
14print(emp)
15# Employee(name='Alice', age=30, employee_id=12345, department='Engineering')

Ordering

 1from dataclasses import dataclass
 2
 3@dataclass(order=True)
 4class Student:
 5    name: str = field(compare=False)
 6    grade: float
 7
 8students = [
 9    Student("Charlie", 85.5),
10    Student("Alice", 92.0),
11    Student("Bob", 88.0),
12]
13
14students.sort()
15for s in students:
16    print(s)
17# Student(name='Charlie', grade=85.5)
18# Student(name='Bob', grade=88.0)
19# Student(name='Alice', grade=92.0)

Custom Sort Key

 1from dataclasses import dataclass, field
 2from typing import Any
 3
 4@dataclass(order=True)
 5class Task:
 6    priority: int = field(compare=True)
 7    name: str = field(compare=False)
 8    description: str = field(compare=False)
 9
10tasks = [
11    Task(3, "Low priority", "Can wait"),
12    Task(1, "Critical", "Do now!"),
13    Task(2, "Medium", "Soon"),
14]
15
16tasks.sort()
17for t in tasks:
18    print(f"{t.priority}: {t.name}")
19# 1: Critical
20# 2: Medium
21# 3: Low priority

Conversion Methods

 1from dataclasses import dataclass, asdict, astuple
 2
 3@dataclass
 4class User:
 5    name: str
 6    age: int
 7    email: str
 8
 9user = User("Alice", 30, "alice@example.com")
10
11# Convert to dict
12print(asdict(user))
13# {'name': 'Alice', 'age': 30, 'email': 'alice@example.com'}
14
15# Convert to tuple
16print(astuple(user))
17# ('Alice', 30, 'alice@example.com')

Nested Dataclasses

 1from dataclasses import dataclass, asdict
 2
 3@dataclass
 4class Address:
 5    street: str
 6    city: str
 7    zip_code: str
 8
 9@dataclass
10class Person:
11    name: str
12    age: int
13    address: Address
14
15person = Person(
16    "Alice",
17    30,
18    Address("123 Main St", "Springfield", "12345")
19)
20
21print(person)
22# Person(name='Alice', age=30, address=Address(street='123 Main St', city='Springfield', zip_code='12345'))
23
24# Convert nested to dict
25print(asdict(person))
26# {'name': 'Alice', 'age': 30, 'address': {'street': '123 Main St', 'city': 'Springfield', 'zip_code': '12345'}}

Slots for Memory Efficiency

 1from dataclasses import dataclass
 2
 3@dataclass(slots=True)  # Python 3.10+
 4class Point:
 5    x: float
 6    y: float
 7
 8# Uses __slots__ internally for memory efficiency
 9# Faster attribute access, lower memory usage
10# Cannot add new attributes dynamically

KW-Only Fields

 1from dataclasses import dataclass
 2
 3@dataclass(kw_only=True)  # Python 3.10+
 4class User:
 5    name: str
 6    age: int
 7    email: str
 8
 9# Must use keyword arguments
10user = User(name="Alice", age=30, email="alice@example.com")
11# user = User("Alice", 30, "alice@example.com")  # TypeError!

Match-Case Support

 1from dataclasses import dataclass
 2
 3@dataclass
 4class Point:
 5    x: float
 6    y: float
 7
 8def describe_point(p: Point):
 9    match p:
10        case Point(0, 0):
11            return "Origin"
12        case Point(x, 0):
13            return f"On X-axis at {x}"
14        case Point(0, y):
15            return f"On Y-axis at {y}"
16        case Point(x, y):
17            return f"At ({x}, {y})"
18
19print(describe_point(Point(0, 0)))    # Origin
20print(describe_point(Point(5, 0)))    # On X-axis at 5
21print(describe_point(Point(3, 4)))    # At (3, 4)

Validation

 1from dataclasses import dataclass
 2
 3@dataclass
 4class User:
 5    name: str
 6    age: int
 7    email: str
 8    
 9    def __post_init__(self):
10        if self.age < 0:
11            raise ValueError("Age cannot be negative")
12        if "@" not in self.email:
13            raise ValueError("Invalid email")
14        if not self.name:
15            raise ValueError("Name cannot be empty")
16
17try:
18    user = User("", -5, "invalid")
19except ValueError as e:
20    print(e)  # Name cannot be empty

Factory Pattern

 1from dataclasses import dataclass, field
 2from typing import ClassVar
 3from datetime import datetime
 4
 5@dataclass
 6class LogEntry:
 7    message: str
 8    timestamp: datetime = field(default_factory=datetime.now)
 9    level: str = "INFO"
10    
11    # Class variable (not an instance field)
12    log_count: ClassVar[int] = 0
13    
14    def __post_init__(self):
15        LogEntry.log_count += 1
16
17log1 = LogEntry("First log")
18log2 = LogEntry("Second log")
19
20print(f"Total logs: {LogEntry.log_count}")  # 2
21print(log1.timestamp < log2.timestamp)  # True

JSON Serialization

 1from dataclasses import dataclass, asdict
 2import json
 3
 4@dataclass
 5class User:
 6    name: str
 7    age: int
 8    email: str
 9
10user = User("Alice", 30, "alice@example.com")
11
12# To JSON
13json_str = json.dumps(asdict(user))
14print(json_str)
15# {"name": "Alice", "age": 30, "email": "alice@example.com"}
16
17# From JSON
18data = json.loads(json_str)
19user2 = User(**data)
20print(user2)
21# User(name='Alice', age=30, email='alice@example.com')

Complex Example: API Response

 1from dataclasses import dataclass, field
 2from typing import List, Optional
 3from datetime import datetime
 4
 5@dataclass
 6class Author:
 7    id: int
 8    name: str
 9    email: str
10
11@dataclass
12class Comment:
13    id: int
14    text: str
15    author: Author
16    created_at: datetime = field(default_factory=datetime.now)
17
18@dataclass
19class Post:
20    id: int
21    title: str
22    content: str
23    author: Author
24    comments: List[Comment] = field(default_factory=list)
25    tags: List[str] = field(default_factory=list)
26    published: bool = False
27    views: int = 0
28    created_at: datetime = field(default_factory=datetime.now)
29    
30    def add_comment(self, text: str, author: Author):
31        comment = Comment(
32            id=len(self.comments) + 1,
33            text=text,
34            author=author
35        )
36        self.comments.append(comment)
37    
38    def publish(self):
39        self.published = True
40
41# Usage
42author = Author(1, "Alice", "alice@example.com")
43post = Post(1, "Python Tips", "Here are some tips...", author)
44post.tags = ["python", "programming"]
45post.publish()
46
47commenter = Author(2, "Bob", "bob@example.com")
48post.add_comment("Great post!", commenter)
49
50print(f"Post: {post.title}")
51print(f"Published: {post.published}")
52print(f"Comments: {len(post.comments)}")

Comparison with Regular Classes

 1# Without dataclass (verbose)
 2class PointOld:
 3    def __init__(self, x: float, y: float):
 4        self.x = x
 5        self.y = y
 6    
 7    def __repr__(self):
 8        return f"PointOld(x={self.x}, y={self.y})"
 9    
10    def __eq__(self, other):
11        if not isinstance(other, PointOld):
12            return NotImplemented
13        return self.x == other.x and self.y == other.y
14
15# With dataclass (concise)
16@dataclass
17class Point:
18    x: float
19    y: float
20
21# Both work the same, but dataclass is much shorter!

Related Snippets