Skip to content

Commit 77ee70c

Browse files
committed
refactor(statistics): introduce a statistics base model based on SQL VIEW
This is still WIP, but I think this is the right way forward. TODO: * Further refactor the views, as the customer,project,task viewsets should return all the selected objects, regardless of reported sum. I think we will need to base those viewsets off of their respective models and join/prefetch the report statistic onto it * Finalize the rest of the refactoring to make it work again Optionally (and I hope it's not necessary): Build a statistic (database) view for all statistics endpoints. This would allow us to optimize the whole thing much more (and improve performance AND readability) at the cost of quite some more boilerplate code
1 parent 0a75577 commit 77ee70c

File tree

7 files changed

+143
-56
lines changed

7 files changed

+143
-56
lines changed

backend/timed/reports/filters.py

+4
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,10 @@
1919
from timed.tracking.models import Report
2020

2121

22+
class ReportStatisticFilterset(FilterSet):
23+
pass
24+
25+
2226
class StatisticFiltersetBase:
2327
def filter_has_reviewer(
2428
self, queryset: QuerySet[Report], _name: str, value: int
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
from django.db import migrations, models
2+
3+
CREATE_VIEW_SQL = """
4+
CREATE VIEW "reports_reportstatistic" AS (
5+
SELECT
6+
task_id as id, -- avoid jumping through hoops for Django
7+
task_id,
8+
user_id,
9+
10+
projects_project.id as project_id,
11+
projects_project.customer_id as customer_id,
12+
13+
sum(duration) as duration,
14+
15+
date,
16+
to_char(date, 'YYYY') as year,
17+
to_char(date, 'YYYY-MM') as month
18+
19+
FROM tracking_report
20+
JOIN projects_task ON projects_task.id = tracking_report.task_id
21+
JOIN projects_project ON projects_project.id = projects_task.project_id
22+
23+
GROUP BY
24+
task_id,
25+
user_id,
26+
date,
27+
projects_task.id,
28+
projects_project.id,
29+
projects_project.customer_id
30+
)
31+
"""
32+
33+
DROP_VIEW_SQL = """
34+
DROP VIEW "reports_reportstatistic"
35+
"""
36+
37+
38+
class Migration(migrations.Migration):
39+
40+
dependencies = [
41+
('tracking', '0001_initial')
42+
]
43+
44+
operations = [
45+
migrations.RunSQL(CREATE_VIEW_SQL, DROP_VIEW_SQL),
46+
migrations.CreateModel(
47+
name='reportstatistic',
48+
fields=[
49+
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
50+
('duration', models.DurationField()),
51+
('month', models.CharField(max_length=7)),
52+
('year', models.CharField(max_length=4)),
53+
],
54+
options={
55+
'managed': False,
56+
},
57+
),
58+
]

backend/timed/reports/migrations/__init__.py

Whitespace-only changes.

backend/timed/reports/models.py

+27
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
from django.db import models
2+
3+
from timed.employment.models import User
4+
from timed.projects.models import Customer, Project, Task
5+
6+
7+
class ReportStatistic(models.Model):
8+
# Implement a basic STAR SCHEMA view: Basic aggregate is done in the VIEW,
9+
# everything else of interest can be JOINed directly
10+
11+
task = models.OneToOneField(
12+
Task, on_delete=models.DO_NOTHING, null=False, related_name="+"
13+
)
14+
user = models.ForeignKey(User, on_delete=models.DO_NOTHING, null=False)
15+
project = models.ForeignKey(Project, on_delete=models.DO_NOTHING, null=False)
16+
customer = models.ForeignKey(Customer, on_delete=models.DO_NOTHING, null=False)
17+
duration = models.DurationField()
18+
19+
month = models.CharField(max_length=7)
20+
year = models.CharField(max_length=4)
21+
date = models.DateField()
22+
23+
class Meta:
24+
managed = False
25+
26+
def __str__(self):
27+
return f"ReportStat({self.task} @ {self.date}, {self.user})"

backend/timed/reports/serializers.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -22,15 +22,15 @@
2222

2323

2424
class YearStatisticSerializer(TotalTimeRootMetaMixin, Serializer):
25-
duration = DurationField()
25+
duration = DurationField(read_only=True)
2626
year = IntegerField()
2727

2828
class Meta:
2929
resource_name = "year-statistics"
3030

3131

3232
class MonthStatisticSerializer(TotalTimeRootMetaMixin, Serializer):
33-
duration = DurationField()
33+
duration = DurationField(read_only=True)
3434
year = IntegerField()
3535
month = IntegerField()
3636

@@ -39,16 +39,16 @@ class Meta:
3939

4040

4141
class CustomerStatisticSerializer(TotalTimeRootMetaMixin, Serializer):
42-
duration = DurationField()
43-
name = CharField(read_only=True)
42+
duration = DurationField(read_only=True)
43+
name = CharField(read_only=True, source="customer__name")
4444

4545
class Meta:
4646
resource_name = "customer-statistics"
4747

4848

4949
class ProjectStatisticSerializer(TotalTimeRootMetaMixin, Serializer):
50-
duration = DurationField()
51-
name = CharField()
50+
duration = DurationField(read_only=True)
51+
name = CharField(source="project__name")
5252
amount_offered = DecimalField(max_digits=None, decimal_places=2)
5353
amount_offered_currency = CharField()
5454
amount_invoiced = DecimalField(max_digits=None, decimal_places=2)

backend/timed/reports/tests/test_customer_statistic.py

+9-3
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,13 @@
1010

1111

1212
@pytest.mark.parametrize(
13-
("is_employed", "is_customer_assignee", "is_customer", "expected", "status_code"),
13+
(
14+
"is_employed",
15+
"is_customer_assignee",
16+
"is_customer",
17+
"num_queries",
18+
"status_code",
19+
),
1420
[
1521
(False, True, False, 1, status.HTTP_403_FORBIDDEN),
1622
(False, True, True, 1, status.HTTP_403_FORBIDDEN),
@@ -24,7 +30,7 @@ def test_customer_statistic_list(
2430
is_employed,
2531
is_customer_assignee,
2632
is_customer,
27-
expected,
33+
num_queries,
2834
status_code,
2935
django_assert_num_queries,
3036
setup_customer_and_employment_status,
@@ -49,7 +55,7 @@ def test_customer_statistic_list(
4955
report2 = ReportFactory.create(duration=timedelta(hours=4))
5056

5157
url = reverse("customer-statistic-list")
52-
with django_assert_num_queries(expected):
58+
with django_assert_num_queries(num_queries):
5359
result = auth_client.get(url, data={"ordering": "duration"})
5460
assert result.status_code == status_code
5561

backend/timed/reports/views.py

+39-47
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
from timed.permissions import IsAuthenticated, IsInternal, IsSuperUser
2222
from timed.projects.models import Customer, Project, Task
2323
from timed.reports import serializers
24+
from timed.reports.models import ReportStatistic
2425
from timed.tracking.filters import ReportFilterSet
2526
from timed.tracking.models import Report
2627
from timed.tracking.views import ReportViewSet
@@ -35,7 +36,7 @@
3536
from timed.employment.models import User
3637

3738

38-
class BaseStatisticQuerysetMixin:
39+
class BaseStatisticQuerysetMixin(AggregateQuerysetMixin):
3940
"""Base statistics queryset mixin.
4041
4142
Build and filter the statistics queryset according to the following
@@ -51,7 +52,7 @@ class BaseStatisticQuerysetMixin:
5152
5253
For this to work, each viewset defines two properties:
5354
54-
* The `qs_fields` define which fields are to be selected
55+
* The `select_fields` is a list of fields to select (and implicitly GROUP BY)
5556
* The `pk_field` is an expression that will be used as a primary key in the
5657
REST sense (not really related to the database primary key, but serves as
5758
a row identifier)
@@ -61,30 +62,29 @@ class BaseStatisticQuerysetMixin:
6162
"""
6263

6364
def get_queryset(self):
64-
return (
65-
Report.objects.all()
66-
.select_related("user", "task", "task__project", "task__project__customer")
67-
.annotate(year=ExtractYear("date"))
68-
.annotate(month=ExtractYear("date") * 100 + ExtractMonth("date"))
65+
# TODO we're doing select_related() here, which makes a JOIN inside
66+
# the VIEW superfluous. Refactor this to drop it in the VIEW and join
67+
# normally here - should be slightly faster. But "first make it correct"
68+
return ReportStatistic.objects.all().select_related(
69+
"user", "task", "project", "customer"
6970
)
7071

7172
def filter_queryset(self, queryset):
7273
queryset = super().filter_queryset(queryset)
73-
if isinstance(self.qs_fields, dict):
74-
# qs fields need to be aliased
75-
queryset = queryset.annotate(**self.qs_fields)
7674

77-
queryset = queryset.values(*list(self.qs_fields))
75+
# need to name it `total_duration` as `duration` is already
76+
# taken on the `Report` model
77+
queryset = queryset.values(*self.select_fields)
7878
queryset = queryset.annotate(duration=Sum("duration"))
7979
queryset = queryset.annotate(pk=F(self.pk_field))
80-
return queryset
80+
return queryset # noqa: RET504
8181

8282

8383
class YearStatisticViewSet(BaseStatisticQuerysetMixin, ReadOnlyModelViewSet):
8484
"""Year statistics calculates total reported time per year."""
8585

8686
serializer_class = serializers.YearStatisticSerializer
87-
filterset_class = ReportFilterSet
87+
filterset_class = filters.ReportStatisticFilterset
8888
ordering_fields = (
8989
"year",
9090
"duration",
@@ -97,15 +97,15 @@ class YearStatisticViewSet(BaseStatisticQuerysetMixin, ReadOnlyModelViewSet):
9797
),
9898
)
9999

100-
qs_fields = ("year", "duration")
101100
pk_field = "year"
101+
select_fields = ("year",)
102102

103103

104104
class MonthStatisticViewSet(BaseStatisticQuerysetMixin, ReadOnlyModelViewSet):
105105
"""Month statistics calculates total reported time per month."""
106106

107107
serializer_class = serializers.MonthStatisticSerializer
108-
filterset_class = ReportFilterSet
108+
filterset_class = filters.ReportStatisticFilterset
109109
ordering_fields = (
110110
"year",
111111
"month",
@@ -122,20 +122,18 @@ class MonthStatisticViewSet(BaseStatisticQuerysetMixin, ReadOnlyModelViewSet):
122122
),
123123
)
124124

125-
qs_fields = ("year", "month", "duration")
126125
pk_field = "month"
126+
select_fields = ("year", "month")
127127

128128

129129
class CustomerStatisticViewSet(BaseStatisticQuerysetMixin, ReadOnlyModelViewSet):
130130
"""Customer statistics calculates total reported time per customer."""
131131

132132
serializer_class = serializers.CustomerStatisticSerializer
133-
filterset_class = ReportFilterSet
133+
filterset_class = filters.ReportStatisticFilterset
134134
ordering_fields = (
135-
"task__project__customer__name",
135+
"customer__name",
136136
"duration",
137-
"estimated_time",
138-
"remaining_effort",
139137
)
140138

141139
ordering = ("name",)
@@ -145,22 +143,17 @@ class CustomerStatisticViewSet(BaseStatisticQuerysetMixin, ReadOnlyModelViewSet)
145143
(IsInternal | IsSuperUser) & IsAuthenticated
146144
),
147145
)
148-
qs_fields = { # noqa: RUF012
149-
"year": F("year"),
150-
"month": F("month"),
151-
"name": F("task__project__customer__name"),
152-
"customer_id": F("task__project__customer_id"),
153-
}
154146
pk_field = "customer_id"
147+
select_fields = ("customer__name",)
155148

156149

157-
class ProjectStatisticViewSet(AggregateQuerysetMixin, ReadOnlyModelViewSet):
150+
class ProjectStatisticViewSet(BaseStatisticQuerysetMixin, ReadOnlyModelViewSet):
158151
"""Project statistics calculates total reported time per project."""
159152

160153
serializer_class = serializers.ProjectStatisticSerializer
161-
filterset_class = ReportFilterSet
154+
filterset_class = filters.ReportStatisticFilterset
162155
ordering_fields = (
163-
"task__project__name",
156+
"project__name",
164157
"duration",
165158
"estimated_time",
166159
"remaining_effort",
@@ -173,20 +166,20 @@ class ProjectStatisticViewSet(AggregateQuerysetMixin, ReadOnlyModelViewSet):
173166
),
174167
)
175168

176-
qs_fields = { # noqa: RUF012
177-
"year": F("year"),
178-
"month": F("month"),
179-
"name": F("task__project__name"),
180-
"project_id": F("task__project_id"),
181-
}
182169
pk_field = "project_id"
170+
select_fields = (
171+
"customer__name",
172+
"project__name",
173+
"task__estimated_time",
174+
"task__remaining_effort",
175+
)
183176

184177

185178
class TaskStatisticViewSet(BaseStatisticQuerysetMixin, ReadOnlyModelViewSet):
186179
"""Task statistics calculates total reported time per task."""
187180

188181
serializer_class = serializers.TaskStatisticSerializer
189-
filterset_class = ReportFilterSet
182+
filterset_class = filters.ReportStatisticFilterset
190183
ordering_fields = (
191184
"task__name",
192185
"duration",
@@ -195,25 +188,23 @@ class TaskStatisticViewSet(BaseStatisticQuerysetMixin, ReadOnlyModelViewSet):
195188
)
196189
ordering = ("task__name",)
197190
permission_classes = (
198-
(
199-
# internal employees or super users may read all customer statistics
200-
(IsInternal | IsSuperUser) & IsAuthenticated
201-
),
191+
# internal employees or super users may read all customer statistics
192+
((IsInternal | IsSuperUser) & IsAuthenticated),
202193
)
203194

204-
qs_fields = { # noqa: RUF012
205-
"year": F("year"),
206-
"month": F("month"),
207-
"name": F("task__name"),
208-
}
209195
pk_field = "task_id"
196+
select_fields = (
197+
"task__name",
198+
"task__estimated_time",
199+
"task__remaining_effort",
200+
)
210201

211202

212203
class UserStatisticViewSet(BaseStatisticQuerysetMixin, ReadOnlyModelViewSet):
213204
"""User calculates total reported time per user."""
214205

215206
serializer_class = serializers.UserStatisticSerializer
216-
filterset_class = ReportFilterSet
207+
filterset_class = filters.ReportStatisticFilterset
217208
ordering_fields = (
218209
"user__username",
219210
"duration",
@@ -227,6 +218,7 @@ class UserStatisticViewSet(BaseStatisticQuerysetMixin, ReadOnlyModelViewSet):
227218
)
228219

229220
pk_field = "user"
221+
select_fields = ("user__username",)
230222

231223

232224
class WorkReportViewSet(GenericViewSet):
@@ -236,7 +228,7 @@ class WorkReportViewSet(GenericViewSet):
236228
in several projects work reports will be returned as zip.
237229
"""
238230

239-
filterset_class = ReportFilterSet
231+
filterset_class = filters.ReportStatisticFilterset
240232
ordering = ReportViewSet.ordering
241233
ordering_fields = ReportViewSet.ordering_fields
242234

0 commit comments

Comments
 (0)