Skip to content

Commit afc2156

Browse files
committed
Use django-qsstats for dashboard computations.
This is like 10000000000 times faster, to say the least. The only thing that's lacking is a really fast page content calculation, but testing with santa cruz (~6000 pages) on my laptop the whole thing only takes ~20s to generate, so I'd say it's good for now.
1 parent d9fce82 commit afc2156

File tree

3 files changed

+81
-97
lines changed

3 files changed

+81
-97
lines changed

sapling/dashboard/views.py

Lines changed: 79 additions & 97 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from django.utils.translation import ugettext as _
77

88
import pyflot
9+
import qsstats
910

1011
from pages.models import Page, PageFile
1112
from maps.models import MapData
@@ -50,12 +51,11 @@ def get_context_data(self, **kwargs):
5051
return {'generated': False}
5152

5253
cache.set('dashboard_generating', True)
53-
now = datetime.now()
5454
context = {
55-
'num_items_over_time': items_over_time(now),
56-
'num_edits_over_time': edits_over_time(now),
57-
'page_content_over_time': page_content_over_time(now),
58-
'users_registered_over_time': users_registered_over_time(now),
55+
'num_items_over_time': items_over_time(),
56+
'num_edits_over_time': edits_over_time(),
57+
'page_content_over_time': page_content_over_time(),
58+
'users_registered_over_time': users_registered_over_time(),
5959
}
6060
context.update(self.get_nums())
6161

@@ -66,49 +66,66 @@ def get_context_data(self, **kwargs):
6666
return context
6767

6868

69-
def items_over_time(now):
70-
def _total_count_as_of(d, M, total_count):
71-
# Figure out which instances of M were added, deleted on this day.
72-
num_added = len(M.versions.filter(
73-
version_info__date__gte=d, version_info__date__lt=next_d,
74-
version_info__type__in=ADDED_TYPES))
75-
num_deleted = len(M.versions.filter(
76-
version_info__date__gte=d, version_info__date__lt=next_d,
77-
version_info__type__in=DELETED_TYPES))
69+
def _summed_series(series):
70+
"""
71+
Take a time series, ``series``, and turn it into a summed-by-term series.
72+
"""
73+
sum = 0
74+
l = []
75+
for (d, num) in series:
76+
sum += num
77+
l.append((d, sum))
78+
return l
7879

79-
total_count += num_added
80-
total_count -= num_deleted
81-
return total_count
8280

81+
def _sum_from_add_del(added_series, deleted_series):
82+
"""
83+
Sum the provided, aligned time series (added, deleted).
84+
"""
85+
sum = 0
86+
l = []
87+
for ((d, added), (_, deleted)) in zip(added_series, deleted_series):
88+
sum += (added - deleted)
89+
l.append((d, sum))
90+
return l
91+
92+
93+
def items_over_time():
8394
oldest_page = Page.versions.all().order_by(
8495
'history_date')[0].version_info.date
85-
8696
graph = pyflot.Flot()
87-
page_total_count, map_total_count, file_total_count, \
88-
redirect_total_count = 0, 0, 0, 0
89-
num_pages_over_time, num_maps_over_time, num_files_over_time, \
90-
num_redirects_over_time = [], [], [], []
91-
# Start at the oldest page's date and then iterate, day-by-day, until
92-
# current day, day by day.
93-
d = datetime(oldest_page.year, oldest_page.month, oldest_page.day - 1)
94-
while (now.year, now.month, now.day) != (d.year, d.month, d.day):
95-
next_d = d + timedelta(days=1)
96-
97-
page_total_count = _total_count_as_of(d, Page, page_total_count)
98-
num_pages_over_time.append((d, page_total_count))
99-
100-
map_total_count = _total_count_as_of(d, MapData, map_total_count)
101-
num_maps_over_time.append((d, map_total_count))
10297

103-
file_total_count = _total_count_as_of(d, PageFile,
104-
file_total_count)
105-
num_files_over_time.append((d, file_total_count))
106-
107-
redirect_total_count = _total_count_as_of(d, Redirect,
108-
redirect_total_count)
109-
num_redirects_over_time.append((d, redirect_total_count))
110-
111-
d = next_d
98+
pages_added = qsstats.QuerySetStats(
99+
Page.versions.filter(version_info__type__in=ADDED_TYPES),
100+
'history_date').time_series(oldest_page)
101+
pages_deleted = qsstats.QuerySetStats(
102+
Page.versions.filter(version_info__type__in=DELETED_TYPES),
103+
'history_date').time_series(oldest_page)
104+
num_pages_over_time = _sum_from_add_del(pages_added, pages_deleted)
105+
106+
maps_added = qsstats.QuerySetStats(
107+
MapData.versions.filter(version_info__type__in=ADDED_TYPES),
108+
'history_date').time_series(oldest_page)
109+
maps_deleted = qsstats.QuerySetStats(
110+
MapData.versions.filter(version_info__type__in=DELETED_TYPES),
111+
'history_date').time_series(oldest_page)
112+
num_maps_over_time = _sum_from_add_del(maps_added, maps_deleted)
113+
114+
files_added = qsstats.QuerySetStats(
115+
PageFile.versions.filter(version_info__type__in=ADDED_TYPES),
116+
'history_date').time_series(oldest_page)
117+
files_deleted = qsstats.QuerySetStats(
118+
PageFile.versions.filter(version_info__type__in=DELETED_TYPES),
119+
'history_date').time_series(oldest_page)
120+
num_files_over_time = _sum_from_add_del(files_added, files_deleted)
121+
122+
redir_added = qsstats.QuerySetStats(
123+
Redirect.versions.filter(version_info__type__in=ADDED_TYPES),
124+
'history_date').time_series(oldest_page)
125+
redir_deleted = qsstats.QuerySetStats(
126+
Redirect.versions.filter(version_info__type__in=DELETED_TYPES),
127+
'history_date').time_series(oldest_page)
128+
num_redirects_over_time = _sum_from_add_del(redir_added, redir_deleted)
112129

113130
graph.add_time_series(num_pages_over_time, label=_("pages"))
114131
graph.add_time_series(num_maps_over_time, label=_("maps"))
@@ -119,56 +136,37 @@ def _total_count_as_of(d, M, total_count):
119136
return [graph.prepare_series(s) for s in graph._series]
120137

121138

122-
def edits_over_time(now):
139+
def edits_over_time():
123140
oldest_page = Page.versions.all().order_by(
124141
'history_date')[0].version_info.date
125-
126142
graph = pyflot.Flot()
127-
page_edits = []
128-
map_edits = []
129-
file_edits = []
130-
redirect_edits = []
131-
d = datetime(oldest_page.year, oldest_page.month, oldest_page.day)
132-
while (now.year, now.month, now.day) != (d.year, d.month, d.day):
133-
next_d = d + timedelta(days=1)
134-
135-
# Page edits
136-
page_edits_this_day = len(Page.versions.filter(
137-
version_info__date__gte=d, version_info__date__lt=next_d))
138-
page_edits.append((d, page_edits_this_day))
139-
140-
# Map edits
141-
map_edits_this_day = len(MapData.versions.filter(
142-
version_info__date__gte=d, version_info__date__lt=next_d))
143-
map_edits.append((d, map_edits_this_day))
144143

145-
# File edits
146-
file_edits_this_day = len(PageFile.versions.filter(
147-
version_info__date__gte=d, version_info__date__lt=next_d))
148-
file_edits.append((d, file_edits_this_day))
144+
qss = qsstats.QuerySetStats(Page.versions.all(), 'history_date')
145+
graph.add_time_series(qss.time_series(oldest_page), label=_("pages"))
149146

150-
# Redirect edits
151-
redirect_edits_this_day = len(Redirect.versions.filter(
152-
version_info__date__gte=d, version_info__date__lt=next_d))
153-
redirect_edits.append((d, redirect_edits_this_day))
147+
qss = qsstats.QuerySetStats(MapData.versions.all(), 'history_date')
148+
graph.add_time_series(qss.time_series(oldest_page), label=_("maps"))
154149

155-
d = next_d
150+
qss = qsstats.QuerySetStats(PageFile.versions.all(), 'history_date')
151+
graph.add_time_series(qss.time_series(oldest_page), label=_("files"))
156152

157-
graph.add_time_series(page_edits, label=_("pages"))
158-
graph.add_time_series(map_edits, label=_("maps"))
159-
graph.add_time_series(file_edits, label=_("files"))
160-
graph.add_time_series(redirect_edits, label=_("redirects"))
153+
qss = qsstats.QuerySetStats(Redirect.versions.all(), 'history_date')
154+
graph.add_time_series(qss.time_series(oldest_page), label=_("redirects"))
161155

162156
return [graph.prepare_series(s) for s in graph._series]
163157

164158

165-
def page_content_over_time(now):
159+
def page_content_over_time():
166160
oldest_page = Page.versions.all().order_by(
167161
'history_date')[0].version_info.date
168162

163+
# TODO: There's probably a much faster way to do this. But it's
164+
# fast enough for now.
165+
169166
graph = pyflot.Flot()
170167
page_dict = {}
171168
page_contents = []
169+
now = datetime.now()
172170

173171
d = datetime(oldest_page.year, oldest_page.month, oldest_page.day)
174172
while (now.year, now.month, now.day) != (d.year, d.month, d.day):
@@ -178,14 +176,11 @@ def page_content_over_time(now):
178176
version_info__date__gte=d, version_info__date__lt=next_d)
179177
# Group the edits by slug and annotate with the max history date
180178
# for the associated page.
181-
slugs_with_date = page_edits_this_day.values('slug').annotate(
182-
Max('history_date')).order_by()
179+
slugs_with_date = page_edits_this_day.values(
180+
'slug', 'content').annotate(Max('history_date'))
183181

184182
for item in slugs_with_date:
185-
p = Page(slug=item['slug'])
186-
# Grab the historical version at this date.
187-
p_h = p.versions.as_of(date=item['history_date__max'])
188-
page_dict[item['slug']] = len(p_h.content)
183+
page_dict[item['slug']] = len(item['content'])
189184

190185
total_content_today = 0
191186
for slug, length in page_dict.iteritems():
@@ -199,24 +194,11 @@ def page_content_over_time(now):
199194
return [graph.prepare_series(s) for s in graph._series]
200195

201196

202-
def users_registered_over_time(now):
197+
def users_registered_over_time():
203198
oldest_user = User.objects.order_by('date_joined')[0].date_joined
204-
205199
graph = pyflot.Flot()
206-
users_registered = []
207-
num_users = 0
208-
d = datetime(oldest_user.year, oldest_user.month, oldest_user.day)
209-
while (now.year, now.month, now.day) != (d.year, d.month, d.day):
210-
next_d = d + timedelta(days=1)
211-
212-
users_this_day = len(User.objects.filter(
213-
date_joined__gte=d, date_joined__lt=next_d))
214-
215-
num_users += users_this_day
216-
217-
users_registered.append((d, num_users))
218-
d = next_d
219200

220-
graph.add_time_series(users_registered)
201+
qss = qsstats.QuerySetStats(User.objects.all(), 'date_joined')
202+
graph.add_time_series(_summed_series(qss.time_series(oldest_user)))
221203

222204
return [graph.prepare_series(s) for s in graph._series]

sapling/etc/install_config/requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,5 +10,6 @@ South==0.7.4
1010
python-flot-utils==0.2.1
1111
django-staticfiles==1.2.1
1212
django-registration==0.8.0
13+
django-qsstats-magic==0.7
1314
-e git+git://github.com/philipn/olwidget.git@72de64cd25ffdc3644bc92d14668c145533d6d45#egg=django-olwidget
1415
-e git+git://github.com/mivanov/django-honeypot.git#egg=django-honeypot

setup.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,7 @@ def gen_data_files(*dirs):
129129
'django-registration==0.8.0',
130130
'django-olwidget==0.46-custom1',
131131
'django-honeypot==0.3.0-custom',
132+
'django-qsstats-magic==0.7',
132133
# Actually optional:
133134
'Sphinx==1.1.3',
134135
]

0 commit comments

Comments
 (0)