Wiki source code of Active Installs 2
Last modified by Vincent Massol on 2024/02/26 17:54
Show last authors
author | version | line-number | content |
---|---|---|---|
1 | This is the second version of [[Design.ActiveInstalls]]. Its aim is to support the following use cases which couldn't all be fulfilled by the first version: | ||
2 | |||
3 | * Active installs as of now | ||
4 | * Total installs (ie per unique id) | ||
5 | * Active installs over time (graph) | ||
6 | * How frequently users upgrade their version of XWiki | ||
7 | * Most used extensions | ||
8 | * Average time an instance is used (by range, < 1 day, 2-7 days, 7-30 days, 30-365 days, > 365 days) | ||
9 | * Drop rate or rather sticking rate = total installs / active installs * 100 | ||
10 | * Graph over time of number of extensions used | ||
11 | * To be decided: | ||
12 | ** Ability to graph countries where XWiki instances are located | ||
13 | ** Java version used | ||
14 | ** Database engine used | ||
15 | ** OS used | ||
16 | |||
17 | Table of contents: | ||
18 | |||
19 | {{toc/}} | ||
20 | |||
21 | = New Format = | ||
22 | |||
23 | Old format: | ||
24 | |||
25 | {{code}} | ||
26 | curl -XPUT "http://localhost:9200/installs/install/<UUID>" -d' | ||
27 | { | ||
28 | "formatVersion" : "1.0", | ||
29 | "date" : "<last ping date>", | ||
30 | "distributionVersion" : "<version>", | ||
31 | "distributionId" : "<distribution id, eg org.xwiki.enterprise:xwiki-enterprise-web>", | ||
32 | "extensions" : [ | ||
33 | { | ||
34 | "id" : "<extension id>", | ||
35 | "version" : "<extension version>" | ||
36 | }, | ||
37 | ... | ||
38 | ] | ||
39 | }' | ||
40 | {{/code}} | ||
41 | |||
42 | New format: | ||
43 | |||
44 | {{code}} | ||
45 | curl -XPUT "http://localhost:9200/installs/install/<unique id generated by ES>" -d' | ||
46 | { | ||
47 | "formatVersion" : "2.0", | ||
48 | "instanceId": "<unique instance id>", | ||
49 | "distributionVersion" : "<version>", | ||
50 | "distributionId" : "<distribution id, eg org.xwiki.enterprise:xwiki-enterprise-web>", | ||
51 | "extensions" : [ | ||
52 | { | ||
53 | "id" : "<extension id>", | ||
54 | "version" : "<extension version>" | ||
55 | }, | ||
56 | ... | ||
57 | ] | ||
58 | }' | ||
59 | {{/code}} | ||
60 | |||
61 | This means we would store all pings sent by XWiki client instances and not just the last ping. This will give us the ability to graph the history. | ||
62 | |||
63 | Note that the date would be handled by ES's ##timestamp## field which has the advantage of being the date on the ES server and not the date on the XWiki client instance which can be wrongly set. | ||
64 | |||
65 | == Alternate Format == | ||
66 | |||
67 | {{code}} | ||
68 | curl -XPUT "http://localhost:9200/installs/install/<UUID>" -d' | ||
69 | { | ||
70 | "formatVersion" : "1.0", | ||
71 | "pings" : [ | ||
72 | { | ||
73 | "date" : "<ping date>", | ||
74 | "distributionVersion" : "<version>", | ||
75 | "distributionId" : "<distribution id, eg org.xwiki.enterprise:xwiki-enterprise-web>", | ||
76 | "extensions" : [ | ||
77 | { | ||
78 | "id" : "<extension id>", | ||
79 | "version" : "<extension version>" | ||
80 | }, | ||
81 | ... | ||
82 | ] | ||
83 | }, | ||
84 | ... | ||
85 | ] | ||
86 | }' | ||
87 | {{/code}} | ||
88 | |||
89 | However this format seems more complex to query. For example, graphing the active installs over time seems more complex than the other format since it means | ||
90 | |||
91 | = Implementation = | ||
92 | |||
93 | * Active installs as of now((( | ||
94 | {{code}} | ||
95 | curl -XGET "http://localhost:9200/installs/install/_search?search_type=count&pretty=1" -d' | ||
96 | { | ||
97 | "aggs": { | ||
98 | "last_day" : { | ||
99 | "filter" : { | ||
100 | "range" : { | ||
101 | "_timestamp" : { | ||
102 | "gt" : "now-1d" | ||
103 | } | ||
104 | } | ||
105 | }, | ||
106 | "aggs" : { | ||
107 | "instanceId_count" : { | ||
108 | "cardinality" : { | ||
109 | "field" : "instanceId" | ||
110 | } | ||
111 | } | ||
112 | } | ||
113 | } | ||
114 | } | ||
115 | }' | ||
116 | {{/code}} | ||
117 | |||
118 | Implementation for alternate format: | ||
119 | |||
120 | {{code}} | ||
121 | curl -XGET "http://localhost:9200/installs2/install/_search?search_type=count&pretty=1" -d' | ||
122 | { | ||
123 | "aggs": { | ||
124 | "last_day" : { | ||
125 | "filter" : { | ||
126 | "range" : { | ||
127 | "date" : { | ||
128 | "gt" : "now-1d" | ||
129 | } | ||
130 | } | ||
131 | } | ||
132 | } | ||
133 | } | ||
134 | }' | ||
135 | {{/code}} | ||
136 | ))) | ||
137 | * Total installs (ie per unique id)((( | ||
138 | {{code}} | ||
139 | curl -XGET "http://localhost:9200/installs/install/_search?search_type=count&pretty=1" -d' | ||
140 | { | ||
141 | "aggs": { | ||
142 | "instanceId_count" : { | ||
143 | "cardinality" : { | ||
144 | "field" : "instanceId" | ||
145 | } | ||
146 | } | ||
147 | } | ||
148 | }' | ||
149 | {{/code}} | ||
150 | |||
151 | Implementation for alternate format: | ||
152 | |||
153 | {{code}} | ||
154 | curl -XGET "http://localhost:9200/installs2/install/_count&pretty=1" -d' | ||
155 | { | ||
156 | "query": { | ||
157 | "match_all": {} | ||
158 | } | ||
159 | }' | ||
160 | {{/code}} | ||
161 | ))) | ||
162 | * Active installs over time (graph)((( | ||
163 | {{code}} | ||
164 | curl -XGET "http://localhost:9200/installs/install/_search?search_type=count&pretty=1" -d' | ||
165 | { | ||
166 | "aggs": { | ||
167 | "activeinstalls_over_time" : { | ||
168 | "date_histogram" : { | ||
169 | "field" : "_timestamp", | ||
170 | "interval" : "day" | ||
171 | }, | ||
172 | "aggs" : { | ||
173 | "instanceId_count" : { | ||
174 | "cardinality" : { | ||
175 | "field" : "instanceId" | ||
176 | } | ||
177 | } | ||
178 | } | ||
179 | } | ||
180 | } | ||
181 | }' | ||
182 | {{/code}} | ||
183 | |||
184 | Each returned bucket will contain the active installs for the period (1day in this example). For example: | ||
185 | |||
186 | {{code}} | ||
187 | ... | ||
188 | "aggregations" : { | ||
189 | "activeinstalls_over_time" : { | ||
190 | "buckets" : [ { | ||
191 | "key_as_string" : "2014-03-20T00:00:00.000Z", | ||
192 | "key" : 1395273600000, | ||
193 | "doc_count" : 1, | ||
194 | "instanceId_count" : { | ||
195 | "value" : 1 | ||
196 | } | ||
197 | }, { | ||
198 | "key_as_string" : "2014-04-04T00:00:00.000Z", | ||
199 | "key" : 1396569600000, | ||
200 | "doc_count" : 2, | ||
201 | "instanceId_count" : { | ||
202 | "value" : 1 | ||
203 | } | ||
204 | } ] | ||
205 | } | ||
206 | ... | ||
207 | {{/code}} | ||
208 | |||
209 | {{warning}} | ||
210 | Kibana3 doesn't support graphing ##aggregations##. This is planned for Kibana4 around end of year. Thus it means doing our own Dashboard in XWiki, which isn't a bad idea in any case and shouldn't be hard to achieve. | ||
211 | {{/warning}} | ||
212 | |||
213 | Implementation for alternate format: | ||
214 | |||
215 | {{code}} | ||
216 | curl -XGET "http://localhost:9200/installs2/install/_search?search_type=count&pretty=1" -d' | ||
217 | { | ||
218 | "aggs": { | ||
219 | "activeinstalls_over_time" : { | ||
220 | "date_histogram" : { | ||
221 | "field" : "date", | ||
222 | "interval" : "day" | ||
223 | } | ||
224 | } | ||
225 | } | ||
226 | }' | ||
227 | {{/code}} | ||
228 | |||
229 | Just using facets (ie compatible with Kibana3): | ||
230 | |||
231 | {{code}} | ||
232 | curl -XGET "http://localhost:9200/installs2/install/_search?search_type=count&pretty=1" -d' | ||
233 | { | ||
234 | "query" : { | ||
235 | "match_all" : {} | ||
236 | }, | ||
237 | "facets" : { | ||
238 | "histo1" : { | ||
239 | "histogram" : { | ||
240 | "field" : "date", | ||
241 | "time_interval" : "1d" | ||
242 | } | ||
243 | } | ||
244 | } | ||
245 | } | ||
246 | {{/code}} | ||
247 | |||
248 | Note that this last solution produces too many results: if an instance does several pings during the same day they're counted several times which isn't correct. | ||
249 | ))) | ||
250 | * How frequently users upgrade their version of XWiki | ||
251 | * Most used extensions | ||
252 | * Average time an instance is used (by range, < 1 day, 2-7 days, 7-30 days, 30-365 days, > 365 days) | ||
253 | * Drop rate or rather sticking rate = total installs / active installs * 100 | ||
254 | * Graph over time of number of extensions used | ||
255 | * (to be decided if we want this one now or not) Ability to graph countries where XWiki instances are located: | ||
256 | ** Use http://ipinfodb.com/ip_location_api_json.php and to be nice with their server, store the country or the lat/long in our DB in the xwikiid table (InstanceId class) | ||
257 | ** This means registering a global XWiki key against http://ipinfodb.com/ with the risk of it being abused, not very nice... | ||
258 | ** If we store long/lat then we can tell ES that it's a [[Geopoint>>http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/mapping-geo-point-type.html]] which allows to make query based on location. | ||
259 | |||
260 | It's also worth considering using the Apache logs for xwiki.org to handle the following use cases: | ||
261 | |||
262 | * Listing what extensions are the most asked for | ||
263 | * Listing the extensions used by a given instance. Note that for this use case, the apache logs are not perfect since they won't contain the unique XWiki instance id and thus the same IP address can hide several XWiki instances installed at the same company. | ||
264 | * Geo-locating XWiki users on a map (this is done by default by tools that analyze Apache logs such as piwiki or awstats). | ||
265 | |||
266 | = Data = | ||
267 | |||
268 | Some script to create and fill data in an ES instance. | ||
269 | |||
270 | {{code}} | ||
271 | curl -XDELETE "http://localhost:9200/installs" | ||
272 | |||
273 | curl -XPUT "http://localhost:9200/installs" | ||
274 | |||
275 | curl -XPUT "http://localhost:9200/installs/install/_mapping" -d' | ||
276 | { | ||
277 | "install" : { | ||
278 | "_timestamp" : { | ||
279 | "enabled" : true, | ||
280 | "store" : true | ||
281 | }, | ||
282 | "properties" : { | ||
283 | "formatVersion" : { "type" : "string", "index" : "not_analyzed" }, | ||
284 | "instanceId" : { "type" : "string", "index" : "not_analyzed" }, | ||
285 | "distributionId" : { "type" : "string", "index" : "not_analyzed" }, | ||
286 | "distributionVersion" : { "type" : "string", "index" : "not_analyzed" } | ||
287 | } | ||
288 | } | ||
289 | }' | ||
290 | |||
291 | curl -XPOST "http://localhost:9200/installs/install?timestamp=2014-02-20" -d' | ||
292 | { | ||
293 | "formatVersion" : "2.0", | ||
294 | "instanceId" : "abc", | ||
295 | "distributionId" : "org.xwiki.enterprise:xwiki-enterprise-web", | ||
296 | "distributionVersion" : "6.0-milestone-1" | ||
297 | }' | ||
298 | |||
299 | curl -XPOST "http://localhost:9200/installs/install?timestamp=2014-03-20" -d' | ||
300 | { | ||
301 | "formatVersion" : "2.0", | ||
302 | "instanceId" : "abc", | ||
303 | "distributionId" : "org.xwiki.enterprise:xwiki-enterprise-web", | ||
304 | "distributionVersion" : "6.0-milestone-2" | ||
305 | }' | ||
306 | |||
307 | curl -XPOST "http://localhost:9200/installs/install" -d' | ||
308 | { | ||
309 | "formatVersion" : "2.0", | ||
310 | "instanceId" : "def", | ||
311 | "distributionId" : "org.xwiki.enterprise:xwiki-enterprise-web", | ||
312 | "distributionVersion" : "5.4.3" | ||
313 | }' | ||
314 | |||
315 | curl -XPOST "http://localhost:9200/installs/install" -d' | ||
316 | { | ||
317 | "formatVersion" : "2.0", | ||
318 | "instanceId" : "def", | ||
319 | "distributionId" : "org.xwiki.enterprise:xwiki-enterprise-web", | ||
320 | "distributionVersion" : "5.4.3" | ||
321 | }' | ||
322 | |||
323 | curl -XGET "http://localhost:9200/installs/install/_search?pretty=1&fields=_source,_timestamp" -d' | ||
324 | { | ||
325 | "query": { | ||
326 | "match_all": {} | ||
327 | } | ||
328 | }' | ||
329 | {{/code}} | ||
330 | |||
331 | == Data for alternate format == | ||
332 | |||
333 | {{code}} | ||
334 | curl -XDELETE "http://localhost:9200/installs2" | ||
335 | |||
336 | curl -XPUT "http://localhost:9200/installs2" | ||
337 | |||
338 | curl -XPUT "http://localhost:9200/installs2/install/_mapping" -d' | ||
339 | { | ||
340 | "install" : { | ||
341 | "properties" : { | ||
342 | "formatVersion" : { "type" : "string", "index" : "not_analyzed" }, | ||
343 | "pings" : { | ||
344 | "properties" : { | ||
345 | "date" : { "type" : "date" }, | ||
346 | "distributionId" : { "type" : "string", "index" : "not_analyzed" }, | ||
347 | "distributionVersion" : { "type" : "string", "index" : "not_analyzed" } | ||
348 | } | ||
349 | } | ||
350 | } | ||
351 | } | ||
352 | }' | ||
353 | |||
354 | curl -XPOST "http://localhost:9200/installs2/install/abc" -d' | ||
355 | { | ||
356 | "formatVersion" : "2.0", | ||
357 | "pings" : [ | ||
358 | { | ||
359 | "date" : "2014-02-20", | ||
360 | "distributionId" : "org.xwiki.enterprise:xwiki-enterprise-web", | ||
361 | "distributionVersion" : "6.0" | ||
362 | }, | ||
363 | { | ||
364 | "date" : "2014-03-20", | ||
365 | "distributionId" : "org.xwiki.enterprise:xwiki-enterprise-web", | ||
366 | "distributionVersion" : "6.1" | ||
367 | }, | ||
368 | { | ||
369 | "date" : "2014-04-14", | ||
370 | "distributionId" : "org.xwiki.enterprise:xwiki-enterprise-web", | ||
371 | "distributionVersion" : "6.1" | ||
372 | } | ||
373 | ] | ||
374 | }' | ||
375 | |||
376 | curl -XPOST "http://localhost:9200/installs2/install/def" -d' | ||
377 | { | ||
378 | "formatVersion" : "2.0", | ||
379 | "pings" : [ | ||
380 | { | ||
381 | "date" : "2014-04-14T00:00:00.000Z", | ||
382 | "distributionId" : "org.xwiki.enterprise:xwiki-enterprise-web", | ||
383 | "distributionVersion" : "5.4.3" | ||
384 | }, | ||
385 | { | ||
386 | "date" : "2014-04-14T00:05:00.000Z", | ||
387 | "distributionId" : "org.xwiki.enterprise:xwiki-enterprise-web", | ||
388 | "distributionVersion" : "5.4.3" | ||
389 | } | ||
390 | ] | ||
391 | }' | ||
392 | {{/code}} | ||
393 | |||
394 | = Backward Compatibility = | ||
395 | |||
396 | * Existing queries will need to ensure that they filter on the ##formatVersion## so that they can handle the format change. | ||
397 | * Introducing this new format means that the # of active installs for format 1.0 is going to reduce over time till it reaches 0 (when all instances will have migrated to the new version using the new format - e.g 6.0 or 6.1) | ||
398 | * If we wish to continue showing the full figure, we'll need to sum the active installs figure from "format 1.0" with the figure from "format 2.0". This can be done transparently in the Active Install module's code. However the graph over time will only work with "format 2.0" data obviously. |