drm/ttm: Use Linux kobjects
[dragonfly.git] / sys / kern / vfs_quota.c
CommitLineData
e788edda 1/*
d1647ee9 2 * Copyright (c) 2011,2012 François Tigeot <ftigeot@wolpond.org>
e788edda 3 * All rights reserved.
95bf5f78 4 *
e788edda
FT
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
95bf5f78 8 *
e788edda
FT
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in
13 * the documentation and/or other materials provided with the
14 * distribution.
15 * 3. Neither the name of The DragonFly Project nor the names of its
16 * contributors may be used to endorse or promote products derived
17 * from this software without specific, prior written permission.
95bf5f78 18 *
e788edda
FT
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
22 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
23 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
25 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
27 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
29 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 */
32
33#include <sys/sysctl.h>
34#include <sys/mount.h>
35#include <sys/systm.h>
36#include <sys/nlookup.h>
37#include <sys/vnode.h>
38#include <sys/stat.h>
39#include <sys/vfs_quota.h>
95bf5f78
FT
40#include <sys/spinlock.h>
41#include <sys/spinlock2.h>
42
b4d6d8bb
FT
43#include <sys/sysproto.h>
44#include <libprop/proplib.h>
45#include <libprop/prop_dictionary.h>
95bf5f78 46
b4d6d8bb 47/* in-memory accounting, red-black tree based */
95bf5f78
FT
48/* FIXME: code duplication caused by uid_t / gid_t differences */
49RB_PROTOTYPE(ac_utree, ac_unode, rb_entry, rb_ac_unode_cmp);
50RB_PROTOTYPE(ac_gtree, ac_gnode, rb_entry, rb_ac_gnode_cmp);
51
52static int
53rb_ac_unode_cmp(struct ac_unode *a, struct ac_unode *b);
54static int
55rb_ac_gnode_cmp(struct ac_gnode *a, struct ac_gnode *b);
56
57RB_GENERATE(ac_utree, ac_unode, rb_entry, rb_ac_unode_cmp);
58RB_GENERATE(ac_gtree, ac_gnode, rb_entry, rb_ac_gnode_cmp);
59
60struct ac_unode* unode_insert(struct mount*, uid_t);
61struct ac_gnode* gnode_insert(struct mount*, gid_t);
62
63static int
64rb_ac_unode_cmp(struct ac_unode *a, struct ac_unode *b)
65{
66 if (a->left_bits < b->left_bits)
67 return(-1);
68 else if (a->left_bits > b->left_bits)
69 return(1);
70 return(0);
71}
72
73static int
74rb_ac_gnode_cmp(struct ac_gnode *a, struct ac_gnode *b)
75{
76 if (a->left_bits < b->left_bits)
77 return(-1);
78 else if (a->left_bits > b->left_bits)
79 return(1);
80 return(0);
81}
82
83struct ac_unode*
84unode_insert(struct mount *mp, uid_t uid)
85{
86 struct ac_unode *unp, *res;
87
3c716a91 88 unp = kmalloc(sizeof(struct ac_unode), M_MOUNT, M_ZERO | M_WAITOK);
95bf5f78
FT
89
90 unp->left_bits = (uid >> ACCT_CHUNK_BITS);
91 res = RB_INSERT(ac_utree, &mp->mnt_acct.ac_uroot, unp);
ed20d0e3 92 KASSERT(res == NULL, ("unode_insert(): RB_INSERT didn't return NULL"));
95bf5f78
FT
93
94 return unp;
95}
96
97struct ac_gnode*
98gnode_insert(struct mount *mp, gid_t gid)
99{
100 struct ac_gnode *gnp, *res;
101
3c716a91 102 gnp = kmalloc(sizeof(struct ac_gnode), M_MOUNT, M_ZERO | M_WAITOK);
95bf5f78
FT
103
104 gnp->left_bits = (gid >> ACCT_CHUNK_BITS);
105 res = RB_INSERT(ac_gtree, &mp->mnt_acct.ac_groot, gnp);
ed20d0e3 106 KASSERT(res == NULL, ("gnode_insert(): RB_INSERT didn't return NULL"));
95bf5f78
FT
107
108 return gnp;
109}
e788edda 110
59a9a88c
FT
111int vfs_quota_enabled = 0;
112TUNABLE_INT("vfs.quota_enabled", &vfs_quota_enabled);
113SYSCTL_INT(_vfs, OID_AUTO, quota_enabled, CTLFLAG_RD,
114 &vfs_quota_enabled, 0, "Enable VFS quota");
f8fe3d75 115
59a9a88c 116/* initializes per mount-point data structures */
e788edda 117void
18b2f5e8
SW
118vq_init(struct mount *mp)
119{
95bf5f78 120
59a9a88c 121 if (!vfs_quota_enabled)
f8fe3d75
FT
122 return;
123
95bf5f78
FT
124 /* initialize the rb trees */
125 RB_INIT(&mp->mnt_acct.ac_uroot);
126 RB_INIT(&mp->mnt_acct.ac_groot);
ba87a4ab 127 spin_init(&mp->mnt_acct.ac_spin, "vqinit");
95bf5f78
FT
128
129 mp->mnt_acct.ac_bytes = 0;
130
a3dce641 131 /* enable data collection */
95bf5f78 132 mp->mnt_op->vfs_account = vfs_stdaccount;
9b272500
FT
133 /* mark this filesystem quota enabled */
134 mp->mnt_flag |= MNT_QUOTA;
3c716a91
SW
135 if (bootverbose)
136 kprintf("vfs accounting enabled for %s\n",
137 mp->mnt_stat.f_mntonname);
e788edda
FT
138}
139
140
141void
18b2f5e8
SW
142vq_done(struct mount *mp)
143{
95bf5f78
FT
144 /* TODO: remove the rb trees here */
145}
146
147void
148vfs_stdaccount(struct mount *mp, uid_t uid, gid_t gid, int64_t delta)
149{
150 struct ac_unode ufind, *unp;
151 struct ac_gnode gfind, *gnp;
152
153 /* find or create address of chunk */
154 ufind.left_bits = (uid >> ACCT_CHUNK_BITS);
155 gfind.left_bits = (gid >> ACCT_CHUNK_BITS);
156
157 spin_lock(&mp->mnt_acct.ac_spin);
158
159 mp->mnt_acct.ac_bytes += delta;
160
161 if ((unp = RB_FIND(ac_utree, &mp->mnt_acct.ac_uroot, &ufind)) == NULL)
162 unp = unode_insert(mp, uid);
163 if ((gnp = RB_FIND(ac_gtree, &mp->mnt_acct.ac_groot, &gfind)) == NULL)
164 gnp = gnode_insert(mp, gid);
165
166 /* update existing chunk */
3e8ec7e9
FT
167 unp->uid_chunk[(uid & ACCT_CHUNK_MASK)].space += delta;
168 gnp->gid_chunk[(gid & ACCT_CHUNK_MASK)].space += delta;
95bf5f78
FT
169
170 spin_unlock(&mp->mnt_acct.ac_spin);
e788edda 171}
b4d6d8bb
FT
172
173static void
18b2f5e8
SW
174cmd_get_usage_all(struct mount *mp, prop_array_t dict_out)
175{
b4d6d8bb
FT
176 struct ac_unode *unp;
177 struct ac_gnode *gnp;
178 int i;
179 prop_dictionary_t item;
180
181 item = prop_dictionary_create();
182 (void) prop_dictionary_set_uint64(item, "space used", mp->mnt_acct.ac_bytes);
8d91721d 183 (void) prop_dictionary_set_uint64(item, "limit", mp->mnt_acct.ac_limit);
b4d6d8bb
FT
184 prop_array_add_and_rel(dict_out, item);
185
186 RB_FOREACH(unp, ac_utree, &mp->mnt_acct.ac_uroot) {
187 for (i=0; i<ACCT_CHUNK_NIDS; i++) {
3e8ec7e9 188 if (unp->uid_chunk[i].space != 0) {
b4d6d8bb
FT
189 item = prop_dictionary_create();
190 (void) prop_dictionary_set_uint32(item, "uid",
191 (unp->left_bits << ACCT_CHUNK_BITS) + i);
192 (void) prop_dictionary_set_uint64(item, "space used",
3e8ec7e9 193 unp->uid_chunk[i].space);
8d91721d
FT
194 (void) prop_dictionary_set_uint64(item, "limit",
195 unp->uid_chunk[i].limit);
b4d6d8bb
FT
196 prop_array_add_and_rel(dict_out, item);
197 }
198 }
199 }
200
201 RB_FOREACH(gnp, ac_gtree, &mp->mnt_acct.ac_groot) {
202 for (i=0; i<ACCT_CHUNK_NIDS; i++) {
3e8ec7e9 203 if (gnp->gid_chunk[i].space != 0) {
b4d6d8bb
FT
204 item = prop_dictionary_create();
205 (void) prop_dictionary_set_uint32(item, "gid",
206 (gnp->left_bits << ACCT_CHUNK_BITS) + i);
207 (void) prop_dictionary_set_uint64(item, "space used",
3e8ec7e9 208 gnp->gid_chunk[i].space);
8d91721d
FT
209 (void) prop_dictionary_set_uint64(item, "limit",
210 gnp->gid_chunk[i].limit);
b4d6d8bb
FT
211 prop_array_add_and_rel(dict_out, item);
212 }
213 }
214 }
215}
216
88c2e66c
FT
217static int
218cmd_set_usage_all(struct mount *mp, prop_array_t args)
219{
220 struct ac_unode ufind, *unp;
221 struct ac_gnode gfind, *gnp;
222 prop_dictionary_t item;
223 prop_object_iterator_t iter;
224 uint32_t id;
225 uint64_t space;
226
227 spin_lock(&mp->mnt_acct.ac_spin);
228 /* 0. zero all statistics */
229 /* we don't bother to free up memory, most of it would probably be
230 * re-allocated immediately anyway. just bzeroing the existing nodes
231 * is fine */
232 mp->mnt_acct.ac_bytes = 0;
233 RB_FOREACH(unp, ac_utree, &mp->mnt_acct.ac_uroot) {
234 bzero(&unp->uid_chunk, sizeof(unp->uid_chunk));
235 }
236 RB_FOREACH(gnp, ac_gtree, &mp->mnt_acct.ac_groot) {
237 bzero(&gnp->gid_chunk, sizeof(gnp->gid_chunk));
238 }
239
240 /* args contains an array of dict */
241 iter = prop_array_iterator(args);
242 if (iter == NULL) {
243 kprintf("cmd_set_usage_all(): failed to create iterator\n");
d9656636 244 spin_unlock(&mp->mnt_acct.ac_spin);
88c2e66c
FT
245 return 1;
246 }
247 while ((item = prop_object_iterator_next(iter)) != NULL) {
248 prop_dictionary_get_uint64(item, "space used", &space);
249 if (prop_dictionary_get_uint32(item, "uid", &id)) {
250 ufind.left_bits = (id >> ACCT_CHUNK_BITS);
251 unp = RB_FIND(ac_utree, &mp->mnt_acct.ac_uroot, &ufind);
252 if (unp == NULL)
253 unp = unode_insert(mp, id);
3e8ec7e9 254 unp->uid_chunk[(id & ACCT_CHUNK_MASK)].space = space;
88c2e66c
FT
255 } else if (prop_dictionary_get_uint32(item, "gid", &id)) {
256 gfind.left_bits = (id >> ACCT_CHUNK_BITS);
257 gnp = RB_FIND(ac_gtree, &mp->mnt_acct.ac_groot, &gfind);
258 if (gnp == NULL)
259 gnp = gnode_insert(mp, id);
3e8ec7e9 260 gnp->gid_chunk[(id & ACCT_CHUNK_MASK)].space = space;
88c2e66c
FT
261 } else {
262 mp->mnt_acct.ac_bytes = space;
263 }
264 }
265 prop_object_iterator_release(iter);
266
267 spin_unlock(&mp->mnt_acct.ac_spin);
268 return 0;
269}
270
c115b876
FT
271static int
272cmd_set_limit(struct mount *mp, prop_dictionary_t args)
273{
274 uint64_t limit;
275
276 prop_dictionary_get_uint64(args, "limit", &limit);
277
278 spin_lock(&mp->mnt_acct.ac_spin);
279 mp->mnt_acct.ac_limit = limit;
280 spin_unlock(&mp->mnt_acct.ac_spin);
281
282 return 0;
283}
284
c6e8a1bd
FT
285static int
286cmd_set_limit_uid(struct mount *mp, prop_dictionary_t args)
287{
288 uint64_t limit;
289 uid_t uid;
290 struct ac_unode ufind, *unp;
291
292 prop_dictionary_get_uint32(args, "uid", &uid);
293 prop_dictionary_get_uint64(args, "limit", &limit);
294
295 ufind.left_bits = (uid >> ACCT_CHUNK_BITS);
296
297 spin_lock(&mp->mnt_acct.ac_spin);
298 if ((unp = RB_FIND(ac_utree, &mp->mnt_acct.ac_uroot, &ufind)) == NULL)
299 unp = unode_insert(mp, uid);
300 unp->uid_chunk[(uid & ACCT_CHUNK_MASK)].limit = limit;
301 spin_unlock(&mp->mnt_acct.ac_spin);
302
303 return 0;
304}
305
3663c79b
FT
306static int
307cmd_set_limit_gid(struct mount *mp, prop_dictionary_t args)
308{
309 uint64_t limit;
310 gid_t gid;
311 struct ac_gnode gfind, *gnp;
312
313 prop_dictionary_get_uint32(args, "gid", &gid);
314 prop_dictionary_get_uint64(args, "limit", &limit);
315
316 gfind.left_bits = (gid >> ACCT_CHUNK_BITS);
317
318 spin_lock(&mp->mnt_acct.ac_spin);
319 if ((gnp = RB_FIND(ac_gtree, &mp->mnt_acct.ac_groot, &gfind)) == NULL)
320 gnp = gnode_insert(mp, gid);
321 gnp->gid_chunk[(gid & ACCT_CHUNK_MASK)].limit = limit;
322 spin_unlock(&mp->mnt_acct.ac_spin);
323
324 return 0;
325}
326
b4d6d8bb
FT
327int
328sys_vquotactl(struct vquotactl_args *vqa)
329/* const char *path, struct plistref *pref */
330{
fda4c5f3 331 struct nchandle nch;
b4d6d8bb
FT
332 const char *path;
333 struct plistref pref;
88c2e66c
FT
334 prop_dictionary_t dict;
335 prop_object_t args;
b4d6d8bb 336 char *cmd;
b4d6d8bb 337 prop_array_t pa_out;
b4d6d8bb 338 struct nlookupdata nd;
b4d6d8bb
FT
339 int error;
340
59a9a88c 341 if (!vfs_quota_enabled)
88614311 342 return EOPNOTSUPP;
b4d6d8bb
FT
343 path = vqa->path;
344 error = copyin(vqa->pref, &pref, sizeof(pref));
345 error = prop_dictionary_copyin(&pref, &dict);
fda4c5f3 346 if (error)
b4d6d8bb
FT
347 return(error);
348
349 /* we have a path, get its mount point */
350 error = nlookup_init(&nd, path, UIO_USERSPACE, 0);
fda4c5f3 351 if (error)
b4d6d8bb
FT
352 return (error);
353 error = nlookup(&nd);
fda4c5f3 354 if (error)
b4d6d8bb 355 return (error);
fda4c5f3
MD
356 nch = nd.nl_nch;
357 cache_zero(&nd.nl_nch);
b4d6d8bb
FT
358 nlookup_done(&nd);
359
360 /* get the command */
361 if (prop_dictionary_get_cstring(dict, "command", &cmd) == 0) {
362 kprintf("sys_vquotactl(): couldn't get command\n");
fda4c5f3 363 cache_put(&nch);
b4d6d8bb
FT
364 return EINVAL;
365 }
366 args = prop_dictionary_get(dict, "arguments");
367 if (args == NULL) {
368 kprintf("couldn't get arguments\n");
fda4c5f3 369 cache_put(&nch);
b4d6d8bb
FT
370 return EINVAL;
371 }
372
373 pa_out = prop_array_create();
fda4c5f3
MD
374 if (pa_out == NULL) {
375 cache_put(&nch);
b4d6d8bb 376 return ENOMEM;
fda4c5f3 377 }
b4d6d8bb
FT
378
379 if (strcmp(cmd, "get usage all") == 0) {
fda4c5f3 380 cmd_get_usage_all(nch.mount, pa_out);
b4d6d8bb
FT
381 goto done;
382 }
88c2e66c 383 if (strcmp(cmd, "set usage all") == 0) {
fda4c5f3 384 error = cmd_set_usage_all(nch.mount, args);
88c2e66c
FT
385 goto done;
386 }
c115b876 387 if (strcmp(cmd, "set limit") == 0) {
fda4c5f3 388 error = cmd_set_limit(nch.mount, args);
c115b876
FT
389 goto done;
390 }
c6e8a1bd 391 if (strcmp(cmd, "set limit uid") == 0) {
fda4c5f3 392 error = cmd_set_limit_uid(nch.mount, args);
c6e8a1bd
FT
393 goto done;
394 }
3663c79b 395 if (strcmp(cmd, "set limit gid") == 0) {
fda4c5f3 396 error = cmd_set_limit_gid(nch.mount, args);
3663c79b
FT
397 goto done;
398 }
fda4c5f3 399 cache_put(&nch);
b4d6d8bb
FT
400 return EINVAL;
401
402done:
403 /* kernel to userland */
404 dict = prop_dictionary_create();
88c2e66c 405 error = prop_dictionary_set(dict, "returned data", pa_out);
b4d6d8bb
FT
406
407 error = prop_dictionary_copyout(&pref, dict);
408 error = copyout(&pref, vqa->pref, sizeof(pref));
fda4c5f3 409 cache_put(&nch);
b4d6d8bb
FT
410
411 return error;
412}
d1647ee9 413
e5e8b92a 414/*
d1647ee9
FT
415 * Returns a valid mount point for accounting purposes
416 * We cannot simply use vp->v_mount if the vnode belongs
417 * to a PFS mount point
418 */
419struct mount*
420vq_vptomp(struct vnode *vp)
421{
422 /* XXX: vp->v_pfsmp may point to a freed structure
423 * we use mountlist_exists() to check if it is valid
424 * before using it */
425 if ((vp->v_pfsmp != NULL) && (mountlist_exists(vp->v_pfsmp))) {
426 /* This is a PFS, use a copy of the real mp */
427 return vp->v_pfsmp;
428 } else {
429 /* Not a PFS or a PFS beeing unmounted */
430 return vp->v_mount;
431 }
432}
e5e8b92a
FT
433
434int
435vq_write_ok(struct mount *mp, uid_t uid, gid_t gid, uint64_t delta)
436{
437 int rv = 1;
8f168b5e
FT
438 struct ac_unode ufind, *unp;
439 struct ac_gnode gfind, *gnp;
440 uint64_t space, limit;
e5e8b92a
FT
441
442 spin_lock(&mp->mnt_acct.ac_spin);
443
444 if (mp->mnt_acct.ac_limit == 0)
8f168b5e
FT
445 goto check_uid;
446 if ((mp->mnt_acct.ac_bytes + delta) > mp->mnt_acct.ac_limit) {
447 rv = 0;
e5e8b92a 448 goto done;
8f168b5e
FT
449 }
450
451check_uid:
452 ufind.left_bits = (uid >> ACCT_CHUNK_BITS);
453 if ((unp = RB_FIND(ac_utree, &mp->mnt_acct.ac_uroot, &ufind)) == NULL) {
454 space = 0;
455 limit = 0;
456 } else {
457 space = unp->uid_chunk[(uid & ACCT_CHUNK_MASK)].space;
458 limit = unp->uid_chunk[(uid & ACCT_CHUNK_MASK)].limit;
459 }
460 if (limit == 0)
461 goto check_gid;
462 if ((space + delta) > limit) {
e5e8b92a 463 rv = 0;
8f168b5e
FT
464 goto done;
465 }
466
467check_gid:
468 gfind.left_bits = (gid >> ACCT_CHUNK_BITS);
469 if ((gnp = RB_FIND(ac_gtree, &mp->mnt_acct.ac_groot, &gfind)) == NULL) {
470 space = 0;
471 limit = 0;
472 } else {
473 space = gnp->gid_chunk[(gid & ACCT_CHUNK_MASK)].space;
474 limit = gnp->gid_chunk[(gid & ACCT_CHUNK_MASK)].limit;
475 }
476 if (limit == 0)
477 goto done;
478 if ((space + delta) > limit)
479 rv = 0;
480
e5e8b92a
FT
481done:
482 spin_unlock(&mp->mnt_acct.ac_spin);
483 return rv;
484}