drm/i915: Update to Linux 4.4
[dragonfly.git] / sys / dev / drm / i915 / intel_pm.c
1 /*
2  * Copyright © 2012 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eugeni Dodonov <eugeni.dodonov@intel.com>
25  *
26  */
27
28 #include "i915_drv.h"
29 #include "intel_drv.h"
30 #include <linux/module.h>
31 #include <machine/clock.h>
32
33 /**
34  * RC6 is a special power stage which allows the GPU to enter an very
35  * low-voltage mode when idle, using down to 0V while at this stage.  This
36  * stage is entered automatically when the GPU is idle when RC6 support is
37  * enabled, and as soon as new workload arises GPU wakes up automatically as well.
38  *
39  * There are different RC6 modes available in Intel GPU, which differentiate
40  * among each other with the latency required to enter and leave RC6 and
41  * voltage consumed by the GPU in different states.
42  *
43  * The combination of the following flags define which states GPU is allowed
44  * to enter, while RC6 is the normal RC6 state, RC6p is the deep RC6, and
45  * RC6pp is deepest RC6. Their support by hardware varies according to the
46  * GPU, BIOS, chipset and platform. RC6 is usually the safest one and the one
47  * which brings the most power savings; deeper states save more power, but
48  * require higher latency to switch to and wake up.
49  */
50 #define INTEL_RC6_ENABLE                        (1<<0)
51 #define INTEL_RC6p_ENABLE                       (1<<1)
52 #define INTEL_RC6pp_ENABLE                      (1<<2)
53
54 static void bxt_init_clock_gating(struct drm_device *dev)
55 {
56         struct drm_i915_private *dev_priv = dev->dev_private;
57
58         /* WaDisableSDEUnitClockGating:bxt */
59         I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
60                    GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
61
62         /*
63          * FIXME:
64          * GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ applies on 3x6 GT SKUs only.
65          */
66         I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
67                    GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ);
68 }
69
70 static void i915_pineview_get_mem_freq(struct drm_device *dev)
71 {
72         struct drm_i915_private *dev_priv = dev->dev_private;
73         u32 tmp;
74
75         tmp = I915_READ(CLKCFG);
76
77         switch (tmp & CLKCFG_FSB_MASK) {
78         case CLKCFG_FSB_533:
79                 dev_priv->fsb_freq = 533; /* 133*4 */
80                 break;
81         case CLKCFG_FSB_800:
82                 dev_priv->fsb_freq = 800; /* 200*4 */
83                 break;
84         case CLKCFG_FSB_667:
85                 dev_priv->fsb_freq =  667; /* 167*4 */
86                 break;
87         case CLKCFG_FSB_400:
88                 dev_priv->fsb_freq = 400; /* 100*4 */
89                 break;
90         }
91
92         switch (tmp & CLKCFG_MEM_MASK) {
93         case CLKCFG_MEM_533:
94                 dev_priv->mem_freq = 533;
95                 break;
96         case CLKCFG_MEM_667:
97                 dev_priv->mem_freq = 667;
98                 break;
99         case CLKCFG_MEM_800:
100                 dev_priv->mem_freq = 800;
101                 break;
102         }
103
104         /* detect pineview DDR3 setting */
105         tmp = I915_READ(CSHRDDR3CTL);
106         dev_priv->is_ddr3 = (tmp & CSHRDDR3CTL_DDR3) ? 1 : 0;
107 }
108
109 static void i915_ironlake_get_mem_freq(struct drm_device *dev)
110 {
111         struct drm_i915_private *dev_priv = dev->dev_private;
112         u16 ddrpll, csipll;
113
114         ddrpll = I915_READ16(DDRMPLL1);
115         csipll = I915_READ16(CSIPLL0);
116
117         switch (ddrpll & 0xff) {
118         case 0xc:
119                 dev_priv->mem_freq = 800;
120                 break;
121         case 0x10:
122                 dev_priv->mem_freq = 1066;
123                 break;
124         case 0x14:
125                 dev_priv->mem_freq = 1333;
126                 break;
127         case 0x18:
128                 dev_priv->mem_freq = 1600;
129                 break;
130         default:
131                 DRM_DEBUG_DRIVER("unknown memory frequency 0x%02x\n",
132                                  ddrpll & 0xff);
133                 dev_priv->mem_freq = 0;
134                 break;
135         }
136
137         dev_priv->ips.r_t = dev_priv->mem_freq;
138
139         switch (csipll & 0x3ff) {
140         case 0x00c:
141                 dev_priv->fsb_freq = 3200;
142                 break;
143         case 0x00e:
144                 dev_priv->fsb_freq = 3733;
145                 break;
146         case 0x010:
147                 dev_priv->fsb_freq = 4266;
148                 break;
149         case 0x012:
150                 dev_priv->fsb_freq = 4800;
151                 break;
152         case 0x014:
153                 dev_priv->fsb_freq = 5333;
154                 break;
155         case 0x016:
156                 dev_priv->fsb_freq = 5866;
157                 break;
158         case 0x018:
159                 dev_priv->fsb_freq = 6400;
160                 break;
161         default:
162                 DRM_DEBUG_DRIVER("unknown fsb frequency 0x%04x\n",
163                                  csipll & 0x3ff);
164                 dev_priv->fsb_freq = 0;
165                 break;
166         }
167
168         if (dev_priv->fsb_freq == 3200) {
169                 dev_priv->ips.c_m = 0;
170         } else if (dev_priv->fsb_freq > 3200 && dev_priv->fsb_freq <= 4800) {
171                 dev_priv->ips.c_m = 1;
172         } else {
173                 dev_priv->ips.c_m = 2;
174         }
175 }
176
177 static const struct cxsr_latency cxsr_latency_table[] = {
178         {1, 0, 800, 400, 3382, 33382, 3983, 33983},    /* DDR2-400 SC */
179         {1, 0, 800, 667, 3354, 33354, 3807, 33807},    /* DDR2-667 SC */
180         {1, 0, 800, 800, 3347, 33347, 3763, 33763},    /* DDR2-800 SC */
181         {1, 1, 800, 667, 6420, 36420, 6873, 36873},    /* DDR3-667 SC */
182         {1, 1, 800, 800, 5902, 35902, 6318, 36318},    /* DDR3-800 SC */
183
184         {1, 0, 667, 400, 3400, 33400, 4021, 34021},    /* DDR2-400 SC */
185         {1, 0, 667, 667, 3372, 33372, 3845, 33845},    /* DDR2-667 SC */
186         {1, 0, 667, 800, 3386, 33386, 3822, 33822},    /* DDR2-800 SC */
187         {1, 1, 667, 667, 6438, 36438, 6911, 36911},    /* DDR3-667 SC */
188         {1, 1, 667, 800, 5941, 35941, 6377, 36377},    /* DDR3-800 SC */
189
190         {1, 0, 400, 400, 3472, 33472, 4173, 34173},    /* DDR2-400 SC */
191         {1, 0, 400, 667, 3443, 33443, 3996, 33996},    /* DDR2-667 SC */
192         {1, 0, 400, 800, 3430, 33430, 3946, 33946},    /* DDR2-800 SC */
193         {1, 1, 400, 667, 6509, 36509, 7062, 37062},    /* DDR3-667 SC */
194         {1, 1, 400, 800, 5985, 35985, 6501, 36501},    /* DDR3-800 SC */
195
196         {0, 0, 800, 400, 3438, 33438, 4065, 34065},    /* DDR2-400 SC */
197         {0, 0, 800, 667, 3410, 33410, 3889, 33889},    /* DDR2-667 SC */
198         {0, 0, 800, 800, 3403, 33403, 3845, 33845},    /* DDR2-800 SC */
199         {0, 1, 800, 667, 6476, 36476, 6955, 36955},    /* DDR3-667 SC */
200         {0, 1, 800, 800, 5958, 35958, 6400, 36400},    /* DDR3-800 SC */
201
202         {0, 0, 667, 400, 3456, 33456, 4103, 34106},    /* DDR2-400 SC */
203         {0, 0, 667, 667, 3428, 33428, 3927, 33927},    /* DDR2-667 SC */
204         {0, 0, 667, 800, 3443, 33443, 3905, 33905},    /* DDR2-800 SC */
205         {0, 1, 667, 667, 6494, 36494, 6993, 36993},    /* DDR3-667 SC */
206         {0, 1, 667, 800, 5998, 35998, 6460, 36460},    /* DDR3-800 SC */
207
208         {0, 0, 400, 400, 3528, 33528, 4255, 34255},    /* DDR2-400 SC */
209         {0, 0, 400, 667, 3500, 33500, 4079, 34079},    /* DDR2-667 SC */
210         {0, 0, 400, 800, 3487, 33487, 4029, 34029},    /* DDR2-800 SC */
211         {0, 1, 400, 667, 6566, 36566, 7145, 37145},    /* DDR3-667 SC */
212         {0, 1, 400, 800, 6042, 36042, 6584, 36584},    /* DDR3-800 SC */
213 };
214
215 static const struct cxsr_latency *intel_get_cxsr_latency(int is_desktop,
216                                                          int is_ddr3,
217                                                          int fsb,
218                                                          int mem)
219 {
220         const struct cxsr_latency *latency;
221         int i;
222
223         if (fsb == 0 || mem == 0)
224                 return NULL;
225
226         for (i = 0; i < ARRAY_SIZE(cxsr_latency_table); i++) {
227                 latency = &cxsr_latency_table[i];
228                 if (is_desktop == latency->is_desktop &&
229                     is_ddr3 == latency->is_ddr3 &&
230                     fsb == latency->fsb_freq && mem == latency->mem_freq)
231                         return latency;
232         }
233
234         DRM_DEBUG_KMS("Unknown FSB/MEM found, disable CxSR\n");
235
236         return NULL;
237 }
238
239 static void chv_set_memory_dvfs(struct drm_i915_private *dev_priv, bool enable)
240 {
241         u32 val;
242
243         mutex_lock(&dev_priv->rps.hw_lock);
244
245         val = vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2);
246         if (enable)
247                 val &= ~FORCE_DDR_HIGH_FREQ;
248         else
249                 val |= FORCE_DDR_HIGH_FREQ;
250         val &= ~FORCE_DDR_LOW_FREQ;
251         val |= FORCE_DDR_FREQ_REQ_ACK;
252         vlv_punit_write(dev_priv, PUNIT_REG_DDR_SETUP2, val);
253
254         if (wait_for((vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2) &
255                       FORCE_DDR_FREQ_REQ_ACK) == 0, 3))
256                 DRM_ERROR("timed out waiting for Punit DDR DVFS request\n");
257
258         mutex_unlock(&dev_priv->rps.hw_lock);
259 }
260
261 static void chv_set_memory_pm5(struct drm_i915_private *dev_priv, bool enable)
262 {
263         u32 val;
264
265         mutex_lock(&dev_priv->rps.hw_lock);
266
267         val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ);
268         if (enable)
269                 val |= DSP_MAXFIFO_PM5_ENABLE;
270         else
271                 val &= ~DSP_MAXFIFO_PM5_ENABLE;
272         vlv_punit_write(dev_priv, PUNIT_REG_DSPFREQ, val);
273
274         mutex_unlock(&dev_priv->rps.hw_lock);
275 }
276
277 #define FW_WM(value, plane) \
278         (((value) << DSPFW_ ## plane ## _SHIFT) & DSPFW_ ## plane ## _MASK)
279
280 void intel_set_memory_cxsr(struct drm_i915_private *dev_priv, bool enable)
281 {
282         struct drm_device *dev = dev_priv->dev;
283         u32 val;
284
285         if (IS_VALLEYVIEW(dev)) {
286                 I915_WRITE(FW_BLC_SELF_VLV, enable ? FW_CSPWRDWNEN : 0);
287                 POSTING_READ(FW_BLC_SELF_VLV);
288                 dev_priv->wm.vlv.cxsr = enable;
289         } else if (IS_G4X(dev) || IS_CRESTLINE(dev)) {
290                 I915_WRITE(FW_BLC_SELF, enable ? FW_BLC_SELF_EN : 0);
291                 POSTING_READ(FW_BLC_SELF);
292         } else if (IS_PINEVIEW(dev)) {
293                 val = I915_READ(DSPFW3) & ~PINEVIEW_SELF_REFRESH_EN;
294                 val |= enable ? PINEVIEW_SELF_REFRESH_EN : 0;
295                 I915_WRITE(DSPFW3, val);
296                 POSTING_READ(DSPFW3);
297         } else if (IS_I945G(dev) || IS_I945GM(dev)) {
298                 val = enable ? _MASKED_BIT_ENABLE(FW_BLC_SELF_EN) :
299                                _MASKED_BIT_DISABLE(FW_BLC_SELF_EN);
300                 I915_WRITE(FW_BLC_SELF, val);
301                 POSTING_READ(FW_BLC_SELF);
302         } else if (IS_I915GM(dev)) {
303                 val = enable ? _MASKED_BIT_ENABLE(INSTPM_SELF_EN) :
304                                _MASKED_BIT_DISABLE(INSTPM_SELF_EN);
305                 I915_WRITE(INSTPM, val);
306                 POSTING_READ(INSTPM);
307         } else {
308                 return;
309         }
310
311         DRM_DEBUG_KMS("memory self-refresh is %s\n",
312                       enable ? "enabled" : "disabled");
313 }
314
315
316 /*
317  * Latency for FIFO fetches is dependent on several factors:
318  *   - memory configuration (speed, channels)
319  *   - chipset
320  *   - current MCH state
321  * It can be fairly high in some situations, so here we assume a fairly
322  * pessimal value.  It's a tradeoff between extra memory fetches (if we
323  * set this value too high, the FIFO will fetch frequently to stay full)
324  * and power consumption (set it too low to save power and we might see
325  * FIFO underruns and display "flicker").
326  *
327  * A value of 5us seems to be a good balance; safe for very low end
328  * platforms but not overly aggressive on lower latency configs.
329  */
330 static const int pessimal_latency_ns = 5000;
331
332 #define VLV_FIFO_START(dsparb, dsparb2, lo_shift, hi_shift) \
333         ((((dsparb) >> (lo_shift)) & 0xff) | ((((dsparb2) >> (hi_shift)) & 0x1) << 8))
334
335 static int vlv_get_fifo_size(struct drm_device *dev,
336                               enum i915_pipe pipe, int plane)
337 {
338         struct drm_i915_private *dev_priv = dev->dev_private;
339         int sprite0_start, sprite1_start, size;
340
341         switch (pipe) {
342                 uint32_t dsparb, dsparb2, dsparb3;
343         case PIPE_A:
344                 dsparb = I915_READ(DSPARB);
345                 dsparb2 = I915_READ(DSPARB2);
346                 sprite0_start = VLV_FIFO_START(dsparb, dsparb2, 0, 0);
347                 sprite1_start = VLV_FIFO_START(dsparb, dsparb2, 8, 4);
348                 break;
349         case PIPE_B:
350                 dsparb = I915_READ(DSPARB);
351                 dsparb2 = I915_READ(DSPARB2);
352                 sprite0_start = VLV_FIFO_START(dsparb, dsparb2, 16, 8);
353                 sprite1_start = VLV_FIFO_START(dsparb, dsparb2, 24, 12);
354                 break;
355         case PIPE_C:
356                 dsparb2 = I915_READ(DSPARB2);
357                 dsparb3 = I915_READ(DSPARB3);
358                 sprite0_start = VLV_FIFO_START(dsparb3, dsparb2, 0, 16);
359                 sprite1_start = VLV_FIFO_START(dsparb3, dsparb2, 8, 20);
360                 break;
361         default:
362                 return 0;
363         }
364
365         switch (plane) {
366         case 0:
367                 size = sprite0_start;
368                 break;
369         case 1:
370                 size = sprite1_start - sprite0_start;
371                 break;
372         case 2:
373                 size = 512 - 1 - sprite1_start;
374                 break;
375         default:
376                 return 0;
377         }
378
379         DRM_DEBUG_KMS("Pipe %c %s %c FIFO size: %d\n",
380                       pipe_name(pipe), plane == 0 ? "primary" : "sprite",
381                       plane == 0 ? plane_name(pipe) : sprite_name(pipe, plane - 1),
382                       size);
383
384         return size;
385 }
386
387 static int i9xx_get_fifo_size(struct drm_device *dev, int plane)
388 {
389         struct drm_i915_private *dev_priv = dev->dev_private;
390         uint32_t dsparb = I915_READ(DSPARB);
391         int size;
392
393         size = dsparb & 0x7f;
394         if (plane)
395                 size = ((dsparb >> DSPARB_CSTART_SHIFT) & 0x7f) - size;
396
397         DRM_DEBUG_KMS("FIFO size - (0x%08x) %s: %d\n", dsparb,
398                       plane ? "B" : "A", size);
399
400         return size;
401 }
402
403 static int i830_get_fifo_size(struct drm_device *dev, int plane)
404 {
405         struct drm_i915_private *dev_priv = dev->dev_private;
406         uint32_t dsparb = I915_READ(DSPARB);
407         int size;
408
409         size = dsparb & 0x1ff;
410         if (plane)
411                 size = ((dsparb >> DSPARB_BEND_SHIFT) & 0x1ff) - size;
412         size >>= 1; /* Convert to cachelines */
413
414         DRM_DEBUG_KMS("FIFO size - (0x%08x) %s: %d\n", dsparb,
415                       plane ? "B" : "A", size);
416
417         return size;
418 }
419
420 static int i845_get_fifo_size(struct drm_device *dev, int plane)
421 {
422         struct drm_i915_private *dev_priv = dev->dev_private;
423         uint32_t dsparb = I915_READ(DSPARB);
424         int size;
425
426         size = dsparb & 0x7f;
427         size >>= 2; /* Convert to cachelines */
428
429         DRM_DEBUG_KMS("FIFO size - (0x%08x) %s: %d\n", dsparb,
430                       plane ? "B" : "A",
431                       size);
432
433         return size;
434 }
435
436 /* Pineview has different values for various configs */
437 static const struct intel_watermark_params pineview_display_wm = {
438         .fifo_size = PINEVIEW_DISPLAY_FIFO,
439         .max_wm = PINEVIEW_MAX_WM,
440         .default_wm = PINEVIEW_DFT_WM,
441         .guard_size = PINEVIEW_GUARD_WM,
442         .cacheline_size = PINEVIEW_FIFO_LINE_SIZE,
443 };
444 static const struct intel_watermark_params pineview_display_hplloff_wm = {
445         .fifo_size = PINEVIEW_DISPLAY_FIFO,
446         .max_wm = PINEVIEW_MAX_WM,
447         .default_wm = PINEVIEW_DFT_HPLLOFF_WM,
448         .guard_size = PINEVIEW_GUARD_WM,
449         .cacheline_size = PINEVIEW_FIFO_LINE_SIZE,
450 };
451 static const struct intel_watermark_params pineview_cursor_wm = {
452         .fifo_size = PINEVIEW_CURSOR_FIFO,
453         .max_wm = PINEVIEW_CURSOR_MAX_WM,
454         .default_wm = PINEVIEW_CURSOR_DFT_WM,
455         .guard_size = PINEVIEW_CURSOR_GUARD_WM,
456         .cacheline_size = PINEVIEW_FIFO_LINE_SIZE,
457 };
458 static const struct intel_watermark_params pineview_cursor_hplloff_wm = {
459         .fifo_size = PINEVIEW_CURSOR_FIFO,
460         .max_wm = PINEVIEW_CURSOR_MAX_WM,
461         .default_wm = PINEVIEW_CURSOR_DFT_WM,
462         .guard_size = PINEVIEW_CURSOR_GUARD_WM,
463         .cacheline_size = PINEVIEW_FIFO_LINE_SIZE,
464 };
465 static const struct intel_watermark_params g4x_wm_info = {
466         .fifo_size = G4X_FIFO_SIZE,
467         .max_wm = G4X_MAX_WM,
468         .default_wm = G4X_MAX_WM,
469         .guard_size = 2,
470         .cacheline_size = G4X_FIFO_LINE_SIZE,
471 };
472 static const struct intel_watermark_params g4x_cursor_wm_info = {
473         .fifo_size = I965_CURSOR_FIFO,
474         .max_wm = I965_CURSOR_MAX_WM,
475         .default_wm = I965_CURSOR_DFT_WM,
476         .guard_size = 2,
477         .cacheline_size = G4X_FIFO_LINE_SIZE,
478 };
479 static const struct intel_watermark_params valleyview_wm_info = {
480         .fifo_size = VALLEYVIEW_FIFO_SIZE,
481         .max_wm = VALLEYVIEW_MAX_WM,
482         .default_wm = VALLEYVIEW_MAX_WM,
483         .guard_size = 2,
484         .cacheline_size = G4X_FIFO_LINE_SIZE,
485 };
486 static const struct intel_watermark_params valleyview_cursor_wm_info = {
487         .fifo_size = I965_CURSOR_FIFO,
488         .max_wm = VALLEYVIEW_CURSOR_MAX_WM,
489         .default_wm = I965_CURSOR_DFT_WM,
490         .guard_size = 2,
491         .cacheline_size = G4X_FIFO_LINE_SIZE,
492 };
493 static const struct intel_watermark_params i965_cursor_wm_info = {
494         .fifo_size = I965_CURSOR_FIFO,
495         .max_wm = I965_CURSOR_MAX_WM,
496         .default_wm = I965_CURSOR_DFT_WM,
497         .guard_size = 2,
498         .cacheline_size = I915_FIFO_LINE_SIZE,
499 };
500 static const struct intel_watermark_params i945_wm_info = {
501         .fifo_size = I945_FIFO_SIZE,
502         .max_wm = I915_MAX_WM,
503         .default_wm = 1,
504         .guard_size = 2,
505         .cacheline_size = I915_FIFO_LINE_SIZE,
506 };
507 static const struct intel_watermark_params i915_wm_info = {
508         .fifo_size = I915_FIFO_SIZE,
509         .max_wm = I915_MAX_WM,
510         .default_wm = 1,
511         .guard_size = 2,
512         .cacheline_size = I915_FIFO_LINE_SIZE,
513 };
514 static const struct intel_watermark_params i830_a_wm_info = {
515         .fifo_size = I855GM_FIFO_SIZE,
516         .max_wm = I915_MAX_WM,
517         .default_wm = 1,
518         .guard_size = 2,
519         .cacheline_size = I830_FIFO_LINE_SIZE,
520 };
521 static const struct intel_watermark_params i830_bc_wm_info = {
522         .fifo_size = I855GM_FIFO_SIZE,
523         .max_wm = I915_MAX_WM/2,
524         .default_wm = 1,
525         .guard_size = 2,
526         .cacheline_size = I830_FIFO_LINE_SIZE,
527 };
528 static const struct intel_watermark_params i845_wm_info = {
529         .fifo_size = I830_FIFO_SIZE,
530         .max_wm = I915_MAX_WM,
531         .default_wm = 1,
532         .guard_size = 2,
533         .cacheline_size = I830_FIFO_LINE_SIZE,
534 };
535
536 /**
537  * intel_calculate_wm - calculate watermark level
538  * @clock_in_khz: pixel clock
539  * @wm: chip FIFO params
540  * @pixel_size: display pixel size
541  * @latency_ns: memory latency for the platform
542  *
543  * Calculate the watermark level (the level at which the display plane will
544  * start fetching from memory again).  Each chip has a different display
545  * FIFO size and allocation, so the caller needs to figure that out and pass
546  * in the correct intel_watermark_params structure.
547  *
548  * As the pixel clock runs, the FIFO will be drained at a rate that depends
549  * on the pixel size.  When it reaches the watermark level, it'll start
550  * fetching FIFO line sized based chunks from memory until the FIFO fills
551  * past the watermark point.  If the FIFO drains completely, a FIFO underrun
552  * will occur, and a display engine hang could result.
553  */
554 static unsigned long intel_calculate_wm(unsigned long clock_in_khz,
555                                         const struct intel_watermark_params *wm,
556                                         int fifo_size,
557                                         int pixel_size,
558                                         unsigned long latency_ns)
559 {
560         long entries_required, wm_size;
561
562         /*
563          * Note: we need to make sure we don't overflow for various clock &
564          * latency values.
565          * clocks go from a few thousand to several hundred thousand.
566          * latency is usually a few thousand
567          */
568         entries_required = ((clock_in_khz / 1000) * pixel_size * latency_ns) /
569                 1000;
570         entries_required = DIV_ROUND_UP(entries_required, wm->cacheline_size);
571
572         DRM_DEBUG_KMS("FIFO entries required for mode: %ld\n", entries_required);
573
574         wm_size = fifo_size - (entries_required + wm->guard_size);
575
576         DRM_DEBUG_KMS("FIFO watermark level: %ld\n", wm_size);
577
578         /* Don't promote wm_size to unsigned... */
579         if (wm_size > (long)wm->max_wm)
580                 wm_size = wm->max_wm;
581         if (wm_size <= 0)
582                 wm_size = wm->default_wm;
583
584         /*
585          * Bspec seems to indicate that the value shouldn't be lower than
586          * 'burst size + 1'. Certainly 830 is quite unhappy with low values.
587          * Lets go for 8 which is the burst size since certain platforms
588          * already use a hardcoded 8 (which is what the spec says should be
589          * done).
590          */
591         if (wm_size <= 8)
592                 wm_size = 8;
593
594         return wm_size;
595 }
596
597 static struct drm_crtc *single_enabled_crtc(struct drm_device *dev)
598 {
599         struct drm_crtc *crtc, *enabled = NULL;
600
601         for_each_crtc(dev, crtc) {
602                 if (intel_crtc_active(crtc)) {
603                         if (enabled)
604                                 return NULL;
605                         enabled = crtc;
606                 }
607         }
608
609         return enabled;
610 }
611
612 static void pineview_update_wm(struct drm_crtc *unused_crtc)
613 {
614         struct drm_device *dev = unused_crtc->dev;
615         struct drm_i915_private *dev_priv = dev->dev_private;
616         struct drm_crtc *crtc;
617         const struct cxsr_latency *latency;
618         u32 reg;
619         unsigned long wm;
620
621         latency = intel_get_cxsr_latency(IS_PINEVIEW_G(dev), dev_priv->is_ddr3,
622                                          dev_priv->fsb_freq, dev_priv->mem_freq);
623         if (!latency) {
624                 DRM_DEBUG_KMS("Unknown FSB/MEM found, disable CxSR\n");
625                 intel_set_memory_cxsr(dev_priv, false);
626                 return;
627         }
628
629         crtc = single_enabled_crtc(dev);
630         if (crtc) {
631                 const struct drm_display_mode *adjusted_mode = &to_intel_crtc(crtc)->config->base.adjusted_mode;
632                 int pixel_size = crtc->primary->state->fb->bits_per_pixel / 8;
633                 int clock = adjusted_mode->crtc_clock;
634
635                 /* Display SR */
636                 wm = intel_calculate_wm(clock, &pineview_display_wm,
637                                         pineview_display_wm.fifo_size,
638                                         pixel_size, latency->display_sr);
639                 reg = I915_READ(DSPFW1);
640                 reg &= ~DSPFW_SR_MASK;
641                 reg |= FW_WM(wm, SR);
642                 I915_WRITE(DSPFW1, reg);
643                 DRM_DEBUG_KMS("DSPFW1 register is %x\n", reg);
644
645                 /* cursor SR */
646                 wm = intel_calculate_wm(clock, &pineview_cursor_wm,
647                                         pineview_display_wm.fifo_size,
648                                         pixel_size, latency->cursor_sr);
649                 reg = I915_READ(DSPFW3);
650                 reg &= ~DSPFW_CURSOR_SR_MASK;
651                 reg |= FW_WM(wm, CURSOR_SR);
652                 I915_WRITE(DSPFW3, reg);
653
654                 /* Display HPLL off SR */
655                 wm = intel_calculate_wm(clock, &pineview_display_hplloff_wm,
656                                         pineview_display_hplloff_wm.fifo_size,
657                                         pixel_size, latency->display_hpll_disable);
658                 reg = I915_READ(DSPFW3);
659                 reg &= ~DSPFW_HPLL_SR_MASK;
660                 reg |= FW_WM(wm, HPLL_SR);
661                 I915_WRITE(DSPFW3, reg);
662
663                 /* cursor HPLL off SR */
664                 wm = intel_calculate_wm(clock, &pineview_cursor_hplloff_wm,
665                                         pineview_display_hplloff_wm.fifo_size,
666                                         pixel_size, latency->cursor_hpll_disable);
667                 reg = I915_READ(DSPFW3);
668                 reg &= ~DSPFW_HPLL_CURSOR_MASK;
669                 reg |= FW_WM(wm, HPLL_CURSOR);
670                 I915_WRITE(DSPFW3, reg);
671                 DRM_DEBUG_KMS("DSPFW3 register is %x\n", reg);
672
673                 intel_set_memory_cxsr(dev_priv, true);
674         } else {
675                 intel_set_memory_cxsr(dev_priv, false);
676         }
677 }
678
679 static bool g4x_compute_wm0(struct drm_device *dev,
680                             int plane,
681                             const struct intel_watermark_params *display,
682                             int display_latency_ns,
683                             const struct intel_watermark_params *cursor,
684                             int cursor_latency_ns,
685                             int *plane_wm,
686                             int *cursor_wm)
687 {
688         struct drm_crtc *crtc;
689         const struct drm_display_mode *adjusted_mode;
690         int htotal, hdisplay, clock, pixel_size;
691         int line_time_us, line_count;
692         int entries, tlb_miss;
693
694         crtc = intel_get_crtc_for_plane(dev, plane);
695         if (!intel_crtc_active(crtc)) {
696                 *cursor_wm = cursor->guard_size;
697                 *plane_wm = display->guard_size;
698                 return false;
699         }
700
701         adjusted_mode = &to_intel_crtc(crtc)->config->base.adjusted_mode;
702         clock = adjusted_mode->crtc_clock;
703         htotal = adjusted_mode->crtc_htotal;
704         hdisplay = to_intel_crtc(crtc)->config->pipe_src_w;
705         pixel_size = crtc->primary->state->fb->bits_per_pixel / 8;
706
707         /* Use the small buffer method to calculate plane watermark */
708         entries = ((clock * pixel_size / 1000) * display_latency_ns) / 1000;
709         tlb_miss = display->fifo_size*display->cacheline_size - hdisplay * 8;
710         if (tlb_miss > 0)
711                 entries += tlb_miss;
712         entries = DIV_ROUND_UP(entries, display->cacheline_size);
713         *plane_wm = entries + display->guard_size;
714         if (*plane_wm > (int)display->max_wm)
715                 *plane_wm = display->max_wm;
716
717         /* Use the large buffer method to calculate cursor watermark */
718         line_time_us = max(htotal * 1000 / clock, 1);
719         line_count = (cursor_latency_ns / line_time_us + 1000) / 1000;
720         entries = line_count * crtc->cursor->state->crtc_w * pixel_size;
721         tlb_miss = cursor->fifo_size*cursor->cacheline_size - hdisplay * 8;
722         if (tlb_miss > 0)
723                 entries += tlb_miss;
724         entries = DIV_ROUND_UP(entries, cursor->cacheline_size);
725         *cursor_wm = entries + cursor->guard_size;
726         if (*cursor_wm > (int)cursor->max_wm)
727                 *cursor_wm = (int)cursor->max_wm;
728
729         return true;
730 }
731
732 /*
733  * Check the wm result.
734  *
735  * If any calculated watermark values is larger than the maximum value that
736  * can be programmed into the associated watermark register, that watermark
737  * must be disabled.
738  */
739 static bool g4x_check_srwm(struct drm_device *dev,
740                            int display_wm, int cursor_wm,
741                            const struct intel_watermark_params *display,
742                            const struct intel_watermark_params *cursor)
743 {
744         DRM_DEBUG_KMS("SR watermark: display plane %d, cursor %d\n",
745                       display_wm, cursor_wm);
746
747         if (display_wm > display->max_wm) {
748                 DRM_DEBUG_KMS("display watermark is too large(%d/%ld), disabling\n",
749                               display_wm, display->max_wm);
750                 return false;
751         }
752
753         if (cursor_wm > cursor->max_wm) {
754                 DRM_DEBUG_KMS("cursor watermark is too large(%d/%ld), disabling\n",
755                               cursor_wm, cursor->max_wm);
756                 return false;
757         }
758
759         if (!(display_wm || cursor_wm)) {
760                 DRM_DEBUG_KMS("SR latency is 0, disabling\n");
761                 return false;
762         }
763
764         return true;
765 }
766
767 static bool g4x_compute_srwm(struct drm_device *dev,
768                              int plane,
769                              int latency_ns,
770                              const struct intel_watermark_params *display,
771                              const struct intel_watermark_params *cursor,
772                              int *display_wm, int *cursor_wm)
773 {
774         struct drm_crtc *crtc;
775         const struct drm_display_mode *adjusted_mode;
776         int hdisplay, htotal, pixel_size, clock;
777         unsigned long line_time_us;
778         int line_count, line_size;
779         int small, large;
780         int entries;
781
782         if (!latency_ns) {
783                 *display_wm = *cursor_wm = 0;
784                 return false;
785         }
786
787         crtc = intel_get_crtc_for_plane(dev, plane);
788         adjusted_mode = &to_intel_crtc(crtc)->config->base.adjusted_mode;
789         clock = adjusted_mode->crtc_clock;
790         htotal = adjusted_mode->crtc_htotal;
791         hdisplay = to_intel_crtc(crtc)->config->pipe_src_w;
792         pixel_size = crtc->primary->state->fb->bits_per_pixel / 8;
793
794         line_time_us = max(htotal * 1000 / clock, 1);
795         line_count = (latency_ns / line_time_us + 1000) / 1000;
796         line_size = hdisplay * pixel_size;
797
798         /* Use the minimum of the small and large buffer method for primary */
799         small = ((clock * pixel_size / 1000) * latency_ns) / 1000;
800         large = line_count * line_size;
801
802         entries = DIV_ROUND_UP(min(small, large), display->cacheline_size);
803         *display_wm = entries + display->guard_size;
804
805         /* calculate the self-refresh watermark for display cursor */
806         entries = line_count * pixel_size * crtc->cursor->state->crtc_w;
807         entries = DIV_ROUND_UP(entries, cursor->cacheline_size);
808         *cursor_wm = entries + cursor->guard_size;
809
810         return g4x_check_srwm(dev,
811                               *display_wm, *cursor_wm,
812                               display, cursor);
813 }
814
815 #define FW_WM_VLV(value, plane) \
816         (((value) << DSPFW_ ## plane ## _SHIFT) & DSPFW_ ## plane ## _MASK_VLV)
817
818 static void vlv_write_wm_values(struct intel_crtc *crtc,
819                                 const struct vlv_wm_values *wm)
820 {
821         struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
822         enum i915_pipe pipe = crtc->pipe;
823
824         I915_WRITE(VLV_DDL(pipe),
825                    (wm->ddl[pipe].cursor << DDL_CURSOR_SHIFT) |
826                    (wm->ddl[pipe].sprite[1] << DDL_SPRITE_SHIFT(1)) |
827                    (wm->ddl[pipe].sprite[0] << DDL_SPRITE_SHIFT(0)) |
828                    (wm->ddl[pipe].primary << DDL_PLANE_SHIFT));
829
830         I915_WRITE(DSPFW1,
831                    FW_WM(wm->sr.plane, SR) |
832                    FW_WM(wm->pipe[PIPE_B].cursor, CURSORB) |
833                    FW_WM_VLV(wm->pipe[PIPE_B].primary, PLANEB) |
834                    FW_WM_VLV(wm->pipe[PIPE_A].primary, PLANEA));
835         I915_WRITE(DSPFW2,
836                    FW_WM_VLV(wm->pipe[PIPE_A].sprite[1], SPRITEB) |
837                    FW_WM(wm->pipe[PIPE_A].cursor, CURSORA) |
838                    FW_WM_VLV(wm->pipe[PIPE_A].sprite[0], SPRITEA));
839         I915_WRITE(DSPFW3,
840                    FW_WM(wm->sr.cursor, CURSOR_SR));
841
842         if (IS_CHERRYVIEW(dev_priv)) {
843                 I915_WRITE(DSPFW7_CHV,
844                            FW_WM_VLV(wm->pipe[PIPE_B].sprite[1], SPRITED) |
845                            FW_WM_VLV(wm->pipe[PIPE_B].sprite[0], SPRITEC));
846                 I915_WRITE(DSPFW8_CHV,
847                            FW_WM_VLV(wm->pipe[PIPE_C].sprite[1], SPRITEF) |
848                            FW_WM_VLV(wm->pipe[PIPE_C].sprite[0], SPRITEE));
849                 I915_WRITE(DSPFW9_CHV,
850                            FW_WM_VLV(wm->pipe[PIPE_C].primary, PLANEC) |
851                            FW_WM(wm->pipe[PIPE_C].cursor, CURSORC));
852                 I915_WRITE(DSPHOWM,
853                            FW_WM(wm->sr.plane >> 9, SR_HI) |
854                            FW_WM(wm->pipe[PIPE_C].sprite[1] >> 8, SPRITEF_HI) |
855                            FW_WM(wm->pipe[PIPE_C].sprite[0] >> 8, SPRITEE_HI) |
856                            FW_WM(wm->pipe[PIPE_C].primary >> 8, PLANEC_HI) |
857                            FW_WM(wm->pipe[PIPE_B].sprite[1] >> 8, SPRITED_HI) |
858                            FW_WM(wm->pipe[PIPE_B].sprite[0] >> 8, SPRITEC_HI) |
859                            FW_WM(wm->pipe[PIPE_B].primary >> 8, PLANEB_HI) |
860                            FW_WM(wm->pipe[PIPE_A].sprite[1] >> 8, SPRITEB_HI) |
861                            FW_WM(wm->pipe[PIPE_A].sprite[0] >> 8, SPRITEA_HI) |
862                            FW_WM(wm->pipe[PIPE_A].primary >> 8, PLANEA_HI));
863         } else {
864                 I915_WRITE(DSPFW7,
865                            FW_WM_VLV(wm->pipe[PIPE_B].sprite[1], SPRITED) |
866                            FW_WM_VLV(wm->pipe[PIPE_B].sprite[0], SPRITEC));
867                 I915_WRITE(DSPHOWM,
868                            FW_WM(wm->sr.plane >> 9, SR_HI) |
869                            FW_WM(wm->pipe[PIPE_B].sprite[1] >> 8, SPRITED_HI) |
870                            FW_WM(wm->pipe[PIPE_B].sprite[0] >> 8, SPRITEC_HI) |
871                            FW_WM(wm->pipe[PIPE_B].primary >> 8, PLANEB_HI) |
872                            FW_WM(wm->pipe[PIPE_A].sprite[1] >> 8, SPRITEB_HI) |
873                            FW_WM(wm->pipe[PIPE_A].sprite[0] >> 8, SPRITEA_HI) |
874                            FW_WM(wm->pipe[PIPE_A].primary >> 8, PLANEA_HI));
875         }
876
877         /* zero (unused) WM1 watermarks */
878         I915_WRITE(DSPFW4, 0);
879         I915_WRITE(DSPFW5, 0);
880         I915_WRITE(DSPFW6, 0);
881         I915_WRITE(DSPHOWM1, 0);
882
883         POSTING_READ(DSPFW1);
884 }
885
886 #undef FW_WM_VLV
887
888 enum vlv_wm_level {
889         VLV_WM_LEVEL_PM2,
890         VLV_WM_LEVEL_PM5,
891         VLV_WM_LEVEL_DDR_DVFS,
892 };
893
894 /* latency must be in 0.1us units. */
895 static unsigned int vlv_wm_method2(unsigned int pixel_rate,
896                                    unsigned int pipe_htotal,
897                                    unsigned int horiz_pixels,
898                                    unsigned int bytes_per_pixel,
899                                    unsigned int latency)
900 {
901         unsigned int ret;
902
903         ret = (latency * pixel_rate) / (pipe_htotal * 10000);
904         ret = (ret + 1) * horiz_pixels * bytes_per_pixel;
905         ret = DIV_ROUND_UP(ret, 64);
906
907         return ret;
908 }
909
910 static void vlv_setup_wm_latency(struct drm_device *dev)
911 {
912         struct drm_i915_private *dev_priv = dev->dev_private;
913
914         /* all latencies in usec */
915         dev_priv->wm.pri_latency[VLV_WM_LEVEL_PM2] = 3;
916
917         dev_priv->wm.max_level = VLV_WM_LEVEL_PM2;
918
919         if (IS_CHERRYVIEW(dev_priv)) {
920                 dev_priv->wm.pri_latency[VLV_WM_LEVEL_PM5] = 12;
921                 dev_priv->wm.pri_latency[VLV_WM_LEVEL_DDR_DVFS] = 33;
922
923                 dev_priv->wm.max_level = VLV_WM_LEVEL_DDR_DVFS;
924         }
925 }
926
927 static uint16_t vlv_compute_wm_level(struct intel_plane *plane,
928                                      struct intel_crtc *crtc,
929                                      const struct intel_plane_state *state,
930                                      int level)
931 {
932         struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
933         int clock, htotal, pixel_size, width, wm;
934
935         if (dev_priv->wm.pri_latency[level] == 0)
936                 return USHRT_MAX;
937
938         if (!state->visible)
939                 return 0;
940
941         pixel_size = drm_format_plane_cpp(state->base.fb->pixel_format, 0);
942         clock = crtc->config->base.adjusted_mode.crtc_clock;
943         htotal = crtc->config->base.adjusted_mode.crtc_htotal;
944         width = crtc->config->pipe_src_w;
945         if (WARN_ON(htotal == 0))
946                 htotal = 1;
947
948         if (plane->base.type == DRM_PLANE_TYPE_CURSOR) {
949                 /*
950                  * FIXME the formula gives values that are
951                  * too big for the cursor FIFO, and hence we
952                  * would never be able to use cursors. For
953                  * now just hardcode the watermark.
954                  */
955                 wm = 63;
956         } else {
957                 wm = vlv_wm_method2(clock, htotal, width, pixel_size,
958                                     dev_priv->wm.pri_latency[level] * 10);
959         }
960
961         return min_t(int, wm, USHRT_MAX);
962 }
963
964 static void vlv_compute_fifo(struct intel_crtc *crtc)
965 {
966         struct drm_device *dev = crtc->base.dev;
967         struct vlv_wm_state *wm_state = &crtc->wm_state;
968         struct intel_plane *plane;
969         unsigned int total_rate = 0;
970         const int fifo_size = 512 - 1;
971         int fifo_extra, fifo_left = fifo_size;
972
973         for_each_intel_plane_on_crtc(dev, crtc, plane) {
974                 struct intel_plane_state *state =
975                         to_intel_plane_state(plane->base.state);
976
977                 if (plane->base.type == DRM_PLANE_TYPE_CURSOR)
978                         continue;
979
980                 if (state->visible) {
981                         wm_state->num_active_planes++;
982                         total_rate += drm_format_plane_cpp(state->base.fb->pixel_format, 0);
983                 }
984         }
985
986         for_each_intel_plane_on_crtc(dev, crtc, plane) {
987                 struct intel_plane_state *state =
988                         to_intel_plane_state(plane->base.state);
989                 unsigned int rate;
990
991                 if (plane->base.type == DRM_PLANE_TYPE_CURSOR) {
992                         plane->wm.fifo_size = 63;
993                         continue;
994                 }
995
996                 if (!state->visible) {
997                         plane->wm.fifo_size = 0;
998                         continue;
999                 }
1000
1001                 rate = drm_format_plane_cpp(state->base.fb->pixel_format, 0);
1002                 plane->wm.fifo_size = fifo_size * rate / total_rate;
1003                 fifo_left -= plane->wm.fifo_size;
1004         }
1005
1006         fifo_extra = DIV_ROUND_UP(fifo_left, wm_state->num_active_planes ?: 1);
1007
1008         /* spread the remainder evenly */
1009         for_each_intel_plane_on_crtc(dev, crtc, plane) {
1010                 int plane_extra;
1011
1012                 if (fifo_left == 0)
1013                         break;
1014
1015                 if (plane->base.type == DRM_PLANE_TYPE_CURSOR)
1016                         continue;
1017
1018                 /* give it all to the first plane if none are active */
1019                 if (plane->wm.fifo_size == 0 &&
1020                     wm_state->num_active_planes)
1021                         continue;
1022
1023                 plane_extra = min(fifo_extra, fifo_left);
1024                 plane->wm.fifo_size += plane_extra;
1025                 fifo_left -= plane_extra;
1026         }
1027
1028         WARN_ON(fifo_left != 0);
1029 }
1030
1031 static void vlv_invert_wms(struct intel_crtc *crtc)
1032 {
1033         struct vlv_wm_state *wm_state = &crtc->wm_state;
1034         int level;
1035
1036         for (level = 0; level < wm_state->num_levels; level++) {
1037                 struct drm_device *dev = crtc->base.dev;
1038                 const int sr_fifo_size = INTEL_INFO(dev)->num_pipes * 512 - 1;
1039                 struct intel_plane *plane;
1040
1041                 wm_state->sr[level].plane = sr_fifo_size - wm_state->sr[level].plane;
1042                 wm_state->sr[level].cursor = 63 - wm_state->sr[level].cursor;
1043
1044                 for_each_intel_plane_on_crtc(dev, crtc, plane) {
1045                         switch (plane->base.type) {
1046                                 int sprite;
1047                         case DRM_PLANE_TYPE_CURSOR:
1048                                 wm_state->wm[level].cursor = plane->wm.fifo_size -
1049                                         wm_state->wm[level].cursor;
1050                                 break;
1051                         case DRM_PLANE_TYPE_PRIMARY:
1052                                 wm_state->wm[level].primary = plane->wm.fifo_size -
1053                                         wm_state->wm[level].primary;
1054                                 break;
1055                         case DRM_PLANE_TYPE_OVERLAY:
1056                                 sprite = plane->plane;
1057                                 wm_state->wm[level].sprite[sprite] = plane->wm.fifo_size -
1058                                         wm_state->wm[level].sprite[sprite];
1059                                 break;
1060                         }
1061                 }
1062         }
1063 }
1064
1065 static void vlv_compute_wm(struct intel_crtc *crtc)
1066 {
1067         struct drm_device *dev = crtc->base.dev;
1068         struct vlv_wm_state *wm_state = &crtc->wm_state;
1069         struct intel_plane *plane;
1070         int sr_fifo_size = INTEL_INFO(dev)->num_pipes * 512 - 1;
1071         int level;
1072
1073         memset(wm_state, 0, sizeof(*wm_state));
1074
1075         wm_state->cxsr = crtc->pipe != PIPE_C && crtc->wm.cxsr_allowed;
1076         wm_state->num_levels = to_i915(dev)->wm.max_level + 1;
1077
1078         wm_state->num_active_planes = 0;
1079
1080         vlv_compute_fifo(crtc);
1081
1082         if (wm_state->num_active_planes != 1)
1083                 wm_state->cxsr = false;
1084
1085         if (wm_state->cxsr) {
1086                 for (level = 0; level < wm_state->num_levels; level++) {
1087                         wm_state->sr[level].plane = sr_fifo_size;
1088                         wm_state->sr[level].cursor = 63;
1089                 }
1090         }
1091
1092         for_each_intel_plane_on_crtc(dev, crtc, plane) {
1093                 struct intel_plane_state *state =
1094                         to_intel_plane_state(plane->base.state);
1095
1096                 if (!state->visible)
1097                         continue;
1098
1099                 /* normal watermarks */
1100                 for (level = 0; level < wm_state->num_levels; level++) {
1101                         int wm = vlv_compute_wm_level(plane, crtc, state, level);
1102                         int max_wm = plane->base.type == DRM_PLANE_TYPE_CURSOR ? 63 : 511;
1103
1104                         /* hack */
1105                         if (WARN_ON(level == 0 && wm > max_wm))
1106                                 wm = max_wm;
1107
1108                         if (wm > plane->wm.fifo_size)
1109                                 break;
1110
1111                         switch (plane->base.type) {
1112                                 int sprite;
1113                         case DRM_PLANE_TYPE_CURSOR:
1114                                 wm_state->wm[level].cursor = wm;
1115                                 break;
1116                         case DRM_PLANE_TYPE_PRIMARY:
1117                                 wm_state->wm[level].primary = wm;
1118                                 break;
1119                         case DRM_PLANE_TYPE_OVERLAY:
1120                                 sprite = plane->plane;
1121                                 wm_state->wm[level].sprite[sprite] = wm;
1122                                 break;
1123                         }
1124                 }
1125
1126                 wm_state->num_levels = level;
1127
1128                 if (!wm_state->cxsr)
1129                         continue;
1130
1131                 /* maxfifo watermarks */
1132                 switch (plane->base.type) {
1133                         int sprite, level;
1134                 case DRM_PLANE_TYPE_CURSOR:
1135                         for (level = 0; level < wm_state->num_levels; level++)
1136                                 wm_state->sr[level].cursor =
1137                                         wm_state->wm[level].cursor;
1138                         break;
1139                 case DRM_PLANE_TYPE_PRIMARY:
1140                         for (level = 0; level < wm_state->num_levels; level++)
1141                                 wm_state->sr[level].plane =
1142                                         min(wm_state->sr[level].plane,
1143                                             wm_state->wm[level].primary);
1144                         break;
1145                 case DRM_PLANE_TYPE_OVERLAY:
1146                         sprite = plane->plane;
1147                         for (level = 0; level < wm_state->num_levels; level++)
1148                                 wm_state->sr[level].plane =
1149                                         min(wm_state->sr[level].plane,
1150                                             wm_state->wm[level].sprite[sprite]);
1151                         break;
1152                 }
1153         }
1154
1155         /* clear any (partially) filled invalid levels */
1156         for (level = wm_state->num_levels; level < to_i915(dev)->wm.max_level + 1; level++) {
1157                 memset(&wm_state->wm[level], 0, sizeof(wm_state->wm[level]));
1158                 memset(&wm_state->sr[level], 0, sizeof(wm_state->sr[level]));
1159         }
1160
1161         vlv_invert_wms(crtc);
1162 }
1163
1164 #define VLV_FIFO(plane, value) \
1165         (((value) << DSPARB_ ## plane ## _SHIFT_VLV) & DSPARB_ ## plane ## _MASK_VLV)
1166
1167 static void vlv_pipe_set_fifo_size(struct intel_crtc *crtc)
1168 {
1169         struct drm_device *dev = crtc->base.dev;
1170         struct drm_i915_private *dev_priv = to_i915(dev);
1171         struct intel_plane *plane;
1172         int sprite0_start = 0, sprite1_start = 0, fifo_size = 0;
1173
1174         for_each_intel_plane_on_crtc(dev, crtc, plane) {
1175                 if (plane->base.type == DRM_PLANE_TYPE_CURSOR) {
1176                         WARN_ON(plane->wm.fifo_size != 63);
1177                         continue;
1178                 }
1179
1180                 if (plane->base.type == DRM_PLANE_TYPE_PRIMARY)
1181                         sprite0_start = plane->wm.fifo_size;
1182                 else if (plane->plane == 0)
1183                         sprite1_start = sprite0_start + plane->wm.fifo_size;
1184                 else
1185                         fifo_size = sprite1_start + plane->wm.fifo_size;
1186         }
1187
1188         WARN_ON(fifo_size != 512 - 1);
1189
1190         DRM_DEBUG_KMS("Pipe %c FIFO split %d / %d / %d\n",
1191                       pipe_name(crtc->pipe), sprite0_start,
1192                       sprite1_start, fifo_size);
1193
1194         switch (crtc->pipe) {
1195                 uint32_t dsparb, dsparb2, dsparb3;
1196         case PIPE_A:
1197                 dsparb = I915_READ(DSPARB);
1198                 dsparb2 = I915_READ(DSPARB2);
1199
1200                 dsparb &= ~(VLV_FIFO(SPRITEA, 0xff) |
1201                             VLV_FIFO(SPRITEB, 0xff));
1202                 dsparb |= (VLV_FIFO(SPRITEA, sprite0_start) |
1203                            VLV_FIFO(SPRITEB, sprite1_start));
1204
1205                 dsparb2 &= ~(VLV_FIFO(SPRITEA_HI, 0x1) |
1206                              VLV_FIFO(SPRITEB_HI, 0x1));
1207                 dsparb2 |= (VLV_FIFO(SPRITEA_HI, sprite0_start >> 8) |
1208                            VLV_FIFO(SPRITEB_HI, sprite1_start >> 8));
1209
1210                 I915_WRITE(DSPARB, dsparb);
1211                 I915_WRITE(DSPARB2, dsparb2);
1212                 break;
1213         case PIPE_B:
1214                 dsparb = I915_READ(DSPARB);
1215                 dsparb2 = I915_READ(DSPARB2);
1216
1217                 dsparb &= ~(VLV_FIFO(SPRITEC, 0xff) |
1218                             VLV_FIFO(SPRITED, 0xff));
1219                 dsparb |= (VLV_FIFO(SPRITEC, sprite0_start) |
1220                            VLV_FIFO(SPRITED, sprite1_start));
1221
1222                 dsparb2 &= ~(VLV_FIFO(SPRITEC_HI, 0xff) |
1223                              VLV_FIFO(SPRITED_HI, 0xff));
1224                 dsparb2 |= (VLV_FIFO(SPRITEC_HI, sprite0_start >> 8) |
1225                            VLV_FIFO(SPRITED_HI, sprite1_start >> 8));
1226
1227                 I915_WRITE(DSPARB, dsparb);
1228                 I915_WRITE(DSPARB2, dsparb2);
1229                 break;
1230         case PIPE_C:
1231                 dsparb3 = I915_READ(DSPARB3);
1232                 dsparb2 = I915_READ(DSPARB2);
1233
1234                 dsparb3 &= ~(VLV_FIFO(SPRITEE, 0xff) |
1235                              VLV_FIFO(SPRITEF, 0xff));
1236                 dsparb3 |= (VLV_FIFO(SPRITEE, sprite0_start) |
1237                             VLV_FIFO(SPRITEF, sprite1_start));
1238
1239                 dsparb2 &= ~(VLV_FIFO(SPRITEE_HI, 0xff) |
1240                              VLV_FIFO(SPRITEF_HI, 0xff));
1241                 dsparb2 |= (VLV_FIFO(SPRITEE_HI, sprite0_start >> 8) |
1242                            VLV_FIFO(SPRITEF_HI, sprite1_start >> 8));
1243
1244                 I915_WRITE(DSPARB3, dsparb3);
1245                 I915_WRITE(DSPARB2, dsparb2);
1246                 break;
1247         default:
1248                 break;
1249         }
1250 }
1251
1252 #undef VLV_FIFO
1253
1254 static void vlv_merge_wm(struct drm_device *dev,
1255                          struct vlv_wm_values *wm)
1256 {
1257         struct intel_crtc *crtc;
1258         int num_active_crtcs = 0;
1259
1260         wm->level = to_i915(dev)->wm.max_level;
1261         wm->cxsr = true;
1262
1263         for_each_intel_crtc(dev, crtc) {
1264                 const struct vlv_wm_state *wm_state = &crtc->wm_state;
1265
1266                 if (!crtc->active)
1267                         continue;
1268
1269                 if (!wm_state->cxsr)
1270                         wm->cxsr = false;
1271
1272                 num_active_crtcs++;
1273                 wm->level = min_t(int, wm->level, wm_state->num_levels - 1);
1274         }
1275
1276         if (num_active_crtcs != 1)
1277                 wm->cxsr = false;
1278
1279         if (num_active_crtcs > 1)
1280                 wm->level = VLV_WM_LEVEL_PM2;
1281
1282         for_each_intel_crtc(dev, crtc) {
1283                 struct vlv_wm_state *wm_state = &crtc->wm_state;
1284                 enum i915_pipe pipe = crtc->pipe;
1285
1286                 if (!crtc->active)
1287                         continue;
1288
1289                 wm->pipe[pipe] = wm_state->wm[wm->level];
1290                 if (wm->cxsr)
1291                         wm->sr = wm_state->sr[wm->level];
1292
1293                 wm->ddl[pipe].primary = DDL_PRECISION_HIGH | 2;
1294                 wm->ddl[pipe].sprite[0] = DDL_PRECISION_HIGH | 2;
1295                 wm->ddl[pipe].sprite[1] = DDL_PRECISION_HIGH | 2;
1296                 wm->ddl[pipe].cursor = DDL_PRECISION_HIGH | 2;
1297         }
1298 }
1299
1300 static void vlv_update_wm(struct drm_crtc *crtc)
1301 {
1302         struct drm_device *dev = crtc->dev;
1303         struct drm_i915_private *dev_priv = dev->dev_private;
1304         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
1305         enum i915_pipe pipe = intel_crtc->pipe;
1306         struct vlv_wm_values wm = {};
1307
1308         vlv_compute_wm(intel_crtc);
1309         vlv_merge_wm(dev, &wm);
1310
1311         if (memcmp(&dev_priv->wm.vlv, &wm, sizeof(wm)) == 0) {
1312                 /* FIXME should be part of crtc atomic commit */
1313                 vlv_pipe_set_fifo_size(intel_crtc);
1314                 return;
1315         }
1316
1317         if (wm.level < VLV_WM_LEVEL_DDR_DVFS &&
1318             dev_priv->wm.vlv.level >= VLV_WM_LEVEL_DDR_DVFS)
1319                 chv_set_memory_dvfs(dev_priv, false);
1320
1321         if (wm.level < VLV_WM_LEVEL_PM5 &&
1322             dev_priv->wm.vlv.level >= VLV_WM_LEVEL_PM5)
1323                 chv_set_memory_pm5(dev_priv, false);
1324
1325         if (!wm.cxsr && dev_priv->wm.vlv.cxsr)
1326                 intel_set_memory_cxsr(dev_priv, false);
1327
1328         /* FIXME should be part of crtc atomic commit */
1329         vlv_pipe_set_fifo_size(intel_crtc);
1330
1331         vlv_write_wm_values(intel_crtc, &wm);
1332
1333         DRM_DEBUG_KMS("Setting FIFO watermarks - %c: plane=%d, cursor=%d, "
1334                       "sprite0=%d, sprite1=%d, SR: plane=%d, cursor=%d level=%d cxsr=%d\n",
1335                       pipe_name(pipe), wm.pipe[pipe].primary, wm.pipe[pipe].cursor,
1336                       wm.pipe[pipe].sprite[0], wm.pipe[pipe].sprite[1],
1337                       wm.sr.plane, wm.sr.cursor, wm.level, wm.cxsr);
1338
1339         if (wm.cxsr && !dev_priv->wm.vlv.cxsr)
1340                 intel_set_memory_cxsr(dev_priv, true);
1341
1342         if (wm.level >= VLV_WM_LEVEL_PM5 &&
1343             dev_priv->wm.vlv.level < VLV_WM_LEVEL_PM5)
1344                 chv_set_memory_pm5(dev_priv, true);
1345
1346         if (wm.level >= VLV_WM_LEVEL_DDR_DVFS &&
1347             dev_priv->wm.vlv.level < VLV_WM_LEVEL_DDR_DVFS)
1348                 chv_set_memory_dvfs(dev_priv, true);
1349
1350         dev_priv->wm.vlv = wm;
1351 }
1352
1353 #define single_plane_enabled(mask) is_power_of_2(mask)
1354
1355 static void g4x_update_wm(struct drm_crtc *crtc)
1356 {
1357         struct drm_device *dev = crtc->dev;
1358         static const int sr_latency_ns = 12000;
1359         struct drm_i915_private *dev_priv = dev->dev_private;
1360         int planea_wm, planeb_wm, cursora_wm, cursorb_wm;
1361         int plane_sr, cursor_sr;
1362         unsigned int enabled = 0;
1363         bool cxsr_enabled;
1364
1365         if (g4x_compute_wm0(dev, PIPE_A,
1366                             &g4x_wm_info, pessimal_latency_ns,
1367                             &g4x_cursor_wm_info, pessimal_latency_ns,
1368                             &planea_wm, &cursora_wm))
1369                 enabled |= 1 << PIPE_A;
1370
1371         if (g4x_compute_wm0(dev, PIPE_B,
1372                             &g4x_wm_info, pessimal_latency_ns,
1373                             &g4x_cursor_wm_info, pessimal_latency_ns,
1374                             &planeb_wm, &cursorb_wm))
1375                 enabled |= 1 << PIPE_B;
1376
1377         if (single_plane_enabled(enabled) &&
1378             g4x_compute_srwm(dev, ffs(enabled) - 1,
1379                              sr_latency_ns,
1380                              &g4x_wm_info,
1381                              &g4x_cursor_wm_info,
1382                              &plane_sr, &cursor_sr)) {
1383                 cxsr_enabled = true;
1384         } else {
1385                 cxsr_enabled = false;
1386                 intel_set_memory_cxsr(dev_priv, false);
1387                 plane_sr = cursor_sr = 0;
1388         }
1389
1390         DRM_DEBUG_KMS("Setting FIFO watermarks - A: plane=%d, cursor=%d, "
1391                       "B: plane=%d, cursor=%d, SR: plane=%d, cursor=%d\n",
1392                       planea_wm, cursora_wm,
1393                       planeb_wm, cursorb_wm,
1394                       plane_sr, cursor_sr);
1395
1396         I915_WRITE(DSPFW1,
1397                    FW_WM(plane_sr, SR) |
1398                    FW_WM(cursorb_wm, CURSORB) |
1399                    FW_WM(planeb_wm, PLANEB) |
1400                    FW_WM(planea_wm, PLANEA));
1401         I915_WRITE(DSPFW2,
1402                    (I915_READ(DSPFW2) & ~DSPFW_CURSORA_MASK) |
1403                    FW_WM(cursora_wm, CURSORA));
1404         /* HPLL off in SR has some issues on G4x... disable it */
1405         I915_WRITE(DSPFW3,
1406                    (I915_READ(DSPFW3) & ~(DSPFW_HPLL_SR_EN | DSPFW_CURSOR_SR_MASK)) |
1407                    FW_WM(cursor_sr, CURSOR_SR));
1408
1409         if (cxsr_enabled)
1410                 intel_set_memory_cxsr(dev_priv, true);
1411 }
1412
1413 static void i965_update_wm(struct drm_crtc *unused_crtc)
1414 {
1415         struct drm_device *dev = unused_crtc->dev;
1416         struct drm_i915_private *dev_priv = dev->dev_private;
1417         struct drm_crtc *crtc;
1418         int srwm = 1;
1419         int cursor_sr = 16;
1420         bool cxsr_enabled;
1421
1422         /* Calc sr entries for one plane configs */
1423         crtc = single_enabled_crtc(dev);
1424         if (crtc) {
1425                 /* self-refresh has much higher latency */
1426                 static const int sr_latency_ns = 12000;
1427                 const struct drm_display_mode *adjusted_mode = &to_intel_crtc(crtc)->config->base.adjusted_mode;
1428                 int clock = adjusted_mode->crtc_clock;
1429                 int htotal = adjusted_mode->crtc_htotal;
1430                 int hdisplay = to_intel_crtc(crtc)->config->pipe_src_w;
1431                 int pixel_size = crtc->primary->state->fb->bits_per_pixel / 8;
1432                 unsigned long line_time_us;
1433                 int entries;
1434
1435                 line_time_us = max(htotal * 1000 / clock, 1);
1436
1437                 /* Use ns/us then divide to preserve precision */
1438                 entries = (((sr_latency_ns / line_time_us) + 1000) / 1000) *
1439                         pixel_size * hdisplay;
1440                 entries = DIV_ROUND_UP(entries, I915_FIFO_LINE_SIZE);
1441                 srwm = I965_FIFO_SIZE - entries;
1442                 if (srwm < 0)
1443                         srwm = 1;
1444                 srwm &= 0x1ff;
1445                 DRM_DEBUG_KMS("self-refresh entries: %d, wm: %d\n",
1446                               entries, srwm);
1447
1448                 entries = (((sr_latency_ns / line_time_us) + 1000) / 1000) *
1449                         pixel_size * crtc->cursor->state->crtc_w;
1450                 entries = DIV_ROUND_UP(entries,
1451                                           i965_cursor_wm_info.cacheline_size);
1452                 cursor_sr = i965_cursor_wm_info.fifo_size -
1453                         (entries + i965_cursor_wm_info.guard_size);
1454
1455                 if (cursor_sr > i965_cursor_wm_info.max_wm)
1456                         cursor_sr = i965_cursor_wm_info.max_wm;
1457
1458                 DRM_DEBUG_KMS("self-refresh watermark: display plane %d "
1459                               "cursor %d\n", srwm, cursor_sr);
1460
1461                 cxsr_enabled = true;
1462         } else {
1463                 cxsr_enabled = false;
1464                 /* Turn off self refresh if both pipes are enabled */
1465                 intel_set_memory_cxsr(dev_priv, false);
1466         }
1467
1468         DRM_DEBUG_KMS("Setting FIFO watermarks - A: 8, B: 8, C: 8, SR %d\n",
1469                       srwm);
1470
1471         /* 965 has limitations... */
1472         I915_WRITE(DSPFW1, FW_WM(srwm, SR) |
1473                    FW_WM(8, CURSORB) |
1474                    FW_WM(8, PLANEB) |
1475                    FW_WM(8, PLANEA));
1476         I915_WRITE(DSPFW2, FW_WM(8, CURSORA) |
1477                    FW_WM(8, PLANEC_OLD));
1478         /* update cursor SR watermark */
1479         I915_WRITE(DSPFW3, FW_WM(cursor_sr, CURSOR_SR));
1480
1481         if (cxsr_enabled)
1482                 intel_set_memory_cxsr(dev_priv, true);
1483 }
1484
1485 #undef FW_WM
1486
1487 static void i9xx_update_wm(struct drm_crtc *unused_crtc)
1488 {
1489         struct drm_device *dev = unused_crtc->dev;
1490         struct drm_i915_private *dev_priv = dev->dev_private;
1491         const struct intel_watermark_params *wm_info;
1492         uint32_t fwater_lo;
1493         uint32_t fwater_hi;
1494         int cwm, srwm = 1;
1495         int fifo_size;
1496         int planea_wm, planeb_wm;
1497         struct drm_crtc *crtc, *enabled = NULL;
1498
1499         if (IS_I945GM(dev))
1500                 wm_info = &i945_wm_info;
1501         else if (!IS_GEN2(dev))
1502                 wm_info = &i915_wm_info;
1503         else
1504                 wm_info = &i830_a_wm_info;
1505
1506         fifo_size = dev_priv->display.get_fifo_size(dev, 0);
1507         crtc = intel_get_crtc_for_plane(dev, 0);
1508         if (intel_crtc_active(crtc)) {
1509                 const struct drm_display_mode *adjusted_mode;
1510                 int cpp = crtc->primary->state->fb->bits_per_pixel / 8;
1511                 if (IS_GEN2(dev))
1512                         cpp = 4;
1513
1514                 adjusted_mode = &to_intel_crtc(crtc)->config->base.adjusted_mode;
1515                 planea_wm = intel_calculate_wm(adjusted_mode->crtc_clock,
1516                                                wm_info, fifo_size, cpp,
1517                                                pessimal_latency_ns);
1518                 enabled = crtc;
1519         } else {
1520                 planea_wm = fifo_size - wm_info->guard_size;
1521                 if (planea_wm > (long)wm_info->max_wm)
1522                         planea_wm = wm_info->max_wm;
1523         }
1524
1525         if (IS_GEN2(dev))
1526                 wm_info = &i830_bc_wm_info;
1527
1528         fifo_size = dev_priv->display.get_fifo_size(dev, 1);
1529         crtc = intel_get_crtc_for_plane(dev, 1);
1530         if (intel_crtc_active(crtc)) {
1531                 const struct drm_display_mode *adjusted_mode;
1532                 int cpp = crtc->primary->state->fb->bits_per_pixel / 8;
1533                 if (IS_GEN2(dev))
1534                         cpp = 4;
1535
1536                 adjusted_mode = &to_intel_crtc(crtc)->config->base.adjusted_mode;
1537                 planeb_wm = intel_calculate_wm(adjusted_mode->crtc_clock,
1538                                                wm_info, fifo_size, cpp,
1539                                                pessimal_latency_ns);
1540                 if (enabled == NULL)
1541                         enabled = crtc;
1542                 else
1543                         enabled = NULL;
1544         } else {
1545                 planeb_wm = fifo_size - wm_info->guard_size;
1546                 if (planeb_wm > (long)wm_info->max_wm)
1547                         planeb_wm = wm_info->max_wm;
1548         }
1549
1550         DRM_DEBUG_KMS("FIFO watermarks - A: %d, B: %d\n", planea_wm, planeb_wm);
1551
1552         if (IS_I915GM(dev) && enabled) {
1553                 struct drm_i915_gem_object *obj;
1554
1555                 obj = intel_fb_obj(enabled->primary->state->fb);
1556
1557                 /* self-refresh seems busted with untiled */
1558                 if (obj->tiling_mode == I915_TILING_NONE)
1559                         enabled = NULL;
1560         }
1561
1562         /*
1563          * Overlay gets an aggressive default since video jitter is bad.
1564          */
1565         cwm = 2;
1566
1567         /* Play safe and disable self-refresh before adjusting watermarks. */
1568         intel_set_memory_cxsr(dev_priv, false);
1569
1570         /* Calc sr entries for one plane configs */
1571         if (HAS_FW_BLC(dev) && enabled) {
1572                 /* self-refresh has much higher latency */
1573                 static const int sr_latency_ns = 6000;
1574                 const struct drm_display_mode *adjusted_mode = &to_intel_crtc(enabled)->config->base.adjusted_mode;
1575                 int clock = adjusted_mode->crtc_clock;
1576                 int htotal = adjusted_mode->crtc_htotal;
1577                 int hdisplay = to_intel_crtc(enabled)->config->pipe_src_w;
1578                 int pixel_size = enabled->primary->state->fb->bits_per_pixel / 8;
1579                 unsigned long line_time_us;
1580                 int entries;
1581
1582                 line_time_us = max(htotal * 1000 / clock, 1);
1583
1584                 /* Use ns/us then divide to preserve precision */
1585                 entries = (((sr_latency_ns / line_time_us) + 1000) / 1000) *
1586                         pixel_size * hdisplay;
1587                 entries = DIV_ROUND_UP(entries, wm_info->cacheline_size);
1588                 DRM_DEBUG_KMS("self-refresh entries: %d\n", entries);
1589                 srwm = wm_info->fifo_size - entries;
1590                 if (srwm < 0)
1591                         srwm = 1;
1592
1593                 if (IS_I945G(dev) || IS_I945GM(dev))
1594                         I915_WRITE(FW_BLC_SELF,
1595                                    FW_BLC_SELF_FIFO_MASK | (srwm & 0xff));
1596                 else if (IS_I915GM(dev))
1597                         I915_WRITE(FW_BLC_SELF, srwm & 0x3f);
1598         }
1599
1600         DRM_DEBUG_KMS("Setting FIFO watermarks - A: %d, B: %d, C: %d, SR %d\n",
1601                       planea_wm, planeb_wm, cwm, srwm);
1602
1603         fwater_lo = ((planeb_wm & 0x3f) << 16) | (planea_wm & 0x3f);
1604         fwater_hi = (cwm & 0x1f);
1605
1606         /* Set request length to 8 cachelines per fetch */
1607         fwater_lo = fwater_lo | (1 << 24) | (1 << 8);
1608         fwater_hi = fwater_hi | (1 << 8);
1609
1610         I915_WRITE(FW_BLC, fwater_lo);
1611         I915_WRITE(FW_BLC2, fwater_hi);
1612
1613         if (enabled)
1614                 intel_set_memory_cxsr(dev_priv, true);
1615 }
1616
1617 static void i845_update_wm(struct drm_crtc *unused_crtc)
1618 {
1619         struct drm_device *dev = unused_crtc->dev;
1620         struct drm_i915_private *dev_priv = dev->dev_private;
1621         struct drm_crtc *crtc;
1622         const struct drm_display_mode *adjusted_mode;
1623         uint32_t fwater_lo;
1624         int planea_wm;
1625
1626         crtc = single_enabled_crtc(dev);
1627         if (crtc == NULL)
1628                 return;
1629
1630         adjusted_mode = &to_intel_crtc(crtc)->config->base.adjusted_mode;
1631         planea_wm = intel_calculate_wm(adjusted_mode->crtc_clock,
1632                                        &i845_wm_info,
1633                                        dev_priv->display.get_fifo_size(dev, 0),
1634                                        4, pessimal_latency_ns);
1635         fwater_lo = I915_READ(FW_BLC) & ~0xfff;
1636         fwater_lo |= (3<<8) | planea_wm;
1637
1638         DRM_DEBUG_KMS("Setting FIFO watermarks - A: %d\n", planea_wm);
1639
1640         I915_WRITE(FW_BLC, fwater_lo);
1641 }
1642
1643 uint32_t ilk_pipe_pixel_rate(const struct intel_crtc_state *pipe_config)
1644 {
1645         uint32_t pixel_rate;
1646
1647         pixel_rate = pipe_config->base.adjusted_mode.crtc_clock;
1648
1649         /* We only use IF-ID interlacing. If we ever use PF-ID we'll need to
1650          * adjust the pixel_rate here. */
1651
1652         if (pipe_config->pch_pfit.enabled) {
1653                 uint64_t pipe_w, pipe_h, pfit_w, pfit_h;
1654                 uint32_t pfit_size = pipe_config->pch_pfit.size;
1655
1656                 pipe_w = pipe_config->pipe_src_w;
1657                 pipe_h = pipe_config->pipe_src_h;
1658
1659                 pfit_w = (pfit_size >> 16) & 0xFFFF;
1660                 pfit_h = pfit_size & 0xFFFF;
1661                 if (pipe_w < pfit_w)
1662                         pipe_w = pfit_w;
1663                 if (pipe_h < pfit_h)
1664                         pipe_h = pfit_h;
1665
1666                 pixel_rate = div_u64((uint64_t) pixel_rate * pipe_w * pipe_h,
1667                                      pfit_w * pfit_h);
1668         }
1669
1670         return pixel_rate;
1671 }
1672
1673 /* latency must be in 0.1us units. */
1674 static uint32_t ilk_wm_method1(uint32_t pixel_rate, uint8_t bytes_per_pixel,
1675                                uint32_t latency)
1676 {
1677         uint64_t ret;
1678
1679         if (WARN(latency == 0, "Latency value missing\n"))
1680                 return UINT_MAX;
1681
1682         ret = (uint64_t) pixel_rate * bytes_per_pixel * latency;
1683         ret = DIV_ROUND_UP_ULL(ret, 64 * 10000) + 2;
1684
1685         return ret;
1686 }
1687
1688 /* latency must be in 0.1us units. */
1689 static uint32_t ilk_wm_method2(uint32_t pixel_rate, uint32_t pipe_htotal,
1690                                uint32_t horiz_pixels, uint8_t bytes_per_pixel,
1691                                uint32_t latency)
1692 {
1693         uint32_t ret;
1694
1695         if (WARN(latency == 0, "Latency value missing\n"))
1696                 return UINT_MAX;
1697
1698         ret = (latency * pixel_rate) / (pipe_htotal * 10000);
1699         ret = (ret + 1) * horiz_pixels * bytes_per_pixel;
1700         ret = DIV_ROUND_UP(ret, 64) + 2;
1701         return ret;
1702 }
1703
1704 static uint32_t ilk_wm_fbc(uint32_t pri_val, uint32_t horiz_pixels,
1705                            uint8_t bytes_per_pixel)
1706 {
1707         return DIV_ROUND_UP(pri_val * 64, horiz_pixels * bytes_per_pixel) + 2;
1708 }
1709
1710 struct skl_pipe_wm_parameters {
1711         bool active;
1712         uint32_t pipe_htotal;
1713         uint32_t pixel_rate; /* in KHz */
1714         struct intel_plane_wm_parameters plane[I915_MAX_PLANES];
1715 };
1716
1717 struct ilk_wm_maximums {
1718         uint16_t pri;
1719         uint16_t spr;
1720         uint16_t cur;
1721         uint16_t fbc;
1722 };
1723
1724 /* used in computing the new watermarks state */
1725 struct intel_wm_config {
1726         unsigned int num_pipes_active;
1727         bool sprites_enabled;
1728         bool sprites_scaled;
1729 };
1730
1731 /*
1732  * For both WM_PIPE and WM_LP.
1733  * mem_value must be in 0.1us units.
1734  */
1735 static uint32_t ilk_compute_pri_wm(const struct intel_crtc_state *cstate,
1736                                    const struct intel_plane_state *pstate,
1737                                    uint32_t mem_value,
1738                                    bool is_lp)
1739 {
1740         int bpp = pstate->base.fb ? pstate->base.fb->bits_per_pixel / 8 : 0;
1741         uint32_t method1, method2;
1742
1743         if (!cstate->base.active || !pstate->visible)
1744                 return 0;
1745
1746         method1 = ilk_wm_method1(ilk_pipe_pixel_rate(cstate), bpp, mem_value);
1747
1748         if (!is_lp)
1749                 return method1;
1750
1751         method2 = ilk_wm_method2(ilk_pipe_pixel_rate(cstate),
1752                                  cstate->base.adjusted_mode.crtc_htotal,
1753                                  drm_rect_width(&pstate->dst),
1754                                  bpp,
1755                                  mem_value);
1756
1757         return min(method1, method2);
1758 }
1759
1760 /*
1761  * For both WM_PIPE and WM_LP.
1762  * mem_value must be in 0.1us units.
1763  */
1764 static uint32_t ilk_compute_spr_wm(const struct intel_crtc_state *cstate,
1765                                    const struct intel_plane_state *pstate,
1766                                    uint32_t mem_value)
1767 {
1768         int bpp = pstate->base.fb ? pstate->base.fb->bits_per_pixel / 8 : 0;
1769         uint32_t method1, method2;
1770
1771         if (!cstate->base.active || !pstate->visible)
1772                 return 0;
1773
1774         method1 = ilk_wm_method1(ilk_pipe_pixel_rate(cstate), bpp, mem_value);
1775         method2 = ilk_wm_method2(ilk_pipe_pixel_rate(cstate),
1776                                  cstate->base.adjusted_mode.crtc_htotal,
1777                                  drm_rect_width(&pstate->dst),
1778                                  bpp,
1779                                  mem_value);
1780         return min(method1, method2);
1781 }
1782
1783 /*
1784  * For both WM_PIPE and WM_LP.
1785  * mem_value must be in 0.1us units.
1786  */
1787 static uint32_t ilk_compute_cur_wm(const struct intel_crtc_state *cstate,
1788                                    const struct intel_plane_state *pstate,
1789                                    uint32_t mem_value)
1790 {
1791         int bpp = pstate->base.fb ? pstate->base.fb->bits_per_pixel / 8 : 0;
1792
1793         if (!cstate->base.active || !pstate->visible)
1794                 return 0;
1795
1796         return ilk_wm_method2(ilk_pipe_pixel_rate(cstate),
1797                               cstate->base.adjusted_mode.crtc_htotal,
1798                               drm_rect_width(&pstate->dst),
1799                               bpp,
1800                               mem_value);
1801 }
1802
1803 /* Only for WM_LP. */
1804 static uint32_t ilk_compute_fbc_wm(const struct intel_crtc_state *cstate,
1805                                    const struct intel_plane_state *pstate,
1806                                    uint32_t pri_val)
1807 {
1808         int bpp = pstate->base.fb ? pstate->base.fb->bits_per_pixel / 8 : 0;
1809
1810         if (!cstate->base.active || !pstate->visible)
1811                 return 0;
1812
1813         return ilk_wm_fbc(pri_val, drm_rect_width(&pstate->dst), bpp);
1814 }
1815
1816 static unsigned int ilk_display_fifo_size(const struct drm_device *dev)
1817 {
1818         if (INTEL_INFO(dev)->gen >= 8)
1819                 return 3072;
1820         else if (INTEL_INFO(dev)->gen >= 7)
1821                 return 768;
1822         else
1823                 return 512;
1824 }
1825
1826 static unsigned int ilk_plane_wm_reg_max(const struct drm_device *dev,
1827                                          int level, bool is_sprite)
1828 {
1829         if (INTEL_INFO(dev)->gen >= 8)
1830                 /* BDW primary/sprite plane watermarks */
1831                 return level == 0 ? 255 : 2047;
1832         else if (INTEL_INFO(dev)->gen >= 7)
1833                 /* IVB/HSW primary/sprite plane watermarks */
1834                 return level == 0 ? 127 : 1023;
1835         else if (!is_sprite)
1836                 /* ILK/SNB primary plane watermarks */
1837                 return level == 0 ? 127 : 511;
1838         else
1839                 /* ILK/SNB sprite plane watermarks */
1840                 return level == 0 ? 63 : 255;
1841 }
1842
1843 static unsigned int ilk_cursor_wm_reg_max(const struct drm_device *dev,
1844                                           int level)
1845 {
1846         if (INTEL_INFO(dev)->gen >= 7)
1847                 return level == 0 ? 63 : 255;
1848         else
1849                 return level == 0 ? 31 : 63;
1850 }
1851
1852 static unsigned int ilk_fbc_wm_reg_max(const struct drm_device *dev)
1853 {
1854         if (INTEL_INFO(dev)->gen >= 8)
1855                 return 31;
1856         else
1857                 return 15;
1858 }
1859
1860 /* Calculate the maximum primary/sprite plane watermark */
1861 static unsigned int ilk_plane_wm_max(const struct drm_device *dev,
1862                                      int level,
1863                                      const struct intel_wm_config *config,
1864                                      enum intel_ddb_partitioning ddb_partitioning,
1865                                      bool is_sprite)
1866 {
1867         unsigned int fifo_size = ilk_display_fifo_size(dev);
1868
1869         /* if sprites aren't enabled, sprites get nothing */
1870         if (is_sprite && !config->sprites_enabled)
1871                 return 0;
1872
1873         /* HSW allows LP1+ watermarks even with multiple pipes */
1874         if (level == 0 || config->num_pipes_active > 1) {
1875                 fifo_size /= INTEL_INFO(dev)->num_pipes;
1876
1877                 /*
1878                  * For some reason the non self refresh
1879                  * FIFO size is only half of the self
1880                  * refresh FIFO size on ILK/SNB.
1881                  */
1882                 if (INTEL_INFO(dev)->gen <= 6)
1883                         fifo_size /= 2;
1884         }
1885
1886         if (config->sprites_enabled) {
1887                 /* level 0 is always calculated with 1:1 split */
1888                 if (level > 0 && ddb_partitioning == INTEL_DDB_PART_5_6) {
1889                         if (is_sprite)
1890                                 fifo_size *= 5;
1891                         fifo_size /= 6;
1892                 } else {
1893                         fifo_size /= 2;
1894                 }
1895         }
1896
1897         /* clamp to max that the registers can hold */
1898         return min(fifo_size, ilk_plane_wm_reg_max(dev, level, is_sprite));
1899 }
1900
1901 /* Calculate the maximum cursor plane watermark */
1902 static unsigned int ilk_cursor_wm_max(const struct drm_device *dev,
1903                                       int level,
1904                                       const struct intel_wm_config *config)
1905 {
1906         /* HSW LP1+ watermarks w/ multiple pipes */
1907         if (level > 0 && config->num_pipes_active > 1)
1908                 return 64;
1909
1910         /* otherwise just report max that registers can hold */
1911         return ilk_cursor_wm_reg_max(dev, level);
1912 }
1913
1914 static void ilk_compute_wm_maximums(const struct drm_device *dev,
1915                                     int level,
1916                                     const struct intel_wm_config *config,
1917                                     enum intel_ddb_partitioning ddb_partitioning,
1918                                     struct ilk_wm_maximums *max)
1919 {
1920         max->pri = ilk_plane_wm_max(dev, level, config, ddb_partitioning, false);
1921         max->spr = ilk_plane_wm_max(dev, level, config, ddb_partitioning, true);
1922         max->cur = ilk_cursor_wm_max(dev, level, config);
1923         max->fbc = ilk_fbc_wm_reg_max(dev);
1924 }
1925
1926 static void ilk_compute_wm_reg_maximums(struct drm_device *dev,
1927                                         int level,
1928                                         struct ilk_wm_maximums *max)
1929 {
1930         max->pri = ilk_plane_wm_reg_max(dev, level, false);
1931         max->spr = ilk_plane_wm_reg_max(dev, level, true);
1932         max->cur = ilk_cursor_wm_reg_max(dev, level);
1933         max->fbc = ilk_fbc_wm_reg_max(dev);
1934 }
1935
1936 static bool ilk_validate_wm_level(int level,
1937                                   const struct ilk_wm_maximums *max,
1938                                   struct intel_wm_level *result)
1939 {
1940         bool ret;
1941
1942         /* already determined to be invalid? */
1943         if (!result->enable)
1944                 return false;
1945
1946         result->enable = result->pri_val <= max->pri &&
1947                          result->spr_val <= max->spr &&
1948                          result->cur_val <= max->cur;
1949
1950         ret = result->enable;
1951
1952         /*
1953          * HACK until we can pre-compute everything,
1954          * and thus fail gracefully if LP0 watermarks
1955          * are exceeded...
1956          */
1957         if (level == 0 && !result->enable) {
1958                 if (result->pri_val > max->pri)
1959                         DRM_DEBUG_KMS("Primary WM%d too large %u (max %u)\n",
1960                                       level, result->pri_val, max->pri);
1961                 if (result->spr_val > max->spr)
1962                         DRM_DEBUG_KMS("Sprite WM%d too large %u (max %u)\n",
1963                                       level, result->spr_val, max->spr);
1964                 if (result->cur_val > max->cur)
1965                         DRM_DEBUG_KMS("Cursor WM%d too large %u (max %u)\n",
1966                                       level, result->cur_val, max->cur);
1967
1968                 result->pri_val = min_t(uint32_t, result->pri_val, max->pri);
1969                 result->spr_val = min_t(uint32_t, result->spr_val, max->spr);
1970                 result->cur_val = min_t(uint32_t, result->cur_val, max->cur);
1971                 result->enable = true;
1972         }
1973
1974         return ret;
1975 }
1976
1977 static void ilk_compute_wm_level(const struct drm_i915_private *dev_priv,
1978                                  const struct intel_crtc *intel_crtc,
1979                                  int level,
1980                                  struct intel_crtc_state *cstate,
1981                                  struct intel_wm_level *result)
1982 {
1983         struct intel_plane *intel_plane;
1984         uint16_t pri_latency = dev_priv->wm.pri_latency[level];
1985         uint16_t spr_latency = dev_priv->wm.spr_latency[level];
1986         uint16_t cur_latency = dev_priv->wm.cur_latency[level];
1987
1988         /* WM1+ latency values stored in 0.5us units */
1989         if (level > 0) {
1990                 pri_latency *= 5;
1991                 spr_latency *= 5;
1992                 cur_latency *= 5;
1993         }
1994
1995         for_each_intel_plane_on_crtc(dev_priv->dev, intel_crtc, intel_plane) {
1996                 struct intel_plane_state *pstate =
1997                         to_intel_plane_state(intel_plane->base.state);
1998
1999                 switch (intel_plane->base.type) {
2000                 case DRM_PLANE_TYPE_PRIMARY:
2001                         result->pri_val = ilk_compute_pri_wm(cstate, pstate,
2002                                                              pri_latency,
2003                                                              level);
2004                         result->fbc_val = ilk_compute_fbc_wm(cstate, pstate,
2005                                                              result->pri_val);
2006                         break;
2007                 case DRM_PLANE_TYPE_OVERLAY:
2008                         result->spr_val = ilk_compute_spr_wm(cstate, pstate,
2009                                                              spr_latency);
2010                         break;
2011                 case DRM_PLANE_TYPE_CURSOR:
2012                         result->cur_val = ilk_compute_cur_wm(cstate, pstate,
2013                                                              cur_latency);
2014                         break;
2015                 }
2016         }
2017
2018         result->enable = true;
2019 }
2020
2021 static uint32_t
2022 hsw_compute_linetime_wm(struct drm_device *dev, struct drm_crtc *crtc)
2023 {
2024         struct drm_i915_private *dev_priv = dev->dev_private;
2025         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
2026         const struct drm_display_mode *adjusted_mode = &intel_crtc->config->base.adjusted_mode;
2027         u32 linetime, ips_linetime;
2028
2029         if (!intel_crtc->active)
2030                 return 0;
2031
2032         /* The WM are computed with base on how long it takes to fill a single
2033          * row at the given clock rate, multiplied by 8.
2034          * */
2035         linetime = DIV_ROUND_CLOSEST(adjusted_mode->crtc_htotal * 1000 * 8,
2036                                      adjusted_mode->crtc_clock);
2037         ips_linetime = DIV_ROUND_CLOSEST(adjusted_mode->crtc_htotal * 1000 * 8,
2038                                          dev_priv->cdclk_freq);
2039
2040         return PIPE_WM_LINETIME_IPS_LINETIME(ips_linetime) |
2041                PIPE_WM_LINETIME_TIME(linetime);
2042 }
2043
2044 static void intel_read_wm_latency(struct drm_device *dev, uint16_t wm[8])
2045 {
2046         struct drm_i915_private *dev_priv = dev->dev_private;
2047
2048         if (IS_GEN9(dev)) {
2049                 uint32_t val;
2050                 int ret, i;
2051                 int level, max_level = ilk_wm_max_level(dev);
2052
2053                 /* read the first set of memory latencies[0:3] */
2054                 val = 0; /* data0 to be programmed to 0 for first set */
2055                 mutex_lock(&dev_priv->rps.hw_lock);
2056                 ret = sandybridge_pcode_read(dev_priv,
2057                                              GEN9_PCODE_READ_MEM_LATENCY,
2058                                              &val);
2059                 mutex_unlock(&dev_priv->rps.hw_lock);
2060
2061                 if (ret) {
2062                         DRM_ERROR("SKL Mailbox read error = %d\n", ret);
2063                         return;
2064                 }
2065
2066                 wm[0] = val & GEN9_MEM_LATENCY_LEVEL_MASK;
2067                 wm[1] = (val >> GEN9_MEM_LATENCY_LEVEL_1_5_SHIFT) &
2068                                 GEN9_MEM_LATENCY_LEVEL_MASK;
2069                 wm[2] = (val >> GEN9_MEM_LATENCY_LEVEL_2_6_SHIFT) &
2070                                 GEN9_MEM_LATENCY_LEVEL_MASK;
2071                 wm[3] = (val >> GEN9_MEM_LATENCY_LEVEL_3_7_SHIFT) &
2072                                 GEN9_MEM_LATENCY_LEVEL_MASK;
2073
2074                 /* read the second set of memory latencies[4:7] */
2075                 val = 1; /* data0 to be programmed to 1 for second set */
2076                 mutex_lock(&dev_priv->rps.hw_lock);
2077                 ret = sandybridge_pcode_read(dev_priv,
2078                                              GEN9_PCODE_READ_MEM_LATENCY,
2079                                              &val);
2080                 mutex_unlock(&dev_priv->rps.hw_lock);
2081                 if (ret) {
2082                         DRM_ERROR("SKL Mailbox read error = %d\n", ret);
2083                         return;
2084                 }
2085
2086                 wm[4] = val & GEN9_MEM_LATENCY_LEVEL_MASK;
2087                 wm[5] = (val >> GEN9_MEM_LATENCY_LEVEL_1_5_SHIFT) &
2088                                 GEN9_MEM_LATENCY_LEVEL_MASK;
2089                 wm[6] = (val >> GEN9_MEM_LATENCY_LEVEL_2_6_SHIFT) &
2090                                 GEN9_MEM_LATENCY_LEVEL_MASK;
2091                 wm[7] = (val >> GEN9_MEM_LATENCY_LEVEL_3_7_SHIFT) &
2092                                 GEN9_MEM_LATENCY_LEVEL_MASK;
2093
2094                 /*
2095                  * WaWmMemoryReadLatency:skl
2096                  *
2097                  * punit doesn't take into account the read latency so we need
2098                  * to add 2us to the various latency levels we retrieve from
2099                  * the punit.
2100                  *   - W0 is a bit special in that it's the only level that
2101                  *   can't be disabled if we want to have display working, so
2102                  *   we always add 2us there.
2103                  *   - For levels >=1, punit returns 0us latency when they are
2104                  *   disabled, so we respect that and don't add 2us then
2105                  *
2106                  * Additionally, if a level n (n > 1) has a 0us latency, all
2107                  * levels m (m >= n) need to be disabled. We make sure to
2108                  * sanitize the values out of the punit to satisfy this
2109                  * requirement.
2110                  */
2111                 wm[0] += 2;
2112                 for (level = 1; level <= max_level; level++)
2113                         if (wm[level] != 0)
2114                                 wm[level] += 2;
2115                         else {
2116                                 for (i = level + 1; i <= max_level; i++)
2117                                         wm[i] = 0;
2118
2119                                 break;
2120                         }
2121         } else if (IS_HASWELL(dev) || IS_BROADWELL(dev)) {
2122                 uint64_t sskpd = I915_READ64(MCH_SSKPD);
2123
2124                 wm[0] = (sskpd >> 56) & 0xFF;
2125                 if (wm[0] == 0)
2126                         wm[0] = sskpd & 0xF;
2127                 wm[1] = (sskpd >> 4) & 0xFF;
2128                 wm[2] = (sskpd >> 12) & 0xFF;
2129                 wm[3] = (sskpd >> 20) & 0x1FF;
2130                 wm[4] = (sskpd >> 32) & 0x1FF;
2131         } else if (INTEL_INFO(dev)->gen >= 6) {
2132                 uint32_t sskpd = I915_READ(MCH_SSKPD);
2133
2134                 wm[0] = (sskpd >> SSKPD_WM0_SHIFT) & SSKPD_WM_MASK;
2135                 wm[1] = (sskpd >> SSKPD_WM1_SHIFT) & SSKPD_WM_MASK;
2136                 wm[2] = (sskpd >> SSKPD_WM2_SHIFT) & SSKPD_WM_MASK;
2137                 wm[3] = (sskpd >> SSKPD_WM3_SHIFT) & SSKPD_WM_MASK;
2138         } else if (INTEL_INFO(dev)->gen >= 5) {
2139                 uint32_t mltr = I915_READ(MLTR_ILK);
2140
2141                 /* ILK primary LP0 latency is 700 ns */
2142                 wm[0] = 7;
2143                 wm[1] = (mltr >> MLTR_WM1_SHIFT) & ILK_SRLT_MASK;
2144                 wm[2] = (mltr >> MLTR_WM2_SHIFT) & ILK_SRLT_MASK;
2145         }
2146 }
2147
2148 static void intel_fixup_spr_wm_latency(struct drm_device *dev, uint16_t wm[5])
2149 {
2150         /* ILK sprite LP0 latency is 1300 ns */
2151         if (INTEL_INFO(dev)->gen == 5)
2152                 wm[0] = 13;
2153 }
2154
2155 static void intel_fixup_cur_wm_latency(struct drm_device *dev, uint16_t wm[5])
2156 {
2157         /* ILK cursor LP0 latency is 1300 ns */
2158         if (INTEL_INFO(dev)->gen == 5)
2159                 wm[0] = 13;
2160
2161         /* WaDoubleCursorLP3Latency:ivb */
2162         if (IS_IVYBRIDGE(dev))
2163                 wm[3] *= 2;
2164 }
2165
2166 int ilk_wm_max_level(const struct drm_device *dev)
2167 {
2168         /* how many WM levels are we expecting */
2169         if (INTEL_INFO(dev)->gen >= 9)
2170                 return 7;
2171         else if (IS_HASWELL(dev) || IS_BROADWELL(dev))
2172                 return 4;
2173         else if (INTEL_INFO(dev)->gen >= 6)
2174                 return 3;
2175         else
2176                 return 2;
2177 }
2178
2179 static void intel_print_wm_latency(struct drm_device *dev,
2180                                    const char *name,
2181                                    const uint16_t wm[8])
2182 {
2183         int level, max_level = ilk_wm_max_level(dev);
2184
2185         for (level = 0; level <= max_level; level++) {
2186                 unsigned int latency = wm[level];
2187
2188                 if (latency == 0) {
2189                         DRM_ERROR("%s WM%d latency not provided\n",
2190                                   name, level);
2191                         continue;
2192                 }
2193
2194                 /*
2195                  * - latencies are in us on gen9.
2196                  * - before then, WM1+ latency values are in 0.5us units
2197                  */
2198                 if (IS_GEN9(dev))
2199                         latency *= 10;
2200                 else if (level > 0)
2201                         latency *= 5;
2202
2203                 DRM_DEBUG_KMS("%s WM%d latency %u (%u.%u usec)\n",
2204                               name, level, wm[level],
2205                               latency / 10, latency % 10);
2206         }
2207 }
2208
2209 static bool ilk_increase_wm_latency(struct drm_i915_private *dev_priv,
2210                                     uint16_t wm[5], uint16_t min)
2211 {
2212         int level, max_level = ilk_wm_max_level(dev_priv->dev);
2213
2214         if (wm[0] >= min)
2215                 return false;
2216
2217         wm[0] = max(wm[0], min);
2218         for (level = 1; level <= max_level; level++)
2219                 wm[level] = max_t(uint16_t, wm[level], DIV_ROUND_UP(min, 5));
2220
2221         return true;
2222 }
2223
2224 static void snb_wm_latency_quirk(struct drm_device *dev)
2225 {
2226         struct drm_i915_private *dev_priv = dev->dev_private;
2227         bool changed;
2228
2229         /*
2230          * The BIOS provided WM memory latency values are often
2231          * inadequate for high resolution displays. Adjust them.
2232          */
2233         changed = ilk_increase_wm_latency(dev_priv, dev_priv->wm.pri_latency, 12) |
2234                 ilk_increase_wm_latency(dev_priv, dev_priv->wm.spr_latency, 12) |
2235                 ilk_increase_wm_latency(dev_priv, dev_priv->wm.cur_latency, 12);
2236
2237         if (!changed)
2238                 return;
2239
2240         DRM_DEBUG_KMS("WM latency values increased to avoid potential underruns\n");
2241         intel_print_wm_latency(dev, "Primary", dev_priv->wm.pri_latency);
2242         intel_print_wm_latency(dev, "Sprite", dev_priv->wm.spr_latency);
2243         intel_print_wm_latency(dev, "Cursor", dev_priv->wm.cur_latency);
2244 }
2245
2246 static void ilk_setup_wm_latency(struct drm_device *dev)
2247 {
2248         struct drm_i915_private *dev_priv = dev->dev_private;
2249
2250         intel_read_wm_latency(dev, dev_priv->wm.pri_latency);
2251
2252         memcpy(dev_priv->wm.spr_latency, dev_priv->wm.pri_latency,
2253                sizeof(dev_priv->wm.pri_latency));
2254         memcpy(dev_priv->wm.cur_latency, dev_priv->wm.pri_latency,
2255                sizeof(dev_priv->wm.pri_latency));
2256
2257         intel_fixup_spr_wm_latency(dev, dev_priv->wm.spr_latency);
2258         intel_fixup_cur_wm_latency(dev, dev_priv->wm.cur_latency);
2259
2260         intel_print_wm_latency(dev, "Primary", dev_priv->wm.pri_latency);
2261         intel_print_wm_latency(dev, "Sprite", dev_priv->wm.spr_latency);
2262         intel_print_wm_latency(dev, "Cursor", dev_priv->wm.cur_latency);
2263
2264         if (IS_GEN6(dev))
2265                 snb_wm_latency_quirk(dev);
2266 }
2267
2268 static void skl_setup_wm_latency(struct drm_device *dev)
2269 {
2270         struct drm_i915_private *dev_priv = dev->dev_private;
2271
2272         intel_read_wm_latency(dev, dev_priv->wm.skl_latency);
2273         intel_print_wm_latency(dev, "Gen9 Plane", dev_priv->wm.skl_latency);
2274 }
2275
2276 static void ilk_compute_wm_config(struct drm_device *dev,
2277                                   struct intel_wm_config *config)
2278 {
2279         struct intel_crtc *intel_crtc;
2280
2281         /* Compute the currently _active_ config */
2282         for_each_intel_crtc(dev, intel_crtc) {
2283                 const struct intel_pipe_wm *wm = &intel_crtc->wm.active;
2284
2285                 if (!wm->pipe_enabled)
2286                         continue;
2287
2288                 config->sprites_enabled |= wm->sprites_enabled;
2289                 config->sprites_scaled |= wm->sprites_scaled;
2290                 config->num_pipes_active++;
2291         }
2292 }
2293
2294 /* Compute new watermarks for the pipe */
2295 static bool intel_compute_pipe_wm(struct intel_crtc_state *cstate,
2296                                   struct intel_pipe_wm *pipe_wm)
2297 {
2298         struct drm_crtc *crtc = cstate->base.crtc;
2299         struct drm_device *dev = crtc->dev;
2300         const struct drm_i915_private *dev_priv = dev->dev_private;
2301         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
2302         struct intel_plane *intel_plane;
2303         struct intel_plane_state *sprstate = NULL;
2304         int level, max_level = ilk_wm_max_level(dev);
2305         /* LP0 watermark maximums depend on this pipe alone */
2306         struct intel_wm_config config = {
2307                 .num_pipes_active = 1,
2308         };
2309         struct ilk_wm_maximums max;
2310
2311         for_each_intel_plane_on_crtc(dev, intel_crtc, intel_plane) {
2312                 if (intel_plane->base.type == DRM_PLANE_TYPE_OVERLAY) {
2313                         sprstate = to_intel_plane_state(intel_plane->base.state);
2314                         break;
2315                 }
2316         }
2317
2318         config.sprites_enabled = sprstate->visible;
2319         config.sprites_scaled = sprstate->visible &&
2320                 (drm_rect_width(&sprstate->dst) != drm_rect_width(&sprstate->src) >> 16 ||
2321                 drm_rect_height(&sprstate->dst) != drm_rect_height(&sprstate->src) >> 16);
2322
2323         pipe_wm->pipe_enabled = cstate->base.active;
2324         pipe_wm->sprites_enabled = sprstate->visible;
2325         pipe_wm->sprites_scaled = config.sprites_scaled;
2326
2327         /* ILK/SNB: LP2+ watermarks only w/o sprites */
2328         if (INTEL_INFO(dev)->gen <= 6 && sprstate->visible)
2329                 max_level = 1;
2330
2331         /* ILK/SNB/IVB: LP1+ watermarks only w/o scaling */
2332         if (config.sprites_scaled)
2333                 max_level = 0;
2334
2335         ilk_compute_wm_level(dev_priv, intel_crtc, 0, cstate, &pipe_wm->wm[0]);
2336
2337         if (IS_HASWELL(dev) || IS_BROADWELL(dev))
2338                 pipe_wm->linetime = hsw_compute_linetime_wm(dev, crtc);
2339
2340         /* LP0 watermarks always use 1/2 DDB partitioning */
2341         ilk_compute_wm_maximums(dev, 0, &config, INTEL_DDB_PART_1_2, &max);
2342
2343         /* At least LP0 must be valid */
2344         if (!ilk_validate_wm_level(0, &max, &pipe_wm->wm[0]))
2345                 return false;
2346
2347         ilk_compute_wm_reg_maximums(dev, 1, &max);
2348
2349         for (level = 1; level <= max_level; level++) {
2350                 struct intel_wm_level wm = {};
2351
2352                 ilk_compute_wm_level(dev_priv, intel_crtc, level, cstate, &wm);
2353
2354                 /*
2355                  * Disable any watermark level that exceeds the
2356                  * register maximums since such watermarks are
2357                  * always invalid.
2358                  */
2359                 if (!ilk_validate_wm_level(level, &max, &wm))
2360                         break;
2361
2362                 pipe_wm->wm[level] = wm;
2363         }
2364
2365         return true;
2366 }
2367
2368 /*
2369  * Merge the watermarks from all active pipes for a specific level.
2370  */
2371 static void ilk_merge_wm_level(struct drm_device *dev,
2372                                int level,
2373                                struct intel_wm_level *ret_wm)
2374 {
2375         struct intel_crtc *intel_crtc;
2376
2377         ret_wm->enable = true;
2378
2379         for_each_intel_crtc(dev, intel_crtc) {
2380                 const struct intel_pipe_wm *active = &intel_crtc->wm.active;
2381                 const struct intel_wm_level *wm = &active->wm[level];
2382
2383                 if (!active->pipe_enabled)
2384                         continue;
2385
2386                 /*
2387                  * The watermark values may have been used in the past,
2388                  * so we must maintain them in the registers for some
2389                  * time even if the level is now disabled.
2390                  */
2391                 if (!wm->enable)
2392                         ret_wm->enable = false;
2393
2394                 ret_wm->pri_val = max(ret_wm->pri_val, wm->pri_val);
2395                 ret_wm->spr_val = max(ret_wm->spr_val, wm->spr_val);
2396                 ret_wm->cur_val = max(ret_wm->cur_val, wm->cur_val);
2397                 ret_wm->fbc_val = max(ret_wm->fbc_val, wm->fbc_val);
2398         }
2399 }
2400
2401 /*
2402  * Merge all low power watermarks for all active pipes.
2403  */
2404 static void ilk_wm_merge(struct drm_device *dev,
2405                          const struct intel_wm_config *config,
2406                          const struct ilk_wm_maximums *max,
2407                          struct intel_pipe_wm *merged)
2408 {
2409         struct drm_i915_private *dev_priv = dev->dev_private;
2410         int level, max_level = ilk_wm_max_level(dev);
2411         int last_enabled_level = max_level;
2412
2413         /* ILK/SNB/IVB: LP1+ watermarks only w/ single pipe */
2414         if ((INTEL_INFO(dev)->gen <= 6 || IS_IVYBRIDGE(dev)) &&
2415             config->num_pipes_active > 1)
2416                 return;
2417
2418         /* ILK: FBC WM must be disabled always */
2419         merged->fbc_wm_enabled = INTEL_INFO(dev)->gen >= 6;
2420
2421         /* merge each WM1+ level */
2422         for (level = 1; level <= max_level; level++) {
2423                 struct intel_wm_level *wm = &merged->wm[level];
2424
2425                 ilk_merge_wm_level(dev, level, wm);
2426
2427                 if (level > last_enabled_level)
2428                         wm->enable = false;
2429                 else if (!ilk_validate_wm_level(level, max, wm))
2430                         /* make sure all following levels get disabled */
2431                         last_enabled_level = level - 1;
2432
2433                 /*
2434                  * The spec says it is preferred to disable
2435                  * FBC WMs instead of disabling a WM level.
2436                  */
2437                 if (wm->fbc_val > max->fbc) {
2438                         if (wm->enable)
2439                                 merged->fbc_wm_enabled = false;
2440                         wm->fbc_val = 0;
2441                 }
2442         }
2443
2444         /* ILK: LP2+ must be disabled when FBC WM is disabled but FBC enabled */
2445         /*
2446          * FIXME this is racy. FBC might get enabled later.
2447          * What we should check here is whether FBC can be
2448          * enabled sometime later.
2449          */
2450         if (IS_GEN5(dev) && !merged->fbc_wm_enabled &&
2451             intel_fbc_enabled(dev_priv)) {
2452                 for (level = 2; level <= max_level; level++) {
2453                         struct intel_wm_level *wm = &merged->wm[level];
2454
2455                         wm->enable = false;
2456                 }
2457         }
2458 }
2459
2460 static int ilk_wm_lp_to_level(int wm_lp, const struct intel_pipe_wm *pipe_wm)
2461 {
2462         /* LP1,LP2,LP3 levels are either 1,2,3 or 1,3,4 */
2463         return wm_lp + (wm_lp >= 2 && pipe_wm->wm[4].enable);
2464 }
2465
2466 /* The value we need to program into the WM_LPx latency field */
2467 static unsigned int ilk_wm_lp_latency(struct drm_device *dev, int level)
2468 {
2469         struct drm_i915_private *dev_priv = dev->dev_private;
2470
2471         if (IS_HASWELL(dev) || IS_BROADWELL(dev))
2472                 return 2 * level;
2473         else
2474                 return dev_priv->wm.pri_latency[level];
2475 }
2476
2477 static void ilk_compute_wm_results(struct drm_device *dev,
2478                                    const struct intel_pipe_wm *merged,
2479                                    enum intel_ddb_partitioning partitioning,
2480                                    struct ilk_wm_values *results)
2481 {
2482         struct intel_crtc *intel_crtc;
2483         int level, wm_lp;
2484
2485         results->enable_fbc_wm = merged->fbc_wm_enabled;
2486         results->partitioning = partitioning;
2487
2488         /* LP1+ register values */
2489         for (wm_lp = 1; wm_lp <= 3; wm_lp++) {
2490                 const struct intel_wm_level *r;
2491
2492                 level = ilk_wm_lp_to_level(wm_lp, merged);
2493
2494                 r = &merged->wm[level];
2495
2496                 /*
2497                  * Maintain the watermark values even if the level is
2498                  * disabled. Doing otherwise could cause underruns.
2499                  */
2500                 results->wm_lp[wm_lp - 1] =
2501                         (ilk_wm_lp_latency(dev, level) << WM1_LP_LATENCY_SHIFT) |
2502                         (r->pri_val << WM1_LP_SR_SHIFT) |
2503                         r->cur_val;
2504
2505                 if (r->enable)
2506                         results->wm_lp[wm_lp - 1] |= WM1_LP_SR_EN;
2507
2508                 if (INTEL_INFO(dev)->gen >= 8)
2509                         results->wm_lp[wm_lp - 1] |=
2510                                 r->fbc_val << WM1_LP_FBC_SHIFT_BDW;
2511                 else
2512                         results->wm_lp[wm_lp - 1] |=
2513                                 r->fbc_val << WM1_LP_FBC_SHIFT;
2514
2515                 /*
2516                  * Always set WM1S_LP_EN when spr_val != 0, even if the
2517                  * level is disabled. Doing otherwise could cause underruns.
2518                  */
2519                 if (INTEL_INFO(dev)->gen <= 6 && r->spr_val) {
2520                         WARN_ON(wm_lp != 1);
2521                         results->wm_lp_spr[wm_lp - 1] = WM1S_LP_EN | r->spr_val;
2522                 } else
2523                         results->wm_lp_spr[wm_lp - 1] = r->spr_val;
2524         }
2525
2526         /* LP0 register values */
2527         for_each_intel_crtc(dev, intel_crtc) {
2528                 enum i915_pipe pipe = intel_crtc->pipe;
2529                 const struct intel_wm_level *r =
2530                         &intel_crtc->wm.active.wm[0];
2531
2532                 if (WARN_ON(!r->enable))
2533                         continue;
2534
2535                 results->wm_linetime[pipe] = intel_crtc->wm.active.linetime;
2536
2537                 results->wm_pipe[pipe] =
2538                         (r->pri_val << WM0_PIPE_PLANE_SHIFT) |
2539                         (r->spr_val << WM0_PIPE_SPRITE_SHIFT) |
2540                         r->cur_val;
2541         }
2542 }
2543
2544 /* Find the result with the highest level enabled. Check for enable_fbc_wm in
2545  * case both are at the same level. Prefer r1 in case they're the same. */
2546 static struct intel_pipe_wm *ilk_find_best_result(struct drm_device *dev,
2547                                                   struct intel_pipe_wm *r1,
2548                                                   struct intel_pipe_wm *r2)
2549 {
2550         int level, max_level = ilk_wm_max_level(dev);
2551         int level1 = 0, level2 = 0;
2552
2553         for (level = 1; level <= max_level; level++) {
2554                 if (r1->wm[level].enable)
2555                         level1 = level;
2556                 if (r2->wm[level].enable)
2557                         level2 = level;
2558         }
2559
2560         if (level1 == level2) {
2561                 if (r2->fbc_wm_enabled && !r1->fbc_wm_enabled)
2562                         return r2;
2563                 else
2564                         return r1;
2565         } else if (level1 > level2) {
2566                 return r1;
2567         } else {
2568                 return r2;
2569         }
2570 }
2571
2572 /* dirty bits used to track which watermarks need changes */
2573 #define WM_DIRTY_PIPE(pipe) (1 << (pipe))
2574 #define WM_DIRTY_LINETIME(pipe) (1 << (8 + (pipe)))
2575 #define WM_DIRTY_LP(wm_lp) (1 << (15 + (wm_lp)))
2576 #define WM_DIRTY_LP_ALL (WM_DIRTY_LP(1) | WM_DIRTY_LP(2) | WM_DIRTY_LP(3))
2577 #define WM_DIRTY_FBC (1 << 24)
2578 #define WM_DIRTY_DDB (1 << 25)
2579
2580 static unsigned int ilk_compute_wm_dirty(struct drm_i915_private *dev_priv,
2581                                          const struct ilk_wm_values *old,
2582                                          const struct ilk_wm_values *new)
2583 {
2584         unsigned int dirty = 0;
2585         enum i915_pipe pipe;
2586         int wm_lp;
2587
2588         for_each_pipe(dev_priv, pipe) {
2589                 if (old->wm_linetime[pipe] != new->wm_linetime[pipe]) {
2590                         dirty |= WM_DIRTY_LINETIME(pipe);
2591                         /* Must disable LP1+ watermarks too */
2592                         dirty |= WM_DIRTY_LP_ALL;
2593                 }
2594
2595                 if (old->wm_pipe[pipe] != new->wm_pipe[pipe]) {
2596                         dirty |= WM_DIRTY_PIPE(pipe);
2597                         /* Must disable LP1+ watermarks too */
2598                         dirty |= WM_DIRTY_LP_ALL;
2599                 }
2600         }
2601
2602         if (old->enable_fbc_wm != new->enable_fbc_wm) {
2603                 dirty |= WM_DIRTY_FBC;
2604                 /* Must disable LP1+ watermarks too */
2605                 dirty |= WM_DIRTY_LP_ALL;
2606         }
2607
2608         if (old->partitioning != new->partitioning) {
2609                 dirty |= WM_DIRTY_DDB;
2610                 /* Must disable LP1+ watermarks too */
2611                 dirty |= WM_DIRTY_LP_ALL;
2612         }
2613
2614         /* LP1+ watermarks already deemed dirty, no need to continue */
2615         if (dirty & WM_DIRTY_LP_ALL)
2616                 return dirty;
2617
2618         /* Find the lowest numbered LP1+ watermark in need of an update... */
2619         for (wm_lp = 1; wm_lp <= 3; wm_lp++) {
2620                 if (old->wm_lp[wm_lp - 1] != new->wm_lp[wm_lp - 1] ||
2621                     old->wm_lp_spr[wm_lp - 1] != new->wm_lp_spr[wm_lp - 1])
2622                         break;
2623         }
2624
2625         /* ...and mark it and all higher numbered LP1+ watermarks as dirty */
2626         for (; wm_lp <= 3; wm_lp++)
2627                 dirty |= WM_DIRTY_LP(wm_lp);
2628
2629         return dirty;
2630 }
2631
2632 static bool _ilk_disable_lp_wm(struct drm_i915_private *dev_priv,
2633                                unsigned int dirty)
2634 {
2635         struct ilk_wm_values *previous = &dev_priv->wm.hw;
2636         bool changed = false;
2637
2638         if (dirty & WM_DIRTY_LP(3) && previous->wm_lp[2] & WM1_LP_SR_EN) {
2639                 previous->wm_lp[2] &= ~WM1_LP_SR_EN;
2640                 I915_WRITE(WM3_LP_ILK, previous->wm_lp[2]);
2641                 changed = true;
2642         }
2643         if (dirty & WM_DIRTY_LP(2) && previous->wm_lp[1] & WM1_LP_SR_EN) {
2644                 previous->wm_lp[1] &= ~WM1_LP_SR_EN;
2645                 I915_WRITE(WM2_LP_ILK, previous->wm_lp[1]);
2646                 changed = true;
2647         }
2648         if (dirty & WM_DIRTY_LP(1) && previous->wm_lp[0] & WM1_LP_SR_EN) {
2649                 previous->wm_lp[0] &= ~WM1_LP_SR_EN;
2650                 I915_WRITE(WM1_LP_ILK, previous->wm_lp[0]);
2651                 changed = true;
2652         }
2653
2654         /*
2655          * Don't touch WM1S_LP_EN here.
2656          * Doing so could cause underruns.
2657          */
2658
2659         return changed;
2660 }
2661
2662 /*
2663  * The spec says we shouldn't write when we don't need, because every write
2664  * causes WMs to be re-evaluated, expending some power.
2665  */
2666 static void ilk_write_wm_values(struct drm_i915_private *dev_priv,
2667                                 struct ilk_wm_values *results)
2668 {
2669         struct drm_device *dev = dev_priv->dev;
2670         struct ilk_wm_values *previous = &dev_priv->wm.hw;
2671         unsigned int dirty;
2672         uint32_t val;
2673
2674         dirty = ilk_compute_wm_dirty(dev_priv, previous, results);
2675         if (!dirty)
2676                 return;
2677
2678         _ilk_disable_lp_wm(dev_priv, dirty);
2679
2680         if (dirty & WM_DIRTY_PIPE(PIPE_A))
2681                 I915_WRITE(WM0_PIPEA_ILK, results->wm_pipe[0]);
2682         if (dirty & WM_DIRTY_PIPE(PIPE_B))
2683                 I915_WRITE(WM0_PIPEB_ILK, results->wm_pipe[1]);
2684         if (dirty & WM_DIRTY_PIPE(PIPE_C))
2685                 I915_WRITE(WM0_PIPEC_IVB, results->wm_pipe[2]);
2686
2687         if (dirty & WM_DIRTY_LINETIME(PIPE_A))
2688                 I915_WRITE(PIPE_WM_LINETIME(PIPE_A), results->wm_linetime[0]);
2689         if (dirty & WM_DIRTY_LINETIME(PIPE_B))
2690                 I915_WRITE(PIPE_WM_LINETIME(PIPE_B), results->wm_linetime[1]);
2691         if (dirty & WM_DIRTY_LINETIME(PIPE_C))
2692                 I915_WRITE(PIPE_WM_LINETIME(PIPE_C), results->wm_linetime[2]);
2693
2694         if (dirty & WM_DIRTY_DDB) {
2695                 if (IS_HASWELL(dev) || IS_BROADWELL(dev)) {
2696                         val = I915_READ(WM_MISC);
2697                         if (results->partitioning == INTEL_DDB_PART_1_2)
2698                                 val &= ~WM_MISC_DATA_PARTITION_5_6;
2699                         else
2700                                 val |= WM_MISC_DATA_PARTITION_5_6;
2701                         I915_WRITE(WM_MISC, val);
2702                 } else {
2703                         val = I915_READ(DISP_ARB_CTL2);
2704                         if (results->partitioning == INTEL_DDB_PART_1_2)
2705                                 val &= ~DISP_DATA_PARTITION_5_6;
2706                         else
2707                                 val |= DISP_DATA_PARTITION_5_6;
2708                         I915_WRITE(DISP_ARB_CTL2, val);
2709                 }
2710         }
2711
2712         if (dirty & WM_DIRTY_FBC) {
2713                 val = I915_READ(DISP_ARB_CTL);
2714                 if (results->enable_fbc_wm)
2715                         val &= ~DISP_FBC_WM_DIS;
2716                 else
2717                         val |= DISP_FBC_WM_DIS;
2718                 I915_WRITE(DISP_ARB_CTL, val);
2719         }
2720
2721         if (dirty & WM_DIRTY_LP(1) &&
2722             previous->wm_lp_spr[0] != results->wm_lp_spr[0])
2723                 I915_WRITE(WM1S_LP_ILK, results->wm_lp_spr[0]);
2724
2725         if (INTEL_INFO(dev)->gen >= 7) {
2726                 if (dirty & WM_DIRTY_LP(2) && previous->wm_lp_spr[1] != results->wm_lp_spr[1])
2727                         I915_WRITE(WM2S_LP_IVB, results->wm_lp_spr[1]);
2728                 if (dirty & WM_DIRTY_LP(3) && previous->wm_lp_spr[2] != results->wm_lp_spr[2])
2729                         I915_WRITE(WM3S_LP_IVB, results->wm_lp_spr[2]);
2730         }
2731
2732         if (dirty & WM_DIRTY_LP(1) && previous->wm_lp[0] != results->wm_lp[0])
2733                 I915_WRITE(WM1_LP_ILK, results->wm_lp[0]);
2734         if (dirty & WM_DIRTY_LP(2) && previous->wm_lp[1] != results->wm_lp[1])
2735                 I915_WRITE(WM2_LP_ILK, results->wm_lp[1]);
2736         if (dirty & WM_DIRTY_LP(3) && previous->wm_lp[2] != results->wm_lp[2])
2737                 I915_WRITE(WM3_LP_ILK, results->wm_lp[2]);
2738
2739         dev_priv->wm.hw = *results;
2740 }
2741
2742 static bool ilk_disable_lp_wm(struct drm_device *dev)
2743 {
2744         struct drm_i915_private *dev_priv = dev->dev_private;
2745
2746         return _ilk_disable_lp_wm(dev_priv, WM_DIRTY_LP_ALL);
2747 }
2748
2749 /*
2750  * On gen9, we need to allocate Display Data Buffer (DDB) portions to the
2751  * different active planes.
2752  */
2753
2754 #define SKL_DDB_SIZE            896     /* in blocks */
2755 #define BXT_DDB_SIZE            512
2756
2757 static void
2758 skl_ddb_get_pipe_allocation_limits(struct drm_device *dev,
2759                                    struct drm_crtc *for_crtc,
2760                                    const struct intel_wm_config *config,
2761                                    const struct skl_pipe_wm_parameters *params,
2762                                    struct skl_ddb_entry *alloc /* out */)
2763 {
2764         struct drm_crtc *crtc;
2765         unsigned int pipe_size, ddb_size;
2766         int nth_active_pipe;
2767
2768         if (!params->active) {
2769                 alloc->start = 0;
2770                 alloc->end = 0;
2771                 return;
2772         }
2773
2774         if (IS_BROXTON(dev))
2775                 ddb_size = BXT_DDB_SIZE;
2776         else
2777                 ddb_size = SKL_DDB_SIZE;
2778
2779         ddb_size -= 4; /* 4 blocks for bypass path allocation */
2780
2781         nth_active_pipe = 0;
2782         for_each_crtc(dev, crtc) {
2783                 if (!to_intel_crtc(crtc)->active)
2784                         continue;
2785
2786                 if (crtc == for_crtc)
2787                         break;
2788
2789                 nth_active_pipe++;
2790         }
2791
2792         pipe_size = ddb_size / config->num_pipes_active;
2793         alloc->start = nth_active_pipe * ddb_size / config->num_pipes_active;
2794         alloc->end = alloc->start + pipe_size;
2795 }
2796
2797 static unsigned int skl_cursor_allocation(const struct intel_wm_config *config)
2798 {
2799         if (config->num_pipes_active == 1)
2800                 return 32;
2801
2802         return 8;
2803 }
2804
2805 static void skl_ddb_entry_init_from_hw(struct skl_ddb_entry *entry, u32 reg)
2806 {
2807         entry->start = reg & 0x3ff;
2808         entry->end = (reg >> 16) & 0x3ff;
2809         if (entry->end)
2810                 entry->end += 1;
2811 }
2812
2813 void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv,
2814                           struct skl_ddb_allocation *ddb /* out */)
2815 {
2816         enum i915_pipe pipe;
2817         int plane;
2818         u32 val;
2819
2820         memset(ddb, 0, sizeof(*ddb));
2821
2822         for_each_pipe(dev_priv, pipe) {
2823                 if (!intel_display_power_is_enabled(dev_priv, POWER_DOMAIN_PIPE(pipe)))
2824                         continue;
2825
2826                 for_each_plane(dev_priv, pipe, plane) {
2827                         val = I915_READ(PLANE_BUF_CFG(pipe, plane));
2828                         skl_ddb_entry_init_from_hw(&ddb->plane[pipe][plane],
2829                                                    val);
2830                 }
2831
2832                 val = I915_READ(CUR_BUF_CFG(pipe));
2833                 skl_ddb_entry_init_from_hw(&ddb->plane[pipe][PLANE_CURSOR],
2834                                            val);
2835         }
2836 }
2837
2838 static unsigned int
2839 skl_plane_relative_data_rate(const struct intel_plane_wm_parameters *p, int y)
2840 {
2841
2842         /* for planar format */
2843         if (p->y_bytes_per_pixel) {
2844                 if (y)  /* y-plane data rate */
2845                         return p->horiz_pixels * p->vert_pixels * p->y_bytes_per_pixel;
2846                 else    /* uv-plane data rate */
2847                         return (p->horiz_pixels/2) * (p->vert_pixels/2) * p->bytes_per_pixel;
2848         }
2849
2850         /* for packed formats */
2851         return p->horiz_pixels * p->vert_pixels * p->bytes_per_pixel;
2852 }
2853
2854 /*
2855  * We don't overflow 32 bits. Worst case is 3 planes enabled, each fetching
2856  * a 8192x4096@32bpp framebuffer:
2857  *   3 * 4096 * 8192  * 4 < 2^32
2858  */
2859 static unsigned int
2860 skl_get_total_relative_data_rate(struct intel_crtc *intel_crtc,
2861                                  const struct skl_pipe_wm_parameters *params)
2862 {
2863         unsigned int total_data_rate = 0;
2864         int plane;
2865
2866         for (plane = 0; plane < intel_num_planes(intel_crtc); plane++) {
2867                 const struct intel_plane_wm_parameters *p;
2868
2869                 p = &params->plane[plane];
2870                 if (!p->enabled)
2871                         continue;
2872
2873                 total_data_rate += skl_plane_relative_data_rate(p, 0); /* packed/uv */
2874                 if (p->y_bytes_per_pixel) {
2875                         total_data_rate += skl_plane_relative_data_rate(p, 1); /* y-plane */
2876                 }
2877         }
2878
2879         return total_data_rate;
2880 }
2881
2882 static void
2883 skl_allocate_pipe_ddb(struct drm_crtc *crtc,
2884                       const struct intel_wm_config *config,
2885                       const struct skl_pipe_wm_parameters *params,
2886                       struct skl_ddb_allocation *ddb /* out */)
2887 {
2888         struct drm_device *dev = crtc->dev;
2889         struct drm_i915_private *dev_priv = dev->dev_private;
2890         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
2891         enum i915_pipe pipe = intel_crtc->pipe;
2892         struct skl_ddb_entry *alloc = &ddb->pipe[pipe];
2893         uint16_t alloc_size, start, cursor_blocks;
2894         uint16_t minimum[I915_MAX_PLANES];
2895         uint16_t y_minimum[I915_MAX_PLANES];
2896         unsigned int total_data_rate;
2897         int plane;
2898
2899         skl_ddb_get_pipe_allocation_limits(dev, crtc, config, params, alloc);
2900         alloc_size = skl_ddb_entry_size(alloc);
2901         if (alloc_size == 0) {
2902                 memset(ddb->plane[pipe], 0, sizeof(ddb->plane[pipe]));
2903                 memset(&ddb->plane[pipe][PLANE_CURSOR], 0,
2904                        sizeof(ddb->plane[pipe][PLANE_CURSOR]));
2905                 return;
2906         }
2907
2908         cursor_blocks = skl_cursor_allocation(config);
2909         ddb->plane[pipe][PLANE_CURSOR].start = alloc->end - cursor_blocks;
2910         ddb->plane[pipe][PLANE_CURSOR].end = alloc->end;
2911
2912         alloc_size -= cursor_blocks;
2913         alloc->end -= cursor_blocks;
2914
2915         /* 1. Allocate the mininum required blocks for each active plane */
2916         for_each_plane(dev_priv, pipe, plane) {
2917                 const struct intel_plane_wm_parameters *p;
2918
2919                 p = &params->plane[plane];
2920                 if (!p->enabled)
2921                         continue;
2922
2923                 minimum[plane] = 8;
2924                 alloc_size -= minimum[plane];
2925                 y_minimum[plane] = p->y_bytes_per_pixel ? 8 : 0;
2926                 alloc_size -= y_minimum[plane];
2927         }
2928
2929         /*
2930          * 2. Distribute the remaining space in proportion to the amount of
2931          * data each plane needs to fetch from memory.
2932          *
2933          * FIXME: we may not allocate every single block here.
2934          */
2935         total_data_rate = skl_get_total_relative_data_rate(intel_crtc, params);
2936
2937         start = alloc->start;
2938         for (plane = 0; plane < intel_num_planes(intel_crtc); plane++) {
2939                 const struct intel_plane_wm_parameters *p;
2940                 unsigned int data_rate, y_data_rate;
2941                 uint16_t plane_blocks, y_plane_blocks = 0;
2942
2943                 p = &params->plane[plane];
2944                 if (!p->enabled)
2945                         continue;
2946
2947                 data_rate = skl_plane_relative_data_rate(p, 0);
2948
2949                 /*
2950                  * allocation for (packed formats) or (uv-plane part of planar format):
2951                  * promote the expression to 64 bits to avoid overflowing, the
2952                  * result is < available as data_rate / total_data_rate < 1
2953                  */
2954                 plane_blocks = minimum[plane];
2955                 plane_blocks += div_u64((uint64_t)alloc_size * data_rate,
2956                                         total_data_rate);
2957
2958                 ddb->plane[pipe][plane].start = start;
2959                 ddb->plane[pipe][plane].end = start + plane_blocks;
2960
2961                 start += plane_blocks;
2962
2963                 /*
2964                  * allocation for y_plane part of planar format:
2965                  */
2966                 if (p->y_bytes_per_pixel) {
2967                         y_data_rate = skl_plane_relative_data_rate(p, 1);
2968                         y_plane_blocks = y_minimum[plane];
2969                         y_plane_blocks += div_u64((uint64_t)alloc_size * y_data_rate,
2970                                                 total_data_rate);
2971
2972                         ddb->y_plane[pipe][plane].start = start;
2973                         ddb->y_plane[pipe][plane].end = start + y_plane_blocks;
2974
2975                         start += y_plane_blocks;
2976                 }
2977
2978         }
2979
2980 }
2981
2982 static uint32_t skl_pipe_pixel_rate(const struct intel_crtc_state *config)
2983 {
2984         /* TODO: Take into account the scalers once we support them */
2985         return config->base.adjusted_mode.crtc_clock;
2986 }
2987
2988 /*
2989  * The max latency should be 257 (max the punit can code is 255 and we add 2us
2990  * for the read latency) and bytes_per_pixel should always be <= 8, so that
2991  * should allow pixel_rate up to ~2 GHz which seems sufficient since max
2992  * 2xcdclk is 1350 MHz and the pixel rate should never exceed that.
2993 */
2994 static uint32_t skl_wm_method1(uint32_t pixel_rate, uint8_t bytes_per_pixel,
2995                                uint32_t latency)
2996 {
2997         uint32_t wm_intermediate_val, ret;
2998
2999         if (latency == 0)
3000                 return UINT_MAX;
3001
3002         wm_intermediate_val = latency * pixel_rate * bytes_per_pixel / 512;
3003         ret = DIV_ROUND_UP(wm_intermediate_val, 1000);
3004
3005         return ret;
3006 }
3007
3008 static uint32_t skl_wm_method2(uint32_t pixel_rate, uint32_t pipe_htotal,
3009                                uint32_t horiz_pixels, uint8_t bytes_per_pixel,
3010                                uint64_t tiling, uint32_t latency)
3011 {
3012         uint32_t ret;
3013         uint32_t plane_bytes_per_line, plane_blocks_per_line;
3014         uint32_t wm_intermediate_val;
3015
3016         if (latency == 0)
3017                 return UINT_MAX;
3018
3019         plane_bytes_per_line = horiz_pixels * bytes_per_pixel;
3020
3021         if (tiling == I915_FORMAT_MOD_Y_TILED ||
3022             tiling == I915_FORMAT_MOD_Yf_TILED) {
3023                 plane_bytes_per_line *= 4;
3024                 plane_blocks_per_line = DIV_ROUND_UP(plane_bytes_per_line, 512);
3025                 plane_blocks_per_line /= 4;
3026         } else {
3027                 plane_blocks_per_line = DIV_ROUND_UP(plane_bytes_per_line, 512);
3028         }
3029
3030         wm_intermediate_val = latency * pixel_rate;
3031         ret = DIV_ROUND_UP(wm_intermediate_val, pipe_htotal * 1000) *
3032                                 plane_blocks_per_line;
3033
3034         return ret;
3035 }
3036
3037 static bool skl_ddb_allocation_changed(const struct skl_ddb_allocation *new_ddb,
3038                                        const struct intel_crtc *intel_crtc)
3039 {
3040         struct drm_device *dev = intel_crtc->base.dev;
3041         struct drm_i915_private *dev_priv = dev->dev_private;
3042         const struct skl_ddb_allocation *cur_ddb = &dev_priv->wm.skl_hw.ddb;
3043         enum i915_pipe pipe = intel_crtc->pipe;
3044
3045         if (memcmp(new_ddb->plane[pipe], cur_ddb->plane[pipe],
3046                    sizeof(new_ddb->plane[pipe])))
3047                 return true;
3048
3049         if (memcmp(&new_ddb->plane[pipe][PLANE_CURSOR], &cur_ddb->plane[pipe][PLANE_CURSOR],
3050                     sizeof(new_ddb->plane[pipe][PLANE_CURSOR])))
3051                 return true;
3052
3053         return false;
3054 }
3055
3056 static void skl_compute_wm_global_parameters(struct drm_device *dev,
3057                                              struct intel_wm_config *config)
3058 {
3059         struct drm_crtc *crtc;
3060         struct drm_plane *plane;
3061
3062         list_for_each_entry(crtc, &dev->mode_config.crtc_list, head)
3063                 config->num_pipes_active += to_intel_crtc(crtc)->active;
3064
3065         /* FIXME: I don't think we need those two global parameters on SKL */
3066         list_for_each_entry(plane, &dev->mode_config.plane_list, head) {
3067                 struct intel_plane *intel_plane = to_intel_plane(plane);
3068
3069                 config->sprites_enabled |= intel_plane->wm.enabled;
3070                 config->sprites_scaled |= intel_plane->wm.scaled;
3071         }
3072 }
3073
3074 static void skl_compute_wm_pipe_parameters(struct drm_crtc *crtc,
3075                                            struct skl_pipe_wm_parameters *p)
3076 {
3077         struct drm_device *dev = crtc->dev;
3078         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
3079         enum i915_pipe pipe = intel_crtc->pipe;
3080         struct drm_plane *plane;
3081         struct drm_framebuffer *fb;
3082         int i = 1; /* Index for sprite planes start */
3083
3084         p->active = intel_crtc->active;
3085         if (p->active) {
3086                 p->pipe_htotal = intel_crtc->config->base.adjusted_mode.crtc_htotal;
3087                 p->pixel_rate = skl_pipe_pixel_rate(intel_crtc->config);
3088
3089                 fb = crtc->primary->state->fb;
3090                 /* For planar: Bpp is for uv plane, y_Bpp is for y plane */
3091                 if (fb) {
3092                         p->plane[0].enabled = true;
3093                         p->plane[0].bytes_per_pixel = fb->pixel_format == DRM_FORMAT_NV12 ?
3094                                 drm_format_plane_cpp(fb->pixel_format, 1) :
3095                                 drm_format_plane_cpp(fb->pixel_format, 0);
3096                         p->plane[0].y_bytes_per_pixel = fb->pixel_format == DRM_FORMAT_NV12 ?
3097                                 drm_format_plane_cpp(fb->pixel_format, 0) : 0;
3098                         p->plane[0].tiling = fb->modifier[0];
3099                 } else {
3100                         p->plane[0].enabled = false;
3101                         p->plane[0].bytes_per_pixel = 0;
3102                         p->plane[0].y_bytes_per_pixel = 0;
3103                         p->plane[0].tiling = DRM_FORMAT_MOD_NONE;
3104                 }
3105                 p->plane[0].horiz_pixels = intel_crtc->config->pipe_src_w;
3106                 p->plane[0].vert_pixels = intel_crtc->config->pipe_src_h;
3107                 p->plane[0].rotation = crtc->primary->state->rotation;
3108
3109                 fb = crtc->cursor->state->fb;
3110                 p->plane[PLANE_CURSOR].y_bytes_per_pixel = 0;
3111                 if (fb) {
3112                         p->plane[PLANE_CURSOR].enabled = true;
3113                         p->plane[PLANE_CURSOR].bytes_per_pixel = fb->bits_per_pixel / 8;
3114                         p->plane[PLANE_CURSOR].horiz_pixels = crtc->cursor->state->crtc_w;
3115                         p->plane[PLANE_CURSOR].vert_pixels = crtc->cursor->state->crtc_h;
3116                 } else {
3117                         p->plane[PLANE_CURSOR].enabled = false;
3118                         p->plane[PLANE_CURSOR].bytes_per_pixel = 0;
3119                         p->plane[PLANE_CURSOR].horiz_pixels = 64;
3120                         p->plane[PLANE_CURSOR].vert_pixels = 64;
3121                 }
3122         }
3123
3124         list_for_each_entry(plane, &dev->mode_config.plane_list, head) {
3125                 struct intel_plane *intel_plane = to_intel_plane(plane);
3126
3127                 if (intel_plane->pipe == pipe &&
3128                         plane->type == DRM_PLANE_TYPE_OVERLAY)
3129                         p->plane[i++] = intel_plane->wm;
3130         }
3131 }
3132
3133 static bool skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
3134                                  struct skl_pipe_wm_parameters *p,
3135                                  struct intel_plane_wm_parameters *p_params,
3136                                  uint16_t ddb_allocation,
3137                                  int level,
3138                                  uint16_t *out_blocks, /* out */
3139                                  uint8_t *out_lines /* out */)
3140 {
3141         uint32_t latency = dev_priv->wm.skl_latency[level];
3142         uint32_t method1, method2;
3143         uint32_t plane_bytes_per_line, plane_blocks_per_line;
3144         uint32_t res_blocks, res_lines;
3145         uint32_t selected_result;
3146         uint8_t bytes_per_pixel;
3147
3148         if (latency == 0 || !p->active || !p_params->enabled)
3149                 return false;
3150
3151         bytes_per_pixel = p_params->y_bytes_per_pixel ?
3152                 p_params->y_bytes_per_pixel :
3153                 p_params->bytes_per_pixel;
3154         method1 = skl_wm_method1(p->pixel_rate,
3155                                  bytes_per_pixel,
3156                                  latency);
3157         method2 = skl_wm_method2(p->pixel_rate,
3158                                  p->pipe_htotal,
3159                                  p_params->horiz_pixels,
3160                                  bytes_per_pixel,
3161                                  p_params->tiling,
3162                                  latency);
3163
3164         plane_bytes_per_line = p_params->horiz_pixels * bytes_per_pixel;
3165         plane_blocks_per_line = DIV_ROUND_UP(plane_bytes_per_line, 512);
3166
3167         if (p_params->tiling == I915_FORMAT_MOD_Y_TILED ||
3168             p_params->tiling == I915_FORMAT_MOD_Yf_TILED) {
3169                 uint32_t min_scanlines = 4;
3170                 uint32_t y_tile_minimum;
3171                 if (intel_rotation_90_or_270(p_params->rotation)) {
3172                         switch (p_params->bytes_per_pixel) {
3173                         case 1:
3174                                 min_scanlines = 16;
3175                                 break;
3176                         case 2:
3177                                 min_scanlines = 8;
3178                                 break;
3179                         case 8:
3180                                 WARN(1, "Unsupported pixel depth for rotation");
3181                         }
3182                 }
3183                 y_tile_minimum = plane_blocks_per_line * min_scanlines;
3184                 selected_result = max(method2, y_tile_minimum);
3185         } else {
3186                 if ((ddb_allocation / plane_blocks_per_line) >= 1)
3187                         selected_result = min(method1, method2);
3188                 else
3189                         selected_result = method1;
3190         }
3191
3192         res_blocks = selected_result + 1;
3193         res_lines = DIV_ROUND_UP(selected_result, plane_blocks_per_line);
3194
3195         if (level >= 1 && level <= 7) {
3196                 if (p_params->tiling == I915_FORMAT_MOD_Y_TILED ||
3197                     p_params->tiling == I915_FORMAT_MOD_Yf_TILED)
3198                         res_lines += 4;
3199                 else
3200                         res_blocks++;
3201         }
3202
3203         if (res_blocks >= ddb_allocation || res_lines > 31)
3204                 return false;
3205
3206         *out_blocks = res_blocks;
3207         *out_lines = res_lines;
3208
3209         return true;
3210 }
3211
3212 static void skl_compute_wm_level(const struct drm_i915_private *dev_priv,
3213                                  struct skl_ddb_allocation *ddb,
3214                                  struct skl_pipe_wm_parameters *p,
3215                                  enum i915_pipe pipe,
3216                                  int level,
3217                                  int num_planes,
3218                                  struct skl_wm_level *result)
3219 {
3220         uint16_t ddb_blocks;
3221         int i;
3222
3223         for (i = 0; i < num_planes; i++) {
3224                 ddb_blocks = skl_ddb_entry_size(&ddb->plane[pipe][i]);
3225
3226                 result->plane_en[i] = skl_compute_plane_wm(dev_priv,
3227                                                 p, &p->plane[i],
3228                                                 ddb_blocks,
3229                                                 level,
3230                                                 &result->plane_res_b[i],
3231                                                 &result->plane_res_l[i]);
3232         }
3233
3234         ddb_blocks = skl_ddb_entry_size(&ddb->plane[pipe][PLANE_CURSOR]);
3235         result->plane_en[PLANE_CURSOR] = skl_compute_plane_wm(dev_priv, p,
3236                                                  &p->plane[PLANE_CURSOR],
3237                                                  ddb_blocks, level,
3238                                                  &result->plane_res_b[PLANE_CURSOR],
3239                                                  &result->plane_res_l[PLANE_CURSOR]);
3240 }
3241
3242 static uint32_t
3243 skl_compute_linetime_wm(struct drm_crtc *crtc, struct skl_pipe_wm_parameters *p)
3244 {
3245         if (!to_intel_crtc(crtc)->active)
3246                 return 0;
3247
3248         if (WARN_ON(p->pixel_rate == 0))
3249                 return 0;
3250
3251         return DIV_ROUND_UP(8 * p->pipe_htotal * 1000, p->pixel_rate);
3252 }
3253
3254 static void skl_compute_transition_wm(struct drm_crtc *crtc,
3255                                       struct skl_pipe_wm_parameters *params,
3256                                       struct skl_wm_level *trans_wm /* out */)
3257 {
3258         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
3259         int i;
3260
3261         if (!params->active)
3262                 return;
3263
3264         /* Until we know more, just disable transition WMs */
3265         for (i = 0; i < intel_num_planes(intel_crtc); i++)
3266                 trans_wm->plane_en[i] = false;
3267         trans_wm->plane_en[PLANE_CURSOR] = false;
3268 }
3269
3270 static void skl_compute_pipe_wm(struct drm_crtc *crtc,
3271                                 struct skl_ddb_allocation *ddb,
3272                                 struct skl_pipe_wm_parameters *params,
3273                                 struct skl_pipe_wm *pipe_wm)
3274 {
3275         struct drm_device *dev = crtc->dev;
3276         const struct drm_i915_private *dev_priv = dev->dev_private;
3277         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
3278         int level, max_level = ilk_wm_max_level(dev);
3279
3280         for (level = 0; level <= max_level; level++) {
3281                 skl_compute_wm_level(dev_priv, ddb, params, intel_crtc->pipe,
3282                                      level, intel_num_planes(intel_crtc),
3283                                      &pipe_wm->wm[level]);
3284         }
3285         pipe_wm->linetime = skl_compute_linetime_wm(crtc, params);
3286
3287         skl_compute_transition_wm(crtc, params, &pipe_wm->trans_wm);
3288 }
3289
3290 static void skl_compute_wm_results(struct drm_device *dev,
3291                                    struct skl_pipe_wm_parameters *p,
3292                                    struct skl_pipe_wm *p_wm,
3293                                    struct skl_wm_values *r,
3294                                    struct intel_crtc *intel_crtc)
3295 {
3296         int level, max_level = ilk_wm_max_level(dev);
3297         enum i915_pipe pipe = intel_crtc->pipe;
3298         uint32_t temp;
3299         int i;
3300
3301         for (level = 0; level <= max_level; level++) {
3302                 for (i = 0; i < intel_num_planes(intel_crtc); i++) {
3303                         temp = 0;
3304
3305                         temp |= p_wm->wm[level].plane_res_l[i] <<
3306                                         PLANE_WM_LINES_SHIFT;
3307                         temp |= p_wm->wm[level].plane_res_b[i];
3308                         if (p_wm->wm[level].plane_en[i])
3309                                 temp |= PLANE_WM_EN;
3310
3311                         r->plane[pipe][i][level] = temp;
3312                 }
3313
3314                 temp = 0;
3315
3316                 temp |= p_wm->wm[level].plane_res_l[PLANE_CURSOR] << PLANE_WM_LINES_SHIFT;
3317                 temp |= p_wm->wm[level].plane_res_b[PLANE_CURSOR];
3318
3319                 if (p_wm->wm[level].plane_en[PLANE_CURSOR])
3320                         temp |= PLANE_WM_EN;
3321
3322                 r->plane[pipe][PLANE_CURSOR][level] = temp;
3323
3324         }
3325
3326         /* transition WMs */
3327         for (i = 0; i < intel_num_planes(intel_crtc); i++) {
3328                 temp = 0;
3329                 temp |= p_wm->trans_wm.plane_res_l[i] << PLANE_WM_LINES_SHIFT;
3330                 temp |= p_wm->trans_wm.plane_res_b[i];
3331                 if (p_wm->trans_wm.plane_en[i])
3332                         temp |= PLANE_WM_EN;
3333
3334                 r->plane_trans[pipe][i] = temp;
3335         }
3336
3337         temp = 0;
3338         temp |= p_wm->trans_wm.plane_res_l[PLANE_CURSOR] << PLANE_WM_LINES_SHIFT;
3339         temp |= p_wm->trans_wm.plane_res_b[PLANE_CURSOR];
3340         if (p_wm->trans_wm.plane_en[PLANE_CURSOR])
3341                 temp |= PLANE_WM_EN;
3342
3343         r->plane_trans[pipe][PLANE_CURSOR] = temp;
3344
3345         r->wm_linetime[pipe] = p_wm->linetime;
3346 }
3347
3348 static void skl_ddb_entry_write(struct drm_i915_private *dev_priv, uint32_t reg,
3349                                 const struct skl_ddb_entry *entry)
3350 {
3351         if (entry->end)
3352                 I915_WRITE(reg, (entry->end - 1) << 16 | entry->start);
3353         else
3354                 I915_WRITE(reg, 0);
3355 }
3356
3357 static void skl_write_wm_values(struct drm_i915_private *dev_priv,
3358                                 const struct skl_wm_values *new)
3359 {
3360         struct drm_device *dev = dev_priv->dev;
3361         struct intel_crtc *crtc;
3362
3363         list_for_each_entry(crtc, &dev->mode_config.crtc_list, base.head) {
3364                 int i, level, max_level = ilk_wm_max_level(dev);
3365                 enum i915_pipe pipe = crtc->pipe;
3366
3367                 if (!new->dirty[pipe])
3368                         continue;
3369
3370                 I915_WRITE(PIPE_WM_LINETIME(pipe), new->wm_linetime[pipe]);
3371
3372                 for (level = 0; level <= max_level; level++) {
3373                         for (i = 0; i < intel_num_planes(crtc); i++)
3374                                 I915_WRITE(PLANE_WM(pipe, i, level),
3375                                            new->plane[pipe][i][level]);
3376                         I915_WRITE(CUR_WM(pipe, level),
3377                                    new->plane[pipe][PLANE_CURSOR][level]);
3378                 }
3379                 for (i = 0; i < intel_num_planes(crtc); i++)
3380                         I915_WRITE(PLANE_WM_TRANS(pipe, i),
3381                                    new->plane_trans[pipe][i]);
3382                 I915_WRITE(CUR_WM_TRANS(pipe),
3383                            new->plane_trans[pipe][PLANE_CURSOR]);
3384
3385                 for (i = 0; i < intel_num_planes(crtc); i++) {
3386                         skl_ddb_entry_write(dev_priv,
3387                                             PLANE_BUF_CFG(pipe, i),
3388                                             &new->ddb.plane[pipe][i]);
3389                         skl_ddb_entry_write(dev_priv,
3390                                             PLANE_NV12_BUF_CFG(pipe, i),
3391                                             &new->ddb.y_plane[pipe][i]);
3392                 }
3393
3394                 skl_ddb_entry_write(dev_priv, CUR_BUF_CFG(pipe),
3395                                     &new->ddb.plane[pipe][PLANE_CURSOR]);
3396         }
3397 }
3398
3399 /*
3400  * When setting up a new DDB allocation arrangement, we need to correctly
3401  * sequence the times at which the new allocations for the pipes are taken into
3402  * account or we'll have pipes fetching from space previously allocated to
3403  * another pipe.
3404  *
3405  * Roughly the sequence looks like:
3406  *  1. re-allocate the pipe(s) with the allocation being reduced and not
3407  *     overlapping with a previous light-up pipe (another way to put it is:
3408  *     pipes with their new allocation strickly included into their old ones).
3409  *  2. re-allocate the other pipes that get their allocation reduced
3410  *  3. allocate the pipes having their allocation increased
3411  *
3412  * Steps 1. and 2. are here to take care of the following case:
3413  * - Initially DDB looks like this:
3414  *     |   B    |   C    |
3415  * - enable pipe A.
3416  * - pipe B has a reduced DDB allocation that overlaps with the old pipe C
3417  *   allocation
3418  *     |  A  |  B  |  C  |
3419  *
3420  * We need to sequence the re-allocation: C, B, A (and not B, C, A).
3421  */
3422
3423 static void
3424 skl_wm_flush_pipe(struct drm_i915_private *dev_priv, enum i915_pipe pipe, int pass)
3425 {
3426         int plane;
3427
3428         DRM_DEBUG_KMS("flush pipe %c (pass %d)\n", pipe_name(pipe), pass);
3429
3430         for_each_plane(dev_priv, pipe, plane) {
3431                 I915_WRITE(PLANE_SURF(pipe, plane),
3432                            I915_READ(PLANE_SURF(pipe, plane)));
3433         }
3434         I915_WRITE(CURBASE(pipe), I915_READ(CURBASE(pipe)));
3435 }
3436
3437 static bool
3438 skl_ddb_allocation_included(const struct skl_ddb_allocation *old,
3439                             const struct skl_ddb_allocation *new,
3440                             enum i915_pipe pipe)
3441 {
3442         uint16_t old_size, new_size;
3443
3444         old_size = skl_ddb_entry_size(&old->pipe[pipe]);
3445         new_size = skl_ddb_entry_size(&new->pipe[pipe]);
3446
3447         return old_size != new_size &&
3448                new->pipe[pipe].start >= old->pipe[pipe].start &&
3449                new->pipe[pipe].end <= old->pipe[pipe].end;
3450 }
3451
3452 static void skl_flush_wm_values(struct drm_i915_private *dev_priv,
3453                                 struct skl_wm_values *new_values)
3454 {
3455         struct drm_device *dev = dev_priv->dev;
3456         struct skl_ddb_allocation *cur_ddb, *new_ddb;
3457         bool reallocated[I915_MAX_PIPES] = {};
3458         struct intel_crtc *crtc;
3459         enum i915_pipe pipe;
3460
3461         new_ddb = &new_values->ddb;
3462         cur_ddb = &dev_priv->wm.skl_hw.ddb;
3463
3464         /*
3465          * First pass: flush the pipes with the new allocation contained into
3466          * the old space.
3467          *
3468          * We'll wait for the vblank on those pipes to ensure we can safely
3469          * re-allocate the freed space without this pipe fetching from it.
3470          */
3471         for_each_intel_crtc(dev, crtc) {
3472                 if (!crtc->active)
3473                         continue;
3474
3475                 pipe = crtc->pipe;
3476
3477                 if (!skl_ddb_allocation_included(cur_ddb, new_ddb, pipe))
3478                         continue;
3479
3480                 skl_wm_flush_pipe(dev_priv, pipe, 1);
3481                 intel_wait_for_vblank(dev, pipe);
3482
3483                 reallocated[pipe] = true;
3484         }
3485
3486
3487         /*
3488          * Second pass: flush the pipes that are having their allocation
3489          * reduced, but overlapping with a previous allocation.
3490          *
3491          * Here as well we need to wait for the vblank to make sure the freed
3492          * space is not used anymore.
3493          */
3494         for_each_intel_crtc(dev, crtc) {
3495                 if (!crtc->active)
3496                         continue;
3497
3498                 pipe = crtc->pipe;
3499
3500                 if (reallocated[pipe])
3501                         continue;
3502
3503                 if (skl_ddb_entry_size(&new_ddb->pipe[pipe]) <
3504                     skl_ddb_entry_size(&cur_ddb->pipe[pipe])) {
3505                         skl_wm_flush_pipe(dev_priv, pipe, 2);
3506                         intel_wait_for_vblank(dev, pipe);
3507                         reallocated[pipe] = true;
3508                 }
3509         }
3510
3511         /*
3512          * Third pass: flush the pipes that got more space allocated.
3513          *
3514          * We don't need to actively wait for the update here, next vblank
3515          * will just get more DDB space with the correct WM values.
3516          */
3517         for_each_intel_crtc(dev, crtc) {
3518                 if (!crtc->active)
3519                         continue;
3520
3521                 pipe = crtc->pipe;
3522
3523                 /*
3524                  * At this point, only the pipes more space than before are
3525                  * left to re-allocate.
3526                  */
3527                 if (reallocated[pipe])
3528                         continue;
3529
3530                 skl_wm_flush_pipe(dev_priv, pipe, 3);
3531         }
3532 }
3533
3534 static bool skl_update_pipe_wm(struct drm_crtc *crtc,
3535                                struct skl_pipe_wm_parameters *params,
3536                                struct intel_wm_config *config,
3537                                struct skl_ddb_allocation *ddb, /* out */
3538                                struct skl_pipe_wm *pipe_wm /* out */)
3539 {
3540         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
3541
3542         skl_compute_wm_pipe_parameters(crtc, params);
3543         skl_allocate_pipe_ddb(crtc, config, params, ddb);
3544         skl_compute_pipe_wm(crtc, ddb, params, pipe_wm);
3545
3546         if (!memcmp(&intel_crtc->wm.skl_active, pipe_wm, sizeof(*pipe_wm)))
3547                 return false;
3548
3549         intel_crtc->wm.skl_active = *pipe_wm;
3550
3551         return true;
3552 }
3553
3554 static void skl_update_other_pipe_wm(struct drm_device *dev,
3555                                      struct drm_crtc *crtc,
3556                                      struct intel_wm_config *config,
3557                                      struct skl_wm_values *r)
3558 {
3559         struct intel_crtc *intel_crtc;
3560         struct intel_crtc *this_crtc = to_intel_crtc(crtc);
3561
3562         /*
3563          * If the WM update hasn't changed the allocation for this_crtc (the
3564          * crtc we are currently computing the new WM values for), other
3565          * enabled crtcs will keep the same allocation and we don't need to
3566          * recompute anything for them.
3567          */
3568         if (!skl_ddb_allocation_changed(&r->ddb, this_crtc))
3569                 return;
3570
3571         /*
3572          * Otherwise, because of this_crtc being freshly enabled/disabled, the
3573          * other active pipes need new DDB allocation and WM values.
3574          */
3575         list_for_each_entry(intel_crtc, &dev->mode_config.crtc_list,
3576                                 base.head) {
3577                 struct skl_pipe_wm_parameters params = {};
3578                 struct skl_pipe_wm pipe_wm = {};
3579                 bool wm_changed;
3580
3581                 if (this_crtc->pipe == intel_crtc->pipe)
3582                         continue;
3583
3584                 if (!intel_crtc->active)
3585                         continue;
3586
3587                 wm_changed = skl_update_pipe_wm(&intel_crtc->base,
3588                                                 &params, config,
3589                                                 &r->ddb, &pipe_wm);
3590
3591                 /*
3592                  * If we end up re-computing the other pipe WM values, it's
3593                  * because it was really needed, so we expect the WM values to
3594                  * be different.
3595                  */
3596                 WARN_ON(!wm_changed);
3597
3598                 skl_compute_wm_results(dev, &params, &pipe_wm, r, intel_crtc);
3599                 r->dirty[intel_crtc->pipe] = true;
3600         }
3601 }
3602
3603 static void skl_clear_wm(struct skl_wm_values *watermarks, enum i915_pipe pipe)
3604 {
3605         watermarks->wm_linetime[pipe] = 0;
3606         memset(watermarks->plane[pipe], 0,
3607                sizeof(uint32_t) * 8 * I915_MAX_PLANES);
3608         memset(watermarks->plane_trans[pipe],
3609                0, sizeof(uint32_t) * I915_MAX_PLANES);
3610         watermarks->plane_trans[pipe][PLANE_CURSOR] = 0;
3611
3612         /* Clear ddb entries for pipe */
3613         memset(&watermarks->ddb.pipe[pipe], 0, sizeof(struct skl_ddb_entry));
3614         memset(&watermarks->ddb.plane[pipe], 0,
3615                sizeof(struct skl_ddb_entry) * I915_MAX_PLANES);
3616         memset(&watermarks->ddb.y_plane[pipe], 0,
3617                sizeof(struct skl_ddb_entry) * I915_MAX_PLANES);
3618         memset(&watermarks->ddb.plane[pipe][PLANE_CURSOR], 0,
3619                sizeof(struct skl_ddb_entry));
3620
3621 }
3622
3623 static void skl_update_wm(struct drm_crtc *crtc)
3624 {
3625         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
3626         struct drm_device *dev = crtc->dev;
3627         struct drm_i915_private *dev_priv = dev->dev_private;
3628         struct skl_pipe_wm_parameters params = {};
3629         struct skl_wm_values *results = &dev_priv->wm.skl_results;
3630         struct skl_pipe_wm pipe_wm = {};
3631         struct intel_wm_config config = {};
3632
3633
3634         /* Clear all dirty flags */
3635         memset(results->dirty, 0, sizeof(bool) * I915_MAX_PIPES);
3636
3637         skl_clear_wm(results, intel_crtc->pipe);
3638
3639         skl_compute_wm_global_parameters(dev, &config);
3640
3641         if (!skl_update_pipe_wm(crtc, &params, &config,
3642                                 &results->ddb, &pipe_wm))
3643                 return;
3644
3645         skl_compute_wm_results(dev, &params, &pipe_wm, results, intel_crtc);
3646         results->dirty[intel_crtc->pipe] = true;
3647
3648         skl_update_other_pipe_wm(dev, crtc, &config, results);
3649         skl_write_wm_values(dev_priv, results);
3650         skl_flush_wm_values(dev_priv, results);
3651
3652         /* store the new configuration */
3653         dev_priv->wm.skl_hw = *results;
3654 }
3655
3656 static void
3657 skl_update_sprite_wm(struct drm_plane *plane, struct drm_crtc *crtc,
3658                      uint32_t sprite_width, uint32_t sprite_height,
3659                      int pixel_size, bool enabled, bool scaled)
3660 {
3661         struct intel_plane *intel_plane = to_intel_plane(plane);
3662         struct drm_framebuffer *fb = plane->state->fb;
3663
3664         intel_plane->wm.enabled = enabled;
3665         intel_plane->wm.scaled = scaled;
3666         intel_plane->wm.horiz_pixels = sprite_width;
3667         intel_plane->wm.vert_pixels = sprite_height;
3668         intel_plane->wm.tiling = DRM_FORMAT_MOD_NONE;
3669
3670         /* For planar: Bpp is for UV plane, y_Bpp is for Y plane */
3671         intel_plane->wm.bytes_per_pixel =
3672                 (fb && fb->pixel_format == DRM_FORMAT_NV12) ?
3673                 drm_format_plane_cpp(plane->state->fb->pixel_format, 1) : pixel_size;
3674         intel_plane->wm.y_bytes_per_pixel =
3675                 (fb && fb->pixel_format == DRM_FORMAT_NV12) ?
3676                 drm_format_plane_cpp(plane->state->fb->pixel_format, 0) : 0;
3677
3678         /*
3679          * Framebuffer can be NULL on plane disable, but it does not
3680          * matter for watermarks if we assume no tiling in that case.
3681          */
3682         if (fb)
3683                 intel_plane->wm.tiling = fb->modifier[0];
3684         intel_plane->wm.rotation = plane->state->rotation;
3685
3686         skl_update_wm(crtc);
3687 }
3688
3689 static void ilk_update_wm(struct drm_crtc *crtc)
3690 {
3691         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
3692         struct intel_crtc_state *cstate = to_intel_crtc_state(crtc->state);
3693         struct drm_device *dev = crtc->dev;
3694         struct drm_i915_private *dev_priv = dev->dev_private;
3695         struct ilk_wm_maximums max;
3696         struct ilk_wm_values results = {};
3697         enum intel_ddb_partitioning partitioning;
3698         struct intel_pipe_wm pipe_wm = {};
3699         struct intel_pipe_wm lp_wm_1_2 = {}, lp_wm_5_6 = {}, *best_lp_wm;
3700         struct intel_wm_config config = {};
3701
3702         WARN_ON(cstate->base.active != intel_crtc->active);
3703
3704         intel_compute_pipe_wm(cstate, &pipe_wm);
3705
3706         if (!memcmp(&intel_crtc->wm.active, &pipe_wm, sizeof(pipe_wm)))
3707                 return;
3708
3709         intel_crtc->wm.active = pipe_wm;
3710
3711         ilk_compute_wm_config(dev, &config);
3712
3713         ilk_compute_wm_maximums(dev, 1, &config, INTEL_DDB_PART_1_2, &max);
3714         ilk_wm_merge(dev, &config, &max, &lp_wm_1_2);
3715
3716         /* 5/6 split only in single pipe config on IVB+ */
3717         if (INTEL_INFO(dev)->gen >= 7 &&
3718             config.num_pipes_active == 1 && config.sprites_enabled) {
3719                 ilk_compute_wm_maximums(dev, 1, &config, INTEL_DDB_PART_5_6, &max);
3720                 ilk_wm_merge(dev, &config, &max, &lp_wm_5_6);
3721
3722                 best_lp_wm = ilk_find_best_result(dev, &lp_wm_1_2, &lp_wm_5_6);
3723         } else {
3724                 best_lp_wm = &lp_wm_1_2;
3725         }
3726
3727         partitioning = (best_lp_wm == &lp_wm_1_2) ?
3728                        INTEL_DDB_PART_1_2 : INTEL_DDB_PART_5_6;
3729
3730         ilk_compute_wm_results(dev, best_lp_wm, partitioning, &results);
3731
3732         ilk_write_wm_values(dev_priv, &results);
3733 }
3734
3735 static void
3736 ilk_update_sprite_wm(struct drm_plane *plane,
3737                      struct drm_crtc *crtc,
3738                      uint32_t sprite_width, uint32_t sprite_height,
3739                      int pixel_size, bool enabled, bool scaled)
3740 {
3741         struct drm_device *dev = plane->dev;
3742         struct intel_plane *intel_plane = to_intel_plane(plane);
3743
3744         /*
3745          * IVB workaround: must disable low power watermarks for at least
3746          * one frame before enabling scaling.  LP watermarks can be re-enabled
3747          * when scaling is disabled.
3748          *
3749          * WaCxSRDisabledForSpriteScaling:ivb
3750          */
3751         if (IS_IVYBRIDGE(dev) && scaled && ilk_disable_lp_wm(dev))
3752                 intel_wait_for_vblank(dev, intel_plane->pipe);
3753
3754         ilk_update_wm(crtc);
3755 }
3756
3757 static void skl_pipe_wm_active_state(uint32_t val,
3758                                      struct skl_pipe_wm *active,
3759                                      bool is_transwm,
3760                                      bool is_cursor,
3761                                      int i,
3762                                      int level)
3763 {
3764         bool is_enabled = (val & PLANE_WM_EN) != 0;
3765
3766         if (!is_transwm) {
3767                 if (!is_cursor) {
3768                         active->wm[level].plane_en[i] = is_enabled;
3769                         active->wm[level].plane_res_b[i] =
3770                                         val & PLANE_WM_BLOCKS_MASK;
3771                         active->wm[level].plane_res_l[i] =
3772                                         (val >> PLANE_WM_LINES_SHIFT) &
3773                                                 PLANE_WM_LINES_MASK;
3774                 } else {
3775                         active->wm[level].plane_en[PLANE_CURSOR] = is_enabled;
3776                         active->wm[level].plane_res_b[PLANE_CURSOR] =
3777                                         val & PLANE_WM_BLOCKS_MASK;
3778                         active->wm[level].plane_res_l[PLANE_CURSOR] =
3779                                         (val >> PLANE_WM_LINES_SHIFT) &
3780                                                 PLANE_WM_LINES_MASK;
3781                 }
3782         } else {
3783                 if (!is_cursor) {
3784                         active->trans_wm.plane_en[i] = is_enabled;
3785                         active->trans_wm.plane_res_b[i] =
3786                                         val & PLANE_WM_BLOCKS_MASK;
3787                         active->trans_wm.plane_res_l[i] =
3788                                         (val >> PLANE_WM_LINES_SHIFT) &
3789                                                 PLANE_WM_LINES_MASK;
3790                 } else {
3791                         active->trans_wm.plane_en[PLANE_CURSOR] = is_enabled;
3792                         active->trans_wm.plane_res_b[PLANE_CURSOR] =
3793                                         val & PLANE_WM_BLOCKS_MASK;
3794                         active->trans_wm.plane_res_l[PLANE_CURSOR] =
3795                                         (val >> PLANE_WM_LINES_SHIFT) &
3796                                                 PLANE_WM_LINES_MASK;
3797                 }
3798         }
3799 }
3800
3801 static void skl_pipe_wm_get_hw_state(struct drm_crtc *crtc)
3802 {
3803         struct drm_device *dev = crtc->dev;
3804         struct drm_i915_private *dev_priv = dev->dev_private;
3805         struct skl_wm_values *hw = &dev_priv->wm.skl_hw;
3806         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
3807         struct skl_pipe_wm *active = &intel_crtc->wm.skl_active;
3808         enum i915_pipe pipe = intel_crtc->pipe;
3809         int level, i, max_level;
3810         uint32_t temp;
3811
3812         max_level = ilk_wm_max_level(dev);
3813
3814         hw->wm_linetime[pipe] = I915_READ(PIPE_WM_LINETIME(pipe));
3815
3816         for (level = 0; level <= max_level; level++) {
3817                 for (i = 0; i < intel_num_planes(intel_crtc); i++)
3818                         hw->plane[pipe][i][level] =
3819                                         I915_READ(PLANE_WM(pipe, i, level));
3820                 hw->plane[pipe][PLANE_CURSOR][level] = I915_READ(CUR_WM(pipe, level));
3821         }
3822
3823         for (i = 0; i < intel_num_planes(intel_crtc); i++)
3824                 hw->plane_trans[pipe][i] = I915_READ(PLANE_WM_TRANS(pipe, i));
3825         hw->plane_trans[pipe][PLANE_CURSOR] = I915_READ(CUR_WM_TRANS(pipe));
3826
3827         if (!intel_crtc->active)
3828                 return;
3829
3830         hw->dirty[pipe] = true;
3831
3832         active->linetime = hw->wm_linetime[pipe];
3833
3834         for (level = 0; level <= max_level; level++) {
3835                 for (i = 0; i < intel_num_planes(intel_crtc); i++) {
3836                         temp = hw->plane[pipe][i][level];
3837                         skl_pipe_wm_active_state(temp, active, false,
3838                                                 false, i, level);
3839                 }
3840                 temp = hw->plane[pipe][PLANE_CURSOR][level];
3841                 skl_pipe_wm_active_state(temp, active, false, true, i, level);
3842         }
3843
3844         for (i = 0; i < intel_num_planes(intel_crtc); i++) {
3845                 temp = hw->plane_trans[pipe][i];
3846                 skl_pipe_wm_active_state(temp, active, true, false, i, 0);
3847         }
3848
3849         temp = hw->plane_trans[pipe][PLANE_CURSOR];
3850         skl_pipe_wm_active_state(temp, active, true, true, i, 0);
3851 }
3852
3853 void skl_wm_get_hw_state(struct drm_device *dev)
3854 {
3855         struct drm_i915_private *dev_priv = dev->dev_private;
3856         struct skl_ddb_allocation *ddb = &dev_priv->wm.skl_hw.ddb;
3857         struct drm_crtc *crtc;
3858
3859         skl_ddb_get_hw_state(dev_priv, ddb);
3860         list_for_each_entry(crtc, &dev->mode_config.crtc_list, head)
3861                 skl_pipe_wm_get_hw_state(crtc);
3862 }
3863
3864 static void ilk_pipe_wm_get_hw_state(struct drm_crtc *crtc)
3865 {
3866         struct drm_device *dev = crtc->dev;
3867         struct drm_i915_private *dev_priv = dev->dev_private;
3868         struct ilk_wm_values *hw = &dev_priv->wm.hw;
3869         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
3870         struct intel_pipe_wm *active = &intel_crtc->wm.active;
3871         enum i915_pipe pipe = intel_crtc->pipe;
3872         static const unsigned int wm0_pipe_reg[] = {
3873                 [PIPE_A] = WM0_PIPEA_ILK,
3874                 [PIPE_B] = WM0_PIPEB_ILK,
3875                 [PIPE_C] = WM0_PIPEC_IVB,
3876         };
3877
3878         hw->wm_pipe[pipe] = I915_READ(wm0_pipe_reg[pipe]);
3879         if (IS_HASWELL(dev) || IS_BROADWELL(dev))
3880                 hw->wm_linetime[pipe] = I915_READ(PIPE_WM_LINETIME(pipe));
3881
3882         active->pipe_enabled = intel_crtc->active;
3883
3884         if (active->pipe_enabled) {
3885                 u32 tmp = hw->wm_pipe[pipe];
3886
3887                 /*
3888                  * For active pipes LP0 watermark is marked as
3889                  * enabled, and LP1+ watermaks as disabled since
3890                  * we can't really reverse compute them in case
3891                  * multiple pipes are active.
3892                  */
3893                 active->wm[0].enable = true;
3894                 active->wm[0].pri_val = (tmp & WM0_PIPE_PLANE_MASK) >> WM0_PIPE_PLANE_SHIFT;
3895                 active->wm[0].spr_val = (tmp & WM0_PIPE_SPRITE_MASK) >> WM0_PIPE_SPRITE_SHIFT;
3896                 active->wm[0].cur_val = tmp & WM0_PIPE_CURSOR_MASK;
3897                 active->linetime = hw->wm_linetime[pipe];
3898         } else {
3899                 int level, max_level = ilk_wm_max_level(dev);
3900
3901                 /*
3902                  * For inactive pipes, all watermark levels
3903                  * should be marked as enabled but zeroed,
3904                  * which is what we'd compute them to.
3905                  */
3906                 for (level = 0; level <= max_level; level++)
3907                         active->wm[level].enable = true;
3908         }
3909 }
3910
3911 #define _FW_WM(value, plane) \
3912         (((value) & DSPFW_ ## plane ## _MASK) >> DSPFW_ ## plane ## _SHIFT)
3913 #define _FW_WM_VLV(value, plane) \
3914         (((value) & DSPFW_ ## plane ## _MASK_VLV) >> DSPFW_ ## plane ## _SHIFT)
3915
3916 static void vlv_read_wm_values(struct drm_i915_private *dev_priv,
3917                                struct vlv_wm_values *wm)
3918 {
3919         enum i915_pipe pipe;
3920         uint32_t tmp;
3921
3922         for_each_pipe(dev_priv, pipe) {
3923                 tmp = I915_READ(VLV_DDL(pipe));
3924
3925                 wm->ddl[pipe].primary =
3926                         (tmp >> DDL_PLANE_SHIFT) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK);
3927                 wm->ddl[pipe].cursor =
3928                         (tmp >> DDL_CURSOR_SHIFT) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK);
3929                 wm->ddl[pipe].sprite[0] =
3930                         (tmp >> DDL_SPRITE_SHIFT(0)) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK);
3931                 wm->ddl[pipe].sprite[1] =
3932                         (tmp >> DDL_SPRITE_SHIFT(1)) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK);
3933         }
3934
3935         tmp = I915_READ(DSPFW1);
3936         wm->sr.plane = _FW_WM(tmp, SR);
3937         wm->pipe[PIPE_B].cursor = _FW_WM(tmp, CURSORB);
3938         wm->pipe[PIPE_B].primary = _FW_WM_VLV(tmp, PLANEB);
3939         wm->pipe[PIPE_A].primary = _FW_WM_VLV(tmp, PLANEA);
3940
3941         tmp = I915_READ(DSPFW2);
3942         wm->pipe[PIPE_A].sprite[1] = _FW_WM_VLV(tmp, SPRITEB);
3943         wm->pipe[PIPE_A].cursor = _FW_WM(tmp, CURSORA);
3944         wm->pipe[PIPE_A].sprite[0] = _FW_WM_VLV(tmp, SPRITEA);
3945
3946         tmp = I915_READ(DSPFW3);
3947         wm->sr.cursor = _FW_WM(tmp, CURSOR_SR);
3948
3949         if (IS_CHERRYVIEW(dev_priv)) {
3950                 tmp = I915_READ(DSPFW7_CHV);
3951                 wm->pipe[PIPE_B].sprite[1] = _FW_WM_VLV(tmp, SPRITED);
3952                 wm->pipe[PIPE_B].sprite[0] = _FW_WM_VLV(tmp, SPRITEC);
3953
3954                 tmp = I915_READ(DSPFW8_CHV);
3955                 wm->pipe[PIPE_C].sprite[1] = _FW_WM_VLV(tmp, SPRITEF);
3956                 wm->pipe[PIPE_C].sprite[0] = _FW_WM_VLV(tmp, SPRITEE);
3957
3958                 tmp = I915_READ(DSPFW9_CHV);
3959                 wm->pipe[PIPE_C].primary = _FW_WM_VLV(tmp, PLANEC);
3960                 wm->pipe[PIPE_C].cursor = _FW_WM(tmp, CURSORC);
3961
3962                 tmp = I915_READ(DSPHOWM);
3963                 wm->sr.plane |= _FW_WM(tmp, SR_HI) << 9;
3964                 wm->pipe[PIPE_C].sprite[1] |= _FW_WM(tmp, SPRITEF_HI) << 8;
3965                 wm->pipe[PIPE_C].sprite[0] |= _FW_WM(tmp, SPRITEE_HI) << 8;
3966                 wm->pipe[PIPE_C].primary |= _FW_WM(tmp, PLANEC_HI) << 8;
3967                 wm->pipe[PIPE_B].sprite[1] |= _FW_WM(tmp, SPRITED_HI) << 8;
3968                 wm->pipe[PIPE_B].sprite[0] |= _FW_WM(tmp, SPRITEC_HI) << 8;
3969                 wm->pipe[PIPE_B].primary |= _FW_WM(tmp, PLANEB_HI) << 8;
3970                 wm->pipe[PIPE_A].sprite[1] |= _FW_WM(tmp, SPRITEB_HI) << 8;
3971                 wm->pipe[PIPE_A].sprite[0] |= _FW_WM(tmp, SPRITEA_HI) << 8;
3972                 wm->pipe[PIPE_A].primary |= _FW_WM(tmp, PLANEA_HI) << 8;
3973         } else {
3974                 tmp = I915_READ(DSPFW7);
3975                 wm->pipe[PIPE_B].sprite[1] = _FW_WM_VLV(tmp, SPRITED);
3976                 wm->pipe[PIPE_B].sprite[0] = _FW_WM_VLV(tmp, SPRITEC);
3977
3978                 tmp = I915_READ(DSPHOWM);
3979                 wm->sr.plane |= _FW_WM(tmp, SR_HI) << 9;
3980                 wm->pipe[PIPE_B].sprite[1] |= _FW_WM(tmp, SPRITED_HI) << 8;
3981                 wm->pipe[PIPE_B].sprite[0] |= _FW_WM(tmp, SPRITEC_HI) << 8;
3982                 wm->pipe[PIPE_B].primary |= _FW_WM(tmp, PLANEB_HI) << 8;
3983                 wm->pipe[PIPE_A].sprite[1] |= _FW_WM(tmp, SPRITEB_HI) << 8;
3984                 wm->pipe[PIPE_A].sprite[0] |= _FW_WM(tmp, SPRITEA_HI) << 8;
3985                 wm->pipe[PIPE_A].primary |= _FW_WM(tmp, PLANEA_HI) << 8;
3986         }
3987 }
3988
3989 #undef _FW_WM
3990 #undef _FW_WM_VLV
3991
3992 void vlv_wm_get_hw_state(struct drm_device *dev)
3993 {
3994         struct drm_i915_private *dev_priv = to_i915(dev);
3995         struct vlv_wm_values *wm = &dev_priv->wm.vlv;
3996         struct intel_plane *plane;
3997         enum i915_pipe pipe;
3998         u32 val;
3999
4000         vlv_read_wm_values(dev_priv, wm);
4001
4002         for_each_intel_plane(dev, plane) {
4003                 switch (plane->base.type) {
4004                         int sprite;
4005                 case DRM_PLANE_TYPE_CURSOR:
4006                         plane->wm.fifo_size = 63;
4007                         break;
4008                 case DRM_PLANE_TYPE_PRIMARY:
4009                         plane->wm.fifo_size = vlv_get_fifo_size(dev, plane->pipe, 0);
4010                         break;
4011                 case DRM_PLANE_TYPE_OVERLAY:
4012                         sprite = plane->plane;
4013                         plane->wm.fifo_size = vlv_get_fifo_size(dev, plane->pipe, sprite + 1);
4014                         break;
4015                 }
4016         }
4017
4018         wm->cxsr = I915_READ(FW_BLC_SELF_VLV) & FW_CSPWRDWNEN;
4019         wm->level = VLV_WM_LEVEL_PM2;
4020
4021         if (IS_CHERRYVIEW(dev_priv)) {
4022                 mutex_lock(&dev_priv->rps.hw_lock);
4023
4024                 val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ);
4025                 if (val & DSP_MAXFIFO_PM5_ENABLE)
4026                         wm->level = VLV_WM_LEVEL_PM5;
4027
4028                 /*
4029                  * If DDR DVFS is disabled in the BIOS, Punit
4030                  * will never ack the request. So if that happens
4031                  * assume we don't have to enable/disable DDR DVFS
4032                  * dynamically. To test that just set the REQ_ACK
4033                  * bit to poke the Punit, but don't change the
4034                  * HIGH/LOW bits so that we don't actually change
4035                  * the current state.
4036                  */
4037                 val = vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2);
4038                 val |= FORCE_DDR_FREQ_REQ_ACK;
4039                 vlv_punit_write(dev_priv, PUNIT_REG_DDR_SETUP2, val);
4040
4041                 if (wait_for((vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2) &
4042                               FORCE_DDR_FREQ_REQ_ACK) == 0, 3)) {
4043                         DRM_DEBUG_KMS("Punit not acking DDR DVFS request, "
4044                                       "assuming DDR DVFS is disabled\n");
4045                         dev_priv->wm.max_level = VLV_WM_LEVEL_PM5;
4046                 } else {
4047                         val = vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2);
4048                         if ((val & FORCE_DDR_HIGH_FREQ) == 0)
4049                                 wm->level = VLV_WM_LEVEL_DDR_DVFS;
4050                 }
4051
4052                 mutex_unlock(&dev_priv->rps.hw_lock);
4053         }
4054
4055         for_each_pipe(dev_priv, pipe)
4056                 DRM_DEBUG_KMS("Initial watermarks: pipe %c, plane=%d, cursor=%d, sprite0=%d, sprite1=%d\n",
4057                               pipe_name(pipe), wm->pipe[pipe].primary, wm->pipe[pipe].cursor,
4058                               wm->pipe[pipe].sprite[0], wm->pipe[pipe].sprite[1]);
4059
4060         DRM_DEBUG_KMS("Initial watermarks: SR plane=%d, SR cursor=%d level=%d cxsr=%d\n",
4061                       wm->sr.plane, wm->sr.cursor, wm->level, wm->cxsr);
4062 }
4063
4064 void ilk_wm_get_hw_state(struct drm_device *dev)
4065 {
4066         struct drm_i915_private *dev_priv = dev->dev_private;
4067         struct ilk_wm_values *hw = &dev_priv->wm.hw;
4068         struct drm_crtc *crtc;
4069
4070         for_each_crtc(dev, crtc)
4071                 ilk_pipe_wm_get_hw_state(crtc);
4072
4073         hw->wm_lp[0] = I915_READ(WM1_LP_ILK);
4074         hw->wm_lp[1] = I915_READ(WM2_LP_ILK);
4075         hw->wm_lp[2] = I915_READ(WM3_LP_ILK);
4076
4077         hw->wm_lp_spr[0] = I915_READ(WM1S_LP_ILK);
4078         if (INTEL_INFO(dev)->gen >= 7) {
4079                 hw->wm_lp_spr[1] = I915_READ(WM2S_LP_IVB);
4080                 hw->wm_lp_spr[2] = I915_READ(WM3S_LP_IVB);
4081         }
4082
4083         if (IS_HASWELL(dev) || IS_BROADWELL(dev))
4084                 hw->partitioning = (I915_READ(WM_MISC) & WM_MISC_DATA_PARTITION_5_6) ?
4085                         INTEL_DDB_PART_5_6 : INTEL_DDB_PART_1_2;
4086         else if (IS_IVYBRIDGE(dev))
4087                 hw->partitioning = (I915_READ(DISP_ARB_CTL2) & DISP_DATA_PARTITION_5_6) ?
4088                         INTEL_DDB_PART_5_6 : INTEL_DDB_PART_1_2;
4089
4090         hw->enable_fbc_wm =
4091                 !(I915_READ(DISP_ARB_CTL) & DISP_FBC_WM_DIS);
4092 }
4093
4094 /**
4095  * intel_update_watermarks - update FIFO watermark values based on current modes
4096  *
4097  * Calculate watermark values for the various WM regs based on current mode
4098  * and plane configuration.
4099  *
4100  * There are several cases to deal with here:
4101  *   - normal (i.e. non-self-refresh)
4102  *   - self-refresh (SR) mode
4103  *   - lines are large relative to FIFO size (buffer can hold up to 2)
4104  *   - lines are small relative to FIFO size (buffer can hold more than 2
4105  *     lines), so need to account for TLB latency
4106  *
4107  *   The normal calculation is:
4108  *     watermark = dotclock * bytes per pixel * latency
4109  *   where latency is platform & configuration dependent (we assume pessimal
4110  *   values here).
4111  *
4112  *   The SR calculation is:
4113  *     watermark = (trunc(latency/line time)+1) * surface width *
4114  *       bytes per pixel
4115  *   where
4116  *     line time = htotal / dotclock
4117  *     surface width = hdisplay for normal plane and 64 for cursor
4118  *   and latency is assumed to be high, as above.
4119  *
4120  * The final value programmed to the register should always be rounded up,
4121  * and include an extra 2 entries to account for clock crossings.
4122  *
4123  * We don't use the sprite, so we can ignore that.  And on Crestline we have
4124  * to set the non-SR watermarks to 8.
4125  */
4126 void intel_update_watermarks(struct drm_crtc *crtc)
4127 {
4128         struct drm_i915_private *dev_priv = crtc->dev->dev_private;
4129
4130         if (dev_priv->display.update_wm)
4131                 dev_priv->display.update_wm(crtc);
4132 }
4133
4134 void intel_update_sprite_watermarks(struct drm_plane *plane,
4135                                     struct drm_crtc *crtc,
4136                                     uint32_t sprite_width,
4137                                     uint32_t sprite_height,
4138                                     int pixel_size,
4139                                     bool enabled, bool scaled)
4140 {
4141         struct drm_i915_private *dev_priv = plane->dev->dev_private;
4142
4143         if (dev_priv->display.update_sprite_wm)
4144                 dev_priv->display.update_sprite_wm(plane, crtc,
4145                                                    sprite_width, sprite_height,
4146                                                    pixel_size, enabled, scaled);
4147 }
4148
4149 /**
4150  * Lock protecting IPS related data structures
4151  */
4152 struct lock mchdev_lock;
4153 LOCK_SYSINIT(mchdev, &mchdev_lock, "mchdev", LK_CANRECURSE);
4154
4155 /* Global for IPS driver to get at the current i915 device. Protected by
4156  * mchdev_lock. */
4157 static struct drm_i915_private *i915_mch_dev;
4158
4159 bool ironlake_set_drps(struct drm_device *dev, u8 val)
4160 {
4161         struct drm_i915_private *dev_priv = dev->dev_private;
4162         u16 rgvswctl;
4163
4164         assert_spin_locked(&mchdev_lock);
4165
4166         rgvswctl = I915_READ16(MEMSWCTL);
4167         if (rgvswctl & MEMCTL_CMD_STS) {
4168                 DRM_DEBUG("gpu busy, RCS change rejected\n");
4169                 return false; /* still busy with another command */
4170         }
4171
4172         rgvswctl = (MEMCTL_CMD_CHFREQ << MEMCTL_CMD_SHIFT) |
4173                 (val << MEMCTL_FREQ_SHIFT) | MEMCTL_SFCAVM;
4174         I915_WRITE16(MEMSWCTL, rgvswctl);
4175         POSTING_READ16(MEMSWCTL);
4176
4177         rgvswctl |= MEMCTL_CMD_STS;
4178         I915_WRITE16(MEMSWCTL, rgvswctl);
4179
4180         return true;
4181 }
4182
4183 static void ironlake_enable_drps(struct drm_device *dev)
4184 {
4185         struct drm_i915_private *dev_priv = dev->dev_private;
4186         u32 rgvmodectl = I915_READ(MEMMODECTL);
4187         u8 fmax, fmin, fstart, vstart;
4188
4189         spin_lock_irq(&mchdev_lock);
4190
4191         /* Enable temp reporting */
4192         I915_WRITE16(PMMISC, I915_READ(PMMISC) | MCPPCE_EN);
4193         I915_WRITE16(TSC1, I915_READ(TSC1) | TSE);
4194
4195         /* 100ms RC evaluation intervals */
4196         I915_WRITE(RCUPEI, 100000);
4197         I915_WRITE(RCDNEI, 100000);
4198
4199         /* Set max/min thresholds to 90ms and 80ms respectively */
4200         I915_WRITE(RCBMAXAVG, 90000);
4201         I915_WRITE(RCBMINAVG, 80000);
4202
4203         I915_WRITE(MEMIHYST, 1);
4204
4205         /* Set up min, max, and cur for interrupt handling */
4206         fmax = (rgvmodectl & MEMMODE_FMAX_MASK) >> MEMMODE_FMAX_SHIFT;
4207         fmin = (rgvmodectl & MEMMODE_FMIN_MASK);
4208         fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >>
4209                 MEMMODE_FSTART_SHIFT;
4210
4211         vstart = (I915_READ(PXVFREQ(fstart)) & PXVFREQ_PX_MASK) >>
4212                 PXVFREQ_PX_SHIFT;
4213
4214         dev_priv->ips.fmax = fmax; /* IPS callback will increase this */
4215         dev_priv->ips.fstart = fstart;
4216
4217         dev_priv->ips.max_delay = fstart;
4218         dev_priv->ips.min_delay = fmin;
4219         dev_priv->ips.cur_delay = fstart;
4220
4221         DRM_DEBUG_DRIVER("fmax: %d, fmin: %d, fstart: %d\n",
4222                          fmax, fmin, fstart);
4223
4224         I915_WRITE(MEMINTREN, MEMINT_CX_SUPR_EN | MEMINT_EVAL_CHG_EN);
4225
4226         /*
4227          * Interrupts will be enabled in ironlake_irq_postinstall
4228          */
4229
4230         I915_WRITE(VIDSTART, vstart);
4231         POSTING_READ(VIDSTART);
4232
4233         rgvmodectl |= MEMMODE_SWMODE_EN;
4234         I915_WRITE(MEMMODECTL, rgvmodectl);
4235
4236         if (wait_for_atomic((I915_READ(MEMSWCTL) & MEMCTL_CMD_STS) == 0, 10))
4237                 DRM_ERROR("stuck trying to change perf mode\n");
4238         mdelay(1);
4239
4240         ironlake_set_drps(dev, fstart);
4241
4242         dev_priv->ips.last_count1 = I915_READ(DMIEC) +
4243                 I915_READ(DDREC) + I915_READ(CSIEC);
4244         dev_priv->ips.last_time1 = jiffies_to_msecs(jiffies);
4245         dev_priv->ips.last_count2 = I915_READ(GFXEC);
4246         dev_priv->ips.last_time2 = ktime_get_raw_ns();
4247
4248         spin_unlock_irq(&mchdev_lock);
4249 }
4250
4251 static void ironlake_disable_drps(struct drm_device *dev)
4252 {
4253         struct drm_i915_private *dev_priv = dev->dev_private;
4254         u16 rgvswctl;
4255
4256         spin_lock_irq(&mchdev_lock);
4257
4258         rgvswctl = I915_READ16(MEMSWCTL);
4259
4260         /* Ack interrupts, disable EFC interrupt */
4261         I915_WRITE(MEMINTREN, I915_READ(MEMINTREN) & ~MEMINT_EVAL_CHG_EN);
4262         I915_WRITE(MEMINTRSTS, MEMINT_EVAL_CHG);
4263         I915_WRITE(DEIER, I915_READ(DEIER) & ~DE_PCU_EVENT);
4264         I915_WRITE(DEIIR, DE_PCU_EVENT);
4265         I915_WRITE(DEIMR, I915_READ(DEIMR) | DE_PCU_EVENT);
4266
4267         /* Go back to the starting frequency */
4268         ironlake_set_drps(dev, dev_priv->ips.fstart);
4269         mdelay(1);
4270         rgvswctl |= MEMCTL_CMD_STS;
4271         I915_WRITE(MEMSWCTL, rgvswctl);
4272         mdelay(1);
4273
4274         spin_unlock_irq(&mchdev_lock);
4275 }
4276
4277 /* There's a funny hw issue where the hw returns all 0 when reading from
4278  * GEN6_RP_INTERRUPT_LIMITS. Hence we always need to compute the desired value
4279  * ourselves, instead of doing a rmw cycle (which might result in us clearing
4280  * all limits and the gpu stuck at whatever frequency it is at atm).
4281  */
4282 static u32 intel_rps_limits(struct drm_i915_private *dev_priv, u8 val)
4283 {
4284         u32 limits;
4285
4286         /* Only set the down limit when we've reached the lowest level to avoid
4287          * getting more interrupts, otherwise leave this clear. This prevents a
4288          * race in the hw when coming out of rc6: There's a tiny window where
4289          * the hw runs at the minimal clock before selecting the desired
4290          * frequency, if the down threshold expires in that window we will not
4291          * receive a down interrupt. */
4292         if (IS_GEN9(dev_priv->dev)) {
4293                 limits = (dev_priv->rps.max_freq_softlimit) << 23;
4294                 if (val <= dev_priv->rps.min_freq_softlimit)
4295                         limits |= (dev_priv->rps.min_freq_softlimit) << 14;
4296         } else {
4297                 limits = dev_priv->rps.max_freq_softlimit << 24;
4298                 if (val <= dev_priv->rps.min_freq_softlimit)
4299                         limits |= dev_priv->rps.min_freq_softlimit << 16;
4300         }
4301
4302         return limits;
4303 }
4304
4305 static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val)
4306 {
4307         int new_power;
4308         u32 threshold_up = 0, threshold_down = 0; /* in % */
4309         u32 ei_up = 0, ei_down = 0;
4310
4311         new_power = dev_priv->rps.power;
4312         switch (dev_priv->rps.power) {
4313         case LOW_POWER:
4314                 if (val > dev_priv->rps.efficient_freq + 1 && val > dev_priv->rps.cur_freq)
4315                         new_power = BETWEEN;
4316                 break;
4317
4318         case BETWEEN:
4319                 if (val <= dev_priv->rps.efficient_freq && val < dev_priv->rps.cur_freq)
4320                         new_power = LOW_POWER;
4321                 else if (val >= dev_priv->rps.rp0_freq && val > dev_priv->rps.cur_freq)
4322                         new_power = HIGH_POWER;
4323                 break;
4324
4325         case HIGH_POWER:
4326                 if (val < (dev_priv->rps.rp1_freq + dev_priv->rps.rp0_freq) >> 1 && val < dev_priv->rps.cur_freq)
4327                         new_power = BETWEEN;
4328                 break;
4329         }
4330         /* Max/min bins are special */
4331         if (val <= dev_priv->rps.min_freq_softlimit)
4332                 new_power = LOW_POWER;
4333         if (val >= dev_priv->rps.max_freq_softlimit)
4334                 new_power = HIGH_POWER;
4335         if (new_power == dev_priv->rps.power)
4336                 return;
4337
4338         /* Note the units here are not exactly 1us, but 1280ns. */
4339         switch (new_power) {
4340         case LOW_POWER:
4341                 /* Upclock if more than 95% busy over 16ms */
4342                 ei_up = 16000;
4343                 threshold_up = 95;
4344
4345                 /* Downclock if less than 85% busy over 32ms */
4346                 ei_down = 32000;
4347                 threshold_down = 85;
4348                 break;
4349
4350         case BETWEEN:
4351                 /* Upclock if more than 90% busy over 13ms */
4352                 ei_up = 13000;
4353                 threshold_up = 90;
4354
4355                 /* Downclock if less than 75% busy over 32ms */
4356                 ei_down = 32000;
4357                 threshold_down = 75;
4358                 break;
4359
4360         case HIGH_POWER:
4361                 /* Upclock if more than 85% busy over 10ms */
4362                 ei_up = 10000;
4363                 threshold_up = 85;
4364
4365                 /* Downclock if less than 60% busy over 32ms */
4366                 ei_down = 32000;
4367                 threshold_down = 60;
4368                 break;
4369         }
4370
4371         I915_WRITE(GEN6_RP_UP_EI,
4372                 GT_INTERVAL_FROM_US(dev_priv, ei_up));
4373         I915_WRITE(GEN6_RP_UP_THRESHOLD,
4374                 GT_INTERVAL_FROM_US(dev_priv, (ei_up * threshold_up / 100)));
4375
4376         I915_WRITE(GEN6_RP_DOWN_EI,
4377                 GT_INTERVAL_FROM_US(dev_priv, ei_down));
4378         I915_WRITE(GEN6_RP_DOWN_THRESHOLD,
4379                 GT_INTERVAL_FROM_US(dev_priv, (ei_down * threshold_down / 100)));
4380
4381          I915_WRITE(GEN6_RP_CONTROL,
4382                     GEN6_RP_MEDIA_TURBO |
4383                     GEN6_RP_MEDIA_HW_NORMAL_MODE |
4384                     GEN6_RP_MEDIA_IS_GFX |
4385                     GEN6_RP_ENABLE |
4386                     GEN6_RP_UP_BUSY_AVG |
4387                     GEN6_RP_DOWN_IDLE_AVG);
4388
4389         dev_priv->rps.power = new_power;
4390         dev_priv->rps.up_threshold = threshold_up;
4391         dev_priv->rps.down_threshold = threshold_down;
4392         dev_priv->rps.last_adj = 0;
4393 }
4394
4395 static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val)
4396 {
4397         u32 mask = 0;
4398
4399         if (val > dev_priv->rps.min_freq_softlimit)
4400                 mask |= GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT;
4401         if (val < dev_priv->rps.max_freq_softlimit)
4402                 mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_UP_THRESHOLD;
4403
4404         mask &= dev_priv->pm_rps_events;
4405
4406         return gen6_sanitize_rps_pm_mask(dev_priv, ~mask);
4407 }
4408
4409 /* gen6_set_rps is called to update the frequency request, but should also be
4410  * called when the range (min_delay and max_delay) is modified so that we can
4411  * update the GEN6_RP_INTERRUPT_LIMITS register accordingly. */
4412 static void gen6_set_rps(struct drm_device *dev, u8 val)
4413 {
4414         struct drm_i915_private *dev_priv = dev->dev_private;
4415
4416         /* WaGsvDisableTurbo: Workaround to disable turbo on BXT A* */
4417         if (IS_BROXTON(dev) && (INTEL_REVID(dev) < BXT_REVID_B0))
4418                 return;
4419
4420         WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
4421         WARN_ON(val > dev_priv->rps.max_freq);
4422         WARN_ON(val < dev_priv->rps.min_freq);
4423
4424         /* min/max delay may still have been modified so be sure to
4425          * write the limits value.
4426          */
4427         if (val != dev_priv->rps.cur_freq) {
4428                 gen6_set_rps_thresholds(dev_priv, val);
4429
4430                 if (IS_GEN9(dev))
4431                         I915_WRITE(GEN6_RPNSWREQ,
4432                                    GEN9_FREQUENCY(val));
4433                 else if (IS_HASWELL(dev) || IS_BROADWELL(dev))
4434                         I915_WRITE(GEN6_RPNSWREQ,
4435                                    HSW_FREQUENCY(val));
4436                 else
4437                         I915_WRITE(GEN6_RPNSWREQ,
4438                                    GEN6_FREQUENCY(val) |
4439                                    GEN6_OFFSET(0) |
4440                                    GEN6_AGGRESSIVE_TURBO);
4441         }
4442
4443         /* Make sure we continue to get interrupts
4444          * until we hit the minimum or maximum frequencies.
4445          */
4446         I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, intel_rps_limits(dev_priv, val));
4447         I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
4448
4449         POSTING_READ(GEN6_RPNSWREQ);
4450
4451         dev_priv->rps.cur_freq = val;
4452         trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val));
4453 }
4454
4455 static void valleyview_set_rps(struct drm_device *dev, u8 val)
4456 {
4457         struct drm_i915_private *dev_priv = dev->dev_private;
4458
4459         WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
4460         WARN_ON(val > dev_priv->rps.max_freq);
4461         WARN_ON(val < dev_priv->rps.min_freq);
4462
4463         if (WARN_ONCE(IS_CHERRYVIEW(dev) && (val & 1),
4464                       "Odd GPU freq value\n"))
4465                 val &= ~1;
4466
4467         I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
4468
4469         if (val != dev_priv->rps.cur_freq) {
4470                 vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val);
4471                 if (!IS_CHERRYVIEW(dev_priv))
4472                         gen6_set_rps_thresholds(dev_priv, val);
4473         }
4474
4475         dev_priv->rps.cur_freq = val;
4476         trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val));
4477 }
4478
4479 /* vlv_set_rps_idle: Set the frequency to idle, if Gfx clocks are down
4480  *
4481  * * If Gfx is Idle, then
4482  * 1. Forcewake Media well.
4483  * 2. Request idle freq.
4484  * 3. Release Forcewake of Media well.
4485 */
4486 static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
4487 {
4488         u32 val = dev_priv->rps.idle_freq;
4489
4490         if (dev_priv->rps.cur_freq <= val)
4491                 return;
4492
4493         /* Wake up the media well, as that takes a lot less
4494          * power than the Render well. */
4495         intel_uncore_forcewake_get(dev_priv, FORCEWAKE_MEDIA);
4496         valleyview_set_rps(dev_priv->dev, val);
4497         intel_uncore_forcewake_put(dev_priv, FORCEWAKE_MEDIA);
4498 }
4499
4500 void gen6_rps_busy(struct drm_i915_private *dev_priv)
4501 {
4502         mutex_lock(&dev_priv->rps.hw_lock);
4503         if (dev_priv->rps.enabled) {
4504                 if (dev_priv->pm_rps_events & (GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED))
4505                         gen6_rps_reset_ei(dev_priv);
4506                 I915_WRITE(GEN6_PMINTRMSK,
4507                            gen6_rps_pm_mask(dev_priv, dev_priv->rps.cur_freq));
4508         }
4509         mutex_unlock(&dev_priv->rps.hw_lock);
4510 }
4511
4512 void gen6_rps_idle(struct drm_i915_private *dev_priv)
4513 {
4514         struct drm_device *dev = dev_priv->dev;
4515
4516         mutex_lock(&dev_priv->rps.hw_lock);
4517         if (dev_priv->rps.enabled) {
4518                 if (IS_VALLEYVIEW(dev))
4519                         vlv_set_rps_idle(dev_priv);
4520                 else
4521                         gen6_set_rps(dev_priv->dev, dev_priv->rps.idle_freq);
4522                 dev_priv->rps.last_adj = 0;
4523                 I915_WRITE(GEN6_PMINTRMSK, 0xffffffff);
4524         }
4525         mutex_unlock(&dev_priv->rps.hw_lock);
4526
4527         lockmgr(&dev_priv->rps.client_lock, LK_EXCLUSIVE);
4528         while (!list_empty(&dev_priv->rps.clients))
4529                 list_del_init(dev_priv->rps.clients.next);
4530         lockmgr(&dev_priv->rps.client_lock, LK_RELEASE);
4531 }
4532
4533 void gen6_rps_boost(struct drm_i915_private *dev_priv,
4534                     struct intel_rps_client *rps,
4535                     unsigned long submitted)
4536 {
4537         /* This is intentionally racy! We peek at the state here, then
4538          * validate inside the RPS worker.
4539          */
4540         if (!(dev_priv->mm.busy &&
4541               dev_priv->rps.enabled &&
4542               dev_priv->rps.cur_freq < dev_priv->rps.max_freq_softlimit))
4543                 return;
4544
4545         /* Force a RPS boost (and don't count it against the client) if
4546          * the GPU is severely congested.
4547          */
4548         if (rps && time_after(jiffies, submitted + DRM_I915_THROTTLE_JIFFIES))
4549                 rps = NULL;
4550
4551         lockmgr(&dev_priv->rps.client_lock, LK_EXCLUSIVE);
4552         if (rps == NULL || list_empty(&rps->link)) {
4553                 spin_lock_irq(&dev_priv->irq_lock);
4554                 if (dev_priv->rps.interrupts_enabled) {
4555                         dev_priv->rps.client_boost = true;
4556                         queue_work(dev_priv->wq, &dev_priv->rps.work);
4557                 }
4558                 spin_unlock_irq(&dev_priv->irq_lock);
4559
4560                 if (rps != NULL) {
4561                         list_add(&rps->link, &dev_priv->rps.clients);
4562                         rps->boosts++;
4563                 } else
4564                         dev_priv->rps.boosts++;
4565         }
4566         lockmgr(&dev_priv->rps.client_lock, LK_RELEASE);
4567 }
4568
4569 void intel_set_rps(struct drm_device *dev, u8 val)
4570 {
4571         if (IS_VALLEYVIEW(dev))
4572                 valleyview_set_rps(dev, val);
4573         else
4574                 gen6_set_rps(dev, val);
4575 }
4576
4577 static void gen9_disable_rps(struct drm_device *dev)
4578 {
4579         struct drm_i915_private *dev_priv = dev->dev_private;
4580
4581         I915_WRITE(GEN6_RC_CONTROL, 0);
4582         I915_WRITE(GEN9_PG_ENABLE, 0);
4583 }
4584
4585 static void gen6_disable_rps(struct drm_device *dev)
4586 {
4587         struct drm_i915_private *dev_priv = dev->dev_private;
4588
4589         I915_WRITE(GEN6_RC_CONTROL, 0);
4590         I915_WRITE(GEN6_RPNSWREQ, 1 << 31);
4591 }
4592
4593 static void cherryview_disable_rps(struct drm_device *dev)
4594 {
4595         struct drm_i915_private *dev_priv = dev->dev_private;
4596
4597         I915_WRITE(GEN6_RC_CONTROL, 0);
4598 }
4599
4600 static void valleyview_disable_rps(struct drm_device *dev)
4601 {
4602         struct drm_i915_private *dev_priv = dev->dev_private;
4603
4604         /* we're doing forcewake before Disabling RC6,
4605          * This what the BIOS expects when going into suspend */
4606         intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
4607
4608         I915_WRITE(GEN6_RC_CONTROL, 0);
4609
4610         intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
4611 }
4612
4613 static void intel_print_rc6_info(struct drm_device *dev, u32 mode)
4614 {
4615         if (IS_VALLEYVIEW(dev)) {
4616                 if (mode & (GEN7_RC_CTL_TO_MODE | GEN6_RC_CTL_EI_MODE(1)))
4617                         mode = GEN6_RC_CTL_RC6_ENABLE;
4618                 else
4619                         mode = 0;
4620         }
4621         if (HAS_RC6p(dev))
4622                 DRM_DEBUG_KMS("Enabling RC6 states: RC6 %s RC6p %s RC6pp %s\n",
4623                               (mode & GEN6_RC_CTL_RC6_ENABLE) ? "on" : "off",
4624                               (mode & GEN6_RC_CTL_RC6p_ENABLE) ? "on" : "off",
4625                               (mode & GEN6_RC_CTL_RC6pp_ENABLE) ? "on" : "off");
4626
4627         else
4628                 DRM_DEBUG_KMS("Enabling RC6 states: RC6 %s\n",
4629                               (mode & GEN6_RC_CTL_RC6_ENABLE) ? "on" : "off");
4630 }
4631
4632 static int sanitize_rc6_option(const struct drm_device *dev, int enable_rc6)
4633 {
4634         /* No RC6 before Ironlake and code is gone for ilk. */
4635         if (INTEL_INFO(dev)->gen < 6)
4636                 return 0;
4637
4638         /* Respect the kernel parameter if it is set */
4639         if (enable_rc6 >= 0) {
4640                 int mask;
4641
4642                 if (HAS_RC6p(dev))
4643                         mask = INTEL_RC6_ENABLE | INTEL_RC6p_ENABLE |
4644                                INTEL_RC6pp_ENABLE;
4645                 else
4646                         mask = INTEL_RC6_ENABLE;
4647
4648                 if ((enable_rc6 & mask) != enable_rc6)
4649                         DRM_DEBUG_KMS("Adjusting RC6 mask to %d (requested %d, valid %d)\n",
4650                                       enable_rc6 & mask, enable_rc6, mask);
4651
4652                 return enable_rc6 & mask;
4653         }
4654
4655         if (IS_IVYBRIDGE(dev))
4656                 return (INTEL_RC6_ENABLE | INTEL_RC6p_ENABLE);
4657
4658         return INTEL_RC6_ENABLE;
4659 }
4660
4661 int intel_enable_rc6(const struct drm_device *dev)
4662 {
4663         return i915.enable_rc6;
4664 }
4665
4666 static void gen6_init_rps_frequencies(struct drm_device *dev)
4667 {
4668         struct drm_i915_private *dev_priv = dev->dev_private;
4669         uint32_t rp_state_cap;
4670         u32 ddcc_status = 0;
4671         int ret;
4672
4673         /* All of these values are in units of 50MHz */
4674         dev_priv->rps.cur_freq          = 0;
4675         /* static values from HW: RP0 > RP1 > RPn (min_freq) */
4676         if (IS_BROXTON(dev)) {
4677                 rp_state_cap = I915_READ(BXT_RP_STATE_CAP);
4678                 dev_priv->rps.rp0_freq = (rp_state_cap >> 16) & 0xff;
4679                 dev_priv->rps.rp1_freq = (rp_state_cap >>  8) & 0xff;
4680                 dev_priv->rps.min_freq = (rp_state_cap >>  0) & 0xff;
4681         } else {
4682                 rp_state_cap = I915_READ(GEN6_RP_STATE_CAP);
4683                 dev_priv->rps.rp0_freq = (rp_state_cap >>  0) & 0xff;
4684                 dev_priv->rps.rp1_freq = (rp_state_cap >>  8) & 0xff;
4685                 dev_priv->rps.min_freq = (rp_state_cap >> 16) & 0xff;
4686         }
4687
4688         /* hw_max = RP0 until we check for overclocking */
4689         dev_priv->rps.max_freq          = dev_priv->rps.rp0_freq;
4690
4691         dev_priv->rps.efficient_freq = dev_priv->rps.rp1_freq;
4692         if (IS_HASWELL(dev) || IS_BROADWELL(dev) || IS_SKYLAKE(dev)) {
4693                 ret = sandybridge_pcode_read(dev_priv,
4694                                         HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL,
4695                                         &ddcc_status);
4696                 if (0 == ret)
4697                         dev_priv->rps.efficient_freq =
4698                                 clamp_t(u8,
4699                                         ((ddcc_status >> 8) & 0xff),
4700                                         dev_priv->rps.min_freq,
4701                                         dev_priv->rps.max_freq);
4702         }
4703
4704         if (IS_SKYLAKE(dev)) {
4705                 /* Store the frequency values in 16.66 MHZ units, which is
4706                    the natural hardware unit for SKL */
4707                 dev_priv->rps.rp0_freq *= GEN9_FREQ_SCALER;
4708                 dev_priv->rps.rp1_freq *= GEN9_FREQ_SCALER;
4709                 dev_priv->rps.min_freq *= GEN9_FREQ_SCALER;
4710                 dev_priv->rps.max_freq *= GEN9_FREQ_SCALER;
4711                 dev_priv->rps.efficient_freq *= GEN9_FREQ_SCALER;
4712         }
4713
4714         dev_priv->rps.idle_freq = dev_priv->rps.min_freq;
4715
4716         /* Preserve min/max settings in case of re-init */
4717         if (dev_priv->rps.max_freq_softlimit == 0)
4718                 dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq;
4719
4720         if (dev_priv->rps.min_freq_softlimit == 0) {
4721                 if (IS_HASWELL(dev) || IS_BROADWELL(dev))
4722                         dev_priv->rps.min_freq_softlimit =
4723                                 max_t(int, dev_priv->rps.efficient_freq,
4724                                       intel_freq_opcode(dev_priv, 450));
4725                 else
4726                         dev_priv->rps.min_freq_softlimit =
4727                                 dev_priv->rps.min_freq;
4728         }
4729 }
4730
4731 /* See the Gen9_GT_PM_Programming_Guide doc for the below */
4732 static void gen9_enable_rps(struct drm_device *dev)
4733 {
4734         struct drm_i915_private *dev_priv = dev->dev_private;
4735
4736         intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
4737
4738         gen6_init_rps_frequencies(dev);
4739
4740         /* WaGsvDisableTurbo: Workaround to disable turbo on BXT A* */
4741         if (IS_BROXTON(dev) && (INTEL_REVID(dev) < BXT_REVID_B0)) {
4742                 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
4743                 return;
4744         }
4745
4746         /* Program defaults and thresholds for RPS*/
4747         I915_WRITE(GEN6_RC_VIDEO_FREQ,
4748                 GEN9_FREQUENCY(dev_priv->rps.rp1_freq));
4749
4750         /* 1 second timeout*/
4751         I915_WRITE(GEN6_RP_DOWN_TIMEOUT,
4752                 GT_INTERVAL_FROM_US(dev_priv, 1000000));
4753
4754         I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 0xa);
4755
4756         /* Leaning on the below call to gen6_set_rps to program/setup the
4757          * Up/Down EI & threshold registers, as well as the RP_CONTROL,
4758          * RP_INTERRUPT_LIMITS & RPNSWREQ registers */
4759         dev_priv->rps.power = HIGH_POWER; /* force a reset */
4760         gen6_set_rps(dev_priv->dev, dev_priv->rps.min_freq_softlimit);
4761
4762         intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
4763 }
4764
4765 static void gen9_enable_rc6(struct drm_device *dev)
4766 {
4767         struct drm_i915_private *dev_priv = dev->dev_private;
4768         struct intel_engine_cs *ring;
4769         uint32_t rc6_mask = 0;
4770         int unused;
4771
4772         /* 1a: Software RC state - RC0 */
4773         I915_WRITE(GEN6_RC_STATE, 0);
4774
4775         /* 1b: Get forcewake during program sequence. Although the driver
4776          * hasn't enabled a state yet where we need forcewake, BIOS may have.*/
4777         intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
4778
4779         /* 2a: Disable RC states. */
4780         I915_WRITE(GEN6_RC_CONTROL, 0);
4781
4782         /* 2b: Program RC6 thresholds.*/
4783
4784         /* WaRsDoubleRc6WrlWithCoarsePowerGating: Doubling WRL only when CPG is enabled */
4785         if (IS_SKYLAKE(dev))
4786                 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 108 << 16);
4787         else
4788                 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16);
4789         I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
4790         I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
4791         for_each_ring(ring, dev_priv, unused)
4792                 I915_WRITE(RING_MAX_IDLE(ring->mmio_base), 10);
4793
4794         if (HAS_GUC_UCODE(dev))
4795                 I915_WRITE(GUC_MAX_IDLE_COUNT, 0xA);
4796
4797         I915_WRITE(GEN6_RC_SLEEP, 0);
4798
4799         /* 2c: Program Coarse Power Gating Policies. */
4800         I915_WRITE(GEN9_MEDIA_PG_IDLE_HYSTERESIS, 25);
4801         I915_WRITE(GEN9_RENDER_PG_IDLE_HYSTERESIS, 25);
4802
4803         /* 3a: Enable RC6 */
4804         if (intel_enable_rc6(dev) & INTEL_RC6_ENABLE)
4805                 rc6_mask = GEN6_RC_CTL_RC6_ENABLE;
4806         DRM_INFO("RC6 %s\n", (rc6_mask & GEN6_RC_CTL_RC6_ENABLE) ?
4807                         "on" : "off");
4808         /* WaRsUseTimeoutMode */
4809         if ((IS_SKYLAKE(dev) && INTEL_REVID(dev) <= SKL_REVID_D0) ||
4810             (IS_BROXTON(dev) && INTEL_REVID(dev) <= BXT_REVID_A0)) {
4811                 I915_WRITE(GEN6_RC6_THRESHOLD, 625); /* 800us */
4812                 I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE |
4813                            GEN7_RC_CTL_TO_MODE |
4814                            rc6_mask);
4815         } else {
4816                 I915_WRITE(GEN6_RC6_THRESHOLD, 37500); /* 37.5/125ms per EI */
4817                 I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE |
4818                            GEN6_RC_CTL_EI_MODE(1) |
4819                            rc6_mask);
4820         }
4821
4822         /*
4823          * 3b: Enable Coarse Power Gating only when RC6 is enabled.
4824          * WaRsDisableCoarsePowerGating:skl,bxt - Render/Media PG need to be disabled with RC6.
4825          */
4826         if ((IS_BROXTON(dev) && (INTEL_REVID(dev) < BXT_REVID_B0)) ||
4827             ((IS_SKL_GT3(dev) || IS_SKL_GT4(dev)) && (INTEL_REVID(dev) <= SKL_REVID_F0)))
4828                 I915_WRITE(GEN9_PG_ENABLE, 0);
4829         else
4830                 I915_WRITE(GEN9_PG_ENABLE, (rc6_mask & GEN6_RC_CTL_RC6_ENABLE) ?
4831                                 (GEN9_RENDER_PG_ENABLE | GEN9_MEDIA_PG_ENABLE) : 0);
4832
4833         intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
4834
4835 }
4836
4837 static void gen8_enable_rps(struct drm_device *dev)
4838 {
4839         struct drm_i915_private *dev_priv = dev->dev_private;
4840         struct intel_engine_cs *ring;
4841         uint32_t rc6_mask = 0;
4842         int unused;
4843
4844         /* 1a: Software RC state - RC0 */
4845         I915_WRITE(GEN6_RC_STATE, 0);
4846
4847         /* 1c & 1d: Get forcewake during program sequence. Although the driver
4848          * hasn't enabled a state yet where we need forcewake, BIOS may have.*/
4849         intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
4850
4851         /* 2a: Disable RC states. */
4852         I915_WRITE(GEN6_RC_CONTROL, 0);
4853
4854         /* Initialize rps frequencies */
4855         gen6_init_rps_frequencies(dev);
4856
4857         /* 2b: Program RC6 thresholds.*/
4858         I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16);
4859         I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
4860         I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
4861         for_each_ring(ring, dev_priv, unused)
4862                 I915_WRITE(RING_MAX_IDLE(ring->mmio_base), 10);
4863         I915_WRITE(GEN6_RC_SLEEP, 0);
4864         if (IS_BROADWELL(dev))
4865                 I915_WRITE(GEN6_RC6_THRESHOLD, 625); /* 800us/1.28 for TO */
4866         else
4867                 I915_WRITE(GEN6_RC6_THRESHOLD, 50000); /* 50/125ms per EI */
4868
4869         /* 3: Enable RC6 */
4870         if (intel_enable_rc6(dev) & INTEL_RC6_ENABLE)
4871                 rc6_mask = GEN6_RC_CTL_RC6_ENABLE;
4872         intel_print_rc6_info(dev, rc6_mask);
4873         if (IS_BROADWELL(dev))
4874                 I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE |
4875                                 GEN7_RC_CTL_TO_MODE |
4876                                 rc6_mask);
4877         else
4878                 I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE |
4879                                 GEN6_RC_CTL_EI_MODE(1) |
4880                                 rc6_mask);
4881
4882         /* 4 Program defaults and thresholds for RPS*/
4883         I915_WRITE(GEN6_RPNSWREQ,
4884                    HSW_FREQUENCY(dev_priv->rps.rp1_freq));
4885         I915_WRITE(GEN6_RC_VIDEO_FREQ,
4886                    HSW_FREQUENCY(dev_priv->rps.rp1_freq));
4887         /* NB: Docs say 1s, and 1000000 - which aren't equivalent */
4888         I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 100000000 / 128); /* 1 second timeout */
4889
4890         /* Docs recommend 900MHz, and 300 MHz respectively */
4891         I915_WRITE(GEN6_RP_INTERRUPT_LIMITS,
4892                    dev_priv->rps.max_freq_softlimit << 24 |
4893                    dev_priv->rps.min_freq_softlimit << 16);
4894
4895         I915_WRITE(GEN6_RP_UP_THRESHOLD, 7600000 / 128); /* 76ms busyness per EI, 90% */
4896         I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 31300000 / 128); /* 313ms busyness per EI, 70%*/
4897         I915_WRITE(GEN6_RP_UP_EI, 66000); /* 84.48ms, XXX: random? */
4898         I915_WRITE(GEN6_RP_DOWN_EI, 350000); /* 448ms, XXX: random? */
4899
4900         I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
4901
4902         /* 5: Enable RPS */
4903         I915_WRITE(GEN6_RP_CONTROL,
4904                    GEN6_RP_MEDIA_TURBO |
4905                    GEN6_RP_MEDIA_HW_NORMAL_MODE |
4906                    GEN6_RP_MEDIA_IS_GFX |
4907                    GEN6_RP_ENABLE |
4908                    GEN6_RP_UP_BUSY_AVG |
4909                    GEN6_RP_DOWN_IDLE_AVG);
4910
4911         /* 6: Ring frequency + overclocking (our driver does this later */
4912
4913         dev_priv->rps.power = HIGH_POWER; /* force a reset */
4914         gen6_set_rps(dev_priv->dev, dev_priv->rps.idle_freq);
4915
4916         intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
4917 }
4918
4919 static void gen6_enable_rps(struct drm_device *dev)
4920 {
4921         struct drm_i915_private *dev_priv = dev->dev_private;
4922         struct intel_engine_cs *ring;
4923         u32 rc6vids, pcu_mbox = 0, rc6_mask = 0;
4924         u32 gtfifodbg;
4925         int rc6_mode;
4926         int i, ret;
4927
4928         WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
4929
4930         /* Here begins a magic sequence of register writes to enable
4931          * auto-downclocking.
4932          *
4933          * Perhaps there might be some value in exposing these to
4934          * userspace...
4935          */
4936         I915_WRITE(GEN6_RC_STATE, 0);
4937
4938         /* Clear the DBG now so we don't confuse earlier errors */
4939         if ((gtfifodbg = I915_READ(GTFIFODBG))) {
4940                 DRM_ERROR("GT fifo had a previous error %x\n", gtfifodbg);
4941                 I915_WRITE(GTFIFODBG, gtfifodbg);
4942         }
4943
4944         intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
4945
4946         /* Initialize rps frequencies */
4947         gen6_init_rps_frequencies(dev);
4948
4949         /* disable the counters and set deterministic thresholds */
4950         I915_WRITE(GEN6_RC_CONTROL, 0);
4951
4952         I915_WRITE(GEN6_RC1_WAKE_RATE_LIMIT, 1000 << 16);
4953         I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16 | 30);
4954         I915_WRITE(GEN6_RC6pp_WAKE_RATE_LIMIT, 30);
4955         I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000);
4956         I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25);
4957
4958         for_each_ring(ring, dev_priv, i)
4959                 I915_WRITE(RING_MAX_IDLE(ring->mmio_base), 10);
4960
4961         I915_WRITE(GEN6_RC_SLEEP, 0);
4962         I915_WRITE(GEN6_RC1e_THRESHOLD, 1000);
4963         if (IS_IVYBRIDGE(dev))
4964                 I915_WRITE(GEN6_RC6_THRESHOLD, 125000);
4965         else
4966                 I915_WRITE(GEN6_RC6_THRESHOLD, 50000);
4967         I915_WRITE(GEN6_RC6p_THRESHOLD, 150000);
4968         I915_WRITE(GEN6_RC6pp_THRESHOLD, 64000); /* unused */
4969
4970         /* Check if we are enabling RC6 */
4971         rc6_mode = intel_enable_rc6(dev_priv->dev);
4972         if (rc6_mode & INTEL_RC6_ENABLE)
4973                 rc6_mask |= GEN6_RC_CTL_RC6_ENABLE;
4974
4975         /* We don't use those on Haswell */
4976         if (!IS_HASWELL(dev)) {
4977                 if (rc6_mode & INTEL_RC6p_ENABLE)
4978                         rc6_mask |= GEN6_RC_CTL_RC6p_ENABLE;
4979
4980                 if (rc6_mode & INTEL_RC6pp_ENABLE)
4981                         rc6_mask |= GEN6_RC_CTL_RC6pp_ENABLE;
4982         }
4983
4984         intel_print_rc6_info(dev, rc6_mask);
4985
4986         I915_WRITE(GEN6_RC_CONTROL,
4987                    rc6_mask |
4988                    GEN6_RC_CTL_EI_MODE(1) |
4989                    GEN6_RC_CTL_HW_ENABLE);
4990
4991         /* Power down if completely idle for over 50ms */
4992         I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 50000);
4993         I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
4994
4995         ret = sandybridge_pcode_write(dev_priv, GEN6_PCODE_WRITE_MIN_FREQ_TABLE, 0);
4996         if (ret)
4997                 DRM_DEBUG_DRIVER("Failed to set the min frequency\n");
4998
4999         ret = sandybridge_pcode_read(dev_priv, GEN6_READ_OC_PARAMS, &pcu_mbox);
5000         if (!ret && (pcu_mbox & (1<<31))) { /* OC supported */
5001                 DRM_DEBUG_DRIVER("Overclocking supported. Max: %dMHz, Overclock max: %dMHz\n",
5002                                  (dev_priv->rps.max_freq_softlimit & 0xff) * 50,
5003                                  (pcu_mbox & 0xff) * 50);
5004                 dev_priv->rps.max_freq = pcu_mbox & 0xff;
5005         }
5006
5007         dev_priv->rps.power = HIGH_POWER; /* force a reset */
5008         gen6_set_rps(dev_priv->dev, dev_priv->rps.idle_freq);
5009
5010         rc6vids = 0;
5011         ret = sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS, &rc6vids);
5012         if (IS_GEN6(dev) && ret) {
5013                 DRM_DEBUG_DRIVER("Couldn't check for BIOS workaround\n");
5014         } else if (IS_GEN6(dev) && (GEN6_DECODE_RC6_VID(rc6vids & 0xff) < 450)) {
5015                 DRM_DEBUG_DRIVER("You should update your BIOS. Correcting minimum rc6 voltage (%dmV->%dmV)\n",
5016                           GEN6_DECODE_RC6_VID(rc6vids & 0xff), 450);
5017                 rc6vids &= 0xffff00;
5018                 rc6vids |= GEN6_ENCODE_RC6_VID(450);
5019                 ret = sandybridge_pcode_write(dev_priv, GEN6_PCODE_WRITE_RC6VIDS, rc6vids);
5020                 if (ret)
5021                         DRM_ERROR("Couldn't fix incorrect rc6 voltage\n");
5022         }
5023
5024         intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
5025 }
5026
5027 static void __gen6_update_ring_freq(struct drm_device *dev)
5028 {
5029         struct drm_i915_private *dev_priv = dev->dev_private;
5030         int min_freq = 15;
5031         unsigned int gpu_freq;
5032         unsigned int max_ia_freq, min_ring_freq;
5033         unsigned int max_gpu_freq, min_gpu_freq;
5034         int scaling_factor = 180;
5035
5036         WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
5037
5038 #if 0
5039         policy = cpufreq_cpu_get(0);
5040         if (policy) {
5041                 max_ia_freq = policy->cpuinfo.max_freq;
5042                 cpufreq_cpu_put(policy);
5043         } else {
5044                 /*
5045                  * Default to measured freq if none found, PCU will ensure we
5046                  * don't go over
5047                  */
5048                 max_ia_freq = tsc_khz;
5049         }
5050 #else
5051         max_ia_freq = tsc_frequency / 1000;
5052 #endif
5053
5054         /* Convert from kHz to MHz */
5055         max_ia_freq /= 1000;
5056
5057         min_ring_freq = I915_READ(DCLK) & 0xf;
5058         /* convert DDR frequency from units of 266.6MHz to bandwidth */
5059         min_ring_freq = mult_frac(min_ring_freq, 8, 3);
5060
5061         if (IS_SKYLAKE(dev)) {
5062                 /* Convert GT frequency to 50 HZ units */
5063                 min_gpu_freq = dev_priv->rps.min_freq / GEN9_FREQ_SCALER;
5064                 max_gpu_freq = dev_priv->rps.max_freq / GEN9_FREQ_SCALER;
5065         } else {
5066                 min_gpu_freq = dev_priv->rps.min_freq;
5067                 max_gpu_freq = dev_priv->rps.max_freq;
5068         }
5069
5070         /*
5071          * For each potential GPU frequency, load a ring frequency we'd like
5072          * to use for memory access.  We do this by specifying the IA frequency
5073          * the PCU should use as a reference to determine the ring frequency.
5074          */
5075         for (gpu_freq = max_gpu_freq; gpu_freq >= min_gpu_freq; gpu_freq--) {
5076                 int diff = max_gpu_freq - gpu_freq;
5077                 unsigned int ia_freq = 0, ring_freq = 0;
5078
5079                 if (IS_SKYLAKE(dev)) {
5080                         /*
5081                          * ring_freq = 2 * GT. ring_freq is in 100MHz units
5082                          * No floor required for ring frequency on SKL.
5083                          */
5084                         ring_freq = gpu_freq;
5085                 } else if (INTEL_INFO(dev)->gen >= 8) {
5086                         /* max(2 * GT, DDR). NB: GT is 50MHz units */
5087                         ring_freq = max(min_ring_freq, gpu_freq);
5088                 } else if (IS_HASWELL(dev)) {
5089                         ring_freq = mult_frac(gpu_freq, 5, 4);
5090                         ring_freq = max(min_ring_freq, ring_freq);
5091                         /* leave ia_freq as the default, chosen by cpufreq */
5092                 } else {
5093                         /* On older processors, there is no separate ring
5094                          * clock domain, so in order to boost the bandwidth
5095                          * of the ring, we need to upclock the CPU (ia_freq).
5096                          *
5097                          * For GPU frequencies less than 750MHz,
5098                          * just use the lowest ring freq.
5099                          */
5100                         if (gpu_freq < min_freq)
5101                                 ia_freq = 800;
5102                         else
5103                                 ia_freq = max_ia_freq - ((diff * scaling_factor) / 2);
5104                         ia_freq = DIV_ROUND_CLOSEST(ia_freq, 100);
5105                 }
5106
5107                 sandybridge_pcode_write(dev_priv,
5108                                         GEN6_PCODE_WRITE_MIN_FREQ_TABLE,
5109                                         ia_freq << GEN6_PCODE_FREQ_IA_RATIO_SHIFT |
5110                                         ring_freq << GEN6_PCODE_FREQ_RING_RATIO_SHIFT |
5111                                         gpu_freq);
5112         }
5113 }
5114
5115 void gen6_update_ring_freq(struct drm_device *dev)
5116 {
5117         struct drm_i915_private *dev_priv = dev->dev_private;
5118
5119         if (!HAS_CORE_RING_FREQ(dev))
5120                 return;
5121
5122         mutex_lock(&dev_priv->rps.hw_lock);
5123         __gen6_update_ring_freq(dev);
5124         mutex_unlock(&dev_priv->rps.hw_lock);
5125 }
5126
5127 static int cherryview_rps_max_freq(struct drm_i915_private *dev_priv)
5128 {
5129         struct drm_device *dev = dev_priv->dev;
5130         u32 val, rp0;
5131
5132         val = vlv_punit_read(dev_priv, FB_GFX_FMAX_AT_VMAX_FUSE);
5133
5134         switch (INTEL_INFO(dev)->eu_total) {
5135         case 8:
5136                 /* (2 * 4) config */
5137                 rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS4EU_FUSE_SHIFT);
5138                 break;
5139         case 12:
5140                 /* (2 * 6) config */
5141                 rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS6EU_FUSE_SHIFT);
5142                 break;
5143         case 16:
5144                 /* (2 * 8) config */
5145         default:
5146                 /* Setting (2 * 8) Min RP0 for any other combination */
5147                 rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS8EU_FUSE_SHIFT);
5148                 break;
5149         }
5150
5151         rp0 = (rp0 & FB_GFX_FREQ_FUSE_MASK);
5152
5153         return rp0;
5154 }
5155
5156 static int cherryview_rps_rpe_freq(struct drm_i915_private *dev_priv)
5157 {
5158         u32 val, rpe;
5159
5160         val = vlv_punit_read(dev_priv, PUNIT_GPU_DUTYCYCLE_REG);
5161         rpe = (val >> PUNIT_GPU_DUTYCYCLE_RPE_FREQ_SHIFT) & PUNIT_GPU_DUTYCYCLE_RPE_FREQ_MASK;
5162
5163         return rpe;
5164 }
5165
5166 static int cherryview_rps_guar_freq(struct drm_i915_private *dev_priv)
5167 {
5168         u32 val, rp1;
5169
5170         val = vlv_punit_read(dev_priv, FB_GFX_FMAX_AT_VMAX_FUSE);
5171         rp1 = (val & FB_GFX_FREQ_FUSE_MASK);
5172
5173         return rp1;
5174 }
5175
5176 static int valleyview_rps_guar_freq(struct drm_i915_private *dev_priv)
5177 {
5178         u32 val, rp1;
5179
5180         val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FREQ_FUSE);
5181
5182         rp1 = (val & FB_GFX_FGUARANTEED_FREQ_FUSE_MASK) >> FB_GFX_FGUARANTEED_FREQ_FUSE_SHIFT;
5183
5184         return rp1;
5185 }
5186
5187 static int valleyview_rps_max_freq(struct drm_i915_private *dev_priv)
5188 {
5189         u32 val, rp0;
5190
5191         val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FREQ_FUSE);
5192
5193         rp0 = (val & FB_GFX_MAX_FREQ_FUSE_MASK) >> FB_GFX_MAX_FREQ_FUSE_SHIFT;
5194         /* Clamp to max */
5195         rp0 = min_t(u32, rp0, 0xea);
5196
5197         return rp0;
5198 }
5199
5200 static int valleyview_rps_rpe_freq(struct drm_i915_private *dev_priv)
5201 {
5202         u32 val, rpe;
5203
5204         val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FMAX_FUSE_LO);
5205         rpe = (val & FB_FMAX_VMIN_FREQ_LO_MASK) >> FB_FMAX_VMIN_FREQ_LO_SHIFT;
5206         val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FMAX_FUSE_HI);
5207         rpe |= (val & FB_FMAX_VMIN_FREQ_HI_MASK) << 5;
5208
5209         return rpe;
5210 }
5211
5212 static int valleyview_rps_min_freq(struct drm_i915_private *dev_priv)
5213 {
5214         return vlv_punit_read(dev_priv, PUNIT_REG_GPU_LFM) & 0xff;
5215 }
5216
5217 /* Check that the pctx buffer wasn't move under us. */
5218 static void valleyview_check_pctx(struct drm_i915_private *dev_priv)
5219 {
5220         unsigned long pctx_addr = I915_READ(VLV_PCBR) & ~4095;
5221
5222         /* DragonFly - if EDID fails vlv_pctx can wind up NULL */
5223         if (WARN_ON(!dev_priv->vlv_pctx))
5224                 return;
5225
5226         WARN_ON(pctx_addr != dev_priv->mm.stolen_base +
5227                              dev_priv->vlv_pctx->stolen->start);
5228 }
5229
5230
5231 /* Check that the pcbr address is not empty. */
5232 static void cherryview_check_pctx(struct drm_i915_private *dev_priv)
5233 {
5234         unsigned long pctx_addr = I915_READ(VLV_PCBR) & ~4095;
5235
5236         WARN_ON((pctx_addr >> VLV_PCBR_ADDR_SHIFT) == 0);
5237 }
5238
5239 static void cherryview_setup_pctx(struct drm_device *dev)
5240 {
5241         struct drm_i915_private *dev_priv = dev->dev_private;
5242         unsigned long pctx_paddr, paddr;
5243         struct i915_gtt *gtt = &dev_priv->gtt;
5244         u32 pcbr;
5245         int pctx_size = 32*1024;
5246
5247         WARN_ON(!mutex_is_locked(&dev->struct_mutex));
5248
5249         pcbr = I915_READ(VLV_PCBR);
5250         if ((pcbr >> VLV_PCBR_ADDR_SHIFT) == 0) {
5251                 DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n");
5252                 paddr = (dev_priv->mm.stolen_base +
5253                          (gtt->stolen_size - pctx_size));
5254
5255                 pctx_paddr = (paddr & (~4095));
5256                 I915_WRITE(VLV_PCBR, pctx_paddr);
5257         }
5258
5259         DRM_DEBUG_DRIVER("PCBR: 0x%08x\n", I915_READ(VLV_PCBR));
5260 }
5261
5262 static void valleyview_setup_pctx(struct drm_device *dev)
5263 {
5264         struct drm_i915_private *dev_priv = dev->dev_private;
5265         struct drm_i915_gem_object *pctx;
5266         unsigned long pctx_paddr;
5267         u32 pcbr;
5268         int pctx_size = 24*1024;
5269
5270         WARN_ON(!mutex_is_locked(&dev->struct_mutex));
5271
5272         pcbr = I915_READ(VLV_PCBR);
5273         if (pcbr) {
5274                 /* BIOS set it up already, grab the pre-alloc'd space */
5275                 int pcbr_offset;
5276
5277                 pcbr_offset = (pcbr & (~4095)) - dev_priv->mm.stolen_base;
5278                 pctx = i915_gem_object_create_stolen_for_preallocated(dev_priv->dev,
5279                                                                       pcbr_offset,
5280                                                                       I915_GTT_OFFSET_NONE,
5281                                                                       pctx_size);
5282                 goto out;
5283         }
5284
5285         DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n");
5286
5287         /*
5288          * From the Gunit register HAS:
5289          * The Gfx driver is expected to program this register and ensure
5290          * proper allocation within Gfx stolen memory.  For example, this
5291          * register should be programmed such than the PCBR range does not
5292          * overlap with other ranges, such as the frame buffer, protected
5293          * memory, or any other relevant ranges.
5294          */
5295         pctx = i915_gem_object_create_stolen(dev, pctx_size);
5296         if (!pctx) {
5297                 DRM_DEBUG("not enough stolen space for PCTX, disabling\n");
5298                 return;
5299         }
5300
5301         pctx_paddr = dev_priv->mm.stolen_base + pctx->stolen->start;
5302         I915_WRITE(VLV_PCBR, pctx_paddr);
5303
5304 out:
5305         DRM_DEBUG_DRIVER("PCBR: 0x%08x\n", I915_READ(VLV_PCBR));
5306         dev_priv->vlv_pctx = pctx;
5307 }
5308
5309 static void valleyview_cleanup_pctx(struct drm_device *dev)
5310 {
5311         struct drm_i915_private *dev_priv = dev->dev_private;
5312
5313         if (WARN_ON(!dev_priv->vlv_pctx))
5314                 return;
5315
5316         drm_gem_object_unreference(&dev_priv->vlv_pctx->base);
5317         dev_priv->vlv_pctx = NULL;
5318 }
5319
5320 static void valleyview_init_gt_powersave(struct drm_device *dev)
5321 {
5322         struct drm_i915_private *dev_priv = dev->dev_private;
5323         u32 val;
5324
5325         valleyview_setup_pctx(dev);
5326
5327         mutex_lock(&dev_priv->rps.hw_lock);
5328
5329         val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
5330         switch ((val >> 6) & 3) {
5331         case 0:
5332         case 1:
5333                 dev_priv->mem_freq = 800;
5334                 break;
5335         case 2:
5336                 dev_priv->mem_freq = 1066;
5337                 break;
5338         case 3:
5339                 dev_priv->mem_freq = 1333;
5340                 break;
5341         }
5342         DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", dev_priv->mem_freq);
5343
5344         dev_priv->rps.max_freq = valleyview_rps_max_freq(dev_priv);
5345         dev_priv->rps.rp0_freq = dev_priv->rps.max_freq;
5346         DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n",
5347                          intel_gpu_freq(dev_priv, dev_priv->rps.max_freq),
5348                          dev_priv->rps.max_freq);
5349
5350         dev_priv->rps.efficient_freq = valleyview_rps_rpe_freq(dev_priv);
5351         DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n",
5352                          intel_gpu_freq(dev_priv, dev_priv->rps.efficient_freq),
5353                          dev_priv->rps.efficient_freq);
5354
5355         dev_priv->rps.rp1_freq = valleyview_rps_guar_freq(dev_priv);
5356         DRM_DEBUG_DRIVER("RP1(Guar Freq) GPU freq: %d MHz (%u)\n",
5357                          intel_gpu_freq(dev_priv, dev_priv->rps.rp1_freq),
5358                          dev_priv->rps.rp1_freq);
5359
5360         dev_priv->rps.min_freq = valleyview_rps_min_freq(dev_priv);
5361         DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
5362                          intel_gpu_freq(dev_priv, dev_priv->rps.min_freq),
5363                          dev_priv->rps.min_freq);
5364
5365         dev_priv->rps.idle_freq = dev_priv->rps.min_freq;
5366
5367         /* Preserve min/max settings in case of re-init */
5368         if (dev_priv->rps.max_freq_softlimit == 0)
5369                 dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq;
5370
5371         if (dev_priv->rps.min_freq_softlimit == 0)
5372                 dev_priv->rps.min_freq_softlimit = dev_priv->rps.min_freq;
5373
5374         mutex_unlock(&dev_priv->rps.hw_lock);
5375 }
5376
5377 static void cherryview_init_gt_powersave(struct drm_device *dev)
5378 {
5379         struct drm_i915_private *dev_priv = dev->dev_private;
5380         u32 val;
5381
5382         cherryview_setup_pctx(dev);
5383
5384         mutex_lock(&dev_priv->rps.hw_lock);
5385
5386         mutex_lock(&dev_priv->sb_lock);
5387         val = vlv_cck_read(dev_priv, CCK_FUSE_REG);
5388         mutex_unlock(&dev_priv->sb_lock);
5389
5390         switch ((val >> 2) & 0x7) {
5391         case 3:
5392                 dev_priv->mem_freq = 2000;
5393                 break;
5394         default:
5395                 dev_priv->mem_freq = 1600;
5396                 break;
5397         }
5398         DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", dev_priv->mem_freq);
5399
5400         dev_priv->rps.max_freq = cherryview_rps_max_freq(dev_priv);
5401         dev_priv->rps.rp0_freq = dev_priv->rps.max_freq;
5402         DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n",
5403                          intel_gpu_freq(dev_priv, dev_priv->rps.max_freq),
5404                          dev_priv->rps.max_freq);
5405
5406         dev_priv->rps.efficient_freq = cherryview_rps_rpe_freq(dev_priv);
5407         DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n",
5408                          intel_gpu_freq(dev_priv, dev_priv->rps.efficient_freq),
5409                          dev_priv->rps.efficient_freq);
5410
5411         dev_priv->rps.rp1_freq = cherryview_rps_guar_freq(dev_priv);
5412         DRM_DEBUG_DRIVER("RP1(Guar) GPU freq: %d MHz (%u)\n",
5413                          intel_gpu_freq(dev_priv, dev_priv->rps.rp1_freq),
5414                          dev_priv->rps.rp1_freq);
5415
5416         /* PUnit validated range is only [RPe, RP0] */
5417         dev_priv->rps.min_freq = dev_priv->rps.efficient_freq;
5418         DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
5419                          intel_gpu_freq(dev_priv, dev_priv->rps.min_freq),
5420                          dev_priv->rps.min_freq);
5421
5422         WARN_ONCE((dev_priv->rps.max_freq |
5423                    dev_priv->rps.efficient_freq |
5424                    dev_priv->rps.rp1_freq |
5425                    dev_priv->rps.min_freq) & 1,
5426                   "Odd GPU freq values\n");
5427
5428         dev_priv->rps.idle_freq = dev_priv->rps.min_freq;
5429
5430         /* Preserve min/max settings in case of re-init */
5431         if (dev_priv->rps.max_freq_softlimit == 0)
5432                 dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq;
5433
5434         if (dev_priv->rps.min_freq_softlimit == 0)
5435                 dev_priv->rps.min_freq_softlimit = dev_priv->rps.min_freq;
5436
5437         mutex_unlock(&dev_priv->rps.hw_lock);
5438 }
5439
5440 static void valleyview_cleanup_gt_powersave(struct drm_device *dev)
5441 {
5442         valleyview_cleanup_pctx(dev);
5443 }
5444
5445 static void cherryview_enable_rps(struct drm_device *dev)
5446 {
5447         struct drm_i915_private *dev_priv = dev->dev_private;
5448         struct intel_engine_cs *ring;
5449         u32 gtfifodbg, val, rc6_mode = 0, pcbr;
5450         int i;
5451
5452         WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
5453
5454         gtfifodbg = I915_READ(GTFIFODBG);
5455         if (gtfifodbg) {
5456                 DRM_DEBUG_DRIVER("GT fifo had a previous error %x\n",
5457                                  gtfifodbg);
5458                 I915_WRITE(GTFIFODBG, gtfifodbg);
5459         }
5460
5461         cherryview_check_pctx(dev_priv);
5462
5463         /* 1a & 1b: Get forcewake during program sequence. Although the driver
5464          * hasn't enabled a state yet where we need forcewake, BIOS may have.*/
5465         intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
5466
5467         /*  Disable RC states. */
5468         I915_WRITE(GEN6_RC_CONTROL, 0);
5469
5470         /* 2a: Program RC6 thresholds.*/
5471         I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16);
5472         I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
5473         I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
5474
5475         for_each_ring(ring, dev_priv, i)
5476                 I915_WRITE(RING_MAX_IDLE(ring->mmio_base), 10);
5477         I915_WRITE(GEN6_RC_SLEEP, 0);
5478
5479         /* TO threshold set to 500 us ( 0x186 * 1.28 us) */
5480         I915_WRITE(GEN6_RC6_THRESHOLD, 0x186);
5481
5482         /* allows RC6 residency counter to work */
5483         I915_WRITE(VLV_COUNTER_CONTROL,
5484                    _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
5485                                       VLV_MEDIA_RC6_COUNT_EN |
5486                                       VLV_RENDER_RC6_COUNT_EN));
5487
5488         /* For now we assume BIOS is allocating and populating the PCBR  */
5489         pcbr = I915_READ(VLV_PCBR);
5490
5491         /* 3: Enable RC6 */
5492         if ((intel_enable_rc6(dev) & INTEL_RC6_ENABLE) &&
5493                                                 (pcbr >> VLV_PCBR_ADDR_SHIFT))
5494                 rc6_mode = GEN7_RC_CTL_TO_MODE;
5495
5496         I915_WRITE(GEN6_RC_CONTROL, rc6_mode);
5497
5498         /* 4 Program defaults and thresholds for RPS*/
5499         I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 1000000);
5500         I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400);
5501         I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000);
5502         I915_WRITE(GEN6_RP_UP_EI, 66000);
5503         I915_WRITE(GEN6_RP_DOWN_EI, 350000);
5504
5505         I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
5506
5507         /* 5: Enable RPS */
5508         I915_WRITE(GEN6_RP_CONTROL,
5509                    GEN6_RP_MEDIA_HW_NORMAL_MODE |
5510                    GEN6_RP_MEDIA_IS_GFX |
5511                    GEN6_RP_ENABLE |
5512                    GEN6_RP_UP_BUSY_AVG |
5513                    GEN6_RP_DOWN_IDLE_AVG);
5514
5515         /* Setting Fixed Bias */
5516         val = VLV_OVERRIDE_EN |
5517                   VLV_SOC_TDP_EN |
5518                   CHV_BIAS_CPU_50_SOC_50;
5519         vlv_punit_write(dev_priv, VLV_TURBO_SOC_OVERRIDE, val);
5520
5521         val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
5522
5523         /* RPS code assumes GPLL is used */
5524         WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n");
5525
5526         DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE));
5527         DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val);
5528
5529         dev_priv->rps.cur_freq = (val >> 8) & 0xff;
5530         DRM_DEBUG_DRIVER("current GPU freq: %d MHz (%u)\n",
5531                          intel_gpu_freq(dev_priv, dev_priv->rps.cur_freq),
5532                          dev_priv->rps.cur_freq);
5533
5534         DRM_DEBUG_DRIVER("setting GPU freq to %d MHz (%u)\n",
5535                          intel_gpu_freq(dev_priv, dev_priv->rps.efficient_freq),
5536                          dev_priv->rps.efficient_freq);
5537
5538         valleyview_set_rps(dev_priv->dev, dev_priv->rps.efficient_freq);
5539
5540         intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
5541 }
5542
5543 static void valleyview_enable_rps(struct drm_device *dev)
5544 {
5545         struct drm_i915_private *dev_priv = dev->dev_private;
5546         struct intel_engine_cs *ring;
5547         u32 gtfifodbg, val, rc6_mode = 0;
5548         int i;
5549
5550         WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
5551
5552         valleyview_check_pctx(dev_priv);
5553
5554         if ((gtfifodbg = I915_READ(GTFIFODBG))) {
5555                 DRM_DEBUG_DRIVER("GT fifo had a previous error %x\n",
5556                                  gtfifodbg);
5557                 I915_WRITE(GTFIFODBG, gtfifodbg);
5558         }
5559
5560         /* If VLV, Forcewake all wells, else re-direct to regular path */
5561         intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
5562
5563         /*  Disable RC states. */
5564         I915_WRITE(GEN6_RC_CONTROL, 0);
5565
5566         I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 1000000);
5567         I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400);
5568         I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000);
5569         I915_WRITE(GEN6_RP_UP_EI, 66000);
5570         I915_WRITE(GEN6_RP_DOWN_EI, 350000);
5571
5572         I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
5573
5574         I915_WRITE(GEN6_RP_CONTROL,
5575                    GEN6_RP_MEDIA_TURBO |
5576                    GEN6_RP_MEDIA_HW_NORMAL_MODE |
5577                    GEN6_RP_MEDIA_IS_GFX |
5578                    GEN6_RP_ENABLE |
5579                    GEN6_RP_UP_BUSY_AVG |
5580                    GEN6_RP_DOWN_IDLE_CONT);
5581
5582         I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 0x00280000);
5583         I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000);
5584         I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25);
5585
5586         for_each_ring(ring, dev_priv, i)
5587                 I915_WRITE(RING_MAX_IDLE(ring->mmio_base), 10);
5588
5589         I915_WRITE(GEN6_RC6_THRESHOLD, 0x557);
5590
5591         /* allows RC6 residency counter to work */
5592         I915_WRITE(VLV_COUNTER_CONTROL,
5593                    _MASKED_BIT_ENABLE(VLV_MEDIA_RC0_COUNT_EN |
5594                                       VLV_RENDER_RC0_COUNT_EN |
5595                                       VLV_MEDIA_RC6_COUNT_EN |
5596                                       VLV_RENDER_RC6_COUNT_EN));
5597
5598         if (intel_enable_rc6(dev) & INTEL_RC6_ENABLE)
5599                 rc6_mode = GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL;
5600
5601         intel_print_rc6_info(dev, rc6_mode);
5602
5603         I915_WRITE(GEN6_RC_CONTROL, rc6_mode);
5604
5605         /* Setting Fixed Bias */
5606         val = VLV_OVERRIDE_EN |
5607                   VLV_SOC_TDP_EN |
5608                   VLV_BIAS_CPU_125_SOC_875;
5609         vlv_punit_write(dev_priv, VLV_TURBO_SOC_OVERRIDE, val);
5610
5611         val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
5612
5613         /* RPS code assumes GPLL is used */
5614         WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n");
5615
5616         DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE));
5617         DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val);
5618
5619         dev_priv->rps.cur_freq = (val >> 8) & 0xff;
5620         DRM_DEBUG_DRIVER("current GPU freq: %d MHz (%u)\n",
5621                          intel_gpu_freq(dev_priv, dev_priv->rps.cur_freq),
5622                          dev_priv->rps.cur_freq);
5623
5624         DRM_DEBUG_DRIVER("setting GPU freq to %d MHz (%u)\n",
5625                          intel_gpu_freq(dev_priv, dev_priv->rps.efficient_freq),
5626                          dev_priv->rps.efficient_freq);
5627
5628         valleyview_set_rps(dev_priv->dev, dev_priv->rps.efficient_freq);
5629
5630         intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
5631 }
5632
5633 static unsigned long intel_pxfreq(u32 vidfreq)
5634 {
5635         unsigned long freq;
5636         int div = (vidfreq & 0x3f0000) >> 16;
5637         int post = (vidfreq & 0x3000) >> 12;
5638         int pre = (vidfreq & 0x7);
5639
5640         if (!pre)
5641                 return 0;
5642
5643         freq = ((div * 133333) / ((1<<post) * pre));
5644
5645         return freq;
5646 }
5647
5648 static const struct cparams {
5649         u16 i;
5650         u16 t;
5651         u16 m;
5652         u16 c;
5653 } cparams[] = {
5654         { 1, 1333, 301, 28664 },
5655         { 1, 1066, 294, 24460 },
5656         { 1, 800, 294, 25192 },
5657         { 0, 1333, 276, 27605 },
5658         { 0, 1066, 276, 27605 },
5659         { 0, 800, 231, 23784 },
5660 };
5661
5662 static unsigned long __i915_chipset_val(struct drm_i915_private *dev_priv)
5663 {
5664         u64 total_count, diff, ret;
5665         u32 count1, count2, count3, m = 0, c = 0;
5666         unsigned long now = jiffies_to_msecs(jiffies), diff1;
5667         int i;
5668
5669         assert_spin_locked(&mchdev_lock);
5670
5671         diff1 = now - dev_priv->ips.last_time1;
5672
5673         /* Prevent division-by-zero if we are asking too fast.
5674          * Also, we don't get interesting results if we are polling
5675          * faster than once in 10ms, so just return the saved value
5676          * in such cases.
5677          */
5678         if (diff1 <= 10)
5679                 return dev_priv->ips.chipset_power;
5680
5681         count1 = I915_READ(DMIEC);
5682         count2 = I915_READ(DDREC);
5683         count3 = I915_READ(CSIEC);
5684
5685         total_count = count1 + count2 + count3;
5686
5687         /* FIXME: handle per-counter overflow */
5688         if (total_count < dev_priv->ips.last_count1) {
5689                 diff = ~0UL - dev_priv->ips.last_count1;
5690                 diff += total_count;
5691         } else {
5692                 diff = total_count - dev_priv->ips.last_count1;
5693         }
5694
5695         for (i = 0; i < ARRAY_SIZE(cparams); i++) {
5696                 if (cparams[i].i == dev_priv->ips.c_m &&
5697                     cparams[i].t == dev_priv->ips.r_t) {
5698                         m = cparams[i].m;
5699                         c = cparams[i].c;
5700                         break;
5701                 }
5702         }
5703
5704         diff = div_u64(diff, diff1);
5705         ret = ((m * diff) + c);
5706         ret = div_u64(ret, 10);
5707
5708         dev_priv->ips.last_count1 = total_count;
5709         dev_priv->ips.last_time1 = now;
5710
5711         dev_priv->ips.chipset_power = ret;
5712
5713         return ret;
5714 }
5715
5716 unsigned long i915_chipset_val(struct drm_i915_private *dev_priv)
5717 {
5718         struct drm_device *dev = dev_priv->dev;
5719         unsigned long val;
5720
5721         if (INTEL_INFO(dev)->gen != 5)
5722                 return 0;
5723
5724         spin_lock_irq(&mchdev_lock);
5725
5726         val = __i915_chipset_val(dev_priv);
5727
5728         spin_unlock_irq(&mchdev_lock);
5729
5730         return val;
5731 }
5732
5733 unsigned long i915_mch_val(struct drm_i915_private *dev_priv)
5734 {
5735         unsigned long m, x, b;
5736         u32 tsfs;
5737
5738         tsfs = I915_READ(TSFS);
5739
5740         m = ((tsfs & TSFS_SLOPE_MASK) >> TSFS_SLOPE_SHIFT);
5741         x = I915_READ8(TR1);
5742
5743         b = tsfs & TSFS_INTR_MASK;
5744
5745         return ((m * x) / 127) - b;
5746 }
5747
5748 static int _pxvid_to_vd(u8 pxvid)
5749 {
5750         if (pxvid == 0)
5751                 return 0;
5752
5753         if (pxvid >= 8 && pxvid < 31)
5754                 pxvid = 31;
5755
5756         return (pxvid + 2) * 125;
5757 }
5758
5759 static u32 pvid_to_extvid(struct drm_i915_private *dev_priv, u8 pxvid)
5760 {
5761         struct drm_device *dev = dev_priv->dev;
5762         const int vd = _pxvid_to_vd(pxvid);
5763         const int vm = vd - 1125;
5764
5765         if (INTEL_INFO(dev)->is_mobile)
5766                 return vm > 0 ? vm : 0;
5767
5768         return vd;
5769 }
5770
5771 static void __i915_update_gfx_val(struct drm_i915_private *dev_priv)
5772 {
5773         u64 now, diff, diffms;
5774         u32 count;
5775
5776         assert_spin_locked(&mchdev_lock);
5777
5778         now = ktime_get_raw_ns();
5779         diffms = now - dev_priv->ips.last_time2;
5780         do_div(diffms, NSEC_PER_MSEC);
5781
5782         /* Don't divide by 0 */
5783         if (!diffms)
5784                 return;
5785
5786         count = I915_READ(GFXEC);
5787
5788         if (count < dev_priv->ips.last_count2) {
5789                 diff = ~0UL - dev_priv->ips.last_count2;
5790                 diff += count;
5791         } else {
5792                 diff = count - dev_priv->ips.last_count2;
5793         }
5794
5795         dev_priv->ips.last_count2 = count;
5796         dev_priv->ips.last_time2 = now;
5797
5798         /* More magic constants... */
5799         diff = diff * 1181;
5800         diff = div_u64(diff, diffms * 10);
5801         dev_priv->ips.gfx_power = diff;
5802 }
5803
5804 void i915_update_gfx_val(struct drm_i915_private *dev_priv)
5805 {
5806         struct drm_device *dev = dev_priv->dev;
5807
5808         if (INTEL_INFO(dev)->gen != 5)
5809                 return;
5810
5811         spin_lock_irq(&mchdev_lock);
5812
5813         __i915_update_gfx_val(dev_priv);
5814
5815         spin_unlock_irq(&mchdev_lock);
5816 }
5817
5818 static unsigned long __i915_gfx_val(struct drm_i915_private *dev_priv)
5819 {
5820         unsigned long t, corr, state1, corr2, state2;
5821         u32 pxvid, ext_v;
5822
5823         assert_spin_locked(&mchdev_lock);
5824
5825         pxvid = I915_READ(PXVFREQ(dev_priv->rps.cur_freq));
5826         pxvid = (pxvid >> 24) & 0x7f;
5827         ext_v = pvid_to_extvid(dev_priv, pxvid);
5828
5829         state1 = ext_v;
5830
5831         t = i915_mch_val(dev_priv);
5832
5833         /* Revel in the empirically derived constants */
5834
5835         /* Correction factor in 1/100000 units */
5836         if (t > 80)
5837                 corr = ((t * 2349) + 135940);
5838         else if (t >= 50)
5839                 corr = ((t * 964) + 29317);
5840         else /* < 50 */
5841                 corr = ((t * 301) + 1004);
5842
5843         corr = corr * ((150142 * state1) / 10000 - 78642);
5844         corr /= 100000;
5845         corr2 = (corr * dev_priv->ips.corr);
5846
5847         state2 = (corr2 * state1) / 10000;
5848         state2 /= 100; /* convert to mW */
5849
5850         __i915_update_gfx_val(dev_priv);
5851
5852         return dev_priv->ips.gfx_power + state2;
5853 }
5854
5855 unsigned long i915_gfx_val(struct drm_i915_private *dev_priv)
5856 {
5857         struct drm_device *dev = dev_priv->dev;
5858         unsigned long val;
5859
5860         if (INTEL_INFO(dev)->gen != 5)
5861                 return 0;
5862
5863         spin_lock_irq(&mchdev_lock);
5864
5865         val = __i915_gfx_val(dev_priv);
5866
5867         spin_unlock_irq(&mchdev_lock);
5868
5869         return val;
5870 }
5871
5872 /**
5873  * i915_read_mch_val - return value for IPS use
5874  *
5875  * Calculate and return a value for the IPS driver to use when deciding whether
5876  * we have thermal and power headroom to increase CPU or GPU power budget.
5877  */
5878 unsigned long i915_read_mch_val(void)
5879 {
5880         struct drm_i915_private *dev_priv;
5881         unsigned long chipset_val, graphics_val, ret = 0;
5882
5883         spin_lock_irq(&mchdev_lock);
5884         if (!i915_mch_dev)
5885                 goto out_unlock;
5886         dev_priv = i915_mch_dev;
5887
5888         chipset_val = __i915_chipset_val(dev_priv);
5889         graphics_val = __i915_gfx_val(dev_priv);
5890
5891         ret = chipset_val + graphics_val;
5892
5893 out_unlock:
5894         spin_unlock_irq(&mchdev_lock);
5895
5896         return ret;
5897 }
5898
5899 /**
5900  * i915_gpu_raise - raise GPU frequency limit
5901  *
5902  * Raise the limit; IPS indicates we have thermal headroom.
5903  */
5904 bool i915_gpu_raise(void)
5905 {
5906         struct drm_i915_private *dev_priv;
5907         bool ret = true;
5908
5909         spin_lock_irq(&mchdev_lock);
5910         if (!i915_mch_dev) {
5911                 ret = false;
5912                 goto out_unlock;
5913         }
5914         dev_priv = i915_mch_dev;
5915
5916         if (dev_priv->ips.max_delay > dev_priv->ips.fmax)
5917                 dev_priv->ips.max_delay--;
5918
5919 out_unlock:
5920         spin_unlock_irq(&mchdev_lock);
5921
5922         return ret;
5923 }
5924
5925 /**
5926  * i915_gpu_lower - lower GPU frequency limit
5927  *
5928  * IPS indicates we're close to a thermal limit, so throttle back the GPU
5929  * frequency maximum.
5930  */
5931 bool i915_gpu_lower(void)
5932 {
5933         struct drm_i915_private *dev_priv;
5934         bool ret = true;
5935
5936         spin_lock_irq(&mchdev_lock);
5937         if (!i915_mch_dev) {
5938                 ret = false;
5939                 goto out_unlock;
5940         }
5941         dev_priv = i915_mch_dev;
5942
5943         if (dev_priv->ips.max_delay < dev_priv->ips.min_delay)
5944                 dev_priv->ips.max_delay++;
5945
5946 out_unlock:
5947         spin_unlock_irq(&mchdev_lock);
5948
5949         return ret;
5950 }
5951
5952 /**
5953  * i915_gpu_busy - indicate GPU business to IPS
5954  *
5955  * Tell the IPS driver whether or not the GPU is busy.
5956  */
5957 bool i915_gpu_busy(void)
5958 {
5959         struct drm_i915_private *dev_priv;
5960         struct intel_engine_cs *ring;
5961         bool ret = false;
5962         int i;
5963
5964         spin_lock_irq(&mchdev_lock);
5965         if (!i915_mch_dev)
5966                 goto out_unlock;
5967         dev_priv = i915_mch_dev;
5968
5969         for_each_ring(ring, dev_priv, i)
5970                 ret |= !list_empty(&ring->request_list);
5971
5972 out_unlock:
5973         spin_unlock_irq(&mchdev_lock);
5974
5975         return ret;
5976 }
5977
5978 /**
5979  * i915_gpu_turbo_disable - disable graphics turbo
5980  *
5981  * Disable graphics turbo by resetting the max frequency and setting the
5982  * current frequency to the default.
5983  */
5984 bool i915_gpu_turbo_disable(void)
5985 {
5986         struct drm_i915_private *dev_priv;
5987         bool ret = true;
5988
5989         spin_lock_irq(&mchdev_lock);
5990         if (!i915_mch_dev) {
5991                 ret = false;
5992                 goto out_unlock;
5993         }
5994         dev_priv = i915_mch_dev;
5995
5996         dev_priv->ips.max_delay = dev_priv->ips.fstart;
5997
5998         if (!ironlake_set_drps(dev_priv->dev, dev_priv->ips.fstart))
5999                 ret = false;
6000
6001 out_unlock:
6002         spin_unlock_irq(&mchdev_lock);
6003
6004         return ret;
6005 }
6006
6007 #if 0
6008 /**
6009  * Tells the intel_ips driver that the i915 driver is now loaded, if
6010  * IPS got loaded first.
6011  *
6012  * This awkward dance is so that neither module has to depend on the
6013  * other in order for IPS to do the appropriate communication of
6014  * GPU turbo limits to i915.
6015  */
6016 static void
6017 ips_ping_for_i915_load(void)
6018 {
6019         void (*link)(void);
6020
6021         link = symbol_get(ips_link_to_i915_driver);
6022         if (link) {
6023                 link();
6024                 symbol_put(ips_link_to_i915_driver);
6025         }
6026 }
6027 #endif
6028
6029 void intel_gpu_ips_init(struct drm_i915_private *dev_priv)
6030 {
6031         /* We only register the i915 ips part with intel-ips once everything is
6032          * set up, to avoid intel-ips sneaking in and reading bogus values. */
6033         spin_lock_irq(&mchdev_lock);
6034         i915_mch_dev = dev_priv;
6035         spin_unlock_irq(&mchdev_lock);
6036
6037 }
6038
6039 void intel_gpu_ips_teardown(void)
6040 {
6041         spin_lock_irq(&mchdev_lock);
6042         i915_mch_dev = NULL;
6043         spin_unlock_irq(&mchdev_lock);
6044 }
6045
6046 static void intel_init_emon(struct drm_device *dev)
6047 {
6048         struct drm_i915_private *dev_priv = dev->dev_private;
6049         u32 lcfuse;
6050         u8 pxw[16];
6051         int i;
6052
6053         /* Disable to program */
6054         I915_WRITE(ECR, 0);
6055         POSTING_READ(ECR);
6056
6057         /* Program energy weights for various events */
6058         I915_WRITE(SDEW, 0x15040d00);
6059         I915_WRITE(CSIEW0, 0x007f0000);
6060         I915_WRITE(CSIEW1, 0x1e220004);
6061         I915_WRITE(CSIEW2, 0x04000004);
6062
6063         for (i = 0; i < 5; i++)
6064                 I915_WRITE(PEW(i), 0);
6065         for (i = 0; i < 3; i++)
6066                 I915_WRITE(DEW(i), 0);
6067
6068         /* Program P-state weights to account for frequency power adjustment */
6069         for (i = 0; i < 16; i++) {
6070                 u32 pxvidfreq = I915_READ(PXVFREQ(i));
6071                 unsigned long freq = intel_pxfreq(pxvidfreq);
6072                 unsigned long vid = (pxvidfreq & PXVFREQ_PX_MASK) >>
6073                         PXVFREQ_PX_SHIFT;
6074                 unsigned long val;
6075
6076                 val = vid * vid;
6077                 val *= (freq / 1000);
6078                 val *= 255;
6079                 val /= (127*127*900);
6080                 if (val > 0xff)
6081                         DRM_ERROR("bad pxval: %ld\n", val);
6082                 pxw[i] = val;
6083         }
6084         /* Render standby states get 0 weight */
6085         pxw[14] = 0;
6086         pxw[15] = 0;
6087
6088         for (i = 0; i < 4; i++) {
6089                 u32 val = (pxw[i*4] << 24) | (pxw[(i*4)+1] << 16) |
6090                         (pxw[(i*4)+2] << 8) | (pxw[(i*4)+3]);
6091                 I915_WRITE(PXW(i), val);
6092         }
6093
6094         /* Adjust magic regs to magic values (more experimental results) */
6095         I915_WRITE(OGW0, 0);
6096         I915_WRITE(OGW1, 0);
6097         I915_WRITE(EG0, 0x00007f00);
6098         I915_WRITE(EG1, 0x0000000e);
6099         I915_WRITE(EG2, 0x000e0000);
6100         I915_WRITE(EG3, 0x68000300);
6101         I915_WRITE(EG4, 0x42000000);
6102         I915_WRITE(EG5, 0x00140031);
6103         I915_WRITE(EG6, 0);
6104         I915_WRITE(EG7, 0);
6105
6106         for (i = 0; i < 8; i++)
6107                 I915_WRITE(PXWL(i), 0);
6108
6109         /* Enable PMON + select events */
6110         I915_WRITE(ECR, 0x80000019);
6111
6112         lcfuse = I915_READ(LCFUSE02);
6113
6114         dev_priv->ips.corr = (lcfuse & LCFUSE_HIV_MASK);
6115 }
6116
6117 void intel_init_gt_powersave(struct drm_device *dev)
6118 {
6119         i915.enable_rc6 = sanitize_rc6_option(dev, i915.enable_rc6);
6120
6121         if (IS_CHERRYVIEW(dev))
6122                 cherryview_init_gt_powersave(dev);
6123         else if (IS_VALLEYVIEW(dev))
6124                 valleyview_init_gt_powersave(dev);
6125 }
6126
6127 void intel_cleanup_gt_powersave(struct drm_device *dev)
6128 {
6129         if (IS_CHERRYVIEW(dev))
6130                 return;
6131         else if (IS_VALLEYVIEW(dev))
6132                 valleyview_cleanup_gt_powersave(dev);
6133 }
6134
6135 static void gen6_suspend_rps(struct drm_device *dev)
6136 {
6137 #if 0
6138         struct drm_i915_private *dev_priv = dev->dev_private;
6139
6140         flush_delayed_work(&dev_priv->rps.delayed_resume_work);
6141 #endif
6142
6143         gen6_disable_rps_interrupts(dev);
6144 }
6145
6146 /**
6147  * intel_suspend_gt_powersave - suspend PM work and helper threads
6148  * @dev: drm device
6149  *
6150  * We don't want to disable RC6 or other features here, we just want
6151  * to make sure any work we've queued has finished and won't bother
6152  * us while we're suspended.
6153  */
6154 void intel_suspend_gt_powersave(struct drm_device *dev)
6155 {
6156         struct drm_i915_private *dev_priv = dev->dev_private;
6157
6158         if (INTEL_INFO(dev)->gen < 6)
6159                 return;
6160
6161         gen6_suspend_rps(dev);
6162
6163         /* Force GPU to min freq during suspend */
6164         gen6_rps_idle(dev_priv);
6165 }
6166
6167 void intel_disable_gt_powersave(struct drm_device *dev)
6168 {
6169         struct drm_i915_private *dev_priv = dev->dev_private;
6170
6171         if (IS_IRONLAKE_M(dev)) {
6172                 ironlake_disable_drps(dev);
6173         } else if (INTEL_INFO(dev)->gen >= 6) {
6174                 intel_suspend_gt_powersave(dev);
6175
6176                 mutex_lock(&dev_priv->rps.hw_lock);
6177                 if (INTEL_INFO(dev)->gen >= 9)
6178                         gen9_disable_rps(dev);
6179                 else if (IS_CHERRYVIEW(dev))
6180                         cherryview_disable_rps(dev);
6181                 else if (IS_VALLEYVIEW(dev))
6182                         valleyview_disable_rps(dev);
6183                 else
6184                         gen6_disable_rps(dev);
6185
6186                 dev_priv->rps.enabled = false;
6187                 mutex_unlock(&dev_priv->rps.hw_lock);
6188         }
6189 }
6190
6191 static void intel_gen6_powersave_work(struct work_struct *work)
6192 {
6193         struct drm_i915_private *dev_priv =
6194                 container_of(work, struct drm_i915_private,
6195                              rps.delayed_resume_work.work);
6196         struct drm_device *dev = dev_priv->dev;
6197
6198         mutex_lock(&dev_priv->rps.hw_lock);
6199
6200         gen6_reset_rps_interrupts(dev);
6201
6202         if (IS_CHERRYVIEW(dev)) {
6203                 cherryview_enable_rps(dev);
6204         } else if (IS_VALLEYVIEW(dev)) {
6205                 valleyview_enable_rps(dev);
6206         } else if (INTEL_INFO(dev)->gen >= 9) {
6207                 gen9_enable_rc6(dev);
6208                 gen9_enable_rps(dev);
6209                 if (IS_SKYLAKE(dev))
6210                         __gen6_update_ring_freq(dev);
6211         } else if (IS_BROADWELL(dev)) {
6212                 gen8_enable_rps(dev);
6213                 __gen6_update_ring_freq(dev);
6214         } else {
6215                 gen6_enable_rps(dev);
6216                 __gen6_update_ring_freq(dev);
6217         }
6218
6219         WARN_ON(dev_priv->rps.max_freq < dev_priv->rps.min_freq);
6220         WARN_ON(dev_priv->rps.idle_freq > dev_priv->rps.max_freq);
6221
6222         WARN_ON(dev_priv->rps.efficient_freq < dev_priv->rps.min_freq);
6223         WARN_ON(dev_priv->rps.efficient_freq > dev_priv->rps.max_freq);
6224
6225         dev_priv->rps.enabled = true;
6226
6227         gen6_enable_rps_interrupts(dev);
6228
6229         mutex_unlock(&dev_priv->rps.hw_lock);
6230
6231         intel_runtime_pm_put(dev_priv);
6232 }
6233
6234 void intel_enable_gt_powersave(struct drm_device *dev)
6235 {
6236         struct drm_i915_private *dev_priv = dev->dev_private;
6237
6238         /* Powersaving is controlled by the host when inside a VM */
6239         if (intel_vgpu_active(dev))
6240                 return;
6241
6242         if (IS_IRONLAKE_M(dev)) {
6243                 mutex_lock(&dev->struct_mutex);
6244                 ironlake_enable_drps(dev);
6245                 intel_init_emon(dev);
6246                 mutex_unlock(&dev->struct_mutex);
6247         } else if (INTEL_INFO(dev)->gen >= 6) {
6248                 /*
6249                  * PCU communication is slow and this doesn't need to be
6250                  * done at any specific time, so do this out of our fast path
6251                  * to make resume and init faster.
6252                  *
6253                  * We depend on the HW RC6 power context save/restore
6254                  * mechanism when entering D3 through runtime PM suspend. So
6255                  * disable RPM until RPS/RC6 is properly setup. We can only
6256                  * get here via the driver load/system resume/runtime resume
6257                  * paths, so the _noresume version is enough (and in case of
6258                  * runtime resume it's necessary).
6259                  */
6260                 if (schedule_delayed_work(&dev_priv->rps.delayed_resume_work,
6261                                            round_jiffies_up_relative(HZ)))
6262                         intel_runtime_pm_get_noresume(dev_priv);
6263         }
6264 }
6265
6266 void intel_reset_gt_powersave(struct drm_device *dev)
6267 {
6268         struct drm_i915_private *dev_priv = dev->dev_private;
6269
6270         if (INTEL_INFO(dev)->gen < 6)
6271                 return;
6272
6273         gen6_suspend_rps(dev);
6274         dev_priv->rps.enabled = false;
6275 }
6276
6277 static void ibx_init_clock_gating(struct drm_device *dev)
6278 {
6279         struct drm_i915_private *dev_priv = dev->dev_private;
6280
6281         /*
6282          * On Ibex Peak and Cougar Point, we need to disable clock
6283          * gating for the panel power sequencer or it will fail to
6284          * start up when no ports are active.
6285          */
6286         I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE);
6287 }
6288
6289 static void g4x_disable_trickle_feed(struct drm_device *dev)
6290 {
6291         struct drm_i915_private *dev_priv = dev->dev_private;
6292         enum i915_pipe pipe;
6293
6294         for_each_pipe(dev_priv, pipe) {
6295                 I915_WRITE(DSPCNTR(pipe),
6296                            I915_READ(DSPCNTR(pipe)) |
6297                            DISPPLANE_TRICKLE_FEED_DISABLE);
6298
6299                 I915_WRITE(DSPSURF(pipe), I915_READ(DSPSURF(pipe)));
6300                 POSTING_READ(DSPSURF(pipe));
6301         }
6302 }
6303
6304 static void ilk_init_lp_watermarks(struct drm_device *dev)
6305 {
6306         struct drm_i915_private *dev_priv = dev->dev_private;
6307
6308         I915_WRITE(WM3_LP_ILK, I915_READ(WM3_LP_ILK) & ~WM1_LP_SR_EN);
6309         I915_WRITE(WM2_LP_ILK, I915_READ(WM2_LP_ILK) & ~WM1_LP_SR_EN);
6310         I915_WRITE(WM1_LP_ILK, I915_READ(WM1_LP_ILK) & ~WM1_LP_SR_EN);
6311
6312         /*
6313          * Don't touch WM1S_LP_EN here.
6314          * Doing so could cause underruns.
6315          */
6316 }
6317
6318 static void ironlake_init_clock_gating(struct drm_device *dev)
6319 {
6320         struct drm_i915_private *dev_priv = dev->dev_private;
6321         uint32_t dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE;
6322
6323         /*
6324          * Required for FBC
6325          * WaFbcDisableDpfcClockGating:ilk
6326          */
6327         dspclk_gate |= ILK_DPFCRUNIT_CLOCK_GATE_DISABLE |
6328                    ILK_DPFCUNIT_CLOCK_GATE_DISABLE |
6329                    ILK_DPFDUNIT_CLOCK_GATE_ENABLE;
6330
6331         I915_WRITE(PCH_3DCGDIS0,
6332                    MARIUNIT_CLOCK_GATE_DISABLE |
6333                    SVSMUNIT_CLOCK_GATE_DISABLE);
6334         I915_WRITE(PCH_3DCGDIS1,
6335                    VFMUNIT_CLOCK_GATE_DISABLE);
6336
6337         /*
6338          * According to the spec the following bits should be set in
6339          * order to enable memory self-refresh
6340          * The bit 22/21 of 0x42004
6341          * The bit 5 of 0x42020
6342          * The bit 15 of 0x45000
6343          */
6344         I915_WRITE(ILK_DISPLAY_CHICKEN2,
6345                    (I915_READ(ILK_DISPLAY_CHICKEN2) |
6346                     ILK_DPARB_GATE | ILK_VSDPFD_FULL));
6347         dspclk_gate |= ILK_DPARBUNIT_CLOCK_GATE_ENABLE;
6348         I915_WRITE(DISP_ARB_CTL,
6349                    (I915_READ(DISP_ARB_CTL) |
6350                     DISP_FBC_WM_DIS));
6351
6352         ilk_init_lp_watermarks(dev);
6353
6354         /*
6355          * Based on the document from hardware guys the following bits
6356          * should be set unconditionally in order to enable FBC.
6357          * The bit 22 of 0x42000
6358          * The bit 22 of 0x42004
6359          * The bit 7,8,9 of 0x42020.
6360          */
6361         if (IS_IRONLAKE_M(dev)) {
6362                 /* WaFbcAsynchFlipDisableFbcQueue:ilk */
6363                 I915_WRITE(ILK_DISPLAY_CHICKEN1,
6364                            I915_READ(ILK_DISPLAY_CHICKEN1) |
6365                            ILK_FBCQ_DIS);
6366                 I915_WRITE(ILK_DISPLAY_CHICKEN2,
6367                            I915_READ(ILK_DISPLAY_CHICKEN2) |
6368                            ILK_DPARB_GATE);
6369         }
6370
6371         I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate);
6372
6373         I915_WRITE(ILK_DISPLAY_CHICKEN2,
6374                    I915_READ(ILK_DISPLAY_CHICKEN2) |
6375                    ILK_ELPIN_409_SELECT);
6376         I915_WRITE(_3D_CHICKEN2,
6377                    _3D_CHICKEN2_WM_READ_PIPELINED << 16 |
6378                    _3D_CHICKEN2_WM_READ_PIPELINED);
6379
6380         /* WaDisableRenderCachePipelinedFlush:ilk */
6381         I915_WRITE(CACHE_MODE_0,
6382                    _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE));
6383
6384         /* WaDisable_RenderCache_OperationalFlush:ilk */
6385         I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
6386
6387         g4x_disable_trickle_feed(dev);
6388
6389         ibx_init_clock_gating(dev);
6390 }
6391
6392 static void cpt_init_clock_gating(struct drm_device *dev)
6393 {
6394         struct drm_i915_private *dev_priv = dev->dev_private;
6395         int pipe;
6396         uint32_t val;
6397
6398         /*
6399          * On Ibex Peak and Cougar Point, we need to disable clock
6400          * gating for the panel power sequencer or it will fail to
6401          * start up when no ports are active.
6402          */
6403         I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE |
6404                    PCH_DPLUNIT_CLOCK_GATE_DISABLE |
6405                    PCH_CPUNIT_CLOCK_GATE_DISABLE);
6406         I915_WRITE(SOUTH_CHICKEN2, I915_READ(SOUTH_CHICKEN2) |
6407                    DPLS_EDP_PPS_FIX_DIS);
6408         /* The below fixes the weird display corruption, a few pixels shifted
6409          * downward, on (only) LVDS of some HP laptops with IVY.
6410          */
6411         for_each_pipe(dev_priv, pipe) {
6412                 val = I915_READ(TRANS_CHICKEN2(pipe));
6413                 val |= TRANS_CHICKEN2_TIMING_OVERRIDE;
6414                 val &= ~TRANS_CHICKEN2_FDI_POLARITY_REVERSED;
6415                 if (dev_priv->vbt.fdi_rx_polarity_inverted)
6416                         val |= TRANS_CHICKEN2_FDI_POLARITY_REVERSED;
6417                 val &= ~TRANS_CHICKEN2_FRAME_START_DELAY_MASK;
6418                 val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_COUNTER;
6419                 val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_MODESWITCH;
6420                 I915_WRITE(TRANS_CHICKEN2(pipe), val);
6421         }
6422         /* WADP0ClockGatingDisable */
6423         for_each_pipe(dev_priv, pipe) {
6424                 I915_WRITE(TRANS_CHICKEN1(pipe),
6425                            TRANS_CHICKEN1_DP0UNIT_GC_DISABLE);
6426         }
6427 }
6428
6429 static void gen6_check_mch_setup(struct drm_device *dev)
6430 {
6431         struct drm_i915_private *dev_priv = dev->dev_private;
6432         uint32_t tmp;
6433
6434         tmp = I915_READ(MCH_SSKPD);
6435         if ((tmp & MCH_SSKPD_WM0_MASK) != MCH_SSKPD_WM0_VAL)
6436                 DRM_DEBUG_KMS("Wrong MCH_SSKPD value: 0x%08x This can cause underruns.\n",
6437                               tmp);
6438 }
6439
6440 static void gen6_init_clock_gating(struct drm_device *dev)
6441 {
6442         struct drm_i915_private *dev_priv = dev->dev_private;
6443         uint32_t dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE;
6444
6445         I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate);
6446
6447         I915_WRITE(ILK_DISPLAY_CHICKEN2,
6448                    I915_READ(ILK_DISPLAY_CHICKEN2) |
6449                    ILK_ELPIN_409_SELECT);
6450
6451         /* WaDisableHiZPlanesWhenMSAAEnabled:snb */
6452         I915_WRITE(_3D_CHICKEN,
6453                    _MASKED_BIT_ENABLE(_3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB));
6454
6455         /* WaDisable_RenderCache_OperationalFlush:snb */
6456         I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
6457
6458         /*
6459          * BSpec recoomends 8x4 when MSAA is used,
6460          * however in practice 16x4 seems fastest.
6461          *
6462          * Note that PS/WM thread counts depend on the WIZ hashing
6463          * disable bit, which we don't touch here, but it's good
6464          * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
6465          */
6466         I915_WRITE(GEN6_GT_MODE,
6467                    _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
6468
6469         ilk_init_lp_watermarks(dev);
6470
6471         I915_WRITE(CACHE_MODE_0,
6472                    _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB));
6473
6474         I915_WRITE(GEN6_UCGCTL1,
6475                    I915_READ(GEN6_UCGCTL1) |
6476                    GEN6_BLBUNIT_CLOCK_GATE_DISABLE |
6477                    GEN6_CSUNIT_CLOCK_GATE_DISABLE);
6478
6479         /* According to the BSpec vol1g, bit 12 (RCPBUNIT) clock
6480          * gating disable must be set.  Failure to set it results in
6481          * flickering pixels due to Z write ordering failures after
6482          * some amount of runtime in the Mesa "fire" demo, and Unigine
6483          * Sanctuary and Tropics, and apparently anything else with
6484          * alpha test or pixel discard.
6485          *
6486          * According to the spec, bit 11 (RCCUNIT) must also be set,
6487          * but we didn't debug actual testcases to find it out.
6488          *
6489          * WaDisableRCCUnitClockGating:snb
6490          * WaDisableRCPBUnitClockGating:snb
6491          */
6492         I915_WRITE(GEN6_UCGCTL2,
6493                    GEN6_RCPBUNIT_CLOCK_GATE_DISABLE |
6494                    GEN6_RCCUNIT_CLOCK_GATE_DISABLE);
6495
6496         /* WaStripsFansDisableFastClipPerformanceFix:snb */
6497         I915_WRITE(_3D_CHICKEN3,
6498                    _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL));
6499
6500         /*
6501          * Bspec says:
6502          * "This bit must be set if 3DSTATE_CLIP clip mode is set to normal and
6503          * 3DSTATE_SF number of SF output attributes is more than 16."
6504          */
6505         I915_WRITE(_3D_CHICKEN3,
6506                    _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_PIPELINED_ATTR_FETCH));
6507
6508         /*
6509          * According to the spec the following bits should be
6510          * set in order to enable memory self-refresh and fbc:
6511          * The bit21 and bit22 of 0x42000
6512          * The bit21 and bit22 of 0x42004
6513          * The bit5 and bit7 of 0x42020
6514          * The bit14 of 0x70180
6515          * The bit14 of 0x71180
6516          *
6517          * WaFbcAsynchFlipDisableFbcQueue:snb
6518          */
6519         I915_WRITE(ILK_DISPLAY_CHICKEN1,
6520                    I915_READ(ILK_DISPLAY_CHICKEN1) |
6521                    ILK_FBCQ_DIS | ILK_PABSTRETCH_DIS);
6522         I915_WRITE(ILK_DISPLAY_CHICKEN2,
6523                    I915_READ(ILK_DISPLAY_CHICKEN2) |
6524                    ILK_DPARB_GATE | ILK_VSDPFD_FULL);
6525         I915_WRITE(ILK_DSPCLK_GATE_D,
6526                    I915_READ(ILK_DSPCLK_GATE_D) |
6527                    ILK_DPARBUNIT_CLOCK_GATE_ENABLE  |
6528                    ILK_DPFDUNIT_CLOCK_GATE_ENABLE);
6529
6530         g4x_disable_trickle_feed(dev);
6531
6532         cpt_init_clock_gating(dev);
6533
6534         gen6_check_mch_setup(dev);
6535 }
6536
6537 static void gen7_setup_fixed_func_scheduler(struct drm_i915_private *dev_priv)
6538 {
6539         uint32_t reg = I915_READ(GEN7_FF_THREAD_MODE);
6540
6541         /*
6542          * WaVSThreadDispatchOverride:ivb,vlv
6543          *
6544          * This actually overrides the dispatch
6545          * mode for all thread types.
6546          */
6547         reg &= ~GEN7_FF_SCHED_MASK;
6548         reg |= GEN7_FF_TS_SCHED_HW;
6549         reg |= GEN7_FF_VS_SCHED_HW;
6550         reg |= GEN7_FF_DS_SCHED_HW;
6551
6552         I915_WRITE(GEN7_FF_THREAD_MODE, reg);
6553 }
6554
6555 static void lpt_init_clock_gating(struct drm_device *dev)
6556 {
6557         struct drm_i915_private *dev_priv = dev->dev_private;
6558
6559         /*
6560          * TODO: this bit should only be enabled when really needed, then
6561          * disabled when not needed anymore in order to save power.
6562          */
6563         if (HAS_PCH_LPT_LP(dev))
6564                 I915_WRITE(SOUTH_DSPCLK_GATE_D,
6565                            I915_READ(SOUTH_DSPCLK_GATE_D) |
6566                            PCH_LP_PARTITION_LEVEL_DISABLE);
6567
6568         /* WADPOClockGatingDisable:hsw */
6569         I915_WRITE(TRANS_CHICKEN1(PIPE_A),
6570                    I915_READ(TRANS_CHICKEN1(PIPE_A)) |
6571                    TRANS_CHICKEN1_DP0UNIT_GC_DISABLE);
6572 }
6573
6574 static void lpt_suspend_hw(struct drm_device *dev)
6575 {
6576         struct drm_i915_private *dev_priv = dev->dev_private;
6577
6578         if (HAS_PCH_LPT_LP(dev)) {
6579                 uint32_t val = I915_READ(SOUTH_DSPCLK_GATE_D);
6580
6581                 val &= ~PCH_LP_PARTITION_LEVEL_DISABLE;
6582                 I915_WRITE(SOUTH_DSPCLK_GATE_D, val);
6583         }
6584 }
6585
6586 static void broadwell_init_clock_gating(struct drm_device *dev)
6587 {
6588         struct drm_i915_private *dev_priv = dev->dev_private;
6589         enum i915_pipe pipe;
6590         uint32_t misccpctl;
6591
6592         ilk_init_lp_watermarks(dev);
6593
6594         /* WaSwitchSolVfFArbitrationPriority:bdw */
6595         I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL);
6596
6597         /* WaPsrDPAMaskVBlankInSRD:bdw */
6598         I915_WRITE(CHICKEN_PAR1_1,
6599                    I915_READ(CHICKEN_PAR1_1) | DPA_MASK_VBLANK_SRD);
6600
6601         /* WaPsrDPRSUnmaskVBlankInSRD:bdw */
6602         for_each_pipe(dev_priv, pipe) {
6603                 I915_WRITE(CHICKEN_PIPESL_1(pipe),
6604                            I915_READ(CHICKEN_PIPESL_1(pipe)) |
6605                            BDW_DPRS_MASK_VBLANK_SRD);
6606         }
6607
6608         /* WaVSRefCountFullforceMissDisable:bdw */
6609         /* WaDSRefCountFullforceMissDisable:bdw */
6610         I915_WRITE(GEN7_FF_THREAD_MODE,
6611                    I915_READ(GEN7_FF_THREAD_MODE) &
6612                    ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME));
6613
6614         I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL,
6615                    _MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE));
6616
6617         /* WaDisableSDEUnitClockGating:bdw */
6618         I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
6619                    GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
6620
6621         /*
6622          * WaProgramL3SqcReg1Default:bdw
6623          * WaTempDisableDOPClkGating:bdw
6624          */
6625         misccpctl = I915_READ(GEN7_MISCCPCTL);
6626         I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE);
6627         I915_WRITE(GEN8_L3SQCREG1, BDW_WA_L3SQCREG1_DEFAULT);
6628         I915_WRITE(GEN7_MISCCPCTL, misccpctl);
6629
6630         /*
6631          * WaGttCachingOffByDefault:bdw
6632          * GTT cache may not work with big pages, so if those
6633          * are ever enabled GTT cache may need to be disabled.
6634          */
6635         I915_WRITE(HSW_GTT_CACHE_EN, GTT_CACHE_EN_ALL);
6636
6637         lpt_init_clock_gating(dev);
6638 }
6639
6640 static void haswell_init_clock_gating(struct drm_device *dev)
6641 {
6642         struct drm_i915_private *dev_priv = dev->dev_private;
6643
6644         ilk_init_lp_watermarks(dev);
6645
6646         /* L3 caching of data atomics doesn't work -- disable it. */
6647         I915_WRITE(HSW_SCRATCH1, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE);
6648         I915_WRITE(HSW_ROW_CHICKEN3,
6649                    _MASKED_BIT_ENABLE(HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE));
6650
6651         /* This is required by WaCatErrorRejectionIssue:hsw */
6652         I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
6653                         I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
6654                         GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
6655
6656         /* WaVSRefCountFullforceMissDisable:hsw */
6657         I915_WRITE(GEN7_FF_THREAD_MODE,
6658                    I915_READ(GEN7_FF_THREAD_MODE) & ~GEN7_FF_VS_REF_CNT_FFME);
6659
6660         /* WaDisable_RenderCache_OperationalFlush:hsw */
6661         I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
6662
6663         /* enable HiZ Raw Stall Optimization */
6664         I915_WRITE(CACHE_MODE_0_GEN7,
6665                    _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE));
6666
6667         /* WaDisable4x2SubspanOptimization:hsw */
6668         I915_WRITE(CACHE_MODE_1,
6669                    _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
6670
6671         /*
6672          * BSpec recommends 8x4 when MSAA is used,
6673          * however in practice 16x4 seems fastest.
6674          *
6675          * Note that PS/WM thread counts depend on the WIZ hashing
6676          * disable bit, which we don't touch here, but it's good
6677          * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
6678          */
6679         I915_WRITE(GEN7_GT_MODE,
6680                    _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
6681
6682         /* WaSampleCChickenBitEnable:hsw */
6683         I915_WRITE(HALF_SLICE_CHICKEN3,
6684                    _MASKED_BIT_ENABLE(HSW_SAMPLE_C_PERFORMANCE));
6685
6686         /* WaSwitchSolVfFArbitrationPriority:hsw */
6687         I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL);
6688
6689         /* WaRsPkgCStateDisplayPMReq:hsw */
6690         I915_WRITE(CHICKEN_PAR1_1,
6691                    I915_READ(CHICKEN_PAR1_1) | FORCE_ARB_IDLE_PLANES);
6692
6693         lpt_init_clock_gating(dev);
6694 }
6695
6696 static void ivybridge_init_clock_gating(struct drm_device *dev)
6697 {
6698         struct drm_i915_private *dev_priv = dev->dev_private;
6699         uint32_t snpcr;
6700
6701         ilk_init_lp_watermarks(dev);
6702
6703         I915_WRITE(ILK_DSPCLK_GATE_D, ILK_VRHUNIT_CLOCK_GATE_DISABLE);
6704
6705         /* WaDisableEarlyCull:ivb */
6706         I915_WRITE(_3D_CHICKEN3,
6707                    _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL));
6708
6709         /* WaDisableBackToBackFlipFix:ivb */
6710         I915_WRITE(IVB_CHICKEN3,
6711                    CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE |
6712                    CHICKEN3_DGMG_DONE_FIX_DISABLE);
6713
6714         /* WaDisablePSDDualDispatchEnable:ivb */
6715         if (IS_IVB_GT1(dev))
6716                 I915_WRITE(GEN7_HALF_SLICE_CHICKEN1,
6717                            _MASKED_BIT_ENABLE(GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE));
6718
6719         /* WaDisable_RenderCache_OperationalFlush:ivb */
6720         I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
6721
6722         /* Apply the WaDisableRHWOOptimizationForRenderHang:ivb workaround. */
6723         I915_WRITE(GEN7_COMMON_SLICE_CHICKEN1,
6724                    GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC);
6725
6726         /* WaApplyL3ControlAndL3ChickenMode:ivb */
6727         I915_WRITE(GEN7_L3CNTLREG1,
6728                         GEN7_WA_FOR_GEN7_L3_CONTROL);
6729         I915_WRITE(GEN7_L3_CHICKEN_MODE_REGISTER,
6730                    GEN7_WA_L3_CHICKEN_MODE);
6731         if (IS_IVB_GT1(dev))
6732                 I915_WRITE(GEN7_ROW_CHICKEN2,
6733                            _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
6734         else {
6735                 /* must write both registers */
6736                 I915_WRITE(GEN7_ROW_CHICKEN2,
6737                            _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
6738                 I915_WRITE(GEN7_ROW_CHICKEN2_GT2,
6739                            _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
6740         }
6741
6742         /* WaForceL3Serialization:ivb */
6743         I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) &
6744                    ~L3SQ_URB_READ_CAM_MATCH_DISABLE);
6745
6746         /*
6747          * According to the spec, bit 13 (RCZUNIT) must be set on IVB.
6748          * This implements the WaDisableRCZUnitClockGating:ivb workaround.
6749          */
6750         I915_WRITE(GEN6_UCGCTL2,
6751                    GEN6_RCZUNIT_CLOCK_GATE_DISABLE);
6752
6753         /* This is required by WaCatErrorRejectionIssue:ivb */
6754         I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
6755                         I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
6756                         GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
6757
6758         g4x_disable_trickle_feed(dev);
6759
6760         gen7_setup_fixed_func_scheduler(dev_priv);
6761
6762         if (0) { /* causes HiZ corruption on ivb:gt1 */
6763                 /* enable HiZ Raw Stall Optimization */
6764                 I915_WRITE(CACHE_MODE_0_GEN7,
6765                            _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE));
6766         }
6767
6768         /* WaDisable4x2SubspanOptimization:ivb */
6769         I915_WRITE(CACHE_MODE_1,
6770                    _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
6771
6772         /*
6773          * BSpec recommends 8x4 when MSAA is used,
6774          * however in practice 16x4 seems fastest.
6775          *
6776          * Note that PS/WM thread counts depend on the WIZ hashing
6777          * disable bit, which we don't touch here, but it's good
6778          * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
6779          */
6780         I915_WRITE(GEN7_GT_MODE,
6781                    _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
6782
6783         snpcr = I915_READ(GEN6_MBCUNIT_SNPCR);
6784         snpcr &= ~GEN6_MBC_SNPCR_MASK;
6785         snpcr |= GEN6_MBC_SNPCR_MED;
6786         I915_WRITE(GEN6_MBCUNIT_SNPCR, snpcr);
6787
6788         if (!HAS_PCH_NOP(dev))
6789                 cpt_init_clock_gating(dev);
6790
6791         gen6_check_mch_setup(dev);
6792 }
6793
6794 static void vlv_init_display_clock_gating(struct drm_i915_private *dev_priv)
6795 {
6796         I915_WRITE(DSPCLK_GATE_D, VRHUNIT_CLOCK_GATE_DISABLE);
6797
6798         /*
6799          * Disable trickle feed and enable pnd deadline calculation
6800          */
6801         I915_WRITE(MI_ARB_VLV, MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE);
6802         I915_WRITE(CBR1_VLV, 0);
6803 }
6804
6805 static void valleyview_init_clock_gating(struct drm_device *dev)
6806 {
6807         struct drm_i915_private *dev_priv = dev->dev_private;
6808
6809         vlv_init_display_clock_gating(dev_priv);
6810
6811         /* WaDisableEarlyCull:vlv */
6812         I915_WRITE(_3D_CHICKEN3,
6813                    _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL));
6814
6815         /* WaDisableBackToBackFlipFix:vlv */
6816         I915_WRITE(IVB_CHICKEN3,
6817                    CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE |
6818                    CHICKEN3_DGMG_DONE_FIX_DISABLE);
6819
6820         /* WaPsdDispatchEnable:vlv */
6821         /* WaDisablePSDDualDispatchEnable:vlv */
6822         I915_WRITE(GEN7_HALF_SLICE_CHICKEN1,
6823                    _MASKED_BIT_ENABLE(GEN7_MAX_PS_THREAD_DEP |
6824                                       GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE));
6825
6826         /* WaDisable_RenderCache_OperationalFlush:vlv */
6827         I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
6828
6829         /* WaForceL3Serialization:vlv */
6830         I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) &
6831                    ~L3SQ_URB_READ_CAM_MATCH_DISABLE);
6832
6833         /* WaDisableDopClockGating:vlv */
6834         I915_WRITE(GEN7_ROW_CHICKEN2,
6835                    _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
6836
6837         /* This is required by WaCatErrorRejectionIssue:vlv */
6838         I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
6839                    I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
6840                    GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
6841
6842         gen7_setup_fixed_func_scheduler(dev_priv);
6843
6844         /*
6845          * According to the spec, bit 13 (RCZUNIT) must be set on IVB.
6846          * This implements the WaDisableRCZUnitClockGating:vlv workaround.
6847          */
6848         I915_WRITE(GEN6_UCGCTL2,
6849                    GEN6_RCZUNIT_CLOCK_GATE_DISABLE);
6850
6851         /* WaDisableL3Bank2xClockGate:vlv
6852          * Disabling L3 clock gating- MMIO 940c[25] = 1
6853          * Set bit 25, to disable L3_BANK_2x_CLK_GATING */
6854         I915_WRITE(GEN7_UCGCTL4,
6855                    I915_READ(GEN7_UCGCTL4) | GEN7_L3BANK2X_CLOCK_GATE_DISABLE);
6856
6857         /*
6858          * BSpec says this must be set, even though
6859          * WaDisable4x2SubspanOptimization isn't listed for VLV.
6860          */
6861         I915_WRITE(CACHE_MODE_1,
6862                    _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
6863
6864         /*
6865          * BSpec recommends 8x4 when MSAA is used,
6866          * however in practice 16x4 seems fastest.
6867          *
6868          * Note that PS/WM thread counts depend on the WIZ hashing
6869          * disable bit, which we don't touch here, but it's good
6870          * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
6871          */
6872         I915_WRITE(GEN7_GT_MODE,
6873                    _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
6874
6875         /*
6876          * WaIncreaseL3CreditsForVLVB0:vlv
6877          * This is the hardware default actually.
6878          */
6879         I915_WRITE(GEN7_L3SQCREG1, VLV_B0_WA_L3SQCREG1_VALUE);
6880
6881         /*
6882          * WaDisableVLVClockGating_VBIIssue:vlv
6883          * Disable clock gating on th GCFG unit to prevent a delay
6884          * in the reporting of vblank events.
6885          */
6886         I915_WRITE(VLV_GUNIT_CLOCK_GATE, GCFG_DIS);
6887 }
6888
6889 static void cherryview_init_clock_gating(struct drm_device *dev)
6890 {
6891         struct drm_i915_private *dev_priv = dev->dev_private;
6892
6893         vlv_init_display_clock_gating(dev_priv);
6894
6895         /* WaVSRefCountFullforceMissDisable:chv */
6896         /* WaDSRefCountFullforceMissDisable:chv */
6897         I915_WRITE(GEN7_FF_THREAD_MODE,
6898                    I915_READ(GEN7_FF_THREAD_MODE) &
6899                    ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME));
6900
6901         /* WaDisableSemaphoreAndSyncFlipWait:chv */
6902         I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL,
6903                    _MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE));
6904
6905         /* WaDisableCSUnitClockGating:chv */
6906         I915_WRITE(GEN6_UCGCTL1, I915_READ(GEN6_UCGCTL1) |
6907                    GEN6_CSUNIT_CLOCK_GATE_DISABLE);
6908
6909         /* WaDisableSDEUnitClockGating:chv */
6910         I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
6911                    GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
6912
6913         /*
6914          * GTT cache may not work with big pages, so if those
6915          * are ever enabled GTT cache may need to be disabled.
6916          */
6917         I915_WRITE(HSW_GTT_CACHE_EN, GTT_CACHE_EN_ALL);
6918 }
6919
6920 static void g4x_init_clock_gating(struct drm_device *dev)
6921 {
6922         struct drm_i915_private *dev_priv = dev->dev_private;
6923         uint32_t dspclk_gate;
6924
6925         I915_WRITE(RENCLK_GATE_D1, 0);
6926         I915_WRITE(RENCLK_GATE_D2, VF_UNIT_CLOCK_GATE_DISABLE |
6927                    GS_UNIT_CLOCK_GATE_DISABLE |
6928                    CL_UNIT_CLOCK_GATE_DISABLE);
6929         I915_WRITE(RAMCLK_GATE_D, 0);
6930         dspclk_gate = VRHUNIT_CLOCK_GATE_DISABLE |
6931                 OVRUNIT_CLOCK_GATE_DISABLE |
6932                 OVCUNIT_CLOCK_GATE_DISABLE;
6933         if (IS_GM45(dev))
6934                 dspclk_gate |= DSSUNIT_CLOCK_GATE_DISABLE;
6935         I915_WRITE(DSPCLK_GATE_D, dspclk_gate);
6936
6937         /* WaDisableRenderCachePipelinedFlush */
6938         I915_WRITE(CACHE_MODE_0,
6939                    _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE));
6940
6941         /* WaDisable_RenderCache_OperationalFlush:g4x */
6942         I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
6943
6944         g4x_disable_trickle_feed(dev);
6945 }
6946
6947 static void crestline_init_clock_gating(struct drm_device *dev)
6948 {
6949         struct drm_i915_private *dev_priv = dev->dev_private;
6950
6951         I915_WRITE(RENCLK_GATE_D1, I965_RCC_CLOCK_GATE_DISABLE);
6952         I915_WRITE(RENCLK_GATE_D2, 0);
6953         I915_WRITE(DSPCLK_GATE_D, 0);
6954         I915_WRITE(RAMCLK_GATE_D, 0);
6955         I915_WRITE16(DEUC, 0);
6956         I915_WRITE(MI_ARB_STATE,
6957                    _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE));
6958
6959         /* WaDisable_RenderCache_OperationalFlush:gen4 */
6960         I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
6961 }
6962
6963 static void broadwater_init_clock_gating(struct drm_device *dev)
6964 {
6965         struct drm_i915_private *dev_priv = dev->dev_private;
6966
6967         I915_WRITE(RENCLK_GATE_D1, I965_RCZ_CLOCK_GATE_DISABLE |
6968                    I965_RCC_CLOCK_GATE_DISABLE |
6969                    I965_RCPB_CLOCK_GATE_DISABLE |
6970                    I965_ISC_CLOCK_GATE_DISABLE |
6971                    I965_FBC_CLOCK_GATE_DISABLE);
6972         I915_WRITE(RENCLK_GATE_D2, 0);
6973         I915_WRITE(MI_ARB_STATE,
6974                    _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE));
6975
6976         /* WaDisable_RenderCache_OperationalFlush:gen4 */
6977         I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
6978 }
6979
6980 static void gen3_init_clock_gating(struct drm_device *dev)
6981 {
6982         struct drm_i915_private *dev_priv = dev->dev_private;
6983         u32 dstate = I915_READ(D_STATE);
6984
6985         dstate |= DSTATE_PLL_D3_OFF | DSTATE_GFX_CLOCK_GATING |
6986                 DSTATE_DOT_CLOCK_GATING;
6987         I915_WRITE(D_STATE, dstate);
6988
6989         if (IS_PINEVIEW(dev))
6990                 I915_WRITE(ECOSKPD, _MASKED_BIT_ENABLE(ECO_GATING_CX_ONLY));
6991
6992         /* IIR "flip pending" means done if this bit is set */
6993         I915_WRITE(ECOSKPD, _MASKED_BIT_DISABLE(ECO_FLIP_DONE));
6994
6995         /* interrupts should cause a wake up from C3 */
6996         I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_AGPBUSY_INT_EN));
6997
6998         /* On GEN3 we really need to make sure the ARB C3 LP bit is set */
6999         I915_WRITE(MI_ARB_STATE, _MASKED_BIT_ENABLE(MI_ARB_C3_LP_WRITE_ENABLE));
7000
7001         I915_WRITE(MI_ARB_STATE,
7002                    _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE));
7003 }
7004
7005 static void i85x_init_clock_gating(struct drm_device *dev)
7006 {
7007         struct drm_i915_private *dev_priv = dev->dev_private;
7008
7009         I915_WRITE(RENCLK_GATE_D1, SV_CLOCK_GATE_DISABLE);
7010
7011         /* interrupts should cause a wake up from C3 */
7012         I915_WRITE(MI_STATE, _MASKED_BIT_ENABLE(MI_AGPBUSY_INT_EN) |
7013                    _MASKED_BIT_DISABLE(MI_AGPBUSY_830_MODE));
7014
7015         I915_WRITE(MEM_MODE,
7016                    _MASKED_BIT_ENABLE(MEM_DISPLAY_TRICKLE_FEED_DISABLE));
7017 }
7018
7019 static void i830_init_clock_gating(struct drm_device *dev)
7020 {
7021         struct drm_i915_private *dev_priv = dev->dev_private;
7022
7023         I915_WRITE(DSPCLK_GATE_D, OVRUNIT_CLOCK_GATE_DISABLE);
7024
7025         I915_WRITE(MEM_MODE,
7026                    _MASKED_BIT_ENABLE(MEM_DISPLAY_A_TRICKLE_FEED_DISABLE) |
7027                    _MASKED_BIT_ENABLE(MEM_DISPLAY_B_TRICKLE_FEED_DISABLE));
7028 }
7029
7030 void intel_init_clock_gating(struct drm_device *dev)
7031 {
7032         struct drm_i915_private *dev_priv = dev->dev_private;
7033
7034         if (dev_priv->display.init_clock_gating)
7035                 dev_priv->display.init_clock_gating(dev);
7036 }
7037
7038 void intel_suspend_hw(struct drm_device *dev)
7039 {
7040         if (HAS_PCH_LPT(dev))
7041                 lpt_suspend_hw(dev);
7042 }
7043
7044 /* Set up chip specific power management-related functions */
7045 void intel_init_pm(struct drm_device *dev)
7046 {
7047         struct drm_i915_private *dev_priv = dev->dev_private;
7048
7049         intel_fbc_init(dev_priv);
7050
7051         /* For cxsr */
7052         if (IS_PINEVIEW(dev))
7053                 i915_pineview_get_mem_freq(dev);
7054         else if (IS_GEN5(dev))
7055                 i915_ironlake_get_mem_freq(dev);
7056
7057         /* For FIFO watermark updates */
7058         if (INTEL_INFO(dev)->gen >= 9) {
7059                 skl_setup_wm_latency(dev);
7060
7061                 if (IS_BROXTON(dev))
7062                         dev_priv->display.init_clock_gating =
7063                                 bxt_init_clock_gating;
7064                 dev_priv->display.update_wm = skl_update_wm;
7065                 dev_priv->display.update_sprite_wm = skl_update_sprite_wm;
7066         } else if (HAS_PCH_SPLIT(dev)) {
7067                 ilk_setup_wm_latency(dev);
7068
7069                 if ((IS_GEN5(dev) && dev_priv->wm.pri_latency[1] &&
7070                      dev_priv->wm.spr_latency[1] && dev_priv->wm.cur_latency[1]) ||
7071                     (!IS_GEN5(dev) && dev_priv->wm.pri_latency[0] &&
7072                      dev_priv->wm.spr_latency[0] && dev_priv->wm.cur_latency[0])) {
7073                         dev_priv->display.update_wm = ilk_update_wm;
7074                         dev_priv->display.update_sprite_wm = ilk_update_sprite_wm;
7075                 } else {
7076                         DRM_DEBUG_KMS("Failed to read display plane latency. "
7077                                       "Disable CxSR\n");
7078                 }
7079
7080                 if (IS_GEN5(dev))
7081                         dev_priv->display.init_clock_gating = ironlake_init_clock_gating;
7082                 else if (IS_GEN6(dev))
7083                         dev_priv->display.init_clock_gating = gen6_init_clock_gating;
7084                 else if (IS_IVYBRIDGE(dev))
7085                         dev_priv->display.init_clock_gating = ivybridge_init_clock_gating;
7086                 else if (IS_HASWELL(dev))
7087                         dev_priv->display.init_clock_gating = haswell_init_clock_gating;
7088                 else if (INTEL_INFO(dev)->gen == 8)
7089                         dev_priv->display.init_clock_gating = broadwell_init_clock_gating;
7090         } else if (IS_CHERRYVIEW(dev)) {
7091                 vlv_setup_wm_latency(dev);
7092
7093                 dev_priv->display.update_wm = vlv_update_wm;
7094                 dev_priv->display.init_clock_gating =
7095                         cherryview_init_clock_gating;
7096         } else if (IS_VALLEYVIEW(dev)) {
7097                 vlv_setup_wm_latency(dev);
7098
7099                 dev_priv->display.update_wm = vlv_update_wm;
7100                 dev_priv->display.init_clock_gating =
7101                         valleyview_init_clock_gating;
7102         } else if (IS_PINEVIEW(dev)) {
7103                 if (!intel_get_cxsr_latency(IS_PINEVIEW_G(dev),
7104                                             dev_priv->is_ddr3,
7105                                             dev_priv->fsb_freq,
7106                                             dev_priv->mem_freq)) {
7107                         DRM_INFO("failed to find known CxSR latency "
7108                                  "(found ddr%s fsb freq %d, mem freq %d), "
7109                                  "disabling CxSR\n",
7110                                  (dev_priv->is_ddr3 == 1) ? "3" : "2",
7111                                  dev_priv->fsb_freq, dev_priv->mem_freq);
7112                         /* Disable CxSR and never update its watermark again */
7113                         intel_set_memory_cxsr(dev_priv, false);
7114                         dev_priv->display.update_wm = NULL;
7115                 } else
7116                         dev_priv->display.update_wm = pineview_update_wm;
7117                 dev_priv->display.init_clock_gating = gen3_init_clock_gating;
7118         } else if (IS_G4X(dev)) {
7119                 dev_priv->display.update_wm = g4x_update_wm;
7120                 dev_priv->display.init_clock_gating = g4x_init_clock_gating;
7121         } else if (IS_GEN4(dev)) {
7122                 dev_priv->display.update_wm = i965_update_wm;
7123                 if (IS_CRESTLINE(dev))
7124                         dev_priv->display.init_clock_gating = crestline_init_clock_gating;
7125                 else if (IS_BROADWATER(dev))
7126                         dev_priv->display.init_clock_gating = broadwater_init_clock_gating;
7127         } else if (IS_GEN3(dev)) {
7128                 dev_priv->display.update_wm = i9xx_update_wm;
7129                 dev_priv->display.get_fifo_size = i9xx_get_fifo_size;
7130                 dev_priv->display.init_clock_gating = gen3_init_clock_gating;
7131         } else if (IS_GEN2(dev)) {
7132                 if (INTEL_INFO(dev)->num_pipes == 1) {
7133                         dev_priv->display.update_wm = i845_update_wm;
7134                         dev_priv->display.get_fifo_size = i845_get_fifo_size;
7135                 } else {
7136                         dev_priv->display.update_wm = i9xx_update_wm;
7137                         dev_priv->display.get_fifo_size = i830_get_fifo_size;
7138                 }
7139
7140                 if (IS_I85X(dev) || IS_I865G(dev))
7141                         dev_priv->display.init_clock_gating = i85x_init_clock_gating;
7142                 else
7143                         dev_priv->display.init_clock_gating = i830_init_clock_gating;
7144         } else {
7145                 DRM_ERROR("unexpected fall-through in intel_init_pm\n");
7146         }
7147 }
7148
7149 int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val)
7150 {
7151         WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
7152
7153         if (I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) {
7154                 DRM_DEBUG_DRIVER("warning: pcode (read) mailbox access failed\n");
7155                 return -EAGAIN;
7156         }
7157
7158         I915_WRITE(GEN6_PCODE_DATA, *val);
7159         I915_WRITE(GEN6_PCODE_DATA1, 0);
7160         I915_WRITE(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox);
7161
7162         if (wait_for((I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) == 0,
7163                      500)) {
7164                 DRM_ERROR("timeout waiting for pcode read (%d) to finish\n", mbox);
7165                 return -ETIMEDOUT;
7166         }
7167
7168         *val = I915_READ(GEN6_PCODE_DATA);
7169         I915_WRITE(GEN6_PCODE_DATA, 0);
7170
7171         return 0;
7172 }
7173
7174 int sandybridge_pcode_write(struct drm_i915_private *dev_priv, u32 mbox, u32 val)
7175 {
7176         WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
7177
7178         if (I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) {
7179                 DRM_DEBUG_DRIVER("warning: pcode (write) mailbox access failed\n");
7180                 return -EAGAIN;
7181         }
7182
7183         I915_WRITE(GEN6_PCODE_DATA, val);
7184         I915_WRITE(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox);
7185
7186         if (wait_for((I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) == 0,
7187                      500)) {
7188                 DRM_ERROR("timeout waiting for pcode write (%d) to finish\n", mbox);
7189                 return -ETIMEDOUT;
7190         }
7191
7192         I915_WRITE(GEN6_PCODE_DATA, 0);
7193
7194         return 0;
7195 }
7196
7197 static int vlv_gpu_freq_div(unsigned int czclk_freq)
7198 {
7199         switch (czclk_freq) {
7200         case 200:
7201                 return 10;
7202         case 267:
7203                 return 12;
7204         case 320:
7205         case 333:
7206                 return 16;
7207         case 400:
7208                 return 20;
7209         default:
7210                 return -1;
7211         }
7212 }
7213
7214 static int byt_gpu_freq(struct drm_i915_private *dev_priv, int val)
7215 {
7216         int div, czclk_freq = DIV_ROUND_CLOSEST(dev_priv->czclk_freq, 1000);
7217
7218         div = vlv_gpu_freq_div(czclk_freq);
7219         if (div < 0)
7220                 return div;
7221
7222         return DIV_ROUND_CLOSEST(czclk_freq * (val + 6 - 0xbd), div);
7223 }
7224
7225 static int byt_freq_opcode(struct drm_i915_private *dev_priv, int val)
7226 {
7227         int mul, czclk_freq = DIV_ROUND_CLOSEST(dev_priv->czclk_freq, 1000);
7228
7229         mul = vlv_gpu_freq_div(czclk_freq);
7230         if (mul < 0)
7231                 return mul;
7232
7233         return DIV_ROUND_CLOSEST(mul * val, czclk_freq) + 0xbd - 6;
7234 }
7235
7236 static int chv_gpu_freq(struct drm_i915_private *dev_priv, int val)
7237 {
7238         int div, czclk_freq = DIV_ROUND_CLOSEST(dev_priv->czclk_freq, 1000);
7239
7240         div = vlv_gpu_freq_div(czclk_freq) / 2;
7241         if (div < 0)
7242                 return div;
7243
7244         return DIV_ROUND_CLOSEST(czclk_freq * val, 2 * div) / 2;
7245 }
7246
7247 static int chv_freq_opcode(struct drm_i915_private *dev_priv, int val)
7248 {
7249         int mul, czclk_freq = DIV_ROUND_CLOSEST(dev_priv->czclk_freq, 1000);
7250
7251         mul = vlv_gpu_freq_div(czclk_freq) / 2;
7252         if (mul < 0)
7253                 return mul;
7254
7255         /* CHV needs even values */
7256         return DIV_ROUND_CLOSEST(val * 2 * mul, czclk_freq) * 2;
7257 }
7258
7259 int intel_gpu_freq(struct drm_i915_private *dev_priv, int val)
7260 {
7261         if (IS_GEN9(dev_priv->dev))
7262                 return DIV_ROUND_CLOSEST(val * GT_FREQUENCY_MULTIPLIER,
7263                                          GEN9_FREQ_SCALER);
7264         else if (IS_CHERRYVIEW(dev_priv->dev))
7265                 return chv_gpu_freq(dev_priv, val);
7266         else if (IS_VALLEYVIEW(dev_priv->dev))
7267                 return byt_gpu_freq(dev_priv, val);
7268         else
7269                 return val * GT_FREQUENCY_MULTIPLIER;
7270 }
7271
7272 int intel_freq_opcode(struct drm_i915_private *dev_priv, int val)
7273 {
7274         if (IS_GEN9(dev_priv->dev))
7275                 return DIV_ROUND_CLOSEST(val * GEN9_FREQ_SCALER,
7276                                          GT_FREQUENCY_MULTIPLIER);
7277         else if (IS_CHERRYVIEW(dev_priv->dev))
7278                 return chv_freq_opcode(dev_priv, val);
7279         else if (IS_VALLEYVIEW(dev_priv->dev))
7280                 return byt_freq_opcode(dev_priv, val);
7281         else
7282                 return DIV_ROUND_CLOSEST(val, GT_FREQUENCY_MULTIPLIER);
7283 }
7284
7285 struct request_boost {
7286         struct work_struct work;
7287         struct drm_i915_gem_request *req;
7288 };
7289
7290 static void __intel_rps_boost_work(struct work_struct *work)
7291 {
7292         struct request_boost *boost = container_of(work, struct request_boost, work);
7293         struct drm_i915_gem_request *req = boost->req;
7294
7295         if (!i915_gem_request_completed(req, true))
7296                 gen6_rps_boost(to_i915(req->ring->dev), NULL,
7297                                req->emitted_jiffies);
7298
7299         i915_gem_request_unreference__unlocked(req);
7300         kfree(boost);
7301 }
7302
7303 void intel_queue_rps_boost_for_request(struct drm_device *dev,
7304                                        struct drm_i915_gem_request *req)
7305 {
7306         struct request_boost *boost;
7307
7308         if (req == NULL || INTEL_INFO(dev)->gen < 6)
7309                 return;
7310
7311         if (i915_gem_request_completed(req, true))
7312                 return;
7313
7314         boost = kmalloc(sizeof(*boost), M_DRM, M_NOWAIT);
7315         if (boost == NULL)
7316                 return;
7317
7318         i915_gem_request_reference(req);
7319         boost->req = req;
7320
7321         INIT_WORK(&boost->work, __intel_rps_boost_work);
7322         queue_work(to_i915(dev)->wq, &boost->work);
7323 }
7324
7325 void intel_pm_setup(struct drm_device *dev)
7326 {
7327         struct drm_i915_private *dev_priv = dev->dev_private;
7328
7329         lockinit(&dev_priv->rps.hw_lock, "i915 rps.hw_lock", 0, LK_CANRECURSE);
7330         lockinit(&dev_priv->rps.client_lock, "i915rcl", 0, LK_CANRECURSE);
7331
7332         INIT_DELAYED_WORK(&dev_priv->rps.delayed_resume_work,
7333                           intel_gen6_powersave_work);
7334         INIT_LIST_HEAD(&dev_priv->rps.clients);
7335         INIT_LIST_HEAD(&dev_priv->rps.semaphores.link);
7336         INIT_LIST_HEAD(&dev_priv->rps.mmioflips.link);
7337
7338         dev_priv->pm.suspended = false;
7339 }