drm/i915: Upgrade to Linux 4.0
[dragonfly.git] / sys / dev / drm / i915 / intel_pm.c
1 /*
2  * Copyright © 2012 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eugeni Dodonov <eugeni.dodonov@intel.com>
25  *
26  */
27
28 #include "i915_drv.h"
29 #include "intel_drv.h"
30 #include <linux/module.h>
31 #include <machine/clock.h>
32
33 /**
34  * RC6 is a special power stage which allows the GPU to enter an very
35  * low-voltage mode when idle, using down to 0V while at this stage.  This
36  * stage is entered automatically when the GPU is idle when RC6 support is
37  * enabled, and as soon as new workload arises GPU wakes up automatically as well.
38  *
39  * There are different RC6 modes available in Intel GPU, which differentiate
40  * among each other with the latency required to enter and leave RC6 and
41  * voltage consumed by the GPU in different states.
42  *
43  * The combination of the following flags define which states GPU is allowed
44  * to enter, while RC6 is the normal RC6 state, RC6p is the deep RC6, and
45  * RC6pp is deepest RC6. Their support by hardware varies according to the
46  * GPU, BIOS, chipset and platform. RC6 is usually the safest one and the one
47  * which brings the most power savings; deeper states save more power, but
48  * require higher latency to switch to and wake up.
49  */
50 #define INTEL_RC6_ENABLE                        (1<<0)
51 #define INTEL_RC6p_ENABLE                       (1<<1)
52 #define INTEL_RC6pp_ENABLE                      (1<<2)
53
54 static void gen9_init_clock_gating(struct drm_device *dev)
55 {
56         struct drm_i915_private *dev_priv = dev->dev_private;
57
58         /*
59          * WaDisableSDEUnitClockGating:skl
60          * This seems to be a pre-production w/a.
61          */
62         I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
63                    GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
64
65         /*
66          * WaDisableDgMirrorFixInHalfSliceChicken5:skl
67          * This is a pre-production w/a.
68          */
69         I915_WRITE(GEN9_HALF_SLICE_CHICKEN5,
70                    I915_READ(GEN9_HALF_SLICE_CHICKEN5) &
71                    ~GEN9_DG_MIRROR_FIX_ENABLE);
72
73         /* Wa4x4STCOptimizationDisable:skl */
74         I915_WRITE(CACHE_MODE_1,
75                    _MASKED_BIT_ENABLE(GEN8_4x4_STC_OPTIMIZATION_DISABLE));
76 }
77
78 static void i915_pineview_get_mem_freq(struct drm_device *dev)
79 {
80         struct drm_i915_private *dev_priv = dev->dev_private;
81         u32 tmp;
82
83         tmp = I915_READ(CLKCFG);
84
85         switch (tmp & CLKCFG_FSB_MASK) {
86         case CLKCFG_FSB_533:
87                 dev_priv->fsb_freq = 533; /* 133*4 */
88                 break;
89         case CLKCFG_FSB_800:
90                 dev_priv->fsb_freq = 800; /* 200*4 */
91                 break;
92         case CLKCFG_FSB_667:
93                 dev_priv->fsb_freq =  667; /* 167*4 */
94                 break;
95         case CLKCFG_FSB_400:
96                 dev_priv->fsb_freq = 400; /* 100*4 */
97                 break;
98         }
99
100         switch (tmp & CLKCFG_MEM_MASK) {
101         case CLKCFG_MEM_533:
102                 dev_priv->mem_freq = 533;
103                 break;
104         case CLKCFG_MEM_667:
105                 dev_priv->mem_freq = 667;
106                 break;
107         case CLKCFG_MEM_800:
108                 dev_priv->mem_freq = 800;
109                 break;
110         }
111
112         /* detect pineview DDR3 setting */
113         tmp = I915_READ(CSHRDDR3CTL);
114         dev_priv->is_ddr3 = (tmp & CSHRDDR3CTL_DDR3) ? 1 : 0;
115 }
116
117 static void i915_ironlake_get_mem_freq(struct drm_device *dev)
118 {
119         struct drm_i915_private *dev_priv = dev->dev_private;
120         u16 ddrpll, csipll;
121
122         ddrpll = I915_READ16(DDRMPLL1);
123         csipll = I915_READ16(CSIPLL0);
124
125         switch (ddrpll & 0xff) {
126         case 0xc:
127                 dev_priv->mem_freq = 800;
128                 break;
129         case 0x10:
130                 dev_priv->mem_freq = 1066;
131                 break;
132         case 0x14:
133                 dev_priv->mem_freq = 1333;
134                 break;
135         case 0x18:
136                 dev_priv->mem_freq = 1600;
137                 break;
138         default:
139                 DRM_DEBUG_DRIVER("unknown memory frequency 0x%02x\n",
140                                  ddrpll & 0xff);
141                 dev_priv->mem_freq = 0;
142                 break;
143         }
144
145         dev_priv->ips.r_t = dev_priv->mem_freq;
146
147         switch (csipll & 0x3ff) {
148         case 0x00c:
149                 dev_priv->fsb_freq = 3200;
150                 break;
151         case 0x00e:
152                 dev_priv->fsb_freq = 3733;
153                 break;
154         case 0x010:
155                 dev_priv->fsb_freq = 4266;
156                 break;
157         case 0x012:
158                 dev_priv->fsb_freq = 4800;
159                 break;
160         case 0x014:
161                 dev_priv->fsb_freq = 5333;
162                 break;
163         case 0x016:
164                 dev_priv->fsb_freq = 5866;
165                 break;
166         case 0x018:
167                 dev_priv->fsb_freq = 6400;
168                 break;
169         default:
170                 DRM_DEBUG_DRIVER("unknown fsb frequency 0x%04x\n",
171                                  csipll & 0x3ff);
172                 dev_priv->fsb_freq = 0;
173                 break;
174         }
175
176         if (dev_priv->fsb_freq == 3200) {
177                 dev_priv->ips.c_m = 0;
178         } else if (dev_priv->fsb_freq > 3200 && dev_priv->fsb_freq <= 4800) {
179                 dev_priv->ips.c_m = 1;
180         } else {
181                 dev_priv->ips.c_m = 2;
182         }
183 }
184
185 static const struct cxsr_latency cxsr_latency_table[] = {
186         {1, 0, 800, 400, 3382, 33382, 3983, 33983},    /* DDR2-400 SC */
187         {1, 0, 800, 667, 3354, 33354, 3807, 33807},    /* DDR2-667 SC */
188         {1, 0, 800, 800, 3347, 33347, 3763, 33763},    /* DDR2-800 SC */
189         {1, 1, 800, 667, 6420, 36420, 6873, 36873},    /* DDR3-667 SC */
190         {1, 1, 800, 800, 5902, 35902, 6318, 36318},    /* DDR3-800 SC */
191
192         {1, 0, 667, 400, 3400, 33400, 4021, 34021},    /* DDR2-400 SC */
193         {1, 0, 667, 667, 3372, 33372, 3845, 33845},    /* DDR2-667 SC */
194         {1, 0, 667, 800, 3386, 33386, 3822, 33822},    /* DDR2-800 SC */
195         {1, 1, 667, 667, 6438, 36438, 6911, 36911},    /* DDR3-667 SC */
196         {1, 1, 667, 800, 5941, 35941, 6377, 36377},    /* DDR3-800 SC */
197
198         {1, 0, 400, 400, 3472, 33472, 4173, 34173},    /* DDR2-400 SC */
199         {1, 0, 400, 667, 3443, 33443, 3996, 33996},    /* DDR2-667 SC */
200         {1, 0, 400, 800, 3430, 33430, 3946, 33946},    /* DDR2-800 SC */
201         {1, 1, 400, 667, 6509, 36509, 7062, 37062},    /* DDR3-667 SC */
202         {1, 1, 400, 800, 5985, 35985, 6501, 36501},    /* DDR3-800 SC */
203
204         {0, 0, 800, 400, 3438, 33438, 4065, 34065},    /* DDR2-400 SC */
205         {0, 0, 800, 667, 3410, 33410, 3889, 33889},    /* DDR2-667 SC */
206         {0, 0, 800, 800, 3403, 33403, 3845, 33845},    /* DDR2-800 SC */
207         {0, 1, 800, 667, 6476, 36476, 6955, 36955},    /* DDR3-667 SC */
208         {0, 1, 800, 800, 5958, 35958, 6400, 36400},    /* DDR3-800 SC */
209
210         {0, 0, 667, 400, 3456, 33456, 4103, 34106},    /* DDR2-400 SC */
211         {0, 0, 667, 667, 3428, 33428, 3927, 33927},    /* DDR2-667 SC */
212         {0, 0, 667, 800, 3443, 33443, 3905, 33905},    /* DDR2-800 SC */
213         {0, 1, 667, 667, 6494, 36494, 6993, 36993},    /* DDR3-667 SC */
214         {0, 1, 667, 800, 5998, 35998, 6460, 36460},    /* DDR3-800 SC */
215
216         {0, 0, 400, 400, 3528, 33528, 4255, 34255},    /* DDR2-400 SC */
217         {0, 0, 400, 667, 3500, 33500, 4079, 34079},    /* DDR2-667 SC */
218         {0, 0, 400, 800, 3487, 33487, 4029, 34029},    /* DDR2-800 SC */
219         {0, 1, 400, 667, 6566, 36566, 7145, 37145},    /* DDR3-667 SC */
220         {0, 1, 400, 800, 6042, 36042, 6584, 36584},    /* DDR3-800 SC */
221 };
222
223 static const struct cxsr_latency *intel_get_cxsr_latency(int is_desktop,
224                                                          int is_ddr3,
225                                                          int fsb,
226                                                          int mem)
227 {
228         const struct cxsr_latency *latency;
229         int i;
230
231         if (fsb == 0 || mem == 0)
232                 return NULL;
233
234         for (i = 0; i < ARRAY_SIZE(cxsr_latency_table); i++) {
235                 latency = &cxsr_latency_table[i];
236                 if (is_desktop == latency->is_desktop &&
237                     is_ddr3 == latency->is_ddr3 &&
238                     fsb == latency->fsb_freq && mem == latency->mem_freq)
239                         return latency;
240         }
241
242         DRM_DEBUG_KMS("Unknown FSB/MEM found, disable CxSR\n");
243
244         return NULL;
245 }
246
247 void intel_set_memory_cxsr(struct drm_i915_private *dev_priv, bool enable)
248 {
249         struct drm_device *dev = dev_priv->dev;
250         u32 val;
251
252         if (IS_VALLEYVIEW(dev)) {
253                 I915_WRITE(FW_BLC_SELF_VLV, enable ? FW_CSPWRDWNEN : 0);
254         } else if (IS_G4X(dev) || IS_CRESTLINE(dev)) {
255                 I915_WRITE(FW_BLC_SELF, enable ? FW_BLC_SELF_EN : 0);
256         } else if (IS_PINEVIEW(dev)) {
257                 val = I915_READ(DSPFW3) & ~PINEVIEW_SELF_REFRESH_EN;
258                 val |= enable ? PINEVIEW_SELF_REFRESH_EN : 0;
259                 I915_WRITE(DSPFW3, val);
260         } else if (IS_I945G(dev) || IS_I945GM(dev)) {
261                 val = enable ? _MASKED_BIT_ENABLE(FW_BLC_SELF_EN) :
262                                _MASKED_BIT_DISABLE(FW_BLC_SELF_EN);
263                 I915_WRITE(FW_BLC_SELF, val);
264         } else if (IS_I915GM(dev)) {
265                 val = enable ? _MASKED_BIT_ENABLE(INSTPM_SELF_EN) :
266                                _MASKED_BIT_DISABLE(INSTPM_SELF_EN);
267                 I915_WRITE(INSTPM, val);
268         } else {
269                 return;
270         }
271
272         DRM_DEBUG_KMS("memory self-refresh is %s\n",
273                       enable ? "enabled" : "disabled");
274 }
275
276 /*
277  * Latency for FIFO fetches is dependent on several factors:
278  *   - memory configuration (speed, channels)
279  *   - chipset
280  *   - current MCH state
281  * It can be fairly high in some situations, so here we assume a fairly
282  * pessimal value.  It's a tradeoff between extra memory fetches (if we
283  * set this value too high, the FIFO will fetch frequently to stay full)
284  * and power consumption (set it too low to save power and we might see
285  * FIFO underruns and display "flicker").
286  *
287  * A value of 5us seems to be a good balance; safe for very low end
288  * platforms but not overly aggressive on lower latency configs.
289  */
290 static const int pessimal_latency_ns = 5000;
291
292 static int i9xx_get_fifo_size(struct drm_device *dev, int plane)
293 {
294         struct drm_i915_private *dev_priv = dev->dev_private;
295         uint32_t dsparb = I915_READ(DSPARB);
296         int size;
297
298         size = dsparb & 0x7f;
299         if (plane)
300                 size = ((dsparb >> DSPARB_CSTART_SHIFT) & 0x7f) - size;
301
302         DRM_DEBUG_KMS("FIFO size - (0x%08x) %s: %d\n", dsparb,
303                       plane ? "B" : "A", size);
304
305         return size;
306 }
307
308 static int i830_get_fifo_size(struct drm_device *dev, int plane)
309 {
310         struct drm_i915_private *dev_priv = dev->dev_private;
311         uint32_t dsparb = I915_READ(DSPARB);
312         int size;
313
314         size = dsparb & 0x1ff;
315         if (plane)
316                 size = ((dsparb >> DSPARB_BEND_SHIFT) & 0x1ff) - size;
317         size >>= 1; /* Convert to cachelines */
318
319         DRM_DEBUG_KMS("FIFO size - (0x%08x) %s: %d\n", dsparb,
320                       plane ? "B" : "A", size);
321
322         return size;
323 }
324
325 static int i845_get_fifo_size(struct drm_device *dev, int plane)
326 {
327         struct drm_i915_private *dev_priv = dev->dev_private;
328         uint32_t dsparb = I915_READ(DSPARB);
329         int size;
330
331         size = dsparb & 0x7f;
332         size >>= 2; /* Convert to cachelines */
333
334         DRM_DEBUG_KMS("FIFO size - (0x%08x) %s: %d\n", dsparb,
335                       plane ? "B" : "A",
336                       size);
337
338         return size;
339 }
340
341 /* Pineview has different values for various configs */
342 static const struct intel_watermark_params pineview_display_wm = {
343         .fifo_size = PINEVIEW_DISPLAY_FIFO,
344         .max_wm = PINEVIEW_MAX_WM,
345         .default_wm = PINEVIEW_DFT_WM,
346         .guard_size = PINEVIEW_GUARD_WM,
347         .cacheline_size = PINEVIEW_FIFO_LINE_SIZE,
348 };
349 static const struct intel_watermark_params pineview_display_hplloff_wm = {
350         .fifo_size = PINEVIEW_DISPLAY_FIFO,
351         .max_wm = PINEVIEW_MAX_WM,
352         .default_wm = PINEVIEW_DFT_HPLLOFF_WM,
353         .guard_size = PINEVIEW_GUARD_WM,
354         .cacheline_size = PINEVIEW_FIFO_LINE_SIZE,
355 };
356 static const struct intel_watermark_params pineview_cursor_wm = {
357         .fifo_size = PINEVIEW_CURSOR_FIFO,
358         .max_wm = PINEVIEW_CURSOR_MAX_WM,
359         .default_wm = PINEVIEW_CURSOR_DFT_WM,
360         .guard_size = PINEVIEW_CURSOR_GUARD_WM,
361         .cacheline_size = PINEVIEW_FIFO_LINE_SIZE,
362 };
363 static const struct intel_watermark_params pineview_cursor_hplloff_wm = {
364         .fifo_size = PINEVIEW_CURSOR_FIFO,
365         .max_wm = PINEVIEW_CURSOR_MAX_WM,
366         .default_wm = PINEVIEW_CURSOR_DFT_WM,
367         .guard_size = PINEVIEW_CURSOR_GUARD_WM,
368         .cacheline_size = PINEVIEW_FIFO_LINE_SIZE,
369 };
370 static const struct intel_watermark_params g4x_wm_info = {
371         .fifo_size = G4X_FIFO_SIZE,
372         .max_wm = G4X_MAX_WM,
373         .default_wm = G4X_MAX_WM,
374         .guard_size = 2,
375         .cacheline_size = G4X_FIFO_LINE_SIZE,
376 };
377 static const struct intel_watermark_params g4x_cursor_wm_info = {
378         .fifo_size = I965_CURSOR_FIFO,
379         .max_wm = I965_CURSOR_MAX_WM,
380         .default_wm = I965_CURSOR_DFT_WM,
381         .guard_size = 2,
382         .cacheline_size = G4X_FIFO_LINE_SIZE,
383 };
384 static const struct intel_watermark_params valleyview_wm_info = {
385         .fifo_size = VALLEYVIEW_FIFO_SIZE,
386         .max_wm = VALLEYVIEW_MAX_WM,
387         .default_wm = VALLEYVIEW_MAX_WM,
388         .guard_size = 2,
389         .cacheline_size = G4X_FIFO_LINE_SIZE,
390 };
391 static const struct intel_watermark_params valleyview_cursor_wm_info = {
392         .fifo_size = I965_CURSOR_FIFO,
393         .max_wm = VALLEYVIEW_CURSOR_MAX_WM,
394         .default_wm = I965_CURSOR_DFT_WM,
395         .guard_size = 2,
396         .cacheline_size = G4X_FIFO_LINE_SIZE,
397 };
398 static const struct intel_watermark_params i965_cursor_wm_info = {
399         .fifo_size = I965_CURSOR_FIFO,
400         .max_wm = I965_CURSOR_MAX_WM,
401         .default_wm = I965_CURSOR_DFT_WM,
402         .guard_size = 2,
403         .cacheline_size = I915_FIFO_LINE_SIZE,
404 };
405 static const struct intel_watermark_params i945_wm_info = {
406         .fifo_size = I945_FIFO_SIZE,
407         .max_wm = I915_MAX_WM,
408         .default_wm = 1,
409         .guard_size = 2,
410         .cacheline_size = I915_FIFO_LINE_SIZE,
411 };
412 static const struct intel_watermark_params i915_wm_info = {
413         .fifo_size = I915_FIFO_SIZE,
414         .max_wm = I915_MAX_WM,
415         .default_wm = 1,
416         .guard_size = 2,
417         .cacheline_size = I915_FIFO_LINE_SIZE,
418 };
419 static const struct intel_watermark_params i830_a_wm_info = {
420         .fifo_size = I855GM_FIFO_SIZE,
421         .max_wm = I915_MAX_WM,
422         .default_wm = 1,
423         .guard_size = 2,
424         .cacheline_size = I830_FIFO_LINE_SIZE,
425 };
426 static const struct intel_watermark_params i830_bc_wm_info = {
427         .fifo_size = I855GM_FIFO_SIZE,
428         .max_wm = I915_MAX_WM/2,
429         .default_wm = 1,
430         .guard_size = 2,
431         .cacheline_size = I830_FIFO_LINE_SIZE,
432 };
433 static const struct intel_watermark_params i845_wm_info = {
434         .fifo_size = I830_FIFO_SIZE,
435         .max_wm = I915_MAX_WM,
436         .default_wm = 1,
437         .guard_size = 2,
438         .cacheline_size = I830_FIFO_LINE_SIZE,
439 };
440
441 /**
442  * intel_calculate_wm - calculate watermark level
443  * @clock_in_khz: pixel clock
444  * @wm: chip FIFO params
445  * @pixel_size: display pixel size
446  * @latency_ns: memory latency for the platform
447  *
448  * Calculate the watermark level (the level at which the display plane will
449  * start fetching from memory again).  Each chip has a different display
450  * FIFO size and allocation, so the caller needs to figure that out and pass
451  * in the correct intel_watermark_params structure.
452  *
453  * As the pixel clock runs, the FIFO will be drained at a rate that depends
454  * on the pixel size.  When it reaches the watermark level, it'll start
455  * fetching FIFO line sized based chunks from memory until the FIFO fills
456  * past the watermark point.  If the FIFO drains completely, a FIFO underrun
457  * will occur, and a display engine hang could result.
458  */
459 static unsigned long intel_calculate_wm(unsigned long clock_in_khz,
460                                         const struct intel_watermark_params *wm,
461                                         int fifo_size,
462                                         int pixel_size,
463                                         unsigned long latency_ns)
464 {
465         long entries_required, wm_size;
466
467         /*
468          * Note: we need to make sure we don't overflow for various clock &
469          * latency values.
470          * clocks go from a few thousand to several hundred thousand.
471          * latency is usually a few thousand
472          */
473         entries_required = ((clock_in_khz / 1000) * pixel_size * latency_ns) /
474                 1000;
475         entries_required = DIV_ROUND_UP(entries_required, wm->cacheline_size);
476
477         DRM_DEBUG_KMS("FIFO entries required for mode: %ld\n", entries_required);
478
479         wm_size = fifo_size - (entries_required + wm->guard_size);
480
481         DRM_DEBUG_KMS("FIFO watermark level: %ld\n", wm_size);
482
483         /* Don't promote wm_size to unsigned... */
484         if (wm_size > (long)wm->max_wm)
485                 wm_size = wm->max_wm;
486         if (wm_size <= 0)
487                 wm_size = wm->default_wm;
488
489         /*
490          * Bspec seems to indicate that the value shouldn't be lower than
491          * 'burst size + 1'. Certainly 830 is quite unhappy with low values.
492          * Lets go for 8 which is the burst size since certain platforms
493          * already use a hardcoded 8 (which is what the spec says should be
494          * done).
495          */
496         if (wm_size <= 8)
497                 wm_size = 8;
498
499         return wm_size;
500 }
501
502 static struct drm_crtc *single_enabled_crtc(struct drm_device *dev)
503 {
504         struct drm_crtc *crtc, *enabled = NULL;
505
506         for_each_crtc(dev, crtc) {
507                 if (intel_crtc_active(crtc)) {
508                         if (enabled)
509                                 return NULL;
510                         enabled = crtc;
511                 }
512         }
513
514         return enabled;
515 }
516
517 static void pineview_update_wm(struct drm_crtc *unused_crtc)
518 {
519         struct drm_device *dev = unused_crtc->dev;
520         struct drm_i915_private *dev_priv = dev->dev_private;
521         struct drm_crtc *crtc;
522         const struct cxsr_latency *latency;
523         u32 reg;
524         unsigned long wm;
525
526         latency = intel_get_cxsr_latency(IS_PINEVIEW_G(dev), dev_priv->is_ddr3,
527                                          dev_priv->fsb_freq, dev_priv->mem_freq);
528         if (!latency) {
529                 DRM_DEBUG_KMS("Unknown FSB/MEM found, disable CxSR\n");
530                 intel_set_memory_cxsr(dev_priv, false);
531                 return;
532         }
533
534         crtc = single_enabled_crtc(dev);
535         if (crtc) {
536                 const struct drm_display_mode *adjusted_mode;
537                 int pixel_size = crtc->primary->fb->bits_per_pixel / 8;
538                 int clock;
539
540                 adjusted_mode = &to_intel_crtc(crtc)->config->base.adjusted_mode;
541                 clock = adjusted_mode->crtc_clock;
542
543                 /* Display SR */
544                 wm = intel_calculate_wm(clock, &pineview_display_wm,
545                                         pineview_display_wm.fifo_size,
546                                         pixel_size, latency->display_sr);
547                 reg = I915_READ(DSPFW1);
548                 reg &= ~DSPFW_SR_MASK;
549                 reg |= wm << DSPFW_SR_SHIFT;
550                 I915_WRITE(DSPFW1, reg);
551                 DRM_DEBUG_KMS("DSPFW1 register is %x\n", reg);
552
553                 /* cursor SR */
554                 wm = intel_calculate_wm(clock, &pineview_cursor_wm,
555                                         pineview_display_wm.fifo_size,
556                                         pixel_size, latency->cursor_sr);
557                 reg = I915_READ(DSPFW3);
558                 reg &= ~DSPFW_CURSOR_SR_MASK;
559                 reg |= (wm & 0x3f) << DSPFW_CURSOR_SR_SHIFT;
560                 I915_WRITE(DSPFW3, reg);
561
562                 /* Display HPLL off SR */
563                 wm = intel_calculate_wm(clock, &pineview_display_hplloff_wm,
564                                         pineview_display_hplloff_wm.fifo_size,
565                                         pixel_size, latency->display_hpll_disable);
566                 reg = I915_READ(DSPFW3);
567                 reg &= ~DSPFW_HPLL_SR_MASK;
568                 reg |= wm & DSPFW_HPLL_SR_MASK;
569                 I915_WRITE(DSPFW3, reg);
570
571                 /* cursor HPLL off SR */
572                 wm = intel_calculate_wm(clock, &pineview_cursor_hplloff_wm,
573                                         pineview_display_hplloff_wm.fifo_size,
574                                         pixel_size, latency->cursor_hpll_disable);
575                 reg = I915_READ(DSPFW3);
576                 reg &= ~DSPFW_HPLL_CURSOR_MASK;
577                 reg |= (wm & 0x3f) << DSPFW_HPLL_CURSOR_SHIFT;
578                 I915_WRITE(DSPFW3, reg);
579                 DRM_DEBUG_KMS("DSPFW3 register is %x\n", reg);
580
581                 intel_set_memory_cxsr(dev_priv, true);
582         } else {
583                 intel_set_memory_cxsr(dev_priv, false);
584         }
585 }
586
587 static bool g4x_compute_wm0(struct drm_device *dev,
588                             int plane,
589                             const struct intel_watermark_params *display,
590                             int display_latency_ns,
591                             const struct intel_watermark_params *cursor,
592                             int cursor_latency_ns,
593                             int *plane_wm,
594                             int *cursor_wm)
595 {
596         struct drm_crtc *crtc;
597         const struct drm_display_mode *adjusted_mode;
598         int htotal, hdisplay, clock, pixel_size;
599         int line_time_us, line_count;
600         int entries, tlb_miss;
601
602         crtc = intel_get_crtc_for_plane(dev, plane);
603         if (!intel_crtc_active(crtc)) {
604                 *cursor_wm = cursor->guard_size;
605                 *plane_wm = display->guard_size;
606                 return false;
607         }
608
609         adjusted_mode = &to_intel_crtc(crtc)->config->base.adjusted_mode;
610         clock = adjusted_mode->crtc_clock;
611         htotal = adjusted_mode->crtc_htotal;
612         hdisplay = to_intel_crtc(crtc)->config->pipe_src_w;
613         pixel_size = crtc->primary->fb->bits_per_pixel / 8;
614
615         /* Use the small buffer method to calculate plane watermark */
616         entries = ((clock * pixel_size / 1000) * display_latency_ns) / 1000;
617         tlb_miss = display->fifo_size*display->cacheline_size - hdisplay * 8;
618         if (tlb_miss > 0)
619                 entries += tlb_miss;
620         entries = DIV_ROUND_UP(entries, display->cacheline_size);
621         *plane_wm = entries + display->guard_size;
622         if (*plane_wm > (int)display->max_wm)
623                 *plane_wm = display->max_wm;
624
625         /* Use the large buffer method to calculate cursor watermark */
626         line_time_us = max(htotal * 1000 / clock, 1);
627         line_count = (cursor_latency_ns / line_time_us + 1000) / 1000;
628         entries = line_count * to_intel_crtc(crtc)->cursor_width * pixel_size;
629         tlb_miss = cursor->fifo_size*cursor->cacheline_size - hdisplay * 8;
630         if (tlb_miss > 0)
631                 entries += tlb_miss;
632         entries = DIV_ROUND_UP(entries, cursor->cacheline_size);
633         *cursor_wm = entries + cursor->guard_size;
634         if (*cursor_wm > (int)cursor->max_wm)
635                 *cursor_wm = (int)cursor->max_wm;
636
637         return true;
638 }
639
640 /*
641  * Check the wm result.
642  *
643  * If any calculated watermark values is larger than the maximum value that
644  * can be programmed into the associated watermark register, that watermark
645  * must be disabled.
646  */
647 static bool g4x_check_srwm(struct drm_device *dev,
648                            int display_wm, int cursor_wm,
649                            const struct intel_watermark_params *display,
650                            const struct intel_watermark_params *cursor)
651 {
652         DRM_DEBUG_KMS("SR watermark: display plane %d, cursor %d\n",
653                       display_wm, cursor_wm);
654
655         if (display_wm > display->max_wm) {
656                 DRM_DEBUG_KMS("display watermark is too large(%d/%ld), disabling\n",
657                               display_wm, display->max_wm);
658                 return false;
659         }
660
661         if (cursor_wm > cursor->max_wm) {
662                 DRM_DEBUG_KMS("cursor watermark is too large(%d/%ld), disabling\n",
663                               cursor_wm, cursor->max_wm);
664                 return false;
665         }
666
667         if (!(display_wm || cursor_wm)) {
668                 DRM_DEBUG_KMS("SR latency is 0, disabling\n");
669                 return false;
670         }
671
672         return true;
673 }
674
675 static bool g4x_compute_srwm(struct drm_device *dev,
676                              int plane,
677                              int latency_ns,
678                              const struct intel_watermark_params *display,
679                              const struct intel_watermark_params *cursor,
680                              int *display_wm, int *cursor_wm)
681 {
682         struct drm_crtc *crtc;
683         const struct drm_display_mode *adjusted_mode;
684         int hdisplay, htotal, pixel_size, clock;
685         unsigned long line_time_us;
686         int line_count, line_size;
687         int small, large;
688         int entries;
689
690         if (!latency_ns) {
691                 *display_wm = *cursor_wm = 0;
692                 return false;
693         }
694
695         crtc = intel_get_crtc_for_plane(dev, plane);
696         adjusted_mode = &to_intel_crtc(crtc)->config->base.adjusted_mode;
697         clock = adjusted_mode->crtc_clock;
698         htotal = adjusted_mode->crtc_htotal;
699         hdisplay = to_intel_crtc(crtc)->config->pipe_src_w;
700         pixel_size = crtc->primary->fb->bits_per_pixel / 8;
701
702         line_time_us = max(htotal * 1000 / clock, 1);
703         line_count = (latency_ns / line_time_us + 1000) / 1000;
704         line_size = hdisplay * pixel_size;
705
706         /* Use the minimum of the small and large buffer method for primary */
707         small = ((clock * pixel_size / 1000) * latency_ns) / 1000;
708         large = line_count * line_size;
709
710         entries = DIV_ROUND_UP(min(small, large), display->cacheline_size);
711         *display_wm = entries + display->guard_size;
712
713         /* calculate the self-refresh watermark for display cursor */
714         entries = line_count * pixel_size * to_intel_crtc(crtc)->cursor_width;
715         entries = DIV_ROUND_UP(entries, cursor->cacheline_size);
716         *cursor_wm = entries + cursor->guard_size;
717
718         return g4x_check_srwm(dev,
719                               *display_wm, *cursor_wm,
720                               display, cursor);
721 }
722
723 static bool vlv_compute_drain_latency(struct drm_crtc *crtc,
724                                       int pixel_size,
725                                       int *prec_mult,
726                                       int *drain_latency)
727 {
728         struct drm_device *dev = crtc->dev;
729         int entries;
730         int clock = to_intel_crtc(crtc)->config->base.adjusted_mode.crtc_clock;
731
732         if (WARN(clock == 0, "Pixel clock is zero!\n"))
733                 return false;
734
735         if (WARN(pixel_size == 0, "Pixel size is zero!\n"))
736                 return false;
737
738         entries = DIV_ROUND_UP(clock, 1000) * pixel_size;
739         if (IS_CHERRYVIEW(dev))
740                 *prec_mult = (entries > 128) ? DRAIN_LATENCY_PRECISION_32 :
741                                                DRAIN_LATENCY_PRECISION_16;
742         else
743                 *prec_mult = (entries > 128) ? DRAIN_LATENCY_PRECISION_64 :
744                                                DRAIN_LATENCY_PRECISION_32;
745         *drain_latency = (64 * (*prec_mult) * 4) / entries;
746
747         if (*drain_latency > DRAIN_LATENCY_MASK)
748                 *drain_latency = DRAIN_LATENCY_MASK;
749
750         return true;
751 }
752
753 /*
754  * Update drain latency registers of memory arbiter
755  *
756  * Valleyview SoC has a new memory arbiter and needs drain latency registers
757  * to be programmed. Each plane has a drain latency multiplier and a drain
758  * latency value.
759  */
760
761 static void vlv_update_drain_latency(struct drm_crtc *crtc)
762 {
763         struct drm_device *dev = crtc->dev;
764         struct drm_i915_private *dev_priv = dev->dev_private;
765         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
766         int pixel_size;
767         int drain_latency;
768         enum i915_pipe pipe = intel_crtc->pipe;
769         int plane_prec, prec_mult, plane_dl;
770         const int high_precision = IS_CHERRYVIEW(dev) ?
771                 DRAIN_LATENCY_PRECISION_32 : DRAIN_LATENCY_PRECISION_64;
772
773         plane_dl = I915_READ(VLV_DDL(pipe)) & ~(DDL_PLANE_PRECISION_HIGH |
774                    DRAIN_LATENCY_MASK | DDL_CURSOR_PRECISION_HIGH |
775                    (DRAIN_LATENCY_MASK << DDL_CURSOR_SHIFT));
776
777         if (!intel_crtc_active(crtc)) {
778                 I915_WRITE(VLV_DDL(pipe), plane_dl);
779                 return;
780         }
781
782         /* Primary plane Drain Latency */
783         pixel_size = crtc->primary->fb->bits_per_pixel / 8;     /* BPP */
784         if (vlv_compute_drain_latency(crtc, pixel_size, &prec_mult, &drain_latency)) {
785                 plane_prec = (prec_mult == high_precision) ?
786                                            DDL_PLANE_PRECISION_HIGH :
787                                            DDL_PLANE_PRECISION_LOW;
788                 plane_dl |= plane_prec | drain_latency;
789         }
790
791         /* Cursor Drain Latency
792          * BPP is always 4 for cursor
793          */
794         pixel_size = 4;
795
796         /* Program cursor DL only if it is enabled */
797         if (intel_crtc->cursor_base &&
798             vlv_compute_drain_latency(crtc, pixel_size, &prec_mult, &drain_latency)) {
799                 plane_prec = (prec_mult == high_precision) ?
800                                            DDL_CURSOR_PRECISION_HIGH :
801                                            DDL_CURSOR_PRECISION_LOW;
802                 plane_dl |= plane_prec | (drain_latency << DDL_CURSOR_SHIFT);
803         }
804
805         I915_WRITE(VLV_DDL(pipe), plane_dl);
806 }
807
808 #define single_plane_enabled(mask) is_power_of_2(mask)
809
810 static void valleyview_update_wm(struct drm_crtc *crtc)
811 {
812         struct drm_device *dev = crtc->dev;
813         static const int sr_latency_ns = 12000;
814         struct drm_i915_private *dev_priv = dev->dev_private;
815         int planea_wm, planeb_wm, cursora_wm, cursorb_wm;
816         int plane_sr, cursor_sr;
817         int ignore_plane_sr, ignore_cursor_sr;
818         unsigned int enabled = 0;
819         bool cxsr_enabled;
820
821         vlv_update_drain_latency(crtc);
822
823         if (g4x_compute_wm0(dev, PIPE_A,
824                             &valleyview_wm_info, pessimal_latency_ns,
825                             &valleyview_cursor_wm_info, pessimal_latency_ns,
826                             &planea_wm, &cursora_wm))
827                 enabled |= 1 << PIPE_A;
828
829         if (g4x_compute_wm0(dev, PIPE_B,
830                             &valleyview_wm_info, pessimal_latency_ns,
831                             &valleyview_cursor_wm_info, pessimal_latency_ns,
832                             &planeb_wm, &cursorb_wm))
833                 enabled |= 1 << PIPE_B;
834
835         if (single_plane_enabled(enabled) &&
836             g4x_compute_srwm(dev, ffs(enabled) - 1,
837                              sr_latency_ns,
838                              &valleyview_wm_info,
839                              &valleyview_cursor_wm_info,
840                              &plane_sr, &ignore_cursor_sr) &&
841             g4x_compute_srwm(dev, ffs(enabled) - 1,
842                              2*sr_latency_ns,
843                              &valleyview_wm_info,
844                              &valleyview_cursor_wm_info,
845                              &ignore_plane_sr, &cursor_sr)) {
846                 cxsr_enabled = true;
847         } else {
848                 cxsr_enabled = false;
849                 intel_set_memory_cxsr(dev_priv, false);
850                 plane_sr = cursor_sr = 0;
851         }
852
853         DRM_DEBUG_KMS("Setting FIFO watermarks - A: plane=%d, cursor=%d, "
854                       "B: plane=%d, cursor=%d, SR: plane=%d, cursor=%d\n",
855                       planea_wm, cursora_wm,
856                       planeb_wm, cursorb_wm,
857                       plane_sr, cursor_sr);
858
859         I915_WRITE(DSPFW1,
860                    (plane_sr << DSPFW_SR_SHIFT) |
861                    (cursorb_wm << DSPFW_CURSORB_SHIFT) |
862                    (planeb_wm << DSPFW_PLANEB_SHIFT) |
863                    (planea_wm << DSPFW_PLANEA_SHIFT));
864         I915_WRITE(DSPFW2,
865                    (I915_READ(DSPFW2) & ~DSPFW_CURSORA_MASK) |
866                    (cursora_wm << DSPFW_CURSORA_SHIFT));
867         I915_WRITE(DSPFW3,
868                    (I915_READ(DSPFW3) & ~DSPFW_CURSOR_SR_MASK) |
869                    (cursor_sr << DSPFW_CURSOR_SR_SHIFT));
870
871         if (cxsr_enabled)
872                 intel_set_memory_cxsr(dev_priv, true);
873 }
874
875 static void cherryview_update_wm(struct drm_crtc *crtc)
876 {
877         struct drm_device *dev = crtc->dev;
878         static const int sr_latency_ns = 12000;
879         struct drm_i915_private *dev_priv = dev->dev_private;
880         int planea_wm, planeb_wm, planec_wm;
881         int cursora_wm, cursorb_wm, cursorc_wm;
882         int plane_sr, cursor_sr;
883         int ignore_plane_sr, ignore_cursor_sr;
884         unsigned int enabled = 0;
885         bool cxsr_enabled;
886
887         vlv_update_drain_latency(crtc);
888
889         if (g4x_compute_wm0(dev, PIPE_A,
890                             &valleyview_wm_info, pessimal_latency_ns,
891                             &valleyview_cursor_wm_info, pessimal_latency_ns,
892                             &planea_wm, &cursora_wm))
893                 enabled |= 1 << PIPE_A;
894
895         if (g4x_compute_wm0(dev, PIPE_B,
896                             &valleyview_wm_info, pessimal_latency_ns,
897                             &valleyview_cursor_wm_info, pessimal_latency_ns,
898                             &planeb_wm, &cursorb_wm))
899                 enabled |= 1 << PIPE_B;
900
901         if (g4x_compute_wm0(dev, PIPE_C,
902                             &valleyview_wm_info, pessimal_latency_ns,
903                             &valleyview_cursor_wm_info, pessimal_latency_ns,
904                             &planec_wm, &cursorc_wm))
905                 enabled |= 1 << PIPE_C;
906
907         if (single_plane_enabled(enabled) &&
908             g4x_compute_srwm(dev, ffs(enabled) - 1,
909                              sr_latency_ns,
910                              &valleyview_wm_info,
911                              &valleyview_cursor_wm_info,
912                              &plane_sr, &ignore_cursor_sr) &&
913             g4x_compute_srwm(dev, ffs(enabled) - 1,
914                              2*sr_latency_ns,
915                              &valleyview_wm_info,
916                              &valleyview_cursor_wm_info,
917                              &ignore_plane_sr, &cursor_sr)) {
918                 cxsr_enabled = true;
919         } else {
920                 cxsr_enabled = false;
921                 intel_set_memory_cxsr(dev_priv, false);
922                 plane_sr = cursor_sr = 0;
923         }
924
925         DRM_DEBUG_KMS("Setting FIFO watermarks - A: plane=%d, cursor=%d, "
926                       "B: plane=%d, cursor=%d, C: plane=%d, cursor=%d, "
927                       "SR: plane=%d, cursor=%d\n",
928                       planea_wm, cursora_wm,
929                       planeb_wm, cursorb_wm,
930                       planec_wm, cursorc_wm,
931                       plane_sr, cursor_sr);
932
933         I915_WRITE(DSPFW1,
934                    (plane_sr << DSPFW_SR_SHIFT) |
935                    (cursorb_wm << DSPFW_CURSORB_SHIFT) |
936                    (planeb_wm << DSPFW_PLANEB_SHIFT) |
937                    (planea_wm << DSPFW_PLANEA_SHIFT));
938         I915_WRITE(DSPFW2,
939                    (I915_READ(DSPFW2) & ~DSPFW_CURSORA_MASK) |
940                    (cursora_wm << DSPFW_CURSORA_SHIFT));
941         I915_WRITE(DSPFW3,
942                    (I915_READ(DSPFW3) & ~DSPFW_CURSOR_SR_MASK) |
943                    (cursor_sr << DSPFW_CURSOR_SR_SHIFT));
944         I915_WRITE(DSPFW9_CHV,
945                    (I915_READ(DSPFW9_CHV) & ~(DSPFW_PLANEC_MASK |
946                                               DSPFW_CURSORC_MASK)) |
947                    (planec_wm << DSPFW_PLANEC_SHIFT) |
948                    (cursorc_wm << DSPFW_CURSORC_SHIFT));
949
950         if (cxsr_enabled)
951                 intel_set_memory_cxsr(dev_priv, true);
952 }
953
954 static void valleyview_update_sprite_wm(struct drm_plane *plane,
955                                         struct drm_crtc *crtc,
956                                         uint32_t sprite_width,
957                                         uint32_t sprite_height,
958                                         int pixel_size,
959                                         bool enabled, bool scaled)
960 {
961         struct drm_device *dev = crtc->dev;
962         struct drm_i915_private *dev_priv = dev->dev_private;
963         int pipe = to_intel_plane(plane)->pipe;
964         int sprite = to_intel_plane(plane)->plane;
965         int drain_latency;
966         int plane_prec;
967         int sprite_dl;
968         int prec_mult;
969         const int high_precision = IS_CHERRYVIEW(dev) ?
970                 DRAIN_LATENCY_PRECISION_32 : DRAIN_LATENCY_PRECISION_64;
971
972         sprite_dl = I915_READ(VLV_DDL(pipe)) & ~(DDL_SPRITE_PRECISION_HIGH(sprite) |
973                     (DRAIN_LATENCY_MASK << DDL_SPRITE_SHIFT(sprite)));
974
975         if (enabled && vlv_compute_drain_latency(crtc, pixel_size, &prec_mult,
976                                                  &drain_latency)) {
977                 plane_prec = (prec_mult == high_precision) ?
978                                            DDL_SPRITE_PRECISION_HIGH(sprite) :
979                                            DDL_SPRITE_PRECISION_LOW(sprite);
980                 sprite_dl |= plane_prec |
981                              (drain_latency << DDL_SPRITE_SHIFT(sprite));
982         }
983
984         I915_WRITE(VLV_DDL(pipe), sprite_dl);
985 }
986
987 static void g4x_update_wm(struct drm_crtc *crtc)
988 {
989         struct drm_device *dev = crtc->dev;
990         static const int sr_latency_ns = 12000;
991         struct drm_i915_private *dev_priv = dev->dev_private;
992         int planea_wm, planeb_wm, cursora_wm, cursorb_wm;
993         int plane_sr, cursor_sr;
994         unsigned int enabled = 0;
995         bool cxsr_enabled;
996
997         if (g4x_compute_wm0(dev, PIPE_A,
998                             &g4x_wm_info, pessimal_latency_ns,
999                             &g4x_cursor_wm_info, pessimal_latency_ns,
1000                             &planea_wm, &cursora_wm))
1001                 enabled |= 1 << PIPE_A;
1002
1003         if (g4x_compute_wm0(dev, PIPE_B,
1004                             &g4x_wm_info, pessimal_latency_ns,
1005                             &g4x_cursor_wm_info, pessimal_latency_ns,
1006                             &planeb_wm, &cursorb_wm))
1007                 enabled |= 1 << PIPE_B;
1008
1009         if (single_plane_enabled(enabled) &&
1010             g4x_compute_srwm(dev, ffs(enabled) - 1,
1011                              sr_latency_ns,
1012                              &g4x_wm_info,
1013                              &g4x_cursor_wm_info,
1014                              &plane_sr, &cursor_sr)) {
1015                 cxsr_enabled = true;
1016         } else {
1017                 cxsr_enabled = false;
1018                 intel_set_memory_cxsr(dev_priv, false);
1019                 plane_sr = cursor_sr = 0;
1020         }
1021
1022         DRM_DEBUG_KMS("Setting FIFO watermarks - A: plane=%d, cursor=%d, "
1023                       "B: plane=%d, cursor=%d, SR: plane=%d, cursor=%d\n",
1024                       planea_wm, cursora_wm,
1025                       planeb_wm, cursorb_wm,
1026                       plane_sr, cursor_sr);
1027
1028         I915_WRITE(DSPFW1,
1029                    (plane_sr << DSPFW_SR_SHIFT) |
1030                    (cursorb_wm << DSPFW_CURSORB_SHIFT) |
1031                    (planeb_wm << DSPFW_PLANEB_SHIFT) |
1032                    (planea_wm << DSPFW_PLANEA_SHIFT));
1033         I915_WRITE(DSPFW2,
1034                    (I915_READ(DSPFW2) & ~DSPFW_CURSORA_MASK) |
1035                    (cursora_wm << DSPFW_CURSORA_SHIFT));
1036         /* HPLL off in SR has some issues on G4x... disable it */
1037         I915_WRITE(DSPFW3,
1038                    (I915_READ(DSPFW3) & ~(DSPFW_HPLL_SR_EN | DSPFW_CURSOR_SR_MASK)) |
1039                    (cursor_sr << DSPFW_CURSOR_SR_SHIFT));
1040
1041         if (cxsr_enabled)
1042                 intel_set_memory_cxsr(dev_priv, true);
1043 }
1044
1045 static void i965_update_wm(struct drm_crtc *unused_crtc)
1046 {
1047         struct drm_device *dev = unused_crtc->dev;
1048         struct drm_i915_private *dev_priv = dev->dev_private;
1049         struct drm_crtc *crtc;
1050         int srwm = 1;
1051         int cursor_sr = 16;
1052         bool cxsr_enabled;
1053
1054         /* Calc sr entries for one plane configs */
1055         crtc = single_enabled_crtc(dev);
1056         if (crtc) {
1057                 /* self-refresh has much higher latency */
1058                 static const int sr_latency_ns = 12000;
1059                 const struct drm_display_mode *adjusted_mode =
1060                         &to_intel_crtc(crtc)->config->base.adjusted_mode;
1061                 int clock = adjusted_mode->crtc_clock;
1062                 int htotal = adjusted_mode->crtc_htotal;
1063                 int hdisplay = to_intel_crtc(crtc)->config->pipe_src_w;
1064                 int pixel_size = crtc->primary->fb->bits_per_pixel / 8;
1065                 unsigned long line_time_us;
1066                 int entries;
1067
1068                 line_time_us = max(htotal * 1000 / clock, 1);
1069
1070                 /* Use ns/us then divide to preserve precision */
1071                 entries = (((sr_latency_ns / line_time_us) + 1000) / 1000) *
1072                         pixel_size * hdisplay;
1073                 entries = DIV_ROUND_UP(entries, I915_FIFO_LINE_SIZE);
1074                 srwm = I965_FIFO_SIZE - entries;
1075                 if (srwm < 0)
1076                         srwm = 1;
1077                 srwm &= 0x1ff;
1078                 DRM_DEBUG_KMS("self-refresh entries: %d, wm: %d\n",
1079                               entries, srwm);
1080
1081                 entries = (((sr_latency_ns / line_time_us) + 1000) / 1000) *
1082                         pixel_size * to_intel_crtc(crtc)->cursor_width;
1083                 entries = DIV_ROUND_UP(entries,
1084                                           i965_cursor_wm_info.cacheline_size);
1085                 cursor_sr = i965_cursor_wm_info.fifo_size -
1086                         (entries + i965_cursor_wm_info.guard_size);
1087
1088                 if (cursor_sr > i965_cursor_wm_info.max_wm)
1089                         cursor_sr = i965_cursor_wm_info.max_wm;
1090
1091                 DRM_DEBUG_KMS("self-refresh watermark: display plane %d "
1092                               "cursor %d\n", srwm, cursor_sr);
1093
1094                 cxsr_enabled = true;
1095         } else {
1096                 cxsr_enabled = false;
1097                 /* Turn off self refresh if both pipes are enabled */
1098                 intel_set_memory_cxsr(dev_priv, false);
1099         }
1100
1101         DRM_DEBUG_KMS("Setting FIFO watermarks - A: 8, B: 8, C: 8, SR %d\n",
1102                       srwm);
1103
1104         /* 965 has limitations... */
1105         I915_WRITE(DSPFW1, (srwm << DSPFW_SR_SHIFT) |
1106                    (8 << DSPFW_CURSORB_SHIFT) |
1107                    (8 << DSPFW_PLANEB_SHIFT) |
1108                    (8 << DSPFW_PLANEA_SHIFT));
1109         I915_WRITE(DSPFW2, (8 << DSPFW_CURSORA_SHIFT) |
1110                    (8 << DSPFW_PLANEC_SHIFT_OLD));
1111         /* update cursor SR watermark */
1112         I915_WRITE(DSPFW3, (cursor_sr << DSPFW_CURSOR_SR_SHIFT));
1113
1114         if (cxsr_enabled)
1115                 intel_set_memory_cxsr(dev_priv, true);
1116 }
1117
1118 static void i9xx_update_wm(struct drm_crtc *unused_crtc)
1119 {
1120         struct drm_device *dev = unused_crtc->dev;
1121         struct drm_i915_private *dev_priv = dev->dev_private;
1122         const struct intel_watermark_params *wm_info;
1123         uint32_t fwater_lo;
1124         uint32_t fwater_hi;
1125         int cwm, srwm = 1;
1126         int fifo_size;
1127         int planea_wm, planeb_wm;
1128         struct drm_crtc *crtc, *enabled = NULL;
1129
1130         if (IS_I945GM(dev))
1131                 wm_info = &i945_wm_info;
1132         else if (!IS_GEN2(dev))
1133                 wm_info = &i915_wm_info;
1134         else
1135                 wm_info = &i830_a_wm_info;
1136
1137         fifo_size = dev_priv->display.get_fifo_size(dev, 0);
1138         crtc = intel_get_crtc_for_plane(dev, 0);
1139         if (intel_crtc_active(crtc)) {
1140                 const struct drm_display_mode *adjusted_mode;
1141                 int cpp = crtc->primary->fb->bits_per_pixel / 8;
1142                 if (IS_GEN2(dev))
1143                         cpp = 4;
1144
1145                 adjusted_mode = &to_intel_crtc(crtc)->config->base.adjusted_mode;
1146                 planea_wm = intel_calculate_wm(adjusted_mode->crtc_clock,
1147                                                wm_info, fifo_size, cpp,
1148                                                pessimal_latency_ns);
1149                 enabled = crtc;
1150         } else {
1151                 planea_wm = fifo_size - wm_info->guard_size;
1152                 if (planea_wm > (long)wm_info->max_wm)
1153                         planea_wm = wm_info->max_wm;
1154         }
1155
1156         if (IS_GEN2(dev))
1157                 wm_info = &i830_bc_wm_info;
1158
1159         fifo_size = dev_priv->display.get_fifo_size(dev, 1);
1160         crtc = intel_get_crtc_for_plane(dev, 1);
1161         if (intel_crtc_active(crtc)) {
1162                 const struct drm_display_mode *adjusted_mode;
1163                 int cpp = crtc->primary->fb->bits_per_pixel / 8;
1164                 if (IS_GEN2(dev))
1165                         cpp = 4;
1166
1167                 adjusted_mode = &to_intel_crtc(crtc)->config->base.adjusted_mode;
1168                 planeb_wm = intel_calculate_wm(adjusted_mode->crtc_clock,
1169                                                wm_info, fifo_size, cpp,
1170                                                pessimal_latency_ns);
1171                 if (enabled == NULL)
1172                         enabled = crtc;
1173                 else
1174                         enabled = NULL;
1175         } else {
1176                 planeb_wm = fifo_size - wm_info->guard_size;
1177                 if (planeb_wm > (long)wm_info->max_wm)
1178                         planeb_wm = wm_info->max_wm;
1179         }
1180
1181         DRM_DEBUG_KMS("FIFO watermarks - A: %d, B: %d\n", planea_wm, planeb_wm);
1182
1183         if (IS_I915GM(dev) && enabled) {
1184                 struct drm_i915_gem_object *obj;
1185
1186                 obj = intel_fb_obj(enabled->primary->fb);
1187
1188                 /* self-refresh seems busted with untiled */
1189                 if (obj->tiling_mode == I915_TILING_NONE)
1190                         enabled = NULL;
1191         }
1192
1193         /*
1194          * Overlay gets an aggressive default since video jitter is bad.
1195          */
1196         cwm = 2;
1197
1198         /* Play safe and disable self-refresh before adjusting watermarks. */
1199         intel_set_memory_cxsr(dev_priv, false);
1200
1201         /* Calc sr entries for one plane configs */
1202         if (HAS_FW_BLC(dev) && enabled) {
1203                 /* self-refresh has much higher latency */
1204                 static const int sr_latency_ns = 6000;
1205                 const struct drm_display_mode *adjusted_mode =
1206                         &to_intel_crtc(enabled)->config->base.adjusted_mode;
1207                 int clock = adjusted_mode->crtc_clock;
1208                 int htotal = adjusted_mode->crtc_htotal;
1209                 int hdisplay = to_intel_crtc(enabled)->config->pipe_src_w;
1210                 int pixel_size = enabled->primary->fb->bits_per_pixel / 8;
1211                 unsigned long line_time_us;
1212                 int entries;
1213
1214                 line_time_us = max(htotal * 1000 / clock, 1);
1215
1216                 /* Use ns/us then divide to preserve precision */
1217                 entries = (((sr_latency_ns / line_time_us) + 1000) / 1000) *
1218                         pixel_size * hdisplay;
1219                 entries = DIV_ROUND_UP(entries, wm_info->cacheline_size);
1220                 DRM_DEBUG_KMS("self-refresh entries: %d\n", entries);
1221                 srwm = wm_info->fifo_size - entries;
1222                 if (srwm < 0)
1223                         srwm = 1;
1224
1225                 if (IS_I945G(dev) || IS_I945GM(dev))
1226                         I915_WRITE(FW_BLC_SELF,
1227                                    FW_BLC_SELF_FIFO_MASK | (srwm & 0xff));
1228                 else if (IS_I915GM(dev))
1229                         I915_WRITE(FW_BLC_SELF, srwm & 0x3f);
1230         }
1231
1232         DRM_DEBUG_KMS("Setting FIFO watermarks - A: %d, B: %d, C: %d, SR %d\n",
1233                       planea_wm, planeb_wm, cwm, srwm);
1234
1235         fwater_lo = ((planeb_wm & 0x3f) << 16) | (planea_wm & 0x3f);
1236         fwater_hi = (cwm & 0x1f);
1237
1238         /* Set request length to 8 cachelines per fetch */
1239         fwater_lo = fwater_lo | (1 << 24) | (1 << 8);
1240         fwater_hi = fwater_hi | (1 << 8);
1241
1242         I915_WRITE(FW_BLC, fwater_lo);
1243         I915_WRITE(FW_BLC2, fwater_hi);
1244
1245         if (enabled)
1246                 intel_set_memory_cxsr(dev_priv, true);
1247 }
1248
1249 static void i845_update_wm(struct drm_crtc *unused_crtc)
1250 {
1251         struct drm_device *dev = unused_crtc->dev;
1252         struct drm_i915_private *dev_priv = dev->dev_private;
1253         struct drm_crtc *crtc;
1254         const struct drm_display_mode *adjusted_mode;
1255         uint32_t fwater_lo;
1256         int planea_wm;
1257
1258         crtc = single_enabled_crtc(dev);
1259         if (crtc == NULL)
1260                 return;
1261
1262         adjusted_mode = &to_intel_crtc(crtc)->config->base.adjusted_mode;
1263         planea_wm = intel_calculate_wm(adjusted_mode->crtc_clock,
1264                                        &i845_wm_info,
1265                                        dev_priv->display.get_fifo_size(dev, 0),
1266                                        4, pessimal_latency_ns);
1267         fwater_lo = I915_READ(FW_BLC) & ~0xfff;
1268         fwater_lo |= (3<<8) | planea_wm;
1269
1270         DRM_DEBUG_KMS("Setting FIFO watermarks - A: %d\n", planea_wm);
1271
1272         I915_WRITE(FW_BLC, fwater_lo);
1273 }
1274
1275 static uint32_t ilk_pipe_pixel_rate(struct drm_device *dev,
1276                                     struct drm_crtc *crtc)
1277 {
1278         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
1279         uint32_t pixel_rate;
1280
1281         pixel_rate = intel_crtc->config->base.adjusted_mode.crtc_clock;
1282
1283         /* We only use IF-ID interlacing. If we ever use PF-ID we'll need to
1284          * adjust the pixel_rate here. */
1285
1286         if (intel_crtc->config->pch_pfit.enabled) {
1287                 uint64_t pipe_w, pipe_h, pfit_w, pfit_h;
1288                 uint32_t pfit_size = intel_crtc->config->pch_pfit.size;
1289
1290                 pipe_w = intel_crtc->config->pipe_src_w;
1291                 pipe_h = intel_crtc->config->pipe_src_h;
1292                 pfit_w = (pfit_size >> 16) & 0xFFFF;
1293                 pfit_h = pfit_size & 0xFFFF;
1294                 if (pipe_w < pfit_w)
1295                         pipe_w = pfit_w;
1296                 if (pipe_h < pfit_h)
1297                         pipe_h = pfit_h;
1298
1299                 pixel_rate = div_u64((uint64_t) pixel_rate * pipe_w * pipe_h,
1300                                      pfit_w * pfit_h);
1301         }
1302
1303         return pixel_rate;
1304 }
1305
1306 /* latency must be in 0.1us units. */
1307 static uint32_t ilk_wm_method1(uint32_t pixel_rate, uint8_t bytes_per_pixel,
1308                                uint32_t latency)
1309 {
1310         uint64_t ret;
1311
1312         if (WARN(latency == 0, "Latency value missing\n"))
1313                 return UINT_MAX;
1314
1315         ret = (uint64_t) pixel_rate * bytes_per_pixel * latency;
1316         ret = DIV_ROUND_UP_ULL(ret, 64 * 10000) + 2;
1317
1318         return ret;
1319 }
1320
1321 /* latency must be in 0.1us units. */
1322 static uint32_t ilk_wm_method2(uint32_t pixel_rate, uint32_t pipe_htotal,
1323                                uint32_t horiz_pixels, uint8_t bytes_per_pixel,
1324                                uint32_t latency)
1325 {
1326         uint32_t ret;
1327
1328         if (WARN(latency == 0, "Latency value missing\n"))
1329                 return UINT_MAX;
1330
1331         ret = (latency * pixel_rate) / (pipe_htotal * 10000);
1332         ret = (ret + 1) * horiz_pixels * bytes_per_pixel;
1333         ret = DIV_ROUND_UP(ret, 64) + 2;
1334         return ret;
1335 }
1336
1337 static uint32_t ilk_wm_fbc(uint32_t pri_val, uint32_t horiz_pixels,
1338                            uint8_t bytes_per_pixel)
1339 {
1340         return DIV_ROUND_UP(pri_val * 64, horiz_pixels * bytes_per_pixel) + 2;
1341 }
1342
1343 struct skl_pipe_wm_parameters {
1344         bool active;
1345         uint32_t pipe_htotal;
1346         uint32_t pixel_rate; /* in KHz */
1347         struct intel_plane_wm_parameters plane[I915_MAX_PLANES];
1348         struct intel_plane_wm_parameters cursor;
1349 };
1350
1351 struct ilk_pipe_wm_parameters {
1352         bool active;
1353         uint32_t pipe_htotal;
1354         uint32_t pixel_rate;
1355         struct intel_plane_wm_parameters pri;
1356         struct intel_plane_wm_parameters spr;
1357         struct intel_plane_wm_parameters cur;
1358 };
1359
1360 struct ilk_wm_maximums {
1361         uint16_t pri;
1362         uint16_t spr;
1363         uint16_t cur;
1364         uint16_t fbc;
1365 };
1366
1367 /* used in computing the new watermarks state */
1368 struct intel_wm_config {
1369         unsigned int num_pipes_active;
1370         bool sprites_enabled;
1371         bool sprites_scaled;
1372 };
1373
1374 /*
1375  * For both WM_PIPE and WM_LP.
1376  * mem_value must be in 0.1us units.
1377  */
1378 static uint32_t ilk_compute_pri_wm(const struct ilk_pipe_wm_parameters *params,
1379                                    uint32_t mem_value,
1380                                    bool is_lp)
1381 {
1382         uint32_t method1, method2;
1383
1384         if (!params->active || !params->pri.enabled)
1385                 return 0;
1386
1387         method1 = ilk_wm_method1(params->pixel_rate,
1388                                  params->pri.bytes_per_pixel,
1389                                  mem_value);
1390
1391         if (!is_lp)
1392                 return method1;
1393
1394         method2 = ilk_wm_method2(params->pixel_rate,
1395                                  params->pipe_htotal,
1396                                  params->pri.horiz_pixels,
1397                                  params->pri.bytes_per_pixel,
1398                                  mem_value);
1399
1400         return min(method1, method2);
1401 }
1402
1403 /*
1404  * For both WM_PIPE and WM_LP.
1405  * mem_value must be in 0.1us units.
1406  */
1407 static uint32_t ilk_compute_spr_wm(const struct ilk_pipe_wm_parameters *params,
1408                                    uint32_t mem_value)
1409 {
1410         uint32_t method1, method2;
1411
1412         if (!params->active || !params->spr.enabled)
1413                 return 0;
1414
1415         method1 = ilk_wm_method1(params->pixel_rate,
1416                                  params->spr.bytes_per_pixel,
1417                                  mem_value);
1418         method2 = ilk_wm_method2(params->pixel_rate,
1419                                  params->pipe_htotal,
1420                                  params->spr.horiz_pixels,
1421                                  params->spr.bytes_per_pixel,
1422                                  mem_value);
1423         return min(method1, method2);
1424 }
1425
1426 /*
1427  * For both WM_PIPE and WM_LP.
1428  * mem_value must be in 0.1us units.
1429  */
1430 static uint32_t ilk_compute_cur_wm(const struct ilk_pipe_wm_parameters *params,
1431                                    uint32_t mem_value)
1432 {
1433         if (!params->active || !params->cur.enabled)
1434                 return 0;
1435
1436         return ilk_wm_method2(params->pixel_rate,
1437                               params->pipe_htotal,
1438                               params->cur.horiz_pixels,
1439                               params->cur.bytes_per_pixel,
1440                               mem_value);
1441 }
1442
1443 /* Only for WM_LP. */
1444 static uint32_t ilk_compute_fbc_wm(const struct ilk_pipe_wm_parameters *params,
1445                                    uint32_t pri_val)
1446 {
1447         if (!params->active || !params->pri.enabled)
1448                 return 0;
1449
1450         return ilk_wm_fbc(pri_val,
1451                           params->pri.horiz_pixels,
1452                           params->pri.bytes_per_pixel);
1453 }
1454
1455 static unsigned int ilk_display_fifo_size(const struct drm_device *dev)
1456 {
1457         if (INTEL_INFO(dev)->gen >= 8)
1458                 return 3072;
1459         else if (INTEL_INFO(dev)->gen >= 7)
1460                 return 768;
1461         else
1462                 return 512;
1463 }
1464
1465 static unsigned int ilk_plane_wm_reg_max(const struct drm_device *dev,
1466                                          int level, bool is_sprite)
1467 {
1468         if (INTEL_INFO(dev)->gen >= 8)
1469                 /* BDW primary/sprite plane watermarks */
1470                 return level == 0 ? 255 : 2047;
1471         else if (INTEL_INFO(dev)->gen >= 7)
1472                 /* IVB/HSW primary/sprite plane watermarks */
1473                 return level == 0 ? 127 : 1023;
1474         else if (!is_sprite)
1475                 /* ILK/SNB primary plane watermarks */
1476                 return level == 0 ? 127 : 511;
1477         else
1478                 /* ILK/SNB sprite plane watermarks */
1479                 return level == 0 ? 63 : 255;
1480 }
1481
1482 static unsigned int ilk_cursor_wm_reg_max(const struct drm_device *dev,
1483                                           int level)
1484 {
1485         if (INTEL_INFO(dev)->gen >= 7)
1486                 return level == 0 ? 63 : 255;
1487         else
1488                 return level == 0 ? 31 : 63;
1489 }
1490
1491 static unsigned int ilk_fbc_wm_reg_max(const struct drm_device *dev)
1492 {
1493         if (INTEL_INFO(dev)->gen >= 8)
1494                 return 31;
1495         else
1496                 return 15;
1497 }
1498
1499 /* Calculate the maximum primary/sprite plane watermark */
1500 static unsigned int ilk_plane_wm_max(const struct drm_device *dev,
1501                                      int level,
1502                                      const struct intel_wm_config *config,
1503                                      enum intel_ddb_partitioning ddb_partitioning,
1504                                      bool is_sprite)
1505 {
1506         unsigned int fifo_size = ilk_display_fifo_size(dev);
1507
1508         /* if sprites aren't enabled, sprites get nothing */
1509         if (is_sprite && !config->sprites_enabled)
1510                 return 0;
1511
1512         /* HSW allows LP1+ watermarks even with multiple pipes */
1513         if (level == 0 || config->num_pipes_active > 1) {
1514                 fifo_size /= INTEL_INFO(dev)->num_pipes;
1515
1516                 /*
1517                  * For some reason the non self refresh
1518                  * FIFO size is only half of the self
1519                  * refresh FIFO size on ILK/SNB.
1520                  */
1521                 if (INTEL_INFO(dev)->gen <= 6)
1522                         fifo_size /= 2;
1523         }
1524
1525         if (config->sprites_enabled) {
1526                 /* level 0 is always calculated with 1:1 split */
1527                 if (level > 0 && ddb_partitioning == INTEL_DDB_PART_5_6) {
1528                         if (is_sprite)
1529                                 fifo_size *= 5;
1530                         fifo_size /= 6;
1531                 } else {
1532                         fifo_size /= 2;
1533                 }
1534         }
1535
1536         /* clamp to max that the registers can hold */
1537         return min(fifo_size, ilk_plane_wm_reg_max(dev, level, is_sprite));
1538 }
1539
1540 /* Calculate the maximum cursor plane watermark */
1541 static unsigned int ilk_cursor_wm_max(const struct drm_device *dev,
1542                                       int level,
1543                                       const struct intel_wm_config *config)
1544 {
1545         /* HSW LP1+ watermarks w/ multiple pipes */
1546         if (level > 0 && config->num_pipes_active > 1)
1547                 return 64;
1548
1549         /* otherwise just report max that registers can hold */
1550         return ilk_cursor_wm_reg_max(dev, level);
1551 }
1552
1553 static void ilk_compute_wm_maximums(const struct drm_device *dev,
1554                                     int level,
1555                                     const struct intel_wm_config *config,
1556                                     enum intel_ddb_partitioning ddb_partitioning,
1557                                     struct ilk_wm_maximums *max)
1558 {
1559         max->pri = ilk_plane_wm_max(dev, level, config, ddb_partitioning, false);
1560         max->spr = ilk_plane_wm_max(dev, level, config, ddb_partitioning, true);
1561         max->cur = ilk_cursor_wm_max(dev, level, config);
1562         max->fbc = ilk_fbc_wm_reg_max(dev);
1563 }
1564
1565 static void ilk_compute_wm_reg_maximums(struct drm_device *dev,
1566                                         int level,
1567                                         struct ilk_wm_maximums *max)
1568 {
1569         max->pri = ilk_plane_wm_reg_max(dev, level, false);
1570         max->spr = ilk_plane_wm_reg_max(dev, level, true);
1571         max->cur = ilk_cursor_wm_reg_max(dev, level);
1572         max->fbc = ilk_fbc_wm_reg_max(dev);
1573 }
1574
1575 static bool ilk_validate_wm_level(int level,
1576                                   const struct ilk_wm_maximums *max,
1577                                   struct intel_wm_level *result)
1578 {
1579         bool ret;
1580
1581         /* already determined to be invalid? */
1582         if (!result->enable)
1583                 return false;
1584
1585         result->enable = result->pri_val <= max->pri &&
1586                          result->spr_val <= max->spr &&
1587                          result->cur_val <= max->cur;
1588
1589         ret = result->enable;
1590
1591         /*
1592          * HACK until we can pre-compute everything,
1593          * and thus fail gracefully if LP0 watermarks
1594          * are exceeded...
1595          */
1596         if (level == 0 && !result->enable) {
1597                 if (result->pri_val > max->pri)
1598                         DRM_DEBUG_KMS("Primary WM%d too large %u (max %u)\n",
1599                                       level, result->pri_val, max->pri);
1600                 if (result->spr_val > max->spr)
1601                         DRM_DEBUG_KMS("Sprite WM%d too large %u (max %u)\n",
1602                                       level, result->spr_val, max->spr);
1603                 if (result->cur_val > max->cur)
1604                         DRM_DEBUG_KMS("Cursor WM%d too large %u (max %u)\n",
1605                                       level, result->cur_val, max->cur);
1606
1607                 result->pri_val = min_t(uint32_t, result->pri_val, max->pri);
1608                 result->spr_val = min_t(uint32_t, result->spr_val, max->spr);
1609                 result->cur_val = min_t(uint32_t, result->cur_val, max->cur);
1610                 result->enable = true;
1611         }
1612
1613         return ret;
1614 }
1615
1616 static void ilk_compute_wm_level(const struct drm_i915_private *dev_priv,
1617                                  int level,
1618                                  const struct ilk_pipe_wm_parameters *p,
1619                                  struct intel_wm_level *result)
1620 {
1621         uint16_t pri_latency = dev_priv->wm.pri_latency[level];
1622         uint16_t spr_latency = dev_priv->wm.spr_latency[level];
1623         uint16_t cur_latency = dev_priv->wm.cur_latency[level];
1624
1625         /* WM1+ latency values stored in 0.5us units */
1626         if (level > 0) {
1627                 pri_latency *= 5;
1628                 spr_latency *= 5;
1629                 cur_latency *= 5;
1630         }
1631
1632         result->pri_val = ilk_compute_pri_wm(p, pri_latency, level);
1633         result->spr_val = ilk_compute_spr_wm(p, spr_latency);
1634         result->cur_val = ilk_compute_cur_wm(p, cur_latency);
1635         result->fbc_val = ilk_compute_fbc_wm(p, result->pri_val);
1636         result->enable = true;
1637 }
1638
1639 static uint32_t
1640 hsw_compute_linetime_wm(struct drm_device *dev, struct drm_crtc *crtc)
1641 {
1642         struct drm_i915_private *dev_priv = dev->dev_private;
1643         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
1644         struct drm_display_mode *mode = &intel_crtc->config->base.adjusted_mode;
1645         u32 linetime, ips_linetime;
1646
1647         if (!intel_crtc_active(crtc))
1648                 return 0;
1649
1650         /* The WM are computed with base on how long it takes to fill a single
1651          * row at the given clock rate, multiplied by 8.
1652          * */
1653         linetime = DIV_ROUND_CLOSEST(mode->crtc_htotal * 1000 * 8,
1654                                      mode->crtc_clock);
1655         ips_linetime = DIV_ROUND_CLOSEST(mode->crtc_htotal * 1000 * 8,
1656                                          intel_ddi_get_cdclk_freq(dev_priv));
1657
1658         return PIPE_WM_LINETIME_IPS_LINETIME(ips_linetime) |
1659                PIPE_WM_LINETIME_TIME(linetime);
1660 }
1661
1662 static void intel_read_wm_latency(struct drm_device *dev, uint16_t wm[8])
1663 {
1664         struct drm_i915_private *dev_priv = dev->dev_private;
1665
1666         if (IS_GEN9(dev)) {
1667                 uint32_t val;
1668                 int ret, i;
1669                 int level, max_level = ilk_wm_max_level(dev);
1670
1671                 /* read the first set of memory latencies[0:3] */
1672                 val = 0; /* data0 to be programmed to 0 for first set */
1673                 mutex_lock(&dev_priv->rps.hw_lock);
1674                 ret = sandybridge_pcode_read(dev_priv,
1675                                              GEN9_PCODE_READ_MEM_LATENCY,
1676                                              &val);
1677                 mutex_unlock(&dev_priv->rps.hw_lock);
1678
1679                 if (ret) {
1680                         DRM_ERROR("SKL Mailbox read error = %d\n", ret);
1681                         return;
1682                 }
1683
1684                 wm[0] = val & GEN9_MEM_LATENCY_LEVEL_MASK;
1685                 wm[1] = (val >> GEN9_MEM_LATENCY_LEVEL_1_5_SHIFT) &
1686                                 GEN9_MEM_LATENCY_LEVEL_MASK;
1687                 wm[2] = (val >> GEN9_MEM_LATENCY_LEVEL_2_6_SHIFT) &
1688                                 GEN9_MEM_LATENCY_LEVEL_MASK;
1689                 wm[3] = (val >> GEN9_MEM_LATENCY_LEVEL_3_7_SHIFT) &
1690                                 GEN9_MEM_LATENCY_LEVEL_MASK;
1691
1692                 /* read the second set of memory latencies[4:7] */
1693                 val = 1; /* data0 to be programmed to 1 for second set */
1694                 mutex_lock(&dev_priv->rps.hw_lock);
1695                 ret = sandybridge_pcode_read(dev_priv,
1696                                              GEN9_PCODE_READ_MEM_LATENCY,
1697                                              &val);
1698                 mutex_unlock(&dev_priv->rps.hw_lock);
1699                 if (ret) {
1700                         DRM_ERROR("SKL Mailbox read error = %d\n", ret);
1701                         return;
1702                 }
1703
1704                 wm[4] = val & GEN9_MEM_LATENCY_LEVEL_MASK;
1705                 wm[5] = (val >> GEN9_MEM_LATENCY_LEVEL_1_5_SHIFT) &
1706                                 GEN9_MEM_LATENCY_LEVEL_MASK;
1707                 wm[6] = (val >> GEN9_MEM_LATENCY_LEVEL_2_6_SHIFT) &
1708                                 GEN9_MEM_LATENCY_LEVEL_MASK;
1709                 wm[7] = (val >> GEN9_MEM_LATENCY_LEVEL_3_7_SHIFT) &
1710                                 GEN9_MEM_LATENCY_LEVEL_MASK;
1711
1712                 /*
1713                  * punit doesn't take into account the read latency so we need
1714                  * to add 2us to the various latency levels we retrieve from
1715                  * the punit.
1716                  *   - W0 is a bit special in that it's the only level that
1717                  *   can't be disabled if we want to have display working, so
1718                  *   we always add 2us there.
1719                  *   - For levels >=1, punit returns 0us latency when they are
1720                  *   disabled, so we respect that and don't add 2us then
1721                  *
1722                  * Additionally, if a level n (n > 1) has a 0us latency, all
1723                  * levels m (m >= n) need to be disabled. We make sure to
1724                  * sanitize the values out of the punit to satisfy this
1725                  * requirement.
1726                  */
1727                 wm[0] += 2;
1728                 for (level = 1; level <= max_level; level++)
1729                         if (wm[level] != 0)
1730                                 wm[level] += 2;
1731                         else {
1732                                 for (i = level + 1; i <= max_level; i++)
1733                                         wm[i] = 0;
1734
1735                                 break;
1736                         }
1737         } else if (IS_HASWELL(dev) || IS_BROADWELL(dev)) {
1738                 uint64_t sskpd = I915_READ64(MCH_SSKPD);
1739
1740                 wm[0] = (sskpd >> 56) & 0xFF;
1741                 if (wm[0] == 0)
1742                         wm[0] = sskpd & 0xF;
1743                 wm[1] = (sskpd >> 4) & 0xFF;
1744                 wm[2] = (sskpd >> 12) & 0xFF;
1745                 wm[3] = (sskpd >> 20) & 0x1FF;
1746                 wm[4] = (sskpd >> 32) & 0x1FF;
1747         } else if (INTEL_INFO(dev)->gen >= 6) {
1748                 uint32_t sskpd = I915_READ(MCH_SSKPD);
1749
1750                 wm[0] = (sskpd >> SSKPD_WM0_SHIFT) & SSKPD_WM_MASK;
1751                 wm[1] = (sskpd >> SSKPD_WM1_SHIFT) & SSKPD_WM_MASK;
1752                 wm[2] = (sskpd >> SSKPD_WM2_SHIFT) & SSKPD_WM_MASK;
1753                 wm[3] = (sskpd >> SSKPD_WM3_SHIFT) & SSKPD_WM_MASK;
1754         } else if (INTEL_INFO(dev)->gen >= 5) {
1755                 uint32_t mltr = I915_READ(MLTR_ILK);
1756
1757                 /* ILK primary LP0 latency is 700 ns */
1758                 wm[0] = 7;
1759                 wm[1] = (mltr >> MLTR_WM1_SHIFT) & ILK_SRLT_MASK;
1760                 wm[2] = (mltr >> MLTR_WM2_SHIFT) & ILK_SRLT_MASK;
1761         }
1762 }
1763
1764 static void intel_fixup_spr_wm_latency(struct drm_device *dev, uint16_t wm[5])
1765 {
1766         /* ILK sprite LP0 latency is 1300 ns */
1767         if (INTEL_INFO(dev)->gen == 5)
1768                 wm[0] = 13;
1769 }
1770
1771 static void intel_fixup_cur_wm_latency(struct drm_device *dev, uint16_t wm[5])
1772 {
1773         /* ILK cursor LP0 latency is 1300 ns */
1774         if (INTEL_INFO(dev)->gen == 5)
1775                 wm[0] = 13;
1776
1777         /* WaDoubleCursorLP3Latency:ivb */
1778         if (IS_IVYBRIDGE(dev))
1779                 wm[3] *= 2;
1780 }
1781
1782 int ilk_wm_max_level(const struct drm_device *dev)
1783 {
1784         /* how many WM levels are we expecting */
1785         if (IS_GEN9(dev))
1786                 return 7;
1787         else if (IS_HASWELL(dev) || IS_BROADWELL(dev))
1788                 return 4;
1789         else if (INTEL_INFO(dev)->gen >= 6)
1790                 return 3;
1791         else
1792                 return 2;
1793 }
1794
1795 static void intel_print_wm_latency(struct drm_device *dev,
1796                                    const char *name,
1797                                    const uint16_t wm[8])
1798 {
1799         int level, max_level = ilk_wm_max_level(dev);
1800
1801         for (level = 0; level <= max_level; level++) {
1802                 unsigned int latency = wm[level];
1803
1804                 if (latency == 0) {
1805                         DRM_ERROR("%s WM%d latency not provided\n",
1806                                   name, level);
1807                         continue;
1808                 }
1809
1810                 /*
1811                  * - latencies are in us on gen9.
1812                  * - before then, WM1+ latency values are in 0.5us units
1813                  */
1814                 if (IS_GEN9(dev))
1815                         latency *= 10;
1816                 else if (level > 0)
1817                         latency *= 5;
1818
1819                 DRM_DEBUG_KMS("%s WM%d latency %u (%u.%u usec)\n",
1820                               name, level, wm[level],
1821                               latency / 10, latency % 10);
1822         }
1823 }
1824
1825 static bool ilk_increase_wm_latency(struct drm_i915_private *dev_priv,
1826                                     uint16_t wm[5], uint16_t min)
1827 {
1828         int level, max_level = ilk_wm_max_level(dev_priv->dev);
1829
1830         if (wm[0] >= min)
1831                 return false;
1832
1833         wm[0] = max(wm[0], min);
1834         for (level = 1; level <= max_level; level++)
1835                 wm[level] = max_t(uint16_t, wm[level], DIV_ROUND_UP(min, 5));
1836
1837         return true;
1838 }
1839
1840 static void snb_wm_latency_quirk(struct drm_device *dev)
1841 {
1842         struct drm_i915_private *dev_priv = dev->dev_private;
1843         bool changed;
1844
1845         /*
1846          * The BIOS provided WM memory latency values are often
1847          * inadequate for high resolution displays. Adjust them.
1848          */
1849         changed = ilk_increase_wm_latency(dev_priv, dev_priv->wm.pri_latency, 12) |
1850                 ilk_increase_wm_latency(dev_priv, dev_priv->wm.spr_latency, 12) |
1851                 ilk_increase_wm_latency(dev_priv, dev_priv->wm.cur_latency, 12);
1852
1853         if (!changed)
1854                 return;
1855
1856         DRM_DEBUG_KMS("WM latency values increased to avoid potential underruns\n");
1857         intel_print_wm_latency(dev, "Primary", dev_priv->wm.pri_latency);
1858         intel_print_wm_latency(dev, "Sprite", dev_priv->wm.spr_latency);
1859         intel_print_wm_latency(dev, "Cursor", dev_priv->wm.cur_latency);
1860 }
1861
1862 static void ilk_setup_wm_latency(struct drm_device *dev)
1863 {
1864         struct drm_i915_private *dev_priv = dev->dev_private;
1865
1866         intel_read_wm_latency(dev, dev_priv->wm.pri_latency);
1867
1868         memcpy(dev_priv->wm.spr_latency, dev_priv->wm.pri_latency,
1869                sizeof(dev_priv->wm.pri_latency));
1870         memcpy(dev_priv->wm.cur_latency, dev_priv->wm.pri_latency,
1871                sizeof(dev_priv->wm.pri_latency));
1872
1873         intel_fixup_spr_wm_latency(dev, dev_priv->wm.spr_latency);
1874         intel_fixup_cur_wm_latency(dev, dev_priv->wm.cur_latency);
1875
1876         intel_print_wm_latency(dev, "Primary", dev_priv->wm.pri_latency);
1877         intel_print_wm_latency(dev, "Sprite", dev_priv->wm.spr_latency);
1878         intel_print_wm_latency(dev, "Cursor", dev_priv->wm.cur_latency);
1879
1880         if (IS_GEN6(dev))
1881                 snb_wm_latency_quirk(dev);
1882 }
1883
1884 static void skl_setup_wm_latency(struct drm_device *dev)
1885 {
1886         struct drm_i915_private *dev_priv = dev->dev_private;
1887
1888         intel_read_wm_latency(dev, dev_priv->wm.skl_latency);
1889         intel_print_wm_latency(dev, "Gen9 Plane", dev_priv->wm.skl_latency);
1890 }
1891
1892 static void ilk_compute_wm_parameters(struct drm_crtc *crtc,
1893                                       struct ilk_pipe_wm_parameters *p)
1894 {
1895         struct drm_device *dev = crtc->dev;
1896         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
1897         enum i915_pipe pipe = intel_crtc->pipe;
1898         struct drm_plane *plane;
1899
1900         if (!intel_crtc_active(crtc))
1901                 return;
1902
1903         p->active = true;
1904         p->pipe_htotal = intel_crtc->config->base.adjusted_mode.crtc_htotal;
1905         p->pixel_rate = ilk_pipe_pixel_rate(dev, crtc);
1906         p->pri.bytes_per_pixel = crtc->primary->fb->bits_per_pixel / 8;
1907         p->cur.bytes_per_pixel = 4;
1908         p->pri.horiz_pixels = intel_crtc->config->pipe_src_w;
1909         p->cur.horiz_pixels = intel_crtc->cursor_width;
1910         /* TODO: for now, assume primary and cursor planes are always enabled. */
1911         p->pri.enabled = true;
1912         p->cur.enabled = true;
1913
1914         drm_for_each_legacy_plane(plane, &dev->mode_config.plane_list) {
1915                 struct intel_plane *intel_plane = to_intel_plane(plane);
1916
1917                 if (intel_plane->pipe == pipe) {
1918                         p->spr = intel_plane->wm;
1919                         break;
1920                 }
1921         }
1922 }
1923
1924 static void ilk_compute_wm_config(struct drm_device *dev,
1925                                   struct intel_wm_config *config)
1926 {
1927         struct intel_crtc *intel_crtc;
1928
1929         /* Compute the currently _active_ config */
1930         for_each_intel_crtc(dev, intel_crtc) {
1931                 const struct intel_pipe_wm *wm = &intel_crtc->wm.active;
1932
1933                 if (!wm->pipe_enabled)
1934                         continue;
1935
1936                 config->sprites_enabled |= wm->sprites_enabled;
1937                 config->sprites_scaled |= wm->sprites_scaled;
1938                 config->num_pipes_active++;
1939         }
1940 }
1941
1942 /* Compute new watermarks for the pipe */
1943 static bool intel_compute_pipe_wm(struct drm_crtc *crtc,
1944                                   const struct ilk_pipe_wm_parameters *params,
1945                                   struct intel_pipe_wm *pipe_wm)
1946 {
1947         struct drm_device *dev = crtc->dev;
1948         const struct drm_i915_private *dev_priv = dev->dev_private;
1949         int level, max_level = ilk_wm_max_level(dev);
1950         /* LP0 watermark maximums depend on this pipe alone */
1951         struct intel_wm_config config = {
1952                 .num_pipes_active = 1,
1953                 .sprites_enabled = params->spr.enabled,
1954                 .sprites_scaled = params->spr.scaled,
1955         };
1956         struct ilk_wm_maximums max;
1957
1958         pipe_wm->pipe_enabled = params->active;
1959         pipe_wm->sprites_enabled = params->spr.enabled;
1960         pipe_wm->sprites_scaled = params->spr.scaled;
1961
1962         /* ILK/SNB: LP2+ watermarks only w/o sprites */
1963         if (INTEL_INFO(dev)->gen <= 6 && params->spr.enabled)
1964                 max_level = 1;
1965
1966         /* ILK/SNB/IVB: LP1+ watermarks only w/o scaling */
1967         if (params->spr.scaled)
1968                 max_level = 0;
1969
1970         ilk_compute_wm_level(dev_priv, 0, params, &pipe_wm->wm[0]);
1971
1972         if (IS_HASWELL(dev) || IS_BROADWELL(dev))
1973                 pipe_wm->linetime = hsw_compute_linetime_wm(dev, crtc);
1974
1975         /* LP0 watermarks always use 1/2 DDB partitioning */
1976         ilk_compute_wm_maximums(dev, 0, &config, INTEL_DDB_PART_1_2, &max);
1977
1978         /* At least LP0 must be valid */
1979         if (!ilk_validate_wm_level(0, &max, &pipe_wm->wm[0]))
1980                 return false;
1981
1982         ilk_compute_wm_reg_maximums(dev, 1, &max);
1983
1984         for (level = 1; level <= max_level; level++) {
1985                 struct intel_wm_level wm = {};
1986
1987                 ilk_compute_wm_level(dev_priv, level, params, &wm);
1988
1989                 /*
1990                  * Disable any watermark level that exceeds the
1991                  * register maximums since such watermarks are
1992                  * always invalid.
1993                  */
1994                 if (!ilk_validate_wm_level(level, &max, &wm))
1995                         break;
1996
1997                 pipe_wm->wm[level] = wm;
1998         }
1999
2000         return true;
2001 }
2002
2003 /*
2004  * Merge the watermarks from all active pipes for a specific level.
2005  */
2006 static void ilk_merge_wm_level(struct drm_device *dev,
2007                                int level,
2008                                struct intel_wm_level *ret_wm)
2009 {
2010         struct intel_crtc *intel_crtc;
2011
2012         ret_wm->enable = true;
2013
2014         for_each_intel_crtc(dev, intel_crtc) {
2015                 const struct intel_pipe_wm *active = &intel_crtc->wm.active;
2016                 const struct intel_wm_level *wm = &active->wm[level];
2017
2018                 if (!active->pipe_enabled)
2019                         continue;
2020
2021                 /*
2022                  * The watermark values may have been used in the past,
2023                  * so we must maintain them in the registers for some
2024                  * time even if the level is now disabled.
2025                  */
2026                 if (!wm->enable)
2027                         ret_wm->enable = false;
2028
2029                 ret_wm->pri_val = max(ret_wm->pri_val, wm->pri_val);
2030                 ret_wm->spr_val = max(ret_wm->spr_val, wm->spr_val);
2031                 ret_wm->cur_val = max(ret_wm->cur_val, wm->cur_val);
2032                 ret_wm->fbc_val = max(ret_wm->fbc_val, wm->fbc_val);
2033         }
2034 }
2035
2036 /*
2037  * Merge all low power watermarks for all active pipes.
2038  */
2039 static void ilk_wm_merge(struct drm_device *dev,
2040                          const struct intel_wm_config *config,
2041                          const struct ilk_wm_maximums *max,
2042                          struct intel_pipe_wm *merged)
2043 {
2044         int level, max_level = ilk_wm_max_level(dev);
2045         int last_enabled_level = max_level;
2046
2047         /* ILK/SNB/IVB: LP1+ watermarks only w/ single pipe */
2048         if ((INTEL_INFO(dev)->gen <= 6 || IS_IVYBRIDGE(dev)) &&
2049             config->num_pipes_active > 1)
2050                 return;
2051
2052         /* ILK: FBC WM must be disabled always */
2053         merged->fbc_wm_enabled = INTEL_INFO(dev)->gen >= 6;
2054
2055         /* merge each WM1+ level */
2056         for (level = 1; level <= max_level; level++) {
2057                 struct intel_wm_level *wm = &merged->wm[level];
2058
2059                 ilk_merge_wm_level(dev, level, wm);
2060
2061                 if (level > last_enabled_level)
2062                         wm->enable = false;
2063                 else if (!ilk_validate_wm_level(level, max, wm))
2064                         /* make sure all following levels get disabled */
2065                         last_enabled_level = level - 1;
2066
2067                 /*
2068                  * The spec says it is preferred to disable
2069                  * FBC WMs instead of disabling a WM level.
2070                  */
2071                 if (wm->fbc_val > max->fbc) {
2072                         if (wm->enable)
2073                                 merged->fbc_wm_enabled = false;
2074                         wm->fbc_val = 0;
2075                 }
2076         }
2077
2078         /* ILK: LP2+ must be disabled when FBC WM is disabled but FBC enabled */
2079         /*
2080          * FIXME this is racy. FBC might get enabled later.
2081          * What we should check here is whether FBC can be
2082          * enabled sometime later.
2083          */
2084         if (IS_GEN5(dev) && !merged->fbc_wm_enabled && intel_fbc_enabled(dev)) {
2085                 for (level = 2; level <= max_level; level++) {
2086                         struct intel_wm_level *wm = &merged->wm[level];
2087
2088                         wm->enable = false;
2089                 }
2090         }
2091 }
2092
2093 static int ilk_wm_lp_to_level(int wm_lp, const struct intel_pipe_wm *pipe_wm)
2094 {
2095         /* LP1,LP2,LP3 levels are either 1,2,3 or 1,3,4 */
2096         return wm_lp + (wm_lp >= 2 && pipe_wm->wm[4].enable);
2097 }
2098
2099 /* The value we need to program into the WM_LPx latency field */
2100 static unsigned int ilk_wm_lp_latency(struct drm_device *dev, int level)
2101 {
2102         struct drm_i915_private *dev_priv = dev->dev_private;
2103
2104         if (IS_HASWELL(dev) || IS_BROADWELL(dev))
2105                 return 2 * level;
2106         else
2107                 return dev_priv->wm.pri_latency[level];
2108 }
2109
2110 static void ilk_compute_wm_results(struct drm_device *dev,
2111                                    const struct intel_pipe_wm *merged,
2112                                    enum intel_ddb_partitioning partitioning,
2113                                    struct ilk_wm_values *results)
2114 {
2115         struct intel_crtc *intel_crtc;
2116         int level, wm_lp;
2117
2118         results->enable_fbc_wm = merged->fbc_wm_enabled;
2119         results->partitioning = partitioning;
2120
2121         /* LP1+ register values */
2122         for (wm_lp = 1; wm_lp <= 3; wm_lp++) {
2123                 const struct intel_wm_level *r;
2124
2125                 level = ilk_wm_lp_to_level(wm_lp, merged);
2126
2127                 r = &merged->wm[level];
2128
2129                 /*
2130                  * Maintain the watermark values even if the level is
2131                  * disabled. Doing otherwise could cause underruns.
2132                  */
2133                 results->wm_lp[wm_lp - 1] =
2134                         (ilk_wm_lp_latency(dev, level) << WM1_LP_LATENCY_SHIFT) |
2135                         (r->pri_val << WM1_LP_SR_SHIFT) |
2136                         r->cur_val;
2137
2138                 if (r->enable)
2139                         results->wm_lp[wm_lp - 1] |= WM1_LP_SR_EN;
2140
2141                 if (INTEL_INFO(dev)->gen >= 8)
2142                         results->wm_lp[wm_lp - 1] |=
2143                                 r->fbc_val << WM1_LP_FBC_SHIFT_BDW;
2144                 else
2145                         results->wm_lp[wm_lp - 1] |=
2146                                 r->fbc_val << WM1_LP_FBC_SHIFT;
2147
2148                 /*
2149                  * Always set WM1S_LP_EN when spr_val != 0, even if the
2150                  * level is disabled. Doing otherwise could cause underruns.
2151                  */
2152                 if (INTEL_INFO(dev)->gen <= 6 && r->spr_val) {
2153                         WARN_ON(wm_lp != 1);
2154                         results->wm_lp_spr[wm_lp - 1] = WM1S_LP_EN | r->spr_val;
2155                 } else
2156                         results->wm_lp_spr[wm_lp - 1] = r->spr_val;
2157         }
2158
2159         /* LP0 register values */
2160         for_each_intel_crtc(dev, intel_crtc) {
2161                 enum i915_pipe pipe = intel_crtc->pipe;
2162                 const struct intel_wm_level *r =
2163                         &intel_crtc->wm.active.wm[0];
2164
2165                 if (WARN_ON(!r->enable))
2166                         continue;
2167
2168                 results->wm_linetime[pipe] = intel_crtc->wm.active.linetime;
2169
2170                 results->wm_pipe[pipe] =
2171                         (r->pri_val << WM0_PIPE_PLANE_SHIFT) |
2172                         (r->spr_val << WM0_PIPE_SPRITE_SHIFT) |
2173                         r->cur_val;
2174         }
2175 }
2176
2177 /* Find the result with the highest level enabled. Check for enable_fbc_wm in
2178  * case both are at the same level. Prefer r1 in case they're the same. */
2179 static struct intel_pipe_wm *ilk_find_best_result(struct drm_device *dev,
2180                                                   struct intel_pipe_wm *r1,
2181                                                   struct intel_pipe_wm *r2)
2182 {
2183         int level, max_level = ilk_wm_max_level(dev);
2184         int level1 = 0, level2 = 0;
2185
2186         for (level = 1; level <= max_level; level++) {
2187                 if (r1->wm[level].enable)
2188                         level1 = level;
2189                 if (r2->wm[level].enable)
2190                         level2 = level;
2191         }
2192
2193         if (level1 == level2) {
2194                 if (r2->fbc_wm_enabled && !r1->fbc_wm_enabled)
2195                         return r2;
2196                 else
2197                         return r1;
2198         } else if (level1 > level2) {
2199                 return r1;
2200         } else {
2201                 return r2;
2202         }
2203 }
2204
2205 /* dirty bits used to track which watermarks need changes */
2206 #define WM_DIRTY_PIPE(pipe) (1 << (pipe))
2207 #define WM_DIRTY_LINETIME(pipe) (1 << (8 + (pipe)))
2208 #define WM_DIRTY_LP(wm_lp) (1 << (15 + (wm_lp)))
2209 #define WM_DIRTY_LP_ALL (WM_DIRTY_LP(1) | WM_DIRTY_LP(2) | WM_DIRTY_LP(3))
2210 #define WM_DIRTY_FBC (1 << 24)
2211 #define WM_DIRTY_DDB (1 << 25)
2212
2213 static unsigned int ilk_compute_wm_dirty(struct drm_i915_private *dev_priv,
2214                                          const struct ilk_wm_values *old,
2215                                          const struct ilk_wm_values *new)
2216 {
2217         unsigned int dirty = 0;
2218         enum i915_pipe pipe;
2219         int wm_lp;
2220
2221         for_each_pipe(dev_priv, pipe) {
2222                 if (old->wm_linetime[pipe] != new->wm_linetime[pipe]) {
2223                         dirty |= WM_DIRTY_LINETIME(pipe);
2224                         /* Must disable LP1+ watermarks too */
2225                         dirty |= WM_DIRTY_LP_ALL;
2226                 }
2227
2228                 if (old->wm_pipe[pipe] != new->wm_pipe[pipe]) {
2229                         dirty |= WM_DIRTY_PIPE(pipe);
2230                         /* Must disable LP1+ watermarks too */
2231                         dirty |= WM_DIRTY_LP_ALL;
2232                 }
2233         }
2234
2235         if (old->enable_fbc_wm != new->enable_fbc_wm) {
2236                 dirty |= WM_DIRTY_FBC;
2237                 /* Must disable LP1+ watermarks too */
2238                 dirty |= WM_DIRTY_LP_ALL;
2239         }
2240
2241         if (old->partitioning != new->partitioning) {
2242                 dirty |= WM_DIRTY_DDB;
2243                 /* Must disable LP1+ watermarks too */
2244                 dirty |= WM_DIRTY_LP_ALL;
2245         }
2246
2247         /* LP1+ watermarks already deemed dirty, no need to continue */
2248         if (dirty & WM_DIRTY_LP_ALL)
2249                 return dirty;
2250
2251         /* Find the lowest numbered LP1+ watermark in need of an update... */
2252         for (wm_lp = 1; wm_lp <= 3; wm_lp++) {
2253                 if (old->wm_lp[wm_lp - 1] != new->wm_lp[wm_lp - 1] ||
2254                     old->wm_lp_spr[wm_lp - 1] != new->wm_lp_spr[wm_lp - 1])
2255                         break;
2256         }
2257
2258         /* ...and mark it and all higher numbered LP1+ watermarks as dirty */
2259         for (; wm_lp <= 3; wm_lp++)
2260                 dirty |= WM_DIRTY_LP(wm_lp);
2261
2262         return dirty;
2263 }
2264
2265 static bool _ilk_disable_lp_wm(struct drm_i915_private *dev_priv,
2266                                unsigned int dirty)
2267 {
2268         struct ilk_wm_values *previous = &dev_priv->wm.hw;
2269         bool changed = false;
2270
2271         if (dirty & WM_DIRTY_LP(3) && previous->wm_lp[2] & WM1_LP_SR_EN) {
2272                 previous->wm_lp[2] &= ~WM1_LP_SR_EN;
2273                 I915_WRITE(WM3_LP_ILK, previous->wm_lp[2]);
2274                 changed = true;
2275         }
2276         if (dirty & WM_DIRTY_LP(2) && previous->wm_lp[1] & WM1_LP_SR_EN) {
2277                 previous->wm_lp[1] &= ~WM1_LP_SR_EN;
2278                 I915_WRITE(WM2_LP_ILK, previous->wm_lp[1]);
2279                 changed = true;
2280         }
2281         if (dirty & WM_DIRTY_LP(1) && previous->wm_lp[0] & WM1_LP_SR_EN) {
2282                 previous->wm_lp[0] &= ~WM1_LP_SR_EN;
2283                 I915_WRITE(WM1_LP_ILK, previous->wm_lp[0]);
2284                 changed = true;
2285         }
2286
2287         /*
2288          * Don't touch WM1S_LP_EN here.
2289          * Doing so could cause underruns.
2290          */
2291
2292         return changed;
2293 }
2294
2295 /*
2296  * The spec says we shouldn't write when we don't need, because every write
2297  * causes WMs to be re-evaluated, expending some power.
2298  */
2299 static void ilk_write_wm_values(struct drm_i915_private *dev_priv,
2300                                 struct ilk_wm_values *results)
2301 {
2302         struct drm_device *dev = dev_priv->dev;
2303         struct ilk_wm_values *previous = &dev_priv->wm.hw;
2304         unsigned int dirty;
2305         uint32_t val;
2306
2307         dirty = ilk_compute_wm_dirty(dev_priv, previous, results);
2308         if (!dirty)
2309                 return;
2310
2311         _ilk_disable_lp_wm(dev_priv, dirty);
2312
2313         if (dirty & WM_DIRTY_PIPE(PIPE_A))
2314                 I915_WRITE(WM0_PIPEA_ILK, results->wm_pipe[0]);
2315         if (dirty & WM_DIRTY_PIPE(PIPE_B))
2316                 I915_WRITE(WM0_PIPEB_ILK, results->wm_pipe[1]);
2317         if (dirty & WM_DIRTY_PIPE(PIPE_C))
2318                 I915_WRITE(WM0_PIPEC_IVB, results->wm_pipe[2]);
2319
2320         if (dirty & WM_DIRTY_LINETIME(PIPE_A))
2321                 I915_WRITE(PIPE_WM_LINETIME(PIPE_A), results->wm_linetime[0]);
2322         if (dirty & WM_DIRTY_LINETIME(PIPE_B))
2323                 I915_WRITE(PIPE_WM_LINETIME(PIPE_B), results->wm_linetime[1]);
2324         if (dirty & WM_DIRTY_LINETIME(PIPE_C))
2325                 I915_WRITE(PIPE_WM_LINETIME(PIPE_C), results->wm_linetime[2]);
2326
2327         if (dirty & WM_DIRTY_DDB) {
2328                 if (IS_HASWELL(dev) || IS_BROADWELL(dev)) {
2329                         val = I915_READ(WM_MISC);
2330                         if (results->partitioning == INTEL_DDB_PART_1_2)
2331                                 val &= ~WM_MISC_DATA_PARTITION_5_6;
2332                         else
2333                                 val |= WM_MISC_DATA_PARTITION_5_6;
2334                         I915_WRITE(WM_MISC, val);
2335                 } else {
2336                         val = I915_READ(DISP_ARB_CTL2);
2337                         if (results->partitioning == INTEL_DDB_PART_1_2)
2338                                 val &= ~DISP_DATA_PARTITION_5_6;
2339                         else
2340                                 val |= DISP_DATA_PARTITION_5_6;
2341                         I915_WRITE(DISP_ARB_CTL2, val);
2342                 }
2343         }
2344
2345         if (dirty & WM_DIRTY_FBC) {
2346                 val = I915_READ(DISP_ARB_CTL);
2347                 if (results->enable_fbc_wm)
2348                         val &= ~DISP_FBC_WM_DIS;
2349                 else
2350                         val |= DISP_FBC_WM_DIS;
2351                 I915_WRITE(DISP_ARB_CTL, val);
2352         }
2353
2354         if (dirty & WM_DIRTY_LP(1) &&
2355             previous->wm_lp_spr[0] != results->wm_lp_spr[0])
2356                 I915_WRITE(WM1S_LP_ILK, results->wm_lp_spr[0]);
2357
2358         if (INTEL_INFO(dev)->gen >= 7) {
2359                 if (dirty & WM_DIRTY_LP(2) && previous->wm_lp_spr[1] != results->wm_lp_spr[1])
2360                         I915_WRITE(WM2S_LP_IVB, results->wm_lp_spr[1]);
2361                 if (dirty & WM_DIRTY_LP(3) && previous->wm_lp_spr[2] != results->wm_lp_spr[2])
2362                         I915_WRITE(WM3S_LP_IVB, results->wm_lp_spr[2]);
2363         }
2364
2365         if (dirty & WM_DIRTY_LP(1) && previous->wm_lp[0] != results->wm_lp[0])
2366                 I915_WRITE(WM1_LP_ILK, results->wm_lp[0]);
2367         if (dirty & WM_DIRTY_LP(2) && previous->wm_lp[1] != results->wm_lp[1])
2368                 I915_WRITE(WM2_LP_ILK, results->wm_lp[1]);
2369         if (dirty & WM_DIRTY_LP(3) && previous->wm_lp[2] != results->wm_lp[2])
2370                 I915_WRITE(WM3_LP_ILK, results->wm_lp[2]);
2371
2372         dev_priv->wm.hw = *results;
2373 }
2374
2375 static bool ilk_disable_lp_wm(struct drm_device *dev)
2376 {
2377         struct drm_i915_private *dev_priv = dev->dev_private;
2378
2379         return _ilk_disable_lp_wm(dev_priv, WM_DIRTY_LP_ALL);
2380 }
2381
2382 /*
2383  * On gen9, we need to allocate Display Data Buffer (DDB) portions to the
2384  * different active planes.
2385  */
2386
2387 #define SKL_DDB_SIZE            896     /* in blocks */
2388
2389 static void
2390 skl_ddb_get_pipe_allocation_limits(struct drm_device *dev,
2391                                    struct drm_crtc *for_crtc,
2392                                    const struct intel_wm_config *config,
2393                                    const struct skl_pipe_wm_parameters *params,
2394                                    struct skl_ddb_entry *alloc /* out */)
2395 {
2396         struct drm_crtc *crtc;
2397         unsigned int pipe_size, ddb_size;
2398         int nth_active_pipe;
2399
2400         if (!params->active) {
2401                 alloc->start = 0;
2402                 alloc->end = 0;
2403                 return;
2404         }
2405
2406         ddb_size = SKL_DDB_SIZE;
2407
2408         ddb_size -= 4; /* 4 blocks for bypass path allocation */
2409
2410         nth_active_pipe = 0;
2411         for_each_crtc(dev, crtc) {
2412                 if (!intel_crtc_active(crtc))
2413                         continue;
2414
2415                 if (crtc == for_crtc)
2416                         break;
2417
2418                 nth_active_pipe++;
2419         }
2420
2421         pipe_size = ddb_size / config->num_pipes_active;
2422         alloc->start = nth_active_pipe * ddb_size / config->num_pipes_active;
2423         alloc->end = alloc->start + pipe_size;
2424 }
2425
2426 static unsigned int skl_cursor_allocation(const struct intel_wm_config *config)
2427 {
2428         if (config->num_pipes_active == 1)
2429                 return 32;
2430
2431         return 8;
2432 }
2433
2434 static void skl_ddb_entry_init_from_hw(struct skl_ddb_entry *entry, u32 reg)
2435 {
2436         entry->start = reg & 0x3ff;
2437         entry->end = (reg >> 16) & 0x3ff;
2438         if (entry->end)
2439                 entry->end += 1;
2440 }
2441
2442 void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv,
2443                           struct skl_ddb_allocation *ddb /* out */)
2444 {
2445         struct drm_device *dev = dev_priv->dev;
2446         enum i915_pipe pipe;
2447         int plane;
2448         u32 val;
2449
2450         for_each_pipe(dev_priv, pipe) {
2451                 for_each_plane(pipe, plane) {
2452                         val = I915_READ(PLANE_BUF_CFG(pipe, plane));
2453                         skl_ddb_entry_init_from_hw(&ddb->plane[pipe][plane],
2454                                                    val);
2455                 }
2456
2457                 val = I915_READ(CUR_BUF_CFG(pipe));
2458                 skl_ddb_entry_init_from_hw(&ddb->cursor[pipe], val);
2459         }
2460 }
2461
2462 static unsigned int
2463 skl_plane_relative_data_rate(const struct intel_plane_wm_parameters *p)
2464 {
2465         return p->horiz_pixels * p->vert_pixels * p->bytes_per_pixel;
2466 }
2467
2468 /*
2469  * We don't overflow 32 bits. Worst case is 3 planes enabled, each fetching
2470  * a 8192x4096@32bpp framebuffer:
2471  *   3 * 4096 * 8192  * 4 < 2^32
2472  */
2473 static unsigned int
2474 skl_get_total_relative_data_rate(struct intel_crtc *intel_crtc,
2475                                  const struct skl_pipe_wm_parameters *params)
2476 {
2477         unsigned int total_data_rate = 0;
2478         int plane;
2479
2480         for (plane = 0; plane < intel_num_planes(intel_crtc); plane++) {
2481                 const struct intel_plane_wm_parameters *p;
2482
2483                 p = &params->plane[plane];
2484                 if (!p->enabled)
2485                         continue;
2486
2487                 total_data_rate += skl_plane_relative_data_rate(p);
2488         }
2489
2490         return total_data_rate;
2491 }
2492
2493 static void
2494 skl_allocate_pipe_ddb(struct drm_crtc *crtc,
2495                       const struct intel_wm_config *config,
2496                       const struct skl_pipe_wm_parameters *params,
2497                       struct skl_ddb_allocation *ddb /* out */)
2498 {
2499         struct drm_device *dev = crtc->dev;
2500         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
2501         enum i915_pipe pipe = intel_crtc->pipe;
2502         struct skl_ddb_entry *alloc = &ddb->pipe[pipe];
2503         uint16_t alloc_size, start, cursor_blocks;
2504         unsigned int total_data_rate;
2505         int plane;
2506
2507         skl_ddb_get_pipe_allocation_limits(dev, crtc, config, params, alloc);
2508         alloc_size = skl_ddb_entry_size(alloc);
2509         if (alloc_size == 0) {
2510                 memset(ddb->plane[pipe], 0, sizeof(ddb->plane[pipe]));
2511                 memset(&ddb->cursor[pipe], 0, sizeof(ddb->cursor[pipe]));
2512                 return;
2513         }
2514
2515         cursor_blocks = skl_cursor_allocation(config);
2516         ddb->cursor[pipe].start = alloc->end - cursor_blocks;
2517         ddb->cursor[pipe].end = alloc->end;
2518
2519         alloc_size -= cursor_blocks;
2520         alloc->end -= cursor_blocks;
2521
2522         /*
2523          * Each active plane get a portion of the remaining space, in
2524          * proportion to the amount of data they need to fetch from memory.
2525          *
2526          * FIXME: we may not allocate every single block here.
2527          */
2528         total_data_rate = skl_get_total_relative_data_rate(intel_crtc, params);
2529
2530         start = alloc->start;
2531         for (plane = 0; plane < intel_num_planes(intel_crtc); plane++) {
2532                 const struct intel_plane_wm_parameters *p;
2533                 unsigned int data_rate;
2534                 uint16_t plane_blocks;
2535
2536                 p = &params->plane[plane];
2537                 if (!p->enabled)
2538                         continue;
2539
2540                 data_rate = skl_plane_relative_data_rate(p);
2541
2542                 /*
2543                  * promote the expression to 64 bits to avoid overflowing, the
2544                  * result is < available as data_rate / total_data_rate < 1
2545                  */
2546                 plane_blocks = div_u64((uint64_t)alloc_size * data_rate,
2547                                        total_data_rate);
2548
2549                 ddb->plane[pipe][plane].start = start;
2550                 ddb->plane[pipe][plane].end = start + plane_blocks;
2551
2552                 start += plane_blocks;
2553         }
2554
2555 }
2556
2557 static uint32_t skl_pipe_pixel_rate(const struct intel_crtc_state *config)
2558 {
2559         /* TODO: Take into account the scalers once we support them */
2560         return config->base.adjusted_mode.crtc_clock;
2561 }
2562
2563 /*
2564  * The max latency should be 257 (max the punit can code is 255 and we add 2us
2565  * for the read latency) and bytes_per_pixel should always be <= 8, so that
2566  * should allow pixel_rate up to ~2 GHz which seems sufficient since max
2567  * 2xcdclk is 1350 MHz and the pixel rate should never exceed that.
2568 */
2569 static uint32_t skl_wm_method1(uint32_t pixel_rate, uint8_t bytes_per_pixel,
2570                                uint32_t latency)
2571 {
2572         uint32_t wm_intermediate_val, ret;
2573
2574         if (latency == 0)
2575                 return UINT_MAX;
2576
2577         wm_intermediate_val = latency * pixel_rate * bytes_per_pixel;
2578         ret = DIV_ROUND_UP(wm_intermediate_val, 1000);
2579
2580         return ret;
2581 }
2582
2583 static uint32_t skl_wm_method2(uint32_t pixel_rate, uint32_t pipe_htotal,
2584                                uint32_t horiz_pixels, uint8_t bytes_per_pixel,
2585                                uint32_t latency)
2586 {
2587         uint32_t ret, plane_bytes_per_line, wm_intermediate_val;
2588
2589         if (latency == 0)
2590                 return UINT_MAX;
2591
2592         plane_bytes_per_line = horiz_pixels * bytes_per_pixel;
2593         wm_intermediate_val = latency * pixel_rate;
2594         ret = DIV_ROUND_UP(wm_intermediate_val, pipe_htotal * 1000) *
2595                                 plane_bytes_per_line;
2596
2597         return ret;
2598 }
2599
2600 static bool skl_ddb_allocation_changed(const struct skl_ddb_allocation *new_ddb,
2601                                        const struct intel_crtc *intel_crtc)
2602 {
2603         struct drm_device *dev = intel_crtc->base.dev;
2604         struct drm_i915_private *dev_priv = dev->dev_private;
2605         const struct skl_ddb_allocation *cur_ddb = &dev_priv->wm.skl_hw.ddb;
2606         enum i915_pipe pipe = intel_crtc->pipe;
2607
2608         if (memcmp(new_ddb->plane[pipe], cur_ddb->plane[pipe],
2609                    sizeof(new_ddb->plane[pipe])))
2610                 return true;
2611
2612         if (memcmp(&new_ddb->cursor[pipe], &cur_ddb->cursor[pipe],
2613                     sizeof(new_ddb->cursor[pipe])))
2614                 return true;
2615
2616         return false;
2617 }
2618
2619 static void skl_compute_wm_global_parameters(struct drm_device *dev,
2620                                              struct intel_wm_config *config)
2621 {
2622         struct drm_crtc *crtc;
2623         struct drm_plane *plane;
2624
2625         list_for_each_entry(crtc, &dev->mode_config.crtc_list, head)
2626                 config->num_pipes_active += intel_crtc_active(crtc);
2627
2628         /* FIXME: I don't think we need those two global parameters on SKL */
2629         list_for_each_entry(plane, &dev->mode_config.plane_list, head) {
2630                 struct intel_plane *intel_plane = to_intel_plane(plane);
2631
2632                 config->sprites_enabled |= intel_plane->wm.enabled;
2633                 config->sprites_scaled |= intel_plane->wm.scaled;
2634         }
2635 }
2636
2637 static void skl_compute_wm_pipe_parameters(struct drm_crtc *crtc,
2638                                            struct skl_pipe_wm_parameters *p)
2639 {
2640         struct drm_device *dev = crtc->dev;
2641         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
2642         enum i915_pipe pipe = intel_crtc->pipe;
2643         struct drm_plane *plane;
2644         int i = 1; /* Index for sprite planes start */
2645
2646         p->active = intel_crtc_active(crtc);
2647         if (p->active) {
2648                 p->pipe_htotal = intel_crtc->config->base.adjusted_mode.crtc_htotal;
2649                 p->pixel_rate = skl_pipe_pixel_rate(intel_crtc->config);
2650
2651                 /*
2652                  * For now, assume primary and cursor planes are always enabled.
2653                  */
2654                 p->plane[0].enabled = true;
2655                 p->plane[0].bytes_per_pixel =
2656                         crtc->primary->fb->bits_per_pixel / 8;
2657                 p->plane[0].horiz_pixels = intel_crtc->config->pipe_src_w;
2658                 p->plane[0].vert_pixels = intel_crtc->config->pipe_src_h;
2659
2660                 p->cursor.enabled = true;
2661                 p->cursor.bytes_per_pixel = 4;
2662                 p->cursor.horiz_pixels = intel_crtc->cursor_width ?
2663                                          intel_crtc->cursor_width : 64;
2664         }
2665
2666         list_for_each_entry(plane, &dev->mode_config.plane_list, head) {
2667                 struct intel_plane *intel_plane = to_intel_plane(plane);
2668
2669                 if (intel_plane->pipe == pipe &&
2670                         plane->type == DRM_PLANE_TYPE_OVERLAY)
2671                         p->plane[i++] = intel_plane->wm;
2672         }
2673 }
2674
2675 static bool skl_compute_plane_wm(struct skl_pipe_wm_parameters *p,
2676                                  struct intel_plane_wm_parameters *p_params,
2677                                  uint16_t ddb_allocation,
2678                                  uint32_t mem_value,
2679                                  uint16_t *out_blocks, /* out */
2680                                  uint8_t *out_lines /* out */)
2681 {
2682         uint32_t method1, method2, plane_bytes_per_line, res_blocks, res_lines;
2683         uint32_t result_bytes;
2684
2685         if (mem_value == 0 || !p->active || !p_params->enabled)
2686                 return false;
2687
2688         method1 = skl_wm_method1(p->pixel_rate,
2689                                  p_params->bytes_per_pixel,
2690                                  mem_value);
2691         method2 = skl_wm_method2(p->pixel_rate,
2692                                  p->pipe_htotal,
2693                                  p_params->horiz_pixels,
2694                                  p_params->bytes_per_pixel,
2695                                  mem_value);
2696
2697         plane_bytes_per_line = p_params->horiz_pixels *
2698                                         p_params->bytes_per_pixel;
2699
2700         /* For now xtile and linear */
2701         if (((ddb_allocation * 512) / plane_bytes_per_line) >= 1)
2702                 result_bytes = min(method1, method2);
2703         else
2704                 result_bytes = method1;
2705
2706         res_blocks = DIV_ROUND_UP(result_bytes, 512) + 1;
2707         res_lines = DIV_ROUND_UP(result_bytes, plane_bytes_per_line);
2708
2709         if (res_blocks > ddb_allocation || res_lines > 31)
2710                 return false;
2711
2712         *out_blocks = res_blocks;
2713         *out_lines = res_lines;
2714
2715         return true;
2716 }
2717
2718 static void skl_compute_wm_level(const struct drm_i915_private *dev_priv,
2719                                  struct skl_ddb_allocation *ddb,
2720                                  struct skl_pipe_wm_parameters *p,
2721                                  enum i915_pipe pipe,
2722                                  int level,
2723                                  int num_planes,
2724                                  struct skl_wm_level *result)
2725 {
2726         uint16_t latency = dev_priv->wm.skl_latency[level];
2727         uint16_t ddb_blocks;
2728         int i;
2729
2730         for (i = 0; i < num_planes; i++) {
2731                 ddb_blocks = skl_ddb_entry_size(&ddb->plane[pipe][i]);
2732
2733                 result->plane_en[i] = skl_compute_plane_wm(p, &p->plane[i],
2734                                                 ddb_blocks,
2735                                                 latency,
2736                                                 &result->plane_res_b[i],
2737                                                 &result->plane_res_l[i]);
2738         }
2739
2740         ddb_blocks = skl_ddb_entry_size(&ddb->cursor[pipe]);
2741         result->cursor_en = skl_compute_plane_wm(p, &p->cursor, ddb_blocks,
2742                                                  latency, &result->cursor_res_b,
2743                                                  &result->cursor_res_l);
2744 }
2745
2746 static uint32_t
2747 skl_compute_linetime_wm(struct drm_crtc *crtc, struct skl_pipe_wm_parameters *p)
2748 {
2749         if (!intel_crtc_active(crtc))
2750                 return 0;
2751
2752         return DIV_ROUND_UP(8 * p->pipe_htotal * 1000, p->pixel_rate);
2753
2754 }
2755
2756 static void skl_compute_transition_wm(struct drm_crtc *crtc,
2757                                       struct skl_pipe_wm_parameters *params,
2758                                       struct skl_wm_level *trans_wm /* out */)
2759 {
2760         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
2761         int i;
2762
2763         if (!params->active)
2764                 return;
2765
2766         /* Until we know more, just disable transition WMs */
2767         for (i = 0; i < intel_num_planes(intel_crtc); i++)
2768                 trans_wm->plane_en[i] = false;
2769         trans_wm->cursor_en = false;
2770 }
2771
2772 static void skl_compute_pipe_wm(struct drm_crtc *crtc,
2773                                 struct skl_ddb_allocation *ddb,
2774                                 struct skl_pipe_wm_parameters *params,
2775                                 struct skl_pipe_wm *pipe_wm)
2776 {
2777         struct drm_device *dev = crtc->dev;
2778         const struct drm_i915_private *dev_priv = dev->dev_private;
2779         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
2780         int level, max_level = ilk_wm_max_level(dev);
2781
2782         for (level = 0; level <= max_level; level++) {
2783                 skl_compute_wm_level(dev_priv, ddb, params, intel_crtc->pipe,
2784                                      level, intel_num_planes(intel_crtc),
2785                                      &pipe_wm->wm[level]);
2786         }
2787         pipe_wm->linetime = skl_compute_linetime_wm(crtc, params);
2788
2789         skl_compute_transition_wm(crtc, params, &pipe_wm->trans_wm);
2790 }
2791
2792 static void skl_compute_wm_results(struct drm_device *dev,
2793                                    struct skl_pipe_wm_parameters *p,
2794                                    struct skl_pipe_wm *p_wm,
2795                                    struct skl_wm_values *r,
2796                                    struct intel_crtc *intel_crtc)
2797 {
2798         int level, max_level = ilk_wm_max_level(dev);
2799         enum i915_pipe pipe = intel_crtc->pipe;
2800         uint32_t temp;
2801         int i;
2802
2803         for (level = 0; level <= max_level; level++) {
2804                 for (i = 0; i < intel_num_planes(intel_crtc); i++) {
2805                         temp = 0;
2806
2807                         temp |= p_wm->wm[level].plane_res_l[i] <<
2808                                         PLANE_WM_LINES_SHIFT;
2809                         temp |= p_wm->wm[level].plane_res_b[i];
2810                         if (p_wm->wm[level].plane_en[i])
2811                                 temp |= PLANE_WM_EN;
2812
2813                         r->plane[pipe][i][level] = temp;
2814                 }
2815
2816                 temp = 0;
2817
2818                 temp |= p_wm->wm[level].cursor_res_l << PLANE_WM_LINES_SHIFT;
2819                 temp |= p_wm->wm[level].cursor_res_b;
2820
2821                 if (p_wm->wm[level].cursor_en)
2822                         temp |= PLANE_WM_EN;
2823
2824                 r->cursor[pipe][level] = temp;
2825
2826         }
2827
2828         /* transition WMs */
2829         for (i = 0; i < intel_num_planes(intel_crtc); i++) {
2830                 temp = 0;
2831                 temp |= p_wm->trans_wm.plane_res_l[i] << PLANE_WM_LINES_SHIFT;
2832                 temp |= p_wm->trans_wm.plane_res_b[i];
2833                 if (p_wm->trans_wm.plane_en[i])
2834                         temp |= PLANE_WM_EN;
2835
2836                 r->plane_trans[pipe][i] = temp;
2837         }
2838
2839         temp = 0;
2840         temp |= p_wm->trans_wm.cursor_res_l << PLANE_WM_LINES_SHIFT;
2841         temp |= p_wm->trans_wm.cursor_res_b;
2842         if (p_wm->trans_wm.cursor_en)
2843                 temp |= PLANE_WM_EN;
2844
2845         r->cursor_trans[pipe] = temp;
2846
2847         r->wm_linetime[pipe] = p_wm->linetime;
2848 }
2849
2850 static void skl_ddb_entry_write(struct drm_i915_private *dev_priv, uint32_t reg,
2851                                 const struct skl_ddb_entry *entry)
2852 {
2853         if (entry->end)
2854                 I915_WRITE(reg, (entry->end - 1) << 16 | entry->start);
2855         else
2856                 I915_WRITE(reg, 0);
2857 }
2858
2859 static void skl_write_wm_values(struct drm_i915_private *dev_priv,
2860                                 const struct skl_wm_values *new)
2861 {
2862         struct drm_device *dev = dev_priv->dev;
2863         struct intel_crtc *crtc;
2864
2865         list_for_each_entry(crtc, &dev->mode_config.crtc_list, base.head) {
2866                 int i, level, max_level = ilk_wm_max_level(dev);
2867                 enum i915_pipe pipe = crtc->pipe;
2868
2869                 if (!new->dirty[pipe])
2870                         continue;
2871
2872                 I915_WRITE(PIPE_WM_LINETIME(pipe), new->wm_linetime[pipe]);
2873
2874                 for (level = 0; level <= max_level; level++) {
2875                         for (i = 0; i < intel_num_planes(crtc); i++)
2876                                 I915_WRITE(PLANE_WM(pipe, i, level),
2877                                            new->plane[pipe][i][level]);
2878                         I915_WRITE(CUR_WM(pipe, level),
2879                                    new->cursor[pipe][level]);
2880                 }
2881                 for (i = 0; i < intel_num_planes(crtc); i++)
2882                         I915_WRITE(PLANE_WM_TRANS(pipe, i),
2883                                    new->plane_trans[pipe][i]);
2884                 I915_WRITE(CUR_WM_TRANS(pipe), new->cursor_trans[pipe]);
2885
2886                 for (i = 0; i < intel_num_planes(crtc); i++)
2887                         skl_ddb_entry_write(dev_priv,
2888                                             PLANE_BUF_CFG(pipe, i),
2889                                             &new->ddb.plane[pipe][i]);
2890
2891                 skl_ddb_entry_write(dev_priv, CUR_BUF_CFG(pipe),
2892                                     &new->ddb.cursor[pipe]);
2893         }
2894 }
2895
2896 /*
2897  * When setting up a new DDB allocation arrangement, we need to correctly
2898  * sequence the times at which the new allocations for the pipes are taken into
2899  * account or we'll have pipes fetching from space previously allocated to
2900  * another pipe.
2901  *
2902  * Roughly the sequence looks like:
2903  *  1. re-allocate the pipe(s) with the allocation being reduced and not
2904  *     overlapping with a previous light-up pipe (another way to put it is:
2905  *     pipes with their new allocation strickly included into their old ones).
2906  *  2. re-allocate the other pipes that get their allocation reduced
2907  *  3. allocate the pipes having their allocation increased
2908  *
2909  * Steps 1. and 2. are here to take care of the following case:
2910  * - Initially DDB looks like this:
2911  *     |   B    |   C    |
2912  * - enable pipe A.
2913  * - pipe B has a reduced DDB allocation that overlaps with the old pipe C
2914  *   allocation
2915  *     |  A  |  B  |  C  |
2916  *
2917  * We need to sequence the re-allocation: C, B, A (and not B, C, A).
2918  */
2919
2920 static void
2921 skl_wm_flush_pipe(struct drm_i915_private *dev_priv, enum i915_pipe pipe, int pass)
2922 {
2923         struct drm_device *dev = dev_priv->dev;
2924         int plane;
2925
2926         DRM_DEBUG_KMS("flush pipe %c (pass %d)\n", pipe_name(pipe), pass);
2927
2928         for_each_plane(pipe, plane) {
2929                 I915_WRITE(PLANE_SURF(pipe, plane),
2930                            I915_READ(PLANE_SURF(pipe, plane)));
2931         }
2932         I915_WRITE(CURBASE(pipe), I915_READ(CURBASE(pipe)));
2933 }
2934
2935 static bool
2936 skl_ddb_allocation_included(const struct skl_ddb_allocation *old,
2937                             const struct skl_ddb_allocation *new,
2938                             enum i915_pipe pipe)
2939 {
2940         uint16_t old_size, new_size;
2941
2942         old_size = skl_ddb_entry_size(&old->pipe[pipe]);
2943         new_size = skl_ddb_entry_size(&new->pipe[pipe]);
2944
2945         return old_size != new_size &&
2946                new->pipe[pipe].start >= old->pipe[pipe].start &&
2947                new->pipe[pipe].end <= old->pipe[pipe].end;
2948 }
2949
2950 static void skl_flush_wm_values(struct drm_i915_private *dev_priv,
2951                                 struct skl_wm_values *new_values)
2952 {
2953         struct drm_device *dev = dev_priv->dev;
2954         struct skl_ddb_allocation *cur_ddb, *new_ddb;
2955         bool reallocated[I915_MAX_PIPES] = {false, false, false};
2956         struct intel_crtc *crtc;
2957         enum i915_pipe pipe;
2958
2959         new_ddb = &new_values->ddb;
2960         cur_ddb = &dev_priv->wm.skl_hw.ddb;
2961
2962         /*
2963          * First pass: flush the pipes with the new allocation contained into
2964          * the old space.
2965          *
2966          * We'll wait for the vblank on those pipes to ensure we can safely
2967          * re-allocate the freed space without this pipe fetching from it.
2968          */
2969         for_each_intel_crtc(dev, crtc) {
2970                 if (!crtc->active)
2971                         continue;
2972
2973                 pipe = crtc->pipe;
2974
2975                 if (!skl_ddb_allocation_included(cur_ddb, new_ddb, pipe))
2976                         continue;
2977
2978                 skl_wm_flush_pipe(dev_priv, pipe, 1);
2979                 intel_wait_for_vblank(dev, pipe);
2980
2981                 reallocated[pipe] = true;
2982         }
2983
2984
2985         /*
2986          * Second pass: flush the pipes that are having their allocation
2987          * reduced, but overlapping with a previous allocation.
2988          *
2989          * Here as well we need to wait for the vblank to make sure the freed
2990          * space is not used anymore.
2991          */
2992         for_each_intel_crtc(dev, crtc) {
2993                 if (!crtc->active)
2994                         continue;
2995
2996                 pipe = crtc->pipe;
2997
2998                 if (reallocated[pipe])
2999                         continue;
3000
3001                 if (skl_ddb_entry_size(&new_ddb->pipe[pipe]) <
3002                     skl_ddb_entry_size(&cur_ddb->pipe[pipe])) {
3003                         skl_wm_flush_pipe(dev_priv, pipe, 2);
3004                         intel_wait_for_vblank(dev, pipe);
3005                         reallocated[pipe] = true;
3006                 }
3007         }
3008
3009         /*
3010          * Third pass: flush the pipes that got more space allocated.
3011          *
3012          * We don't need to actively wait for the update here, next vblank
3013          * will just get more DDB space with the correct WM values.
3014          */
3015         for_each_intel_crtc(dev, crtc) {
3016                 if (!crtc->active)
3017                         continue;
3018
3019                 pipe = crtc->pipe;
3020
3021                 /*
3022                  * At this point, only the pipes more space than before are
3023                  * left to re-allocate.
3024                  */
3025                 if (reallocated[pipe])
3026                         continue;
3027
3028                 skl_wm_flush_pipe(dev_priv, pipe, 3);
3029         }
3030 }
3031
3032 static bool skl_update_pipe_wm(struct drm_crtc *crtc,
3033                                struct skl_pipe_wm_parameters *params,
3034                                struct intel_wm_config *config,
3035                                struct skl_ddb_allocation *ddb, /* out */
3036                                struct skl_pipe_wm *pipe_wm /* out */)
3037 {
3038         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
3039
3040         skl_compute_wm_pipe_parameters(crtc, params);
3041         skl_allocate_pipe_ddb(crtc, config, params, ddb);
3042         skl_compute_pipe_wm(crtc, ddb, params, pipe_wm);
3043
3044         if (!memcmp(&intel_crtc->wm.skl_active, pipe_wm, sizeof(*pipe_wm)))
3045                 return false;
3046
3047         intel_crtc->wm.skl_active = *pipe_wm;
3048         return true;
3049 }
3050
3051 static void skl_update_other_pipe_wm(struct drm_device *dev,
3052                                      struct drm_crtc *crtc,
3053                                      struct intel_wm_config *config,
3054                                      struct skl_wm_values *r)
3055 {
3056         struct intel_crtc *intel_crtc;
3057         struct intel_crtc *this_crtc = to_intel_crtc(crtc);
3058
3059         /*
3060          * If the WM update hasn't changed the allocation for this_crtc (the
3061          * crtc we are currently computing the new WM values for), other
3062          * enabled crtcs will keep the same allocation and we don't need to
3063          * recompute anything for them.
3064          */
3065         if (!skl_ddb_allocation_changed(&r->ddb, this_crtc))
3066                 return;
3067
3068         /*
3069          * Otherwise, because of this_crtc being freshly enabled/disabled, the
3070          * other active pipes need new DDB allocation and WM values.
3071          */
3072         list_for_each_entry(intel_crtc, &dev->mode_config.crtc_list,
3073                                 base.head) {
3074                 struct skl_pipe_wm_parameters params = {};
3075                 struct skl_pipe_wm pipe_wm = {};
3076                 bool wm_changed;
3077
3078                 if (this_crtc->pipe == intel_crtc->pipe)
3079                         continue;
3080
3081                 if (!intel_crtc->active)
3082                         continue;
3083
3084                 wm_changed = skl_update_pipe_wm(&intel_crtc->base,
3085                                                 &params, config,
3086                                                 &r->ddb, &pipe_wm);
3087
3088                 /*
3089                  * If we end up re-computing the other pipe WM values, it's
3090                  * because it was really needed, so we expect the WM values to
3091                  * be different.
3092                  */
3093                 WARN_ON(!wm_changed);
3094
3095                 skl_compute_wm_results(dev, &params, &pipe_wm, r, intel_crtc);
3096                 r->dirty[intel_crtc->pipe] = true;
3097         }
3098 }
3099
3100 static void skl_update_wm(struct drm_crtc *crtc)
3101 {
3102         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
3103         struct drm_device *dev = crtc->dev;
3104         struct drm_i915_private *dev_priv = dev->dev_private;
3105         struct skl_pipe_wm_parameters params = {};
3106         struct skl_wm_values *results = &dev_priv->wm.skl_results;
3107         struct skl_pipe_wm pipe_wm = {};
3108         struct intel_wm_config config = {};
3109
3110         memset(results, 0, sizeof(*results));
3111
3112         skl_compute_wm_global_parameters(dev, &config);
3113
3114         if (!skl_update_pipe_wm(crtc, &params, &config,
3115                                 &results->ddb, &pipe_wm))
3116                 return;
3117
3118         skl_compute_wm_results(dev, &params, &pipe_wm, results, intel_crtc);
3119         results->dirty[intel_crtc->pipe] = true;
3120
3121         skl_update_other_pipe_wm(dev, crtc, &config, results);
3122         skl_write_wm_values(dev_priv, results);
3123         skl_flush_wm_values(dev_priv, results);
3124
3125         /* store the new configuration */
3126         dev_priv->wm.skl_hw = *results;
3127 }
3128
3129 static void
3130 skl_update_sprite_wm(struct drm_plane *plane, struct drm_crtc *crtc,
3131                      uint32_t sprite_width, uint32_t sprite_height,
3132                      int pixel_size, bool enabled, bool scaled)
3133 {
3134         struct intel_plane *intel_plane = to_intel_plane(plane);
3135
3136         intel_plane->wm.enabled = enabled;
3137         intel_plane->wm.scaled = scaled;
3138         intel_plane->wm.horiz_pixels = sprite_width;
3139         intel_plane->wm.vert_pixels = sprite_height;
3140         intel_plane->wm.bytes_per_pixel = pixel_size;
3141
3142         skl_update_wm(crtc);
3143 }
3144
3145 static void ilk_update_wm(struct drm_crtc *crtc)
3146 {
3147         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
3148         struct drm_device *dev = crtc->dev;
3149         struct drm_i915_private *dev_priv = dev->dev_private;
3150         struct ilk_wm_maximums max;
3151         struct ilk_pipe_wm_parameters params = {};
3152         struct ilk_wm_values results = {};
3153         enum intel_ddb_partitioning partitioning;
3154         struct intel_pipe_wm pipe_wm = {};
3155         struct intel_pipe_wm lp_wm_1_2 = {}, lp_wm_5_6 = {}, *best_lp_wm;
3156         struct intel_wm_config config = {};
3157
3158         ilk_compute_wm_parameters(crtc, &params);
3159
3160         intel_compute_pipe_wm(crtc, &params, &pipe_wm);
3161
3162         if (!memcmp(&intel_crtc->wm.active, &pipe_wm, sizeof(pipe_wm)))
3163                 return;
3164
3165         intel_crtc->wm.active = pipe_wm;
3166
3167         ilk_compute_wm_config(dev, &config);
3168
3169         ilk_compute_wm_maximums(dev, 1, &config, INTEL_DDB_PART_1_2, &max);
3170         ilk_wm_merge(dev, &config, &max, &lp_wm_1_2);
3171
3172         /* 5/6 split only in single pipe config on IVB+ */
3173         if (INTEL_INFO(dev)->gen >= 7 &&
3174             config.num_pipes_active == 1 && config.sprites_enabled) {
3175                 ilk_compute_wm_maximums(dev, 1, &config, INTEL_DDB_PART_5_6, &max);
3176                 ilk_wm_merge(dev, &config, &max, &lp_wm_5_6);
3177
3178                 best_lp_wm = ilk_find_best_result(dev, &lp_wm_1_2, &lp_wm_5_6);
3179         } else {
3180                 best_lp_wm = &lp_wm_1_2;
3181         }
3182
3183         partitioning = (best_lp_wm == &lp_wm_1_2) ?
3184                        INTEL_DDB_PART_1_2 : INTEL_DDB_PART_5_6;
3185
3186         ilk_compute_wm_results(dev, best_lp_wm, partitioning, &results);
3187
3188         ilk_write_wm_values(dev_priv, &results);
3189 }
3190
3191 static void
3192 ilk_update_sprite_wm(struct drm_plane *plane,
3193                      struct drm_crtc *crtc,
3194                      uint32_t sprite_width, uint32_t sprite_height,
3195                      int pixel_size, bool enabled, bool scaled)
3196 {
3197         struct drm_device *dev = plane->dev;
3198         struct intel_plane *intel_plane = to_intel_plane(plane);
3199
3200         intel_plane->wm.enabled = enabled;
3201         intel_plane->wm.scaled = scaled;
3202         intel_plane->wm.horiz_pixels = sprite_width;
3203         intel_plane->wm.vert_pixels = sprite_width;
3204         intel_plane->wm.bytes_per_pixel = pixel_size;
3205
3206         /*
3207          * IVB workaround: must disable low power watermarks for at least
3208          * one frame before enabling scaling.  LP watermarks can be re-enabled
3209          * when scaling is disabled.
3210          *
3211          * WaCxSRDisabledForSpriteScaling:ivb
3212          */
3213         if (IS_IVYBRIDGE(dev) && scaled && ilk_disable_lp_wm(dev))
3214                 intel_wait_for_vblank(dev, intel_plane->pipe);
3215
3216         ilk_update_wm(crtc);
3217 }
3218
3219 static void skl_pipe_wm_active_state(uint32_t val,
3220                                      struct skl_pipe_wm *active,
3221                                      bool is_transwm,
3222                                      bool is_cursor,
3223                                      int i,
3224                                      int level)
3225 {
3226         bool is_enabled = (val & PLANE_WM_EN) != 0;
3227
3228         if (!is_transwm) {
3229                 if (!is_cursor) {
3230                         active->wm[level].plane_en[i] = is_enabled;
3231                         active->wm[level].plane_res_b[i] =
3232                                         val & PLANE_WM_BLOCKS_MASK;
3233                         active->wm[level].plane_res_l[i] =
3234                                         (val >> PLANE_WM_LINES_SHIFT) &
3235                                                 PLANE_WM_LINES_MASK;
3236                 } else {
3237                         active->wm[level].cursor_en = is_enabled;
3238                         active->wm[level].cursor_res_b =
3239                                         val & PLANE_WM_BLOCKS_MASK;
3240                         active->wm[level].cursor_res_l =
3241                                         (val >> PLANE_WM_LINES_SHIFT) &
3242                                                 PLANE_WM_LINES_MASK;
3243                 }
3244         } else {
3245                 if (!is_cursor) {
3246                         active->trans_wm.plane_en[i] = is_enabled;
3247                         active->trans_wm.plane_res_b[i] =
3248                                         val & PLANE_WM_BLOCKS_MASK;
3249                         active->trans_wm.plane_res_l[i] =
3250                                         (val >> PLANE_WM_LINES_SHIFT) &
3251                                                 PLANE_WM_LINES_MASK;
3252                 } else {
3253                         active->trans_wm.cursor_en = is_enabled;
3254                         active->trans_wm.cursor_res_b =
3255                                         val & PLANE_WM_BLOCKS_MASK;
3256                         active->trans_wm.cursor_res_l =
3257                                         (val >> PLANE_WM_LINES_SHIFT) &
3258                                                 PLANE_WM_LINES_MASK;
3259                 }
3260         }
3261 }
3262
3263 static void skl_pipe_wm_get_hw_state(struct drm_crtc *crtc)
3264 {
3265         struct drm_device *dev = crtc->dev;
3266         struct drm_i915_private *dev_priv = dev->dev_private;
3267         struct skl_wm_values *hw = &dev_priv->wm.skl_hw;
3268         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
3269         struct skl_pipe_wm *active = &intel_crtc->wm.skl_active;
3270         enum i915_pipe pipe = intel_crtc->pipe;
3271         int level, i, max_level;
3272         uint32_t temp;
3273
3274         max_level = ilk_wm_max_level(dev);
3275
3276         hw->wm_linetime[pipe] = I915_READ(PIPE_WM_LINETIME(pipe));
3277
3278         for (level = 0; level <= max_level; level++) {
3279                 for (i = 0; i < intel_num_planes(intel_crtc); i++)
3280                         hw->plane[pipe][i][level] =
3281                                         I915_READ(PLANE_WM(pipe, i, level));
3282                 hw->cursor[pipe][level] = I915_READ(CUR_WM(pipe, level));
3283         }
3284
3285         for (i = 0; i < intel_num_planes(intel_crtc); i++)
3286                 hw->plane_trans[pipe][i] = I915_READ(PLANE_WM_TRANS(pipe, i));
3287         hw->cursor_trans[pipe] = I915_READ(CUR_WM_TRANS(pipe));
3288
3289         if (!intel_crtc_active(crtc))
3290                 return;
3291
3292         hw->dirty[pipe] = true;
3293
3294         active->linetime = hw->wm_linetime[pipe];
3295
3296         for (level = 0; level <= max_level; level++) {
3297                 for (i = 0; i < intel_num_planes(intel_crtc); i++) {
3298                         temp = hw->plane[pipe][i][level];
3299                         skl_pipe_wm_active_state(temp, active, false,
3300                                                 false, i, level);
3301                 }
3302                 temp = hw->cursor[pipe][level];
3303                 skl_pipe_wm_active_state(temp, active, false, true, i, level);
3304         }
3305
3306         for (i = 0; i < intel_num_planes(intel_crtc); i++) {
3307                 temp = hw->plane_trans[pipe][i];
3308                 skl_pipe_wm_active_state(temp, active, true, false, i, 0);
3309         }
3310
3311         temp = hw->cursor_trans[pipe];
3312         skl_pipe_wm_active_state(temp, active, true, true, i, 0);
3313 }
3314
3315 void skl_wm_get_hw_state(struct drm_device *dev)
3316 {
3317         struct drm_i915_private *dev_priv = dev->dev_private;
3318         struct skl_ddb_allocation *ddb = &dev_priv->wm.skl_hw.ddb;
3319         struct drm_crtc *crtc;
3320
3321         skl_ddb_get_hw_state(dev_priv, ddb);
3322         list_for_each_entry(crtc, &dev->mode_config.crtc_list, head)
3323                 skl_pipe_wm_get_hw_state(crtc);
3324 }
3325
3326 static void ilk_pipe_wm_get_hw_state(struct drm_crtc *crtc)
3327 {
3328         struct drm_device *dev = crtc->dev;
3329         struct drm_i915_private *dev_priv = dev->dev_private;
3330         struct ilk_wm_values *hw = &dev_priv->wm.hw;
3331         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
3332         struct intel_pipe_wm *active = &intel_crtc->wm.active;
3333         enum i915_pipe pipe = intel_crtc->pipe;
3334         static const unsigned int wm0_pipe_reg[] = {
3335                 [PIPE_A] = WM0_PIPEA_ILK,
3336                 [PIPE_B] = WM0_PIPEB_ILK,
3337                 [PIPE_C] = WM0_PIPEC_IVB,
3338         };
3339
3340         hw->wm_pipe[pipe] = I915_READ(wm0_pipe_reg[pipe]);
3341         if (IS_HASWELL(dev) || IS_BROADWELL(dev))
3342                 hw->wm_linetime[pipe] = I915_READ(PIPE_WM_LINETIME(pipe));
3343
3344         active->pipe_enabled = intel_crtc_active(crtc);
3345
3346         if (active->pipe_enabled) {
3347                 u32 tmp = hw->wm_pipe[pipe];
3348
3349                 /*
3350                  * For active pipes LP0 watermark is marked as
3351                  * enabled, and LP1+ watermaks as disabled since
3352                  * we can't really reverse compute them in case
3353                  * multiple pipes are active.
3354                  */
3355                 active->wm[0].enable = true;
3356                 active->wm[0].pri_val = (tmp & WM0_PIPE_PLANE_MASK) >> WM0_PIPE_PLANE_SHIFT;
3357                 active->wm[0].spr_val = (tmp & WM0_PIPE_SPRITE_MASK) >> WM0_PIPE_SPRITE_SHIFT;
3358                 active->wm[0].cur_val = tmp & WM0_PIPE_CURSOR_MASK;
3359                 active->linetime = hw->wm_linetime[pipe];
3360         } else {
3361                 int level, max_level = ilk_wm_max_level(dev);
3362
3363                 /*
3364                  * For inactive pipes, all watermark levels
3365                  * should be marked as enabled but zeroed,
3366                  * which is what we'd compute them to.
3367                  */
3368                 for (level = 0; level <= max_level; level++)
3369                         active->wm[level].enable = true;
3370         }
3371 }
3372
3373 void ilk_wm_get_hw_state(struct drm_device *dev)
3374 {
3375         struct drm_i915_private *dev_priv = dev->dev_private;
3376         struct ilk_wm_values *hw = &dev_priv->wm.hw;
3377         struct drm_crtc *crtc;
3378
3379         for_each_crtc(dev, crtc)
3380                 ilk_pipe_wm_get_hw_state(crtc);
3381
3382         hw->wm_lp[0] = I915_READ(WM1_LP_ILK);
3383         hw->wm_lp[1] = I915_READ(WM2_LP_ILK);
3384         hw->wm_lp[2] = I915_READ(WM3_LP_ILK);
3385
3386         hw->wm_lp_spr[0] = I915_READ(WM1S_LP_ILK);
3387         if (INTEL_INFO(dev)->gen >= 7) {
3388                 hw->wm_lp_spr[1] = I915_READ(WM2S_LP_IVB);
3389                 hw->wm_lp_spr[2] = I915_READ(WM3S_LP_IVB);
3390         }
3391
3392         if (IS_HASWELL(dev) || IS_BROADWELL(dev))
3393                 hw->partitioning = (I915_READ(WM_MISC) & WM_MISC_DATA_PARTITION_5_6) ?
3394                         INTEL_DDB_PART_5_6 : INTEL_DDB_PART_1_2;
3395         else if (IS_IVYBRIDGE(dev))
3396                 hw->partitioning = (I915_READ(DISP_ARB_CTL2) & DISP_DATA_PARTITION_5_6) ?
3397                         INTEL_DDB_PART_5_6 : INTEL_DDB_PART_1_2;
3398
3399         hw->enable_fbc_wm =
3400                 !(I915_READ(DISP_ARB_CTL) & DISP_FBC_WM_DIS);
3401 }
3402
3403 /**
3404  * intel_update_watermarks - update FIFO watermark values based on current modes
3405  *
3406  * Calculate watermark values for the various WM regs based on current mode
3407  * and plane configuration.
3408  *
3409  * There are several cases to deal with here:
3410  *   - normal (i.e. non-self-refresh)
3411  *   - self-refresh (SR) mode
3412  *   - lines are large relative to FIFO size (buffer can hold up to 2)
3413  *   - lines are small relative to FIFO size (buffer can hold more than 2
3414  *     lines), so need to account for TLB latency
3415  *
3416  *   The normal calculation is:
3417  *     watermark = dotclock * bytes per pixel * latency
3418  *   where latency is platform & configuration dependent (we assume pessimal
3419  *   values here).
3420  *
3421  *   The SR calculation is:
3422  *     watermark = (trunc(latency/line time)+1) * surface width *
3423  *       bytes per pixel
3424  *   where
3425  *     line time = htotal / dotclock
3426  *     surface width = hdisplay for normal plane and 64 for cursor
3427  *   and latency is assumed to be high, as above.
3428  *
3429  * The final value programmed to the register should always be rounded up,
3430  * and include an extra 2 entries to account for clock crossings.
3431  *
3432  * We don't use the sprite, so we can ignore that.  And on Crestline we have
3433  * to set the non-SR watermarks to 8.
3434  */
3435 void intel_update_watermarks(struct drm_crtc *crtc)
3436 {
3437         struct drm_i915_private *dev_priv = crtc->dev->dev_private;
3438
3439         if (dev_priv->display.update_wm)
3440                 dev_priv->display.update_wm(crtc);
3441 }
3442
3443 void intel_update_sprite_watermarks(struct drm_plane *plane,
3444                                     struct drm_crtc *crtc,
3445                                     uint32_t sprite_width,
3446                                     uint32_t sprite_height,
3447                                     int pixel_size,
3448                                     bool enabled, bool scaled)
3449 {
3450         struct drm_i915_private *dev_priv = plane->dev->dev_private;
3451
3452         if (dev_priv->display.update_sprite_wm)
3453                 dev_priv->display.update_sprite_wm(plane, crtc,
3454                                                    sprite_width, sprite_height,
3455                                                    pixel_size, enabled, scaled);
3456 }
3457
3458 static struct drm_i915_gem_object *
3459 intel_alloc_context_page(struct drm_device *dev)
3460 {
3461         struct drm_i915_gem_object *ctx;
3462         int ret;
3463
3464         WARN_ON(!mutex_is_locked(&dev->struct_mutex));
3465
3466         ctx = i915_gem_alloc_object(dev, 4096);
3467         if (!ctx) {
3468                 DRM_DEBUG("failed to alloc power context, RC6 disabled\n");
3469                 return NULL;
3470         }
3471
3472         ret = i915_gem_obj_ggtt_pin(ctx, 4096, 0);
3473         if (ret) {
3474                 DRM_ERROR("failed to pin power context: %d\n", ret);
3475                 goto err_unref;
3476         }
3477
3478         ret = i915_gem_object_set_to_gtt_domain(ctx, 1);
3479         if (ret) {
3480                 DRM_ERROR("failed to set-domain on power context: %d\n", ret);
3481                 goto err_unpin;
3482         }
3483
3484         return ctx;
3485
3486 err_unpin:
3487         i915_gem_object_ggtt_unpin(ctx);
3488 err_unref:
3489         drm_gem_object_unreference(&ctx->base);
3490         return NULL;
3491 }
3492
3493 /**
3494  * Lock protecting IPS related data structures
3495  */
3496 struct lock mchdev_lock;
3497 LOCK_SYSINIT(mchdev, &mchdev_lock, "mchdev", LK_CANRECURSE);
3498
3499 /* Global for IPS driver to get at the current i915 device. Protected by
3500  * mchdev_lock. */
3501 static struct drm_i915_private *i915_mch_dev;
3502
3503 bool ironlake_set_drps(struct drm_device *dev, u8 val)
3504 {
3505         struct drm_i915_private *dev_priv = dev->dev_private;
3506         u16 rgvswctl;
3507
3508         assert_spin_locked(&mchdev_lock);
3509
3510         rgvswctl = I915_READ16(MEMSWCTL);
3511         if (rgvswctl & MEMCTL_CMD_STS) {
3512                 DRM_DEBUG("gpu busy, RCS change rejected\n");
3513                 return false; /* still busy with another command */
3514         }
3515
3516         rgvswctl = (MEMCTL_CMD_CHFREQ << MEMCTL_CMD_SHIFT) |
3517                 (val << MEMCTL_FREQ_SHIFT) | MEMCTL_SFCAVM;
3518         I915_WRITE16(MEMSWCTL, rgvswctl);
3519         POSTING_READ16(MEMSWCTL);
3520
3521         rgvswctl |= MEMCTL_CMD_STS;
3522         I915_WRITE16(MEMSWCTL, rgvswctl);
3523
3524         return true;
3525 }
3526
3527 static void ironlake_enable_drps(struct drm_device *dev)
3528 {
3529         struct drm_i915_private *dev_priv = dev->dev_private;
3530         u32 rgvmodectl = I915_READ(MEMMODECTL);
3531         u8 fmax, fmin, fstart, vstart;
3532
3533         lockmgr(&mchdev_lock, LK_EXCLUSIVE);
3534
3535         /* Enable temp reporting */
3536         I915_WRITE16(PMMISC, I915_READ(PMMISC) | MCPPCE_EN);
3537         I915_WRITE16(TSC1, I915_READ(TSC1) | TSE);
3538
3539         /* 100ms RC evaluation intervals */
3540         I915_WRITE(RCUPEI, 100000);
3541         I915_WRITE(RCDNEI, 100000);
3542
3543         /* Set max/min thresholds to 90ms and 80ms respectively */
3544         I915_WRITE(RCBMAXAVG, 90000);
3545         I915_WRITE(RCBMINAVG, 80000);
3546
3547         I915_WRITE(MEMIHYST, 1);
3548
3549         /* Set up min, max, and cur for interrupt handling */
3550         fmax = (rgvmodectl & MEMMODE_FMAX_MASK) >> MEMMODE_FMAX_SHIFT;
3551         fmin = (rgvmodectl & MEMMODE_FMIN_MASK);
3552         fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >>
3553                 MEMMODE_FSTART_SHIFT;
3554
3555         vstart = (I915_READ(PXVFREQ_BASE + (fstart * 4)) & PXVFREQ_PX_MASK) >>
3556                 PXVFREQ_PX_SHIFT;
3557
3558         dev_priv->ips.fmax = fmax; /* IPS callback will increase this */
3559         dev_priv->ips.fstart = fstart;
3560
3561         dev_priv->ips.max_delay = fstart;
3562         dev_priv->ips.min_delay = fmin;
3563         dev_priv->ips.cur_delay = fstart;
3564
3565         DRM_DEBUG_DRIVER("fmax: %d, fmin: %d, fstart: %d\n",
3566                          fmax, fmin, fstart);
3567
3568         I915_WRITE(MEMINTREN, MEMINT_CX_SUPR_EN | MEMINT_EVAL_CHG_EN);
3569
3570         /*
3571          * Interrupts will be enabled in ironlake_irq_postinstall
3572          */
3573
3574         I915_WRITE(VIDSTART, vstart);
3575         POSTING_READ(VIDSTART);
3576
3577         rgvmodectl |= MEMMODE_SWMODE_EN;
3578         I915_WRITE(MEMMODECTL, rgvmodectl);
3579
3580         if (wait_for_atomic((I915_READ(MEMSWCTL) & MEMCTL_CMD_STS) == 0, 10))
3581                 DRM_ERROR("stuck trying to change perf mode\n");
3582         mdelay(1);
3583
3584         ironlake_set_drps(dev, fstart);
3585
3586         dev_priv->ips.last_count1 = I915_READ(0x112e4) + I915_READ(0x112e8) +
3587                 I915_READ(0x112e0);
3588         dev_priv->ips.last_time1 = jiffies_to_msecs(jiffies);
3589         dev_priv->ips.last_count2 = I915_READ(0x112f4);
3590         dev_priv->ips.last_time2 = ktime_get_raw_ns();
3591
3592         lockmgr(&mchdev_lock, LK_RELEASE);
3593 }
3594
3595 static void ironlake_disable_drps(struct drm_device *dev)
3596 {
3597         struct drm_i915_private *dev_priv = dev->dev_private;
3598         u16 rgvswctl;
3599
3600         lockmgr(&mchdev_lock, LK_EXCLUSIVE);
3601
3602         rgvswctl = I915_READ16(MEMSWCTL);
3603
3604         /* Ack interrupts, disable EFC interrupt */
3605         I915_WRITE(MEMINTREN, I915_READ(MEMINTREN) & ~MEMINT_EVAL_CHG_EN);
3606         I915_WRITE(MEMINTRSTS, MEMINT_EVAL_CHG);
3607         I915_WRITE(DEIER, I915_READ(DEIER) & ~DE_PCU_EVENT);
3608         I915_WRITE(DEIIR, DE_PCU_EVENT);
3609         I915_WRITE(DEIMR, I915_READ(DEIMR) | DE_PCU_EVENT);
3610
3611         /* Go back to the starting frequency */
3612         ironlake_set_drps(dev, dev_priv->ips.fstart);
3613         mdelay(1);
3614         rgvswctl |= MEMCTL_CMD_STS;
3615         I915_WRITE(MEMSWCTL, rgvswctl);
3616         mdelay(1);
3617
3618         lockmgr(&mchdev_lock, LK_RELEASE);
3619 }
3620
3621 /* There's a funny hw issue where the hw returns all 0 when reading from
3622  * GEN6_RP_INTERRUPT_LIMITS. Hence we always need to compute the desired value
3623  * ourselves, instead of doing a rmw cycle (which might result in us clearing
3624  * all limits and the gpu stuck at whatever frequency it is at atm).
3625  */
3626 static u32 gen6_rps_limits(struct drm_i915_private *dev_priv, u8 val)
3627 {
3628         u32 limits;
3629
3630         /* Only set the down limit when we've reached the lowest level to avoid
3631          * getting more interrupts, otherwise leave this clear. This prevents a
3632          * race in the hw when coming out of rc6: There's a tiny window where
3633          * the hw runs at the minimal clock before selecting the desired
3634          * frequency, if the down threshold expires in that window we will not
3635          * receive a down interrupt. */
3636         limits = dev_priv->rps.max_freq_softlimit << 24;
3637         if (val <= dev_priv->rps.min_freq_softlimit)
3638                 limits |= dev_priv->rps.min_freq_softlimit << 16;
3639
3640         return limits;
3641 }
3642
3643 static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val)
3644 {
3645         int new_power;
3646
3647         new_power = dev_priv->rps.power;
3648         switch (dev_priv->rps.power) {
3649         case LOW_POWER:
3650                 if (val > dev_priv->rps.efficient_freq + 1 && val > dev_priv->rps.cur_freq)
3651                         new_power = BETWEEN;
3652                 break;
3653
3654         case BETWEEN:
3655                 if (val <= dev_priv->rps.efficient_freq && val < dev_priv->rps.cur_freq)
3656                         new_power = LOW_POWER;
3657                 else if (val >= dev_priv->rps.rp0_freq && val > dev_priv->rps.cur_freq)
3658                         new_power = HIGH_POWER;
3659                 break;
3660
3661         case HIGH_POWER:
3662                 if (val < (dev_priv->rps.rp1_freq + dev_priv->rps.rp0_freq) >> 1 && val < dev_priv->rps.cur_freq)
3663                         new_power = BETWEEN;
3664                 break;
3665         }
3666         /* Max/min bins are special */
3667         if (val == dev_priv->rps.min_freq_softlimit)
3668                 new_power = LOW_POWER;
3669         if (val == dev_priv->rps.max_freq_softlimit)
3670                 new_power = HIGH_POWER;
3671         if (new_power == dev_priv->rps.power)
3672                 return;
3673
3674         /* Note the units here are not exactly 1us, but 1280ns. */
3675         switch (new_power) {
3676         case LOW_POWER:
3677                 /* Upclock if more than 95% busy over 16ms */
3678                 I915_WRITE(GEN6_RP_UP_EI, 12500);
3679                 I915_WRITE(GEN6_RP_UP_THRESHOLD, 11800);
3680
3681                 /* Downclock if less than 85% busy over 32ms */
3682                 I915_WRITE(GEN6_RP_DOWN_EI, 25000);
3683                 I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 21250);
3684
3685                 I915_WRITE(GEN6_RP_CONTROL,
3686                            GEN6_RP_MEDIA_TURBO |
3687                            GEN6_RP_MEDIA_HW_NORMAL_MODE |
3688                            GEN6_RP_MEDIA_IS_GFX |
3689                            GEN6_RP_ENABLE |
3690                            GEN6_RP_UP_BUSY_AVG |
3691                            GEN6_RP_DOWN_IDLE_AVG);
3692                 break;
3693
3694         case BETWEEN:
3695                 /* Upclock if more than 90% busy over 13ms */
3696                 I915_WRITE(GEN6_RP_UP_EI, 10250);
3697                 I915_WRITE(GEN6_RP_UP_THRESHOLD, 9225);
3698
3699                 /* Downclock if less than 75% busy over 32ms */
3700                 I915_WRITE(GEN6_RP_DOWN_EI, 25000);
3701                 I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 18750);
3702
3703                 I915_WRITE(GEN6_RP_CONTROL,
3704                            GEN6_RP_MEDIA_TURBO |
3705                            GEN6_RP_MEDIA_HW_NORMAL_MODE |
3706                            GEN6_RP_MEDIA_IS_GFX |
3707                            GEN6_RP_ENABLE |
3708                            GEN6_RP_UP_BUSY_AVG |
3709                            GEN6_RP_DOWN_IDLE_AVG);
3710                 break;
3711
3712         case HIGH_POWER:
3713                 /* Upclock if more than 85% busy over 10ms */
3714                 I915_WRITE(GEN6_RP_UP_EI, 8000);
3715                 I915_WRITE(GEN6_RP_UP_THRESHOLD, 6800);
3716
3717                 /* Downclock if less than 60% busy over 32ms */
3718                 I915_WRITE(GEN6_RP_DOWN_EI, 25000);
3719                 I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 15000);
3720
3721                 I915_WRITE(GEN6_RP_CONTROL,
3722                            GEN6_RP_MEDIA_TURBO |
3723                            GEN6_RP_MEDIA_HW_NORMAL_MODE |
3724                            GEN6_RP_MEDIA_IS_GFX |
3725                            GEN6_RP_ENABLE |
3726                            GEN6_RP_UP_BUSY_AVG |
3727                            GEN6_RP_DOWN_IDLE_AVG);
3728                 break;
3729         }
3730
3731         dev_priv->rps.power = new_power;
3732         dev_priv->rps.last_adj = 0;
3733 }
3734
3735 static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val)
3736 {
3737         u32 mask = 0;
3738
3739         if (val > dev_priv->rps.min_freq_softlimit)
3740                 mask |= GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT;
3741         if (val < dev_priv->rps.max_freq_softlimit)
3742                 mask |= GEN6_PM_RP_UP_THRESHOLD;
3743
3744         mask |= dev_priv->pm_rps_events & (GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED);
3745         mask &= dev_priv->pm_rps_events;
3746
3747         return gen6_sanitize_rps_pm_mask(dev_priv, ~mask);
3748 }
3749
3750 /* gen6_set_rps is called to update the frequency request, but should also be
3751  * called when the range (min_delay and max_delay) is modified so that we can
3752  * update the GEN6_RP_INTERRUPT_LIMITS register accordingly. */
3753 void gen6_set_rps(struct drm_device *dev, u8 val)
3754 {
3755         struct drm_i915_private *dev_priv = dev->dev_private;
3756
3757         WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
3758         WARN_ON(val > dev_priv->rps.max_freq_softlimit);
3759         WARN_ON(val < dev_priv->rps.min_freq_softlimit);
3760
3761         /* min/max delay may still have been modified so be sure to
3762          * write the limits value.
3763          */
3764         if (val != dev_priv->rps.cur_freq) {
3765                 gen6_set_rps_thresholds(dev_priv, val);
3766
3767                 if (IS_HASWELL(dev) || IS_BROADWELL(dev))
3768                         I915_WRITE(GEN6_RPNSWREQ,
3769                                    HSW_FREQUENCY(val));
3770                 else
3771                         I915_WRITE(GEN6_RPNSWREQ,
3772                                    GEN6_FREQUENCY(val) |
3773                                    GEN6_OFFSET(0) |
3774                                    GEN6_AGGRESSIVE_TURBO);
3775         }
3776
3777         /* Make sure we continue to get interrupts
3778          * until we hit the minimum or maximum frequencies.
3779          */
3780         I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, gen6_rps_limits(dev_priv, val));
3781         I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
3782
3783         POSTING_READ(GEN6_RPNSWREQ);
3784
3785         dev_priv->rps.cur_freq = val;
3786         trace_intel_gpu_freq_change(val * 50);
3787 }
3788
3789 /* vlv_set_rps_idle: Set the frequency to Rpn if Gfx clocks are down
3790  *
3791  * * If Gfx is Idle, then
3792  * 1. Mask Turbo interrupts
3793  * 2. Bring up Gfx clock
3794  * 3. Change the freq to Rpn and wait till P-Unit updates freq
3795  * 4. Clear the Force GFX CLK ON bit so that Gfx can down
3796  * 5. Unmask Turbo interrupts
3797 */
3798 static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
3799 {
3800         int revision;
3801
3802         struct drm_device *dev = dev_priv->dev;
3803
3804         /* CHV and latest VLV don't need to force the gfx clock */
3805         revision = pci_read_config(dev->dev, PCIR_REVID, 1);
3806         if (IS_CHERRYVIEW(dev) || revision >= 0xd) {
3807                 valleyview_set_rps(dev_priv->dev, dev_priv->rps.min_freq_softlimit);
3808                 return;
3809         }
3810
3811         /*
3812          * When we are idle.  Drop to min voltage state.
3813          */
3814
3815         if (dev_priv->rps.cur_freq <= dev_priv->rps.min_freq_softlimit)
3816                 return;
3817
3818         /* Mask turbo interrupt so that they will not come in between */
3819         I915_WRITE(GEN6_PMINTRMSK,
3820                    gen6_sanitize_rps_pm_mask(dev_priv, ~0));
3821
3822         vlv_force_gfx_clock(dev_priv, true);
3823
3824         dev_priv->rps.cur_freq = dev_priv->rps.min_freq_softlimit;
3825
3826         vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ,
3827                                         dev_priv->rps.min_freq_softlimit);
3828
3829         if (wait_for(((vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS))
3830                                 & GENFREQSTATUS) == 0, 100))
3831                 DRM_ERROR("timed out waiting for Punit\n");
3832
3833         vlv_force_gfx_clock(dev_priv, false);
3834
3835         I915_WRITE(GEN6_PMINTRMSK,
3836                    gen6_rps_pm_mask(dev_priv, dev_priv->rps.cur_freq));
3837 }
3838
3839 void gen6_rps_idle(struct drm_i915_private *dev_priv)
3840 {
3841         struct drm_device *dev = dev_priv->dev;
3842
3843         mutex_lock(&dev_priv->rps.hw_lock);
3844         if (dev_priv->rps.enabled) {
3845                 if (IS_VALLEYVIEW(dev))
3846                         vlv_set_rps_idle(dev_priv);
3847                 else
3848                         gen6_set_rps(dev_priv->dev, dev_priv->rps.min_freq_softlimit);
3849                 dev_priv->rps.last_adj = 0;
3850         }
3851         mutex_unlock(&dev_priv->rps.hw_lock);
3852 }
3853
3854 void gen6_rps_boost(struct drm_i915_private *dev_priv)
3855 {
3856         struct drm_device *dev = dev_priv->dev;
3857
3858         mutex_lock(&dev_priv->rps.hw_lock);
3859         if (dev_priv->rps.enabled) {
3860                 if (IS_VALLEYVIEW(dev))
3861                         valleyview_set_rps(dev_priv->dev, dev_priv->rps.max_freq_softlimit);
3862                 else
3863                         gen6_set_rps(dev_priv->dev, dev_priv->rps.max_freq_softlimit);
3864                 dev_priv->rps.last_adj = 0;
3865         }
3866         mutex_unlock(&dev_priv->rps.hw_lock);
3867 }
3868
3869 void valleyview_set_rps(struct drm_device *dev, u8 val)
3870 {
3871         struct drm_i915_private *dev_priv = dev->dev_private;
3872
3873         WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
3874         WARN_ON(val > dev_priv->rps.max_freq_softlimit);
3875         WARN_ON(val < dev_priv->rps.min_freq_softlimit);
3876
3877         if (WARN_ONCE(IS_CHERRYVIEW(dev) && (val & 1),
3878                       "Odd GPU freq value\n"))
3879                 val &= ~1;
3880
3881         if (val != dev_priv->rps.cur_freq)
3882                 vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val);
3883
3884         I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
3885
3886         dev_priv->rps.cur_freq = val;
3887         trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val));
3888 }
3889
3890 static void gen9_disable_rps(struct drm_device *dev)
3891 {
3892         struct drm_i915_private *dev_priv = dev->dev_private;
3893
3894         I915_WRITE(GEN6_RC_CONTROL, 0);
3895         I915_WRITE(GEN9_PG_ENABLE, 0);
3896 }
3897
3898 static void gen6_disable_rps(struct drm_device *dev)
3899 {
3900         struct drm_i915_private *dev_priv = dev->dev_private;
3901
3902         I915_WRITE(GEN6_RC_CONTROL, 0);
3903         I915_WRITE(GEN6_RPNSWREQ, 1 << 31);
3904 }
3905
3906 static void cherryview_disable_rps(struct drm_device *dev)
3907 {
3908         struct drm_i915_private *dev_priv = dev->dev_private;
3909
3910         I915_WRITE(GEN6_RC_CONTROL, 0);
3911 }
3912
3913 static void valleyview_disable_rps(struct drm_device *dev)
3914 {
3915         struct drm_i915_private *dev_priv = dev->dev_private;
3916
3917         /* we're doing forcewake before Disabling RC6,
3918          * This what the BIOS expects when going into suspend */
3919         intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
3920
3921         I915_WRITE(GEN6_RC_CONTROL, 0);
3922
3923         intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
3924 }
3925
3926 static void intel_print_rc6_info(struct drm_device *dev, u32 mode)
3927 {
3928         if (IS_VALLEYVIEW(dev)) {
3929                 if (mode & (GEN7_RC_CTL_TO_MODE | GEN6_RC_CTL_EI_MODE(1)))
3930                         mode = GEN6_RC_CTL_RC6_ENABLE;
3931                 else
3932                         mode = 0;
3933         }
3934         if (HAS_RC6p(dev))
3935                 DRM_DEBUG_KMS("Enabling RC6 states: RC6 %s RC6p %s RC6pp %s\n",
3936                               (mode & GEN6_RC_CTL_RC6_ENABLE) ? "on" : "off",
3937                               (mode & GEN6_RC_CTL_RC6p_ENABLE) ? "on" : "off",
3938                               (mode & GEN6_RC_CTL_RC6pp_ENABLE) ? "on" : "off");
3939
3940         else
3941                 DRM_DEBUG_KMS("Enabling RC6 states: RC6 %s\n",
3942                               (mode & GEN6_RC_CTL_RC6_ENABLE) ? "on" : "off");
3943 }
3944
3945 static int sanitize_rc6_option(const struct drm_device *dev, int enable_rc6)
3946 {
3947         /* No RC6 before Ironlake */
3948         if (INTEL_INFO(dev)->gen < 5)
3949                 return 0;
3950
3951         /* RC6 is only on Ironlake mobile not on desktop */
3952         if (INTEL_INFO(dev)->gen == 5 && !IS_IRONLAKE_M(dev))
3953                 return 0;
3954
3955         /* Respect the kernel parameter if it is set */
3956         if (enable_rc6 >= 0) {
3957                 int mask;
3958
3959                 if (HAS_RC6p(dev))
3960                         mask = INTEL_RC6_ENABLE | INTEL_RC6p_ENABLE |
3961                                INTEL_RC6pp_ENABLE;
3962                 else
3963                         mask = INTEL_RC6_ENABLE;
3964
3965                 if ((enable_rc6 & mask) != enable_rc6)
3966                         DRM_DEBUG_KMS("Adjusting RC6 mask to %d (requested %d, valid %d)\n",
3967                                       enable_rc6 & mask, enable_rc6, mask);
3968
3969                 return enable_rc6 & mask;
3970         }
3971
3972         /* Disable RC6 on Ironlake */
3973         if (INTEL_INFO(dev)->gen == 5)
3974                 return 0;
3975
3976         if (IS_IVYBRIDGE(dev))
3977                 return (INTEL_RC6_ENABLE | INTEL_RC6p_ENABLE);
3978
3979         return INTEL_RC6_ENABLE;
3980 }
3981
3982 int intel_enable_rc6(const struct drm_device *dev)
3983 {
3984         return i915.enable_rc6;
3985 }
3986
3987 static void gen6_init_rps_frequencies(struct drm_device *dev)
3988 {
3989         struct drm_i915_private *dev_priv = dev->dev_private;
3990         uint32_t rp_state_cap;
3991         u32 ddcc_status = 0;
3992         int ret;
3993
3994         rp_state_cap = I915_READ(GEN6_RP_STATE_CAP);
3995         /* All of these values are in units of 50MHz */
3996         dev_priv->rps.cur_freq          = 0;
3997         /* static values from HW: RP0 > RP1 > RPn (min_freq) */
3998         dev_priv->rps.rp0_freq          = (rp_state_cap >>  0) & 0xff;
3999         dev_priv->rps.rp1_freq          = (rp_state_cap >>  8) & 0xff;
4000         dev_priv->rps.min_freq          = (rp_state_cap >> 16) & 0xff;
4001         /* hw_max = RP0 until we check for overclocking */
4002         dev_priv->rps.max_freq          = dev_priv->rps.rp0_freq;
4003
4004         dev_priv->rps.efficient_freq = dev_priv->rps.rp1_freq;
4005         if (IS_HASWELL(dev) || IS_BROADWELL(dev)) {
4006                 ret = sandybridge_pcode_read(dev_priv,
4007                                         HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL,
4008                                         &ddcc_status);
4009                 if (0 == ret)
4010                         dev_priv->rps.efficient_freq =
4011                                 clamp_t(u8,
4012                                         ((ddcc_status >> 8) & 0xff),
4013                                         dev_priv->rps.min_freq,
4014                                         dev_priv->rps.max_freq);
4015         }
4016
4017         /* Preserve min/max settings in case of re-init */
4018         if (dev_priv->rps.max_freq_softlimit == 0)
4019                 dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq;
4020
4021         if (dev_priv->rps.min_freq_softlimit == 0) {
4022                 if (IS_HASWELL(dev) || IS_BROADWELL(dev))
4023                         dev_priv->rps.min_freq_softlimit =
4024                                 /* max(RPe, 450 MHz) */
4025                                 max(dev_priv->rps.efficient_freq, (u8) 9);
4026                 else
4027                         dev_priv->rps.min_freq_softlimit =
4028                                 dev_priv->rps.min_freq;
4029         }
4030 }
4031
4032 /* See the Gen9_GT_PM_Programming_Guide doc for the below */
4033 static void gen9_enable_rps(struct drm_device *dev)
4034 {
4035         struct drm_i915_private *dev_priv = dev->dev_private;
4036
4037         intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
4038
4039         gen6_init_rps_frequencies(dev);
4040
4041         I915_WRITE(GEN6_RPNSWREQ, 0xc800000);
4042         I915_WRITE(GEN6_RC_VIDEO_FREQ, 0xc800000);
4043
4044         I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 0xf4240);
4045         I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, 0x12060000);
4046         I915_WRITE(GEN6_RP_UP_THRESHOLD, 0xe808);
4047         I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 0x3bd08);
4048         I915_WRITE(GEN6_RP_UP_EI, 0x101d0);
4049         I915_WRITE(GEN6_RP_DOWN_EI, 0x55730);
4050         I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 0xa);
4051         I915_WRITE(GEN6_PMINTRMSK, 0x6);
4052         I915_WRITE(GEN6_RP_CONTROL, GEN6_RP_MEDIA_TURBO |
4053                    GEN6_RP_MEDIA_HW_MODE | GEN6_RP_MEDIA_IS_GFX |
4054                    GEN6_RP_ENABLE | GEN6_RP_UP_BUSY_AVG |
4055                    GEN6_RP_DOWN_IDLE_AVG);
4056
4057         gen6_enable_rps_interrupts(dev);
4058
4059         intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
4060 }
4061
4062 static void gen9_enable_rc6(struct drm_device *dev)
4063 {
4064         struct drm_i915_private *dev_priv = dev->dev_private;
4065         struct intel_engine_cs *ring;
4066         uint32_t rc6_mask = 0;
4067         int unused;
4068
4069         /* 1a: Software RC state - RC0 */
4070         I915_WRITE(GEN6_RC_STATE, 0);
4071
4072         /* 1b: Get forcewake during program sequence. Although the driver
4073          * hasn't enabled a state yet where we need forcewake, BIOS may have.*/
4074         intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
4075
4076         /* 2a: Disable RC states. */
4077         I915_WRITE(GEN6_RC_CONTROL, 0);
4078
4079         /* 2b: Program RC6 thresholds.*/
4080         I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16);
4081         I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
4082         I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
4083         for_each_ring(ring, dev_priv, unused)
4084                 I915_WRITE(RING_MAX_IDLE(ring->mmio_base), 10);
4085         I915_WRITE(GEN6_RC_SLEEP, 0);
4086         I915_WRITE(GEN6_RC6_THRESHOLD, 37500); /* 37.5/125ms per EI */
4087
4088         /* 2c: Program Coarse Power Gating Policies. */
4089         I915_WRITE(GEN9_MEDIA_PG_IDLE_HYSTERESIS, 25);
4090         I915_WRITE(GEN9_RENDER_PG_IDLE_HYSTERESIS, 25);
4091
4092         /* 3a: Enable RC6 */
4093         if (intel_enable_rc6(dev) & INTEL_RC6_ENABLE)
4094                 rc6_mask = GEN6_RC_CTL_RC6_ENABLE;
4095         DRM_INFO("RC6 %s\n", (rc6_mask & GEN6_RC_CTL_RC6_ENABLE) ?
4096                         "on" : "off");
4097         I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE |
4098                                    GEN6_RC_CTL_EI_MODE(1) |
4099                                    rc6_mask);
4100
4101         /* 3b: Enable Coarse Power Gating only when RC6 is enabled */
4102         I915_WRITE(GEN9_PG_ENABLE, (rc6_mask & GEN6_RC_CTL_RC6_ENABLE) ? 3 : 0);
4103
4104         intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
4105
4106 }
4107
4108 static void gen8_enable_rps(struct drm_device *dev)
4109 {
4110         struct drm_i915_private *dev_priv = dev->dev_private;
4111         struct intel_engine_cs *ring;
4112         uint32_t rc6_mask = 0;
4113         int unused;
4114
4115         /* 1a: Software RC state - RC0 */
4116         I915_WRITE(GEN6_RC_STATE, 0);
4117
4118         /* 1c & 1d: Get forcewake during program sequence. Although the driver
4119          * hasn't enabled a state yet where we need forcewake, BIOS may have.*/
4120         intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
4121
4122         /* 2a: Disable RC states. */
4123         I915_WRITE(GEN6_RC_CONTROL, 0);
4124
4125         /* Initialize rps frequencies */
4126         gen6_init_rps_frequencies(dev);
4127
4128         /* 2b: Program RC6 thresholds.*/
4129         I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16);
4130         I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
4131         I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
4132         for_each_ring(ring, dev_priv, unused)
4133                 I915_WRITE(RING_MAX_IDLE(ring->mmio_base), 10);
4134         I915_WRITE(GEN6_RC_SLEEP, 0);
4135         if (IS_BROADWELL(dev))
4136                 I915_WRITE(GEN6_RC6_THRESHOLD, 625); /* 800us/1.28 for TO */
4137         else
4138                 I915_WRITE(GEN6_RC6_THRESHOLD, 50000); /* 50/125ms per EI */
4139
4140         /* 3: Enable RC6 */
4141         if (intel_enable_rc6(dev) & INTEL_RC6_ENABLE)
4142                 rc6_mask = GEN6_RC_CTL_RC6_ENABLE;
4143         intel_print_rc6_info(dev, rc6_mask);
4144         if (IS_BROADWELL(dev))
4145                 I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE |
4146                                 GEN7_RC_CTL_TO_MODE |
4147                                 rc6_mask);
4148         else
4149                 I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE |
4150                                 GEN6_RC_CTL_EI_MODE(1) |
4151                                 rc6_mask);
4152
4153         /* 4 Program defaults and thresholds for RPS*/
4154         I915_WRITE(GEN6_RPNSWREQ,
4155                    HSW_FREQUENCY(dev_priv->rps.rp1_freq));
4156         I915_WRITE(GEN6_RC_VIDEO_FREQ,
4157                    HSW_FREQUENCY(dev_priv->rps.rp1_freq));
4158         /* NB: Docs say 1s, and 1000000 - which aren't equivalent */
4159         I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 100000000 / 128); /* 1 second timeout */
4160
4161         /* Docs recommend 900MHz, and 300 MHz respectively */
4162         I915_WRITE(GEN6_RP_INTERRUPT_LIMITS,
4163                    dev_priv->rps.max_freq_softlimit << 24 |
4164                    dev_priv->rps.min_freq_softlimit << 16);
4165
4166         I915_WRITE(GEN6_RP_UP_THRESHOLD, 7600000 / 128); /* 76ms busyness per EI, 90% */
4167         I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 31300000 / 128); /* 313ms busyness per EI, 70%*/
4168         I915_WRITE(GEN6_RP_UP_EI, 66000); /* 84.48ms, XXX: random? */
4169         I915_WRITE(GEN6_RP_DOWN_EI, 350000); /* 448ms, XXX: random? */
4170
4171         I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
4172
4173         /* 5: Enable RPS */
4174         I915_WRITE(GEN6_RP_CONTROL,
4175                    GEN6_RP_MEDIA_TURBO |
4176                    GEN6_RP_MEDIA_HW_NORMAL_MODE |
4177                    GEN6_RP_MEDIA_IS_GFX |
4178                    GEN6_RP_ENABLE |
4179                    GEN6_RP_UP_BUSY_AVG |
4180                    GEN6_RP_DOWN_IDLE_AVG);
4181
4182         /* 6: Ring frequency + overclocking (our driver does this later */
4183
4184         dev_priv->rps.power = HIGH_POWER; /* force a reset */
4185         gen6_set_rps(dev_priv->dev, dev_priv->rps.min_freq_softlimit);
4186
4187         intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
4188 }
4189
4190 static void gen6_enable_rps(struct drm_device *dev)
4191 {
4192         struct drm_i915_private *dev_priv = dev->dev_private;
4193         struct intel_engine_cs *ring;
4194         u32 rc6vids, pcu_mbox = 0, rc6_mask = 0;
4195         u32 gtfifodbg;
4196         int rc6_mode;
4197         int i, ret;
4198
4199         WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
4200
4201         /* Here begins a magic sequence of register writes to enable
4202          * auto-downclocking.
4203          *
4204          * Perhaps there might be some value in exposing these to
4205          * userspace...
4206          */
4207         I915_WRITE(GEN6_RC_STATE, 0);
4208
4209         /* Clear the DBG now so we don't confuse earlier errors */
4210         if ((gtfifodbg = I915_READ(GTFIFODBG))) {
4211                 DRM_ERROR("GT fifo had a previous error %x\n", gtfifodbg);
4212                 I915_WRITE(GTFIFODBG, gtfifodbg);
4213         }
4214
4215         intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
4216
4217         /* Initialize rps frequencies */
4218         gen6_init_rps_frequencies(dev);
4219
4220         /* disable the counters and set deterministic thresholds */
4221         I915_WRITE(GEN6_RC_CONTROL, 0);
4222
4223         I915_WRITE(GEN6_RC1_WAKE_RATE_LIMIT, 1000 << 16);
4224         I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16 | 30);
4225         I915_WRITE(GEN6_RC6pp_WAKE_RATE_LIMIT, 30);
4226         I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000);
4227         I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25);
4228
4229         for_each_ring(ring, dev_priv, i)
4230                 I915_WRITE(RING_MAX_IDLE(ring->mmio_base), 10);
4231
4232         I915_WRITE(GEN6_RC_SLEEP, 0);
4233         I915_WRITE(GEN6_RC1e_THRESHOLD, 1000);
4234         if (IS_IVYBRIDGE(dev))
4235                 I915_WRITE(GEN6_RC6_THRESHOLD, 125000);
4236         else
4237                 I915_WRITE(GEN6_RC6_THRESHOLD, 50000);
4238         I915_WRITE(GEN6_RC6p_THRESHOLD, 150000);
4239         I915_WRITE(GEN6_RC6pp_THRESHOLD, 64000); /* unused */
4240
4241         /* Check if we are enabling RC6 */
4242         rc6_mode = intel_enable_rc6(dev_priv->dev);
4243         if (rc6_mode & INTEL_RC6_ENABLE)
4244                 rc6_mask |= GEN6_RC_CTL_RC6_ENABLE;
4245
4246         /* We don't use those on Haswell */
4247         if (!IS_HASWELL(dev)) {
4248                 if (rc6_mode & INTEL_RC6p_ENABLE)
4249                         rc6_mask |= GEN6_RC_CTL_RC6p_ENABLE;
4250
4251                 if (rc6_mode & INTEL_RC6pp_ENABLE)
4252                         rc6_mask |= GEN6_RC_CTL_RC6pp_ENABLE;
4253         }
4254
4255         intel_print_rc6_info(dev, rc6_mask);
4256
4257         I915_WRITE(GEN6_RC_CONTROL,
4258                    rc6_mask |
4259                    GEN6_RC_CTL_EI_MODE(1) |
4260                    GEN6_RC_CTL_HW_ENABLE);
4261
4262         /* Power down if completely idle for over 50ms */
4263         I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 50000);
4264         I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
4265
4266         ret = sandybridge_pcode_write(dev_priv, GEN6_PCODE_WRITE_MIN_FREQ_TABLE, 0);
4267         if (ret)
4268                 DRM_DEBUG_DRIVER("Failed to set the min frequency\n");
4269
4270         ret = sandybridge_pcode_read(dev_priv, GEN6_READ_OC_PARAMS, &pcu_mbox);
4271         if (!ret && (pcu_mbox & (1<<31))) { /* OC supported */
4272                 DRM_DEBUG_DRIVER("Overclocking supported. Max: %dMHz, Overclock max: %dMHz\n",
4273                                  (dev_priv->rps.max_freq_softlimit & 0xff) * 50,
4274                                  (pcu_mbox & 0xff) * 50);
4275                 dev_priv->rps.max_freq = pcu_mbox & 0xff;
4276         }
4277
4278         dev_priv->rps.power = HIGH_POWER; /* force a reset */
4279         gen6_set_rps(dev_priv->dev, dev_priv->rps.min_freq_softlimit);
4280
4281         rc6vids = 0;
4282         ret = sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS, &rc6vids);
4283         if (IS_GEN6(dev) && ret) {
4284                 DRM_DEBUG_DRIVER("Couldn't check for BIOS workaround\n");
4285         } else if (IS_GEN6(dev) && (GEN6_DECODE_RC6_VID(rc6vids & 0xff) < 450)) {
4286                 DRM_DEBUG_DRIVER("You should update your BIOS. Correcting minimum rc6 voltage (%dmV->%dmV)\n",
4287                           GEN6_DECODE_RC6_VID(rc6vids & 0xff), 450);
4288                 rc6vids &= 0xffff00;
4289                 rc6vids |= GEN6_ENCODE_RC6_VID(450);
4290                 ret = sandybridge_pcode_write(dev_priv, GEN6_PCODE_WRITE_RC6VIDS, rc6vids);
4291                 if (ret)
4292                         DRM_ERROR("Couldn't fix incorrect rc6 voltage\n");
4293         }
4294
4295         intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
4296 }
4297
4298 static void __gen6_update_ring_freq(struct drm_device *dev)
4299 {
4300         struct drm_i915_private *dev_priv = dev->dev_private;
4301         int min_freq = 15;
4302         unsigned int gpu_freq;
4303         unsigned int max_ia_freq, min_ring_freq;
4304         int scaling_factor = 180;
4305
4306         WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
4307
4308 #if 0
4309         policy = cpufreq_cpu_get(0);
4310         if (policy) {
4311                 max_ia_freq = policy->cpuinfo.max_freq;
4312                 cpufreq_cpu_put(policy);
4313         } else {
4314                 /*
4315                  * Default to measured freq if none found, PCU will ensure we
4316                  * don't go over
4317                  */
4318                 max_ia_freq = tsc_khz;
4319         }
4320 #else
4321         max_ia_freq = tsc_frequency / 1000;
4322 #endif
4323
4324         /* Convert from kHz to MHz */
4325         max_ia_freq /= 1000;
4326
4327         min_ring_freq = I915_READ(DCLK) & 0xf;
4328         /* convert DDR frequency from units of 266.6MHz to bandwidth */
4329         min_ring_freq = mult_frac(min_ring_freq, 8, 3);
4330
4331         /*
4332          * For each potential GPU frequency, load a ring frequency we'd like
4333          * to use for memory access.  We do this by specifying the IA frequency
4334          * the PCU should use as a reference to determine the ring frequency.
4335          */
4336         for (gpu_freq = dev_priv->rps.max_freq; gpu_freq >= dev_priv->rps.min_freq;
4337              gpu_freq--) {
4338                 int diff = dev_priv->rps.max_freq - gpu_freq;
4339                 unsigned int ia_freq = 0, ring_freq = 0;
4340
4341                 if (INTEL_INFO(dev)->gen >= 8) {
4342                         /* max(2 * GT, DDR). NB: GT is 50MHz units */
4343                         ring_freq = max(min_ring_freq, gpu_freq);
4344                 } else if (IS_HASWELL(dev)) {
4345                         ring_freq = mult_frac(gpu_freq, 5, 4);
4346                         ring_freq = max(min_ring_freq, ring_freq);
4347                         /* leave ia_freq as the default, chosen by cpufreq */
4348                 } else {
4349                         /* On older processors, there is no separate ring
4350                          * clock domain, so in order to boost the bandwidth
4351                          * of the ring, we need to upclock the CPU (ia_freq).
4352                          *
4353                          * For GPU frequencies less than 750MHz,
4354                          * just use the lowest ring freq.
4355                          */
4356                         if (gpu_freq < min_freq)
4357                                 ia_freq = 800;
4358                         else
4359                                 ia_freq = max_ia_freq - ((diff * scaling_factor) / 2);
4360                         ia_freq = DIV_ROUND_CLOSEST(ia_freq, 100);
4361                 }
4362
4363                 sandybridge_pcode_write(dev_priv,
4364                                         GEN6_PCODE_WRITE_MIN_FREQ_TABLE,
4365                                         ia_freq << GEN6_PCODE_FREQ_IA_RATIO_SHIFT |
4366                                         ring_freq << GEN6_PCODE_FREQ_RING_RATIO_SHIFT |
4367                                         gpu_freq);
4368         }
4369 }
4370
4371 void gen6_update_ring_freq(struct drm_device *dev)
4372 {
4373         struct drm_i915_private *dev_priv = dev->dev_private;
4374
4375         if (INTEL_INFO(dev)->gen < 6 || IS_VALLEYVIEW(dev))
4376                 return;
4377
4378         mutex_lock(&dev_priv->rps.hw_lock);
4379         __gen6_update_ring_freq(dev);
4380         mutex_unlock(&dev_priv->rps.hw_lock);
4381 }
4382
4383 static int cherryview_rps_max_freq(struct drm_i915_private *dev_priv)
4384 {
4385         struct drm_device *dev = dev_priv->dev;
4386         u32 val, rp0;
4387
4388         int revision = pci_read_config(dev->dev, PCIR_REVID, 1);
4389         if (revision >= 0x20) {
4390                 val = vlv_punit_read(dev_priv, FB_GFX_FMAX_AT_VMAX_FUSE);
4391
4392                 switch (INTEL_INFO(dev)->eu_total) {
4393                 case 8:
4394                                 /* (2 * 4) config */
4395                                 rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS4EU_FUSE_SHIFT);
4396                                 break;
4397                 case 12:
4398                                 /* (2 * 6) config */
4399                                 rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS6EU_FUSE_SHIFT);
4400                                 break;
4401                 case 16:
4402                                 /* (2 * 8) config */
4403                 default:
4404                                 /* Setting (2 * 8) Min RP0 for any other combination */
4405                                 rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS8EU_FUSE_SHIFT);
4406                                 break;
4407                 }
4408                 rp0 = (rp0 & FB_GFX_FREQ_FUSE_MASK);
4409         } else {
4410                 /* For pre-production hardware */
4411                 val = vlv_punit_read(dev_priv, PUNIT_GPU_STATUS_REG);
4412                 rp0 = (val >> PUNIT_GPU_STATUS_MAX_FREQ_SHIFT) &
4413                        PUNIT_GPU_STATUS_MAX_FREQ_MASK;
4414         }
4415         return rp0;
4416 }
4417
4418 static int cherryview_rps_rpe_freq(struct drm_i915_private *dev_priv)
4419 {
4420         u32 val, rpe;
4421
4422         val = vlv_punit_read(dev_priv, PUNIT_GPU_DUTYCYCLE_REG);
4423         rpe = (val >> PUNIT_GPU_DUTYCYCLE_RPE_FREQ_SHIFT) & PUNIT_GPU_DUTYCYCLE_RPE_FREQ_MASK;
4424
4425         return rpe;
4426 }
4427
4428 static int cherryview_rps_guar_freq(struct drm_i915_private *dev_priv)
4429 {
4430         struct drm_device *dev = dev_priv->dev;
4431         u32 val, rp1;
4432
4433         int revision = pci_read_config(dev->dev, PCIR_REVID, 1);
4434         if (revision >= 0x20) {
4435                 val = vlv_punit_read(dev_priv, FB_GFX_FMAX_AT_VMAX_FUSE);
4436                 rp1 = (val & FB_GFX_FREQ_FUSE_MASK);
4437         } else {
4438                 /* For pre-production hardware */
4439                 val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
4440                 rp1 = ((val >> PUNIT_GPU_STATUS_MAX_FREQ_SHIFT) &
4441                        PUNIT_GPU_STATUS_MAX_FREQ_MASK);
4442         }
4443         return rp1;
4444 }
4445
4446 static int cherryview_rps_min_freq(struct drm_i915_private *dev_priv)
4447 {
4448         struct drm_device *dev = dev_priv->dev;
4449         u32 val, rpn;
4450
4451         int revision = pci_read_config(dev->dev, PCIR_REVID, 1);
4452         if (revision >= 0x20) {
4453                 val = vlv_punit_read(dev_priv, FB_GFX_FMIN_AT_VMIN_FUSE);
4454                 rpn = ((val >> FB_GFX_FMIN_AT_VMIN_FUSE_SHIFT) &
4455                        FB_GFX_FREQ_FUSE_MASK);
4456         } else { /* For pre-production hardware */
4457                 val = vlv_punit_read(dev_priv, PUNIT_GPU_STATUS_REG);
4458                 rpn = ((val >> PUNIT_GPU_STATIS_GFX_MIN_FREQ_SHIFT) &
4459                        PUNIT_GPU_STATUS_GFX_MIN_FREQ_MASK);
4460         }
4461
4462         return rpn;
4463 }
4464
4465 static int valleyview_rps_guar_freq(struct drm_i915_private *dev_priv)
4466 {
4467         u32 val, rp1;
4468
4469         val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FREQ_FUSE);
4470
4471         rp1 = (val & FB_GFX_FGUARANTEED_FREQ_FUSE_MASK) >> FB_GFX_FGUARANTEED_FREQ_FUSE_SHIFT;
4472
4473         return rp1;
4474 }
4475
4476 static int valleyview_rps_max_freq(struct drm_i915_private *dev_priv)
4477 {
4478         u32 val, rp0;
4479
4480         val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FREQ_FUSE);
4481
4482         rp0 = (val & FB_GFX_MAX_FREQ_FUSE_MASK) >> FB_GFX_MAX_FREQ_FUSE_SHIFT;
4483         /* Clamp to max */
4484         rp0 = min_t(u32, rp0, 0xea);
4485
4486         return rp0;
4487 }
4488
4489 static int valleyview_rps_rpe_freq(struct drm_i915_private *dev_priv)
4490 {
4491         u32 val, rpe;
4492
4493         val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FMAX_FUSE_LO);
4494         rpe = (val & FB_FMAX_VMIN_FREQ_LO_MASK) >> FB_FMAX_VMIN_FREQ_LO_SHIFT;
4495         val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FMAX_FUSE_HI);
4496         rpe |= (val & FB_FMAX_VMIN_FREQ_HI_MASK) << 5;
4497
4498         return rpe;
4499 }
4500
4501 static int valleyview_rps_min_freq(struct drm_i915_private *dev_priv)
4502 {
4503         return vlv_punit_read(dev_priv, PUNIT_REG_GPU_LFM) & 0xff;
4504 }
4505
4506 /* Check that the pctx buffer wasn't move under us. */
4507 static void valleyview_check_pctx(struct drm_i915_private *dev_priv)
4508 {
4509         unsigned long pctx_addr = I915_READ(VLV_PCBR) & ~4095;
4510
4511         /* DragonFly - if EDID fails vlv_pctx can wind up NULL */
4512         if (WARN_ON(!dev_priv->vlv_pctx))
4513                 return;
4514
4515         WARN_ON(pctx_addr != dev_priv->mm.stolen_base +
4516                              dev_priv->vlv_pctx->stolen->start);
4517 }
4518
4519
4520 /* Check that the pcbr address is not empty. */
4521 static void cherryview_check_pctx(struct drm_i915_private *dev_priv)
4522 {
4523         unsigned long pctx_addr = I915_READ(VLV_PCBR) & ~4095;
4524
4525         WARN_ON((pctx_addr >> VLV_PCBR_ADDR_SHIFT) == 0);
4526 }
4527
4528 static void cherryview_setup_pctx(struct drm_device *dev)
4529 {
4530         struct drm_i915_private *dev_priv = dev->dev_private;
4531         unsigned long pctx_paddr, paddr;
4532         struct i915_gtt *gtt = &dev_priv->gtt;
4533         u32 pcbr;
4534         int pctx_size = 32*1024;
4535
4536         WARN_ON(!mutex_is_locked(&dev->struct_mutex));
4537
4538         pcbr = I915_READ(VLV_PCBR);
4539         if ((pcbr >> VLV_PCBR_ADDR_SHIFT) == 0) {
4540                 DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n");
4541                 paddr = (dev_priv->mm.stolen_base +
4542                          (gtt->stolen_size - pctx_size));
4543
4544                 pctx_paddr = (paddr & (~4095));
4545                 I915_WRITE(VLV_PCBR, pctx_paddr);
4546         }
4547
4548         DRM_DEBUG_DRIVER("PCBR: 0x%08x\n", I915_READ(VLV_PCBR));
4549 }
4550
4551 static void valleyview_setup_pctx(struct drm_device *dev)
4552 {
4553         struct drm_i915_private *dev_priv = dev->dev_private;
4554         struct drm_i915_gem_object *pctx;
4555         unsigned long pctx_paddr;
4556         u32 pcbr;
4557         int pctx_size = 24*1024;
4558
4559         WARN_ON(!mutex_is_locked(&dev->struct_mutex));
4560
4561         pcbr = I915_READ(VLV_PCBR);
4562         if (pcbr) {
4563                 /* BIOS set it up already, grab the pre-alloc'd space */
4564                 int pcbr_offset;
4565
4566                 pcbr_offset = (pcbr & (~4095)) - dev_priv->mm.stolen_base;
4567                 pctx = i915_gem_object_create_stolen_for_preallocated(dev_priv->dev,
4568                                                                       pcbr_offset,
4569                                                                       I915_GTT_OFFSET_NONE,
4570                                                                       pctx_size);
4571                 goto out;
4572         }
4573
4574         DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n");
4575
4576         /*
4577          * From the Gunit register HAS:
4578          * The Gfx driver is expected to program this register and ensure
4579          * proper allocation within Gfx stolen memory.  For example, this
4580          * register should be programmed such than the PCBR range does not
4581          * overlap with other ranges, such as the frame buffer, protected
4582          * memory, or any other relevant ranges.
4583          */
4584         pctx = i915_gem_object_create_stolen(dev, pctx_size);
4585         if (!pctx) {
4586                 DRM_DEBUG("not enough stolen space for PCTX, disabling\n");
4587                 return;
4588         }
4589
4590         pctx_paddr = dev_priv->mm.stolen_base + pctx->stolen->start;
4591         I915_WRITE(VLV_PCBR, pctx_paddr);
4592
4593 out:
4594         DRM_DEBUG_DRIVER("PCBR: 0x%08x\n", I915_READ(VLV_PCBR));
4595         dev_priv->vlv_pctx = pctx;
4596 }
4597
4598 static void valleyview_cleanup_pctx(struct drm_device *dev)
4599 {
4600         struct drm_i915_private *dev_priv = dev->dev_private;
4601
4602         if (WARN_ON(!dev_priv->vlv_pctx))
4603                 return;
4604
4605         drm_gem_object_unreference(&dev_priv->vlv_pctx->base);
4606         dev_priv->vlv_pctx = NULL;
4607 }
4608
4609 static void valleyview_init_gt_powersave(struct drm_device *dev)
4610 {
4611         struct drm_i915_private *dev_priv = dev->dev_private;
4612         u32 val;
4613
4614         valleyview_setup_pctx(dev);
4615
4616         mutex_lock(&dev_priv->rps.hw_lock);
4617
4618         val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
4619         switch ((val >> 6) & 3) {
4620         case 0:
4621         case 1:
4622                 dev_priv->mem_freq = 800;
4623                 break;
4624         case 2:
4625                 dev_priv->mem_freq = 1066;
4626                 break;
4627         case 3:
4628                 dev_priv->mem_freq = 1333;
4629                 break;
4630         }
4631         DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", dev_priv->mem_freq);
4632
4633         dev_priv->rps.max_freq = valleyview_rps_max_freq(dev_priv);
4634         dev_priv->rps.rp0_freq = dev_priv->rps.max_freq;
4635         DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n",
4636                          intel_gpu_freq(dev_priv, dev_priv->rps.max_freq),
4637                          dev_priv->rps.max_freq);
4638
4639         dev_priv->rps.efficient_freq = valleyview_rps_rpe_freq(dev_priv);
4640         DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n",
4641                          intel_gpu_freq(dev_priv, dev_priv->rps.efficient_freq),
4642                          dev_priv->rps.efficient_freq);
4643
4644         dev_priv->rps.rp1_freq = valleyview_rps_guar_freq(dev_priv);
4645         DRM_DEBUG_DRIVER("RP1(Guar Freq) GPU freq: %d MHz (%u)\n",
4646                          intel_gpu_freq(dev_priv, dev_priv->rps.rp1_freq),
4647                          dev_priv->rps.rp1_freq);
4648
4649         dev_priv->rps.min_freq = valleyview_rps_min_freq(dev_priv);
4650         DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
4651                          intel_gpu_freq(dev_priv, dev_priv->rps.min_freq),
4652                          dev_priv->rps.min_freq);
4653
4654         /* Preserve min/max settings in case of re-init */
4655         if (dev_priv->rps.max_freq_softlimit == 0)
4656                 dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq;
4657
4658         if (dev_priv->rps.min_freq_softlimit == 0)
4659                 dev_priv->rps.min_freq_softlimit = dev_priv->rps.min_freq;
4660
4661         mutex_unlock(&dev_priv->rps.hw_lock);
4662 }
4663
4664 static void cherryview_init_gt_powersave(struct drm_device *dev)
4665 {
4666         struct drm_i915_private *dev_priv = dev->dev_private;
4667         u32 val;
4668
4669         cherryview_setup_pctx(dev);
4670
4671         mutex_lock(&dev_priv->rps.hw_lock);
4672
4673         mutex_lock(&dev_priv->dpio_lock);
4674         val = vlv_cck_read(dev_priv, CCK_FUSE_REG);
4675         mutex_unlock(&dev_priv->dpio_lock);
4676
4677         switch ((val >> 2) & 0x7) {
4678         case 0:
4679         case 1:
4680                 dev_priv->rps.cz_freq = 200;
4681                 dev_priv->mem_freq = 1600;
4682                 break;
4683         case 2:
4684                 dev_priv->rps.cz_freq = 267;
4685                 dev_priv->mem_freq = 1600;
4686                 break;
4687         case 3:
4688                 dev_priv->rps.cz_freq = 333;
4689                 dev_priv->mem_freq = 2000;
4690                 break;
4691         case 4:
4692                 dev_priv->rps.cz_freq = 320;
4693                 dev_priv->mem_freq = 1600;
4694                 break;
4695         case 5:
4696                 dev_priv->rps.cz_freq = 400;
4697                 dev_priv->mem_freq = 1600;
4698                 break;
4699         }
4700         DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", dev_priv->mem_freq);
4701
4702         dev_priv->rps.max_freq = cherryview_rps_max_freq(dev_priv);
4703         dev_priv->rps.rp0_freq = dev_priv->rps.max_freq;
4704         DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n",
4705                          intel_gpu_freq(dev_priv, dev_priv->rps.max_freq),
4706                          dev_priv->rps.max_freq);
4707
4708         dev_priv->rps.efficient_freq = cherryview_rps_rpe_freq(dev_priv);
4709         DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n",
4710                          intel_gpu_freq(dev_priv, dev_priv->rps.efficient_freq),
4711                          dev_priv->rps.efficient_freq);
4712
4713         dev_priv->rps.rp1_freq = cherryview_rps_guar_freq(dev_priv);
4714         DRM_DEBUG_DRIVER("RP1(Guar) GPU freq: %d MHz (%u)\n",
4715                          intel_gpu_freq(dev_priv, dev_priv->rps.rp1_freq),
4716                          dev_priv->rps.rp1_freq);
4717
4718         dev_priv->rps.min_freq = cherryview_rps_min_freq(dev_priv);
4719         DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
4720                          intel_gpu_freq(dev_priv, dev_priv->rps.min_freq),
4721                          dev_priv->rps.min_freq);
4722
4723         WARN_ONCE((dev_priv->rps.max_freq |
4724                    dev_priv->rps.efficient_freq |
4725                    dev_priv->rps.rp1_freq |
4726                    dev_priv->rps.min_freq) & 1,
4727                   "Odd GPU freq values\n");
4728
4729         /* Preserve min/max settings in case of re-init */
4730         if (dev_priv->rps.max_freq_softlimit == 0)
4731                 dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq;
4732
4733         if (dev_priv->rps.min_freq_softlimit == 0)
4734                 dev_priv->rps.min_freq_softlimit = dev_priv->rps.min_freq;
4735
4736         mutex_unlock(&dev_priv->rps.hw_lock);
4737 }
4738
4739 static void valleyview_cleanup_gt_powersave(struct drm_device *dev)
4740 {
4741         valleyview_cleanup_pctx(dev);
4742 }
4743
4744 static void cherryview_enable_rps(struct drm_device *dev)
4745 {
4746         struct drm_i915_private *dev_priv = dev->dev_private;
4747         struct intel_engine_cs *ring;
4748         u32 gtfifodbg, val, rc6_mode = 0, pcbr;
4749         int i;
4750
4751         WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
4752
4753         gtfifodbg = I915_READ(GTFIFODBG);
4754         if (gtfifodbg) {
4755                 DRM_DEBUG_DRIVER("GT fifo had a previous error %x\n",
4756                                  gtfifodbg);
4757                 I915_WRITE(GTFIFODBG, gtfifodbg);
4758         }
4759
4760         cherryview_check_pctx(dev_priv);
4761
4762         /* 1a & 1b: Get forcewake during program sequence. Although the driver
4763          * hasn't enabled a state yet where we need forcewake, BIOS may have.*/
4764         intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
4765
4766         /*  Disable RC states. */
4767         I915_WRITE(GEN6_RC_CONTROL, 0);
4768
4769         /* 2a: Program RC6 thresholds.*/
4770         I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16);
4771         I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
4772         I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
4773
4774         for_each_ring(ring, dev_priv, i)
4775                 I915_WRITE(RING_MAX_IDLE(ring->mmio_base), 10);
4776         I915_WRITE(GEN6_RC_SLEEP, 0);
4777
4778         /* TO threshold set to 1750 us ( 0x557 * 1.28 us) */
4779         I915_WRITE(GEN6_RC6_THRESHOLD, 0x557);
4780
4781         /* allows RC6 residency counter to work */
4782         I915_WRITE(VLV_COUNTER_CONTROL,
4783                    _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
4784                                       VLV_MEDIA_RC6_COUNT_EN |
4785                                       VLV_RENDER_RC6_COUNT_EN));
4786
4787         /* For now we assume BIOS is allocating and populating the PCBR  */
4788         pcbr = I915_READ(VLV_PCBR);
4789
4790         /* 3: Enable RC6 */
4791         if ((intel_enable_rc6(dev) & INTEL_RC6_ENABLE) &&
4792                                                 (pcbr >> VLV_PCBR_ADDR_SHIFT))
4793                 rc6_mode = GEN7_RC_CTL_TO_MODE;
4794
4795         I915_WRITE(GEN6_RC_CONTROL, rc6_mode);
4796
4797         /* 4 Program defaults and thresholds for RPS*/
4798         I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 1000000);
4799         I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400);
4800         I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000);
4801         I915_WRITE(GEN6_RP_UP_EI, 66000);
4802         I915_WRITE(GEN6_RP_DOWN_EI, 350000);
4803
4804         I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
4805
4806         /* 5: Enable RPS */
4807         I915_WRITE(GEN6_RP_CONTROL,
4808                    GEN6_RP_MEDIA_HW_NORMAL_MODE |
4809                    GEN6_RP_MEDIA_IS_GFX |
4810                    GEN6_RP_ENABLE |
4811                    GEN6_RP_UP_BUSY_AVG |
4812                    GEN6_RP_DOWN_IDLE_AVG);
4813
4814         val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
4815
4816         /* RPS code assumes GPLL is used */
4817         WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n");
4818
4819         DRM_DEBUG_DRIVER("GPLL enabled? %s\n", val & GPLLENABLE ? "yes" : "no");
4820         DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val);
4821
4822         dev_priv->rps.cur_freq = (val >> 8) & 0xff;
4823         DRM_DEBUG_DRIVER("current GPU freq: %d MHz (%u)\n",
4824                          intel_gpu_freq(dev_priv, dev_priv->rps.cur_freq),
4825                          dev_priv->rps.cur_freq);
4826
4827         DRM_DEBUG_DRIVER("setting GPU freq to %d MHz (%u)\n",
4828                          intel_gpu_freq(dev_priv, dev_priv->rps.efficient_freq),
4829                          dev_priv->rps.efficient_freq);
4830
4831         valleyview_set_rps(dev_priv->dev, dev_priv->rps.efficient_freq);
4832
4833         intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
4834 }
4835
4836 static void valleyview_enable_rps(struct drm_device *dev)
4837 {
4838         struct drm_i915_private *dev_priv = dev->dev_private;
4839         struct intel_engine_cs *ring;
4840         u32 gtfifodbg, val, rc6_mode = 0;
4841         int i;
4842
4843         WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
4844
4845         valleyview_check_pctx(dev_priv);
4846
4847         if ((gtfifodbg = I915_READ(GTFIFODBG))) {
4848                 DRM_DEBUG_DRIVER("GT fifo had a previous error %x\n",
4849                                  gtfifodbg);
4850                 I915_WRITE(GTFIFODBG, gtfifodbg);
4851         }
4852
4853         /* If VLV, Forcewake all wells, else re-direct to regular path */
4854         intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
4855
4856         /*  Disable RC states. */
4857         I915_WRITE(GEN6_RC_CONTROL, 0);
4858
4859         I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 1000000);
4860         I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400);
4861         I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000);
4862         I915_WRITE(GEN6_RP_UP_EI, 66000);
4863         I915_WRITE(GEN6_RP_DOWN_EI, 350000);
4864
4865         I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
4866
4867         I915_WRITE(GEN6_RP_CONTROL,
4868                    GEN6_RP_MEDIA_TURBO |
4869                    GEN6_RP_MEDIA_HW_NORMAL_MODE |
4870                    GEN6_RP_MEDIA_IS_GFX |
4871                    GEN6_RP_ENABLE |
4872                    GEN6_RP_UP_BUSY_AVG |
4873                    GEN6_RP_DOWN_IDLE_CONT);
4874
4875         I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 0x00280000);
4876         I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000);
4877         I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25);
4878
4879         for_each_ring(ring, dev_priv, i)
4880                 I915_WRITE(RING_MAX_IDLE(ring->mmio_base), 10);
4881
4882         I915_WRITE(GEN6_RC6_THRESHOLD, 0x557);
4883
4884         /* allows RC6 residency counter to work */
4885         I915_WRITE(VLV_COUNTER_CONTROL,
4886                    _MASKED_BIT_ENABLE(VLV_MEDIA_RC0_COUNT_EN |
4887                                       VLV_RENDER_RC0_COUNT_EN |
4888                                       VLV_MEDIA_RC6_COUNT_EN |
4889                                       VLV_RENDER_RC6_COUNT_EN));
4890
4891         if (intel_enable_rc6(dev) & INTEL_RC6_ENABLE)
4892                 rc6_mode = GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL;
4893
4894         intel_print_rc6_info(dev, rc6_mode);
4895
4896         I915_WRITE(GEN6_RC_CONTROL, rc6_mode);
4897
4898         val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
4899
4900         /* RPS code assumes GPLL is used */
4901         WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n");
4902
4903         DRM_DEBUG_DRIVER("GPLL enabled? %s\n", val & GPLLENABLE ? "yes" : "no");
4904         DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val);
4905
4906         dev_priv->rps.cur_freq = (val >> 8) & 0xff;
4907         DRM_DEBUG_DRIVER("current GPU freq: %d MHz (%u)\n",
4908                          intel_gpu_freq(dev_priv, dev_priv->rps.cur_freq),
4909                          dev_priv->rps.cur_freq);
4910
4911         DRM_DEBUG_DRIVER("setting GPU freq to %d MHz (%u)\n",
4912                          intel_gpu_freq(dev_priv, dev_priv->rps.efficient_freq),
4913                          dev_priv->rps.efficient_freq);
4914
4915         valleyview_set_rps(dev_priv->dev, dev_priv->rps.efficient_freq);
4916
4917         intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
4918 }
4919
4920 void ironlake_teardown_rc6(struct drm_device *dev)
4921 {
4922         struct drm_i915_private *dev_priv = dev->dev_private;
4923
4924         if (dev_priv->ips.renderctx) {
4925                 i915_gem_object_ggtt_unpin(dev_priv->ips.renderctx);
4926                 drm_gem_object_unreference(&dev_priv->ips.renderctx->base);
4927                 dev_priv->ips.renderctx = NULL;
4928         }
4929
4930         if (dev_priv->ips.pwrctx) {
4931                 i915_gem_object_ggtt_unpin(dev_priv->ips.pwrctx);
4932                 drm_gem_object_unreference(&dev_priv->ips.pwrctx->base);
4933                 dev_priv->ips.pwrctx = NULL;
4934         }
4935 }
4936
4937 static void ironlake_disable_rc6(struct drm_device *dev)
4938 {
4939         struct drm_i915_private *dev_priv = dev->dev_private;
4940
4941         if (I915_READ(PWRCTXA)) {
4942                 /* Wake the GPU, prevent RC6, then restore RSTDBYCTL */
4943                 I915_WRITE(RSTDBYCTL, I915_READ(RSTDBYCTL) | RCX_SW_EXIT);
4944                 wait_for(((I915_READ(RSTDBYCTL) & RSX_STATUS_MASK) == RSX_STATUS_ON),
4945                          50);
4946
4947                 I915_WRITE(PWRCTXA, 0);
4948                 POSTING_READ(PWRCTXA);
4949
4950                 I915_WRITE(RSTDBYCTL, I915_READ(RSTDBYCTL) & ~RCX_SW_EXIT);
4951                 POSTING_READ(RSTDBYCTL);
4952         }
4953 }
4954
4955 static int ironlake_setup_rc6(struct drm_device *dev)
4956 {
4957         struct drm_i915_private *dev_priv = dev->dev_private;
4958
4959         if (dev_priv->ips.renderctx == NULL)
4960                 dev_priv->ips.renderctx = intel_alloc_context_page(dev);
4961         if (!dev_priv->ips.renderctx)
4962                 return -ENOMEM;
4963
4964         if (dev_priv->ips.pwrctx == NULL)
4965                 dev_priv->ips.pwrctx = intel_alloc_context_page(dev);
4966         if (!dev_priv->ips.pwrctx) {
4967                 ironlake_teardown_rc6(dev);
4968                 return -ENOMEM;
4969         }
4970
4971         return 0;
4972 }
4973
4974 static void ironlake_enable_rc6(struct drm_device *dev)
4975 {
4976         struct drm_i915_private *dev_priv = dev->dev_private;
4977         struct intel_engine_cs *ring = &dev_priv->ring[RCS];
4978         bool was_interruptible;
4979         int ret;
4980
4981         /* rc6 disabled by default due to repeated reports of hanging during
4982          * boot and resume.
4983          */
4984         if (!intel_enable_rc6(dev))
4985                 return;
4986
4987         WARN_ON(!mutex_is_locked(&dev->struct_mutex));
4988
4989         ret = ironlake_setup_rc6(dev);
4990         if (ret)
4991                 return;
4992
4993         was_interruptible = dev_priv->mm.interruptible;
4994         dev_priv->mm.interruptible = false;
4995
4996         /*
4997          * GPU can automatically power down the render unit if given a page
4998          * to save state.
4999          */
5000         ret = intel_ring_begin(ring, 6);
5001         if (ret) {
5002                 ironlake_teardown_rc6(dev);
5003                 dev_priv->mm.interruptible = was_interruptible;
5004                 return;
5005         }
5006
5007         intel_ring_emit(ring, MI_SUSPEND_FLUSH | MI_SUSPEND_FLUSH_EN);
5008         intel_ring_emit(ring, MI_SET_CONTEXT);
5009         intel_ring_emit(ring, i915_gem_obj_ggtt_offset(dev_priv->ips.renderctx) |
5010                         MI_MM_SPACE_GTT |
5011                         MI_SAVE_EXT_STATE_EN |
5012                         MI_RESTORE_EXT_STATE_EN |
5013                         MI_RESTORE_INHIBIT);
5014         intel_ring_emit(ring, MI_SUSPEND_FLUSH);
5015         intel_ring_emit(ring, MI_NOOP);
5016         intel_ring_emit(ring, MI_FLUSH);
5017         intel_ring_advance(ring);
5018
5019         /*
5020          * Wait for the command parser to advance past MI_SET_CONTEXT. The HW
5021          * does an implicit flush, combined with MI_FLUSH above, it should be
5022          * safe to assume that renderctx is valid
5023          */
5024         ret = intel_ring_idle(ring);
5025         dev_priv->mm.interruptible = was_interruptible;
5026         if (ret) {
5027                 DRM_ERROR("failed to enable ironlake power savings\n");
5028                 ironlake_teardown_rc6(dev);
5029                 return;
5030         }
5031
5032         I915_WRITE(PWRCTXA, i915_gem_obj_ggtt_offset(dev_priv->ips.pwrctx) | PWRCTX_EN);
5033         I915_WRITE(RSTDBYCTL, I915_READ(RSTDBYCTL) & ~RCX_SW_EXIT);
5034
5035         intel_print_rc6_info(dev, GEN6_RC_CTL_RC6_ENABLE);
5036 }
5037
5038 static unsigned long intel_pxfreq(u32 vidfreq)
5039 {
5040         unsigned long freq;
5041         int div = (vidfreq & 0x3f0000) >> 16;
5042         int post = (vidfreq & 0x3000) >> 12;
5043         int pre = (vidfreq & 0x7);
5044
5045         if (!pre)
5046                 return 0;
5047
5048         freq = ((div * 133333) / ((1<<post) * pre));
5049
5050         return freq;
5051 }
5052
5053 static const struct cparams {
5054         u16 i;
5055         u16 t;
5056         u16 m;
5057         u16 c;
5058 } cparams[] = {
5059         { 1, 1333, 301, 28664 },
5060         { 1, 1066, 294, 24460 },
5061         { 1, 800, 294, 25192 },
5062         { 0, 1333, 276, 27605 },
5063         { 0, 1066, 276, 27605 },
5064         { 0, 800, 231, 23784 },
5065 };
5066
5067 static unsigned long __i915_chipset_val(struct drm_i915_private *dev_priv)
5068 {
5069         u64 total_count, diff, ret;
5070         u32 count1, count2, count3, m = 0, c = 0;
5071         unsigned long now = jiffies_to_msecs(jiffies), diff1;
5072         int i;
5073
5074         assert_spin_locked(&mchdev_lock);
5075
5076         diff1 = now - dev_priv->ips.last_time1;
5077
5078         /* Prevent division-by-zero if we are asking too fast.
5079          * Also, we don't get interesting results if we are polling
5080          * faster than once in 10ms, so just return the saved value
5081          * in such cases.
5082          */
5083         if (diff1 <= 10)
5084                 return dev_priv->ips.chipset_power;
5085
5086         count1 = I915_READ(DMIEC);
5087         count2 = I915_READ(DDREC);
5088         count3 = I915_READ(CSIEC);
5089
5090         total_count = count1 + count2 + count3;
5091
5092         /* FIXME: handle per-counter overflow */
5093         if (total_count < dev_priv->ips.last_count1) {
5094                 diff = ~0UL - dev_priv->ips.last_count1;
5095                 diff += total_count;
5096         } else {
5097                 diff = total_count - dev_priv->ips.last_count1;
5098         }
5099
5100         for (i = 0; i < ARRAY_SIZE(cparams); i++) {
5101                 if (cparams[i].i == dev_priv->ips.c_m &&
5102                     cparams[i].t == dev_priv->ips.r_t) {
5103                         m = cparams[i].m;
5104                         c = cparams[i].c;
5105                         break;
5106                 }
5107         }
5108
5109         diff = div_u64(diff, diff1);
5110         ret = ((m * diff) + c);
5111         ret = div_u64(ret, 10);
5112
5113         dev_priv->ips.last_count1 = total_count;
5114         dev_priv->ips.last_time1 = now;
5115
5116         dev_priv->ips.chipset_power = ret;
5117
5118         return ret;
5119 }
5120
5121 unsigned long i915_chipset_val(struct drm_i915_private *dev_priv)
5122 {
5123         struct drm_device *dev = dev_priv->dev;
5124         unsigned long val;
5125
5126         if (INTEL_INFO(dev)->gen != 5)
5127                 return 0;
5128
5129         lockmgr(&mchdev_lock, LK_EXCLUSIVE);
5130
5131         val = __i915_chipset_val(dev_priv);
5132
5133         lockmgr(&mchdev_lock, LK_RELEASE);
5134
5135         return val;
5136 }
5137
5138 unsigned long i915_mch_val(struct drm_i915_private *dev_priv)
5139 {
5140         unsigned long m, x, b;
5141         u32 tsfs;
5142
5143         tsfs = I915_READ(TSFS);
5144
5145         m = ((tsfs & TSFS_SLOPE_MASK) >> TSFS_SLOPE_SHIFT);
5146         x = I915_READ8(TR1);
5147
5148         b = tsfs & TSFS_INTR_MASK;
5149
5150         return ((m * x) / 127) - b;
5151 }
5152
5153 static int _pxvid_to_vd(u8 pxvid)
5154 {
5155         if (pxvid == 0)
5156                 return 0;
5157
5158         if (pxvid >= 8 && pxvid < 31)
5159                 pxvid = 31;
5160
5161         return (pxvid + 2) * 125;
5162 }
5163
5164 static u32 pvid_to_extvid(struct drm_i915_private *dev_priv, u8 pxvid)
5165 {
5166         struct drm_device *dev = dev_priv->dev;
5167         const int vd = _pxvid_to_vd(pxvid);
5168         const int vm = vd - 1125;
5169
5170         if (INTEL_INFO(dev)->is_mobile)
5171                 return vm > 0 ? vm : 0;
5172
5173         return vd;
5174 }
5175
5176 static void __i915_update_gfx_val(struct drm_i915_private *dev_priv)
5177 {
5178         u64 now, diff, diffms;
5179         u32 count;
5180
5181         assert_spin_locked(&mchdev_lock);
5182
5183         now = ktime_get_raw_ns();
5184         diffms = now - dev_priv->ips.last_time2;
5185         do_div(diffms, NSEC_PER_MSEC);
5186
5187         /* Don't divide by 0 */
5188         if (!diffms)
5189                 return;
5190
5191         count = I915_READ(GFXEC);
5192
5193         if (count < dev_priv->ips.last_count2) {
5194                 diff = ~0UL - dev_priv->ips.last_count2;
5195                 diff += count;
5196         } else {
5197                 diff = count - dev_priv->ips.last_count2;
5198         }
5199
5200         dev_priv->ips.last_count2 = count;
5201         dev_priv->ips.last_time2 = now;
5202
5203         /* More magic constants... */
5204         diff = diff * 1181;
5205         diff = div_u64(diff, diffms * 10);
5206         dev_priv->ips.gfx_power = diff;
5207 }
5208
5209 void i915_update_gfx_val(struct drm_i915_private *dev_priv)
5210 {
5211         struct drm_device *dev = dev_priv->dev;
5212
5213         if (INTEL_INFO(dev)->gen != 5)
5214                 return;
5215
5216         lockmgr(&mchdev_lock, LK_EXCLUSIVE);
5217
5218         __i915_update_gfx_val(dev_priv);
5219
5220         lockmgr(&mchdev_lock, LK_RELEASE);
5221 }
5222
5223 static unsigned long __i915_gfx_val(struct drm_i915_private *dev_priv)
5224 {
5225         unsigned long t, corr, state1, corr2, state2;
5226         u32 pxvid, ext_v;
5227
5228         assert_spin_locked(&mchdev_lock);
5229
5230         pxvid = I915_READ(PXVFREQ_BASE + (dev_priv->rps.cur_freq * 4));
5231         pxvid = (pxvid >> 24) & 0x7f;
5232         ext_v = pvid_to_extvid(dev_priv, pxvid);
5233
5234         state1 = ext_v;
5235
5236         t = i915_mch_val(dev_priv);
5237
5238         /* Revel in the empirically derived constants */
5239
5240         /* Correction factor in 1/100000 units */
5241         if (t > 80)
5242                 corr = ((t * 2349) + 135940);
5243         else if (t >= 50)
5244                 corr = ((t * 964) + 29317);
5245         else /* < 50 */
5246                 corr = ((t * 301) + 1004);
5247
5248         corr = corr * ((150142 * state1) / 10000 - 78642);
5249         corr /= 100000;
5250         corr2 = (corr * dev_priv->ips.corr);
5251
5252         state2 = (corr2 * state1) / 10000;
5253         state2 /= 100; /* convert to mW */
5254
5255         __i915_update_gfx_val(dev_priv);
5256
5257         return dev_priv->ips.gfx_power + state2;
5258 }
5259
5260 unsigned long i915_gfx_val(struct drm_i915_private *dev_priv)
5261 {
5262         struct drm_device *dev = dev_priv->dev;
5263         unsigned long val;
5264
5265         if (INTEL_INFO(dev)->gen != 5)
5266                 return 0;
5267
5268         lockmgr(&mchdev_lock, LK_EXCLUSIVE);
5269
5270         val = __i915_gfx_val(dev_priv);
5271
5272         lockmgr(&mchdev_lock, LK_RELEASE);
5273
5274         return val;
5275 }
5276
5277 /**
5278  * i915_read_mch_val - return value for IPS use
5279  *
5280  * Calculate and return a value for the IPS driver to use when deciding whether
5281  * we have thermal and power headroom to increase CPU or GPU power budget.
5282  */
5283 unsigned long i915_read_mch_val(void)
5284 {
5285         struct drm_i915_private *dev_priv;
5286         unsigned long chipset_val, graphics_val, ret = 0;
5287
5288         lockmgr(&mchdev_lock, LK_EXCLUSIVE);
5289         if (!i915_mch_dev)
5290                 goto out_unlock;
5291         dev_priv = i915_mch_dev;
5292
5293         chipset_val = __i915_chipset_val(dev_priv);
5294         graphics_val = __i915_gfx_val(dev_priv);
5295
5296         ret = chipset_val + graphics_val;
5297
5298 out_unlock:
5299         lockmgr(&mchdev_lock, LK_RELEASE);
5300
5301         return ret;
5302 }
5303
5304 /**
5305  * i915_gpu_raise - raise GPU frequency limit
5306  *
5307  * Raise the limit; IPS indicates we have thermal headroom.
5308  */
5309 bool i915_gpu_raise(void)
5310 {
5311         struct drm_i915_private *dev_priv;
5312         bool ret = true;
5313
5314         lockmgr(&mchdev_lock, LK_EXCLUSIVE);
5315         if (!i915_mch_dev) {
5316                 ret = false;
5317                 goto out_unlock;
5318         }
5319         dev_priv = i915_mch_dev;
5320
5321         if (dev_priv->ips.max_delay > dev_priv->ips.fmax)
5322                 dev_priv->ips.max_delay--;
5323
5324 out_unlock:
5325         lockmgr(&mchdev_lock, LK_RELEASE);
5326
5327         return ret;
5328 }
5329
5330 /**
5331  * i915_gpu_lower - lower GPU frequency limit
5332  *
5333  * IPS indicates we're close to a thermal limit, so throttle back the GPU
5334  * frequency maximum.
5335  */
5336 bool i915_gpu_lower(void)
5337 {
5338         struct drm_i915_private *dev_priv;
5339         bool ret = true;
5340
5341         lockmgr(&mchdev_lock, LK_EXCLUSIVE);
5342         if (!i915_mch_dev) {
5343                 ret = false;
5344                 goto out_unlock;
5345         }
5346         dev_priv = i915_mch_dev;
5347
5348         if (dev_priv->ips.max_delay < dev_priv->ips.min_delay)
5349                 dev_priv->ips.max_delay++;
5350
5351 out_unlock:
5352         lockmgr(&mchdev_lock, LK_RELEASE);
5353
5354         return ret;
5355 }
5356
5357 /**
5358  * i915_gpu_busy - indicate GPU business to IPS
5359  *
5360  * Tell the IPS driver whether or not the GPU is busy.
5361  */
5362 bool i915_gpu_busy(void)
5363 {
5364         struct drm_i915_private *dev_priv;
5365         struct intel_engine_cs *ring;
5366         bool ret = false;
5367         int i;
5368
5369         lockmgr(&mchdev_lock, LK_EXCLUSIVE);
5370         if (!i915_mch_dev)
5371                 goto out_unlock;
5372         dev_priv = i915_mch_dev;
5373
5374         for_each_ring(ring, dev_priv, i)
5375                 ret |= !list_empty(&ring->request_list);
5376
5377 out_unlock:
5378         lockmgr(&mchdev_lock, LK_RELEASE);
5379
5380         return ret;
5381 }
5382
5383 /**
5384  * i915_gpu_turbo_disable - disable graphics turbo
5385  *
5386  * Disable graphics turbo by resetting the max frequency and setting the
5387  * current frequency to the default.
5388  */
5389 bool i915_gpu_turbo_disable(void)
5390 {
5391         struct drm_i915_private *dev_priv;
5392         bool ret = true;
5393
5394         lockmgr(&mchdev_lock, LK_EXCLUSIVE);
5395         if (!i915_mch_dev) {
5396                 ret = false;
5397                 goto out_unlock;
5398         }
5399         dev_priv = i915_mch_dev;
5400
5401         dev_priv->ips.max_delay = dev_priv->ips.fstart;
5402
5403         if (!ironlake_set_drps(dev_priv->dev, dev_priv->ips.fstart))
5404                 ret = false;
5405
5406 out_unlock:
5407         lockmgr(&mchdev_lock, LK_RELEASE);
5408
5409         return ret;
5410 }
5411
5412 #if 0
5413 /**
5414  * Tells the intel_ips driver that the i915 driver is now loaded, if
5415  * IPS got loaded first.
5416  *
5417  * This awkward dance is so that neither module has to depend on the
5418  * other in order for IPS to do the appropriate communication of
5419  * GPU turbo limits to i915.
5420  */
5421 static void
5422 ips_ping_for_i915_load(void)
5423 {
5424         void (*link)(void);
5425
5426         link = symbol_get(ips_link_to_i915_driver);
5427         if (link) {
5428                 link();
5429                 symbol_put(ips_link_to_i915_driver);
5430         }
5431 }
5432 #endif
5433
5434 void intel_gpu_ips_init(struct drm_i915_private *dev_priv)
5435 {
5436         /* We only register the i915 ips part with intel-ips once everything is
5437          * set up, to avoid intel-ips sneaking in and reading bogus values. */
5438         lockmgr(&mchdev_lock, LK_EXCLUSIVE);
5439         i915_mch_dev = dev_priv;
5440         lockmgr(&mchdev_lock, LK_RELEASE);
5441 }
5442
5443 void intel_gpu_ips_teardown(void)
5444 {
5445         lockmgr(&mchdev_lock, LK_EXCLUSIVE);
5446         i915_mch_dev = NULL;
5447         lockmgr(&mchdev_lock, LK_RELEASE);
5448 }
5449
5450 static void intel_init_emon(struct drm_device *dev)
5451 {
5452         struct drm_i915_private *dev_priv = dev->dev_private;
5453         u32 lcfuse;
5454         u8 pxw[16];
5455         int i;
5456
5457         /* Disable to program */
5458         I915_WRITE(ECR, 0);
5459         POSTING_READ(ECR);
5460
5461         /* Program energy weights for various events */
5462         I915_WRITE(SDEW, 0x15040d00);
5463         I915_WRITE(CSIEW0, 0x007f0000);
5464         I915_WRITE(CSIEW1, 0x1e220004);
5465         I915_WRITE(CSIEW2, 0x04000004);
5466
5467         for (i = 0; i < 5; i++)
5468                 I915_WRITE(PEW + (i * 4), 0);
5469         for (i = 0; i < 3; i++)
5470                 I915_WRITE(DEW + (i * 4), 0);
5471
5472         /* Program P-state weights to account for frequency power adjustment */
5473         for (i = 0; i < 16; i++) {
5474                 u32 pxvidfreq = I915_READ(PXVFREQ_BASE + (i * 4));
5475                 unsigned long freq = intel_pxfreq(pxvidfreq);
5476                 unsigned long vid = (pxvidfreq & PXVFREQ_PX_MASK) >>
5477                         PXVFREQ_PX_SHIFT;
5478                 unsigned long val;
5479
5480                 val = vid * vid;
5481                 val *= (freq / 1000);
5482                 val *= 255;
5483                 val /= (127*127*900);
5484                 if (val > 0xff)
5485                         DRM_ERROR("bad pxval: %ld\n", val);
5486                 pxw[i] = val;
5487         }
5488         /* Render standby states get 0 weight */
5489         pxw[14] = 0;
5490         pxw[15] = 0;
5491
5492         for (i = 0; i < 4; i++) {
5493                 u32 val = (pxw[i*4] << 24) | (pxw[(i*4)+1] << 16) |
5494                         (pxw[(i*4)+2] << 8) | (pxw[(i*4)+3]);
5495                 I915_WRITE(PXW + (i * 4), val);
5496         }
5497
5498         /* Adjust magic regs to magic values (more experimental results) */
5499         I915_WRITE(OGW0, 0);
5500         I915_WRITE(OGW1, 0);
5501         I915_WRITE(EG0, 0x00007f00);
5502         I915_WRITE(EG1, 0x0000000e);
5503         I915_WRITE(EG2, 0x000e0000);
5504         I915_WRITE(EG3, 0x68000300);
5505         I915_WRITE(EG4, 0x42000000);
5506         I915_WRITE(EG5, 0x00140031);
5507         I915_WRITE(EG6, 0);
5508         I915_WRITE(EG7, 0);
5509
5510         for (i = 0; i < 8; i++)
5511                 I915_WRITE(PXWL + (i * 4), 0);
5512
5513         /* Enable PMON + select events */
5514         I915_WRITE(ECR, 0x80000019);
5515
5516         lcfuse = I915_READ(LCFUSE02);
5517
5518         dev_priv->ips.corr = (lcfuse & LCFUSE_HIV_MASK);
5519 }
5520
5521 void intel_init_gt_powersave(struct drm_device *dev)
5522 {
5523         i915.enable_rc6 = sanitize_rc6_option(dev, i915.enable_rc6);
5524
5525         if (IS_CHERRYVIEW(dev))
5526                 cherryview_init_gt_powersave(dev);
5527         else if (IS_VALLEYVIEW(dev))
5528                 valleyview_init_gt_powersave(dev);
5529 }
5530
5531 void intel_cleanup_gt_powersave(struct drm_device *dev)
5532 {
5533         if (IS_CHERRYVIEW(dev))
5534                 return;
5535         else if (IS_VALLEYVIEW(dev))
5536                 valleyview_cleanup_gt_powersave(dev);
5537 }
5538
5539 static void gen6_suspend_rps(struct drm_device *dev)
5540 {
5541 #if 0
5542         struct drm_i915_private *dev_priv = dev->dev_private;
5543
5544         flush_delayed_work(&dev_priv->rps.delayed_resume_work);
5545 #endif
5546
5547         /*
5548          * TODO: disable RPS interrupts on GEN9+ too once RPS support
5549          * is added for it.
5550          */
5551         if (INTEL_INFO(dev)->gen < 9)
5552                 gen6_disable_rps_interrupts(dev);
5553 }
5554
5555 /**
5556  * intel_suspend_gt_powersave - suspend PM work and helper threads
5557  * @dev: drm device
5558  *
5559  * We don't want to disable RC6 or other features here, we just want
5560  * to make sure any work we've queued has finished and won't bother
5561  * us while we're suspended.
5562  */
5563 void intel_suspend_gt_powersave(struct drm_device *dev)
5564 {
5565         struct drm_i915_private *dev_priv = dev->dev_private;
5566
5567         if (INTEL_INFO(dev)->gen < 6)
5568                 return;
5569
5570         gen6_suspend_rps(dev);
5571
5572         /* Force GPU to min freq during suspend */
5573         gen6_rps_idle(dev_priv);
5574 }
5575
5576 void intel_disable_gt_powersave(struct drm_device *dev)
5577 {
5578         struct drm_i915_private *dev_priv = dev->dev_private;
5579
5580         if (IS_IRONLAKE_M(dev)) {
5581                 ironlake_disable_drps(dev);
5582                 ironlake_disable_rc6(dev);
5583         } else if (INTEL_INFO(dev)->gen >= 6) {
5584                 intel_suspend_gt_powersave(dev);
5585
5586                 mutex_lock(&dev_priv->rps.hw_lock);
5587                 if (INTEL_INFO(dev)->gen >= 9)
5588                         gen9_disable_rps(dev);
5589                 else if (IS_CHERRYVIEW(dev))
5590                         cherryview_disable_rps(dev);
5591                 else if (IS_VALLEYVIEW(dev))
5592                         valleyview_disable_rps(dev);
5593                 else
5594                         gen6_disable_rps(dev);
5595
5596                 dev_priv->rps.enabled = false;
5597                 mutex_unlock(&dev_priv->rps.hw_lock);
5598         }
5599 }
5600
5601 static void intel_gen6_powersave_work(struct work_struct *work)
5602 {
5603         struct drm_i915_private *dev_priv =
5604                 container_of(work, struct drm_i915_private,
5605                              rps.delayed_resume_work.work);
5606         struct drm_device *dev = dev_priv->dev;
5607
5608         mutex_lock(&dev_priv->rps.hw_lock);
5609
5610         /*
5611          * TODO: reset/enable RPS interrupts on GEN9+ too, once RPS support is
5612          * added for it.
5613          */
5614         if (INTEL_INFO(dev)->gen < 9)
5615                 gen6_reset_rps_interrupts(dev);
5616
5617         if (IS_CHERRYVIEW(dev)) {
5618                 cherryview_enable_rps(dev);
5619         } else if (IS_VALLEYVIEW(dev)) {
5620                 valleyview_enable_rps(dev);
5621         } else if (INTEL_INFO(dev)->gen >= 9) {
5622                 gen9_enable_rc6(dev);
5623                 gen9_enable_rps(dev);
5624                 __gen6_update_ring_freq(dev);
5625         } else if (IS_BROADWELL(dev)) {
5626                 gen8_enable_rps(dev);
5627                 __gen6_update_ring_freq(dev);
5628         } else {
5629                 gen6_enable_rps(dev);
5630                 __gen6_update_ring_freq(dev);
5631         }
5632         dev_priv->rps.enabled = true;
5633
5634         if (INTEL_INFO(dev)->gen < 9)
5635                 gen6_enable_rps_interrupts(dev);
5636
5637         mutex_unlock(&dev_priv->rps.hw_lock);
5638
5639         intel_runtime_pm_put(dev_priv);
5640 }
5641
5642 void intel_enable_gt_powersave(struct drm_device *dev)
5643 {
5644         struct drm_i915_private *dev_priv = dev->dev_private;
5645
5646         if (IS_IRONLAKE_M(dev)) {
5647                 mutex_lock(&dev->struct_mutex);
5648                 ironlake_enable_drps(dev);
5649                 ironlake_enable_rc6(dev);
5650                 intel_init_emon(dev);
5651                 mutex_unlock(&dev->struct_mutex);
5652         } else if (INTEL_INFO(dev)->gen >= 6) {
5653                 /*
5654                  * PCU communication is slow and this doesn't need to be
5655                  * done at any specific time, so do this out of our fast path
5656                  * to make resume and init faster.
5657                  *
5658                  * We depend on the HW RC6 power context save/restore
5659                  * mechanism when entering D3 through runtime PM suspend. So
5660                  * disable RPM until RPS/RC6 is properly setup. We can only
5661                  * get here via the driver load/system resume/runtime resume
5662                  * paths, so the _noresume version is enough (and in case of
5663                  * runtime resume it's necessary).
5664                  */
5665                 if (schedule_delayed_work(&dev_priv->rps.delayed_resume_work,
5666                                            round_jiffies_up_relative(HZ)))
5667                         intel_runtime_pm_get_noresume(dev_priv);
5668         }
5669 }
5670
5671 void intel_reset_gt_powersave(struct drm_device *dev)
5672 {
5673         struct drm_i915_private *dev_priv = dev->dev_private;
5674
5675         if (INTEL_INFO(dev)->gen < 6)
5676                 return;
5677
5678         gen6_suspend_rps(dev);
5679         dev_priv->rps.enabled = false;
5680 }
5681
5682 static void ibx_init_clock_gating(struct drm_device *dev)
5683 {
5684         struct drm_i915_private *dev_priv = dev->dev_private;
5685
5686         /*
5687          * On Ibex Peak and Cougar Point, we need to disable clock
5688          * gating for the panel power sequencer or it will fail to
5689          * start up when no ports are active.
5690          */
5691         I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE);
5692 }
5693
5694 static void g4x_disable_trickle_feed(struct drm_device *dev)
5695 {
5696         struct drm_i915_private *dev_priv = dev->dev_private;
5697         int pipe;
5698
5699         for_each_pipe(dev_priv, pipe) {
5700                 I915_WRITE(DSPCNTR(pipe),
5701                            I915_READ(DSPCNTR(pipe)) |
5702                            DISPPLANE_TRICKLE_FEED_DISABLE);
5703                 intel_flush_primary_plane(dev_priv, pipe);
5704         }
5705 }
5706
5707 static void ilk_init_lp_watermarks(struct drm_device *dev)
5708 {
5709         struct drm_i915_private *dev_priv = dev->dev_private;
5710
5711         I915_WRITE(WM3_LP_ILK, I915_READ(WM3_LP_ILK) & ~WM1_LP_SR_EN);
5712         I915_WRITE(WM2_LP_ILK, I915_READ(WM2_LP_ILK) & ~WM1_LP_SR_EN);
5713         I915_WRITE(WM1_LP_ILK, I915_READ(WM1_LP_ILK) & ~WM1_LP_SR_EN);
5714
5715         /*
5716          * Don't touch WM1S_LP_EN here.
5717          * Doing so could cause underruns.
5718          */
5719 }
5720
5721 static void ironlake_init_clock_gating(struct drm_device *dev)
5722 {
5723         struct drm_i915_private *dev_priv = dev->dev_private;
5724         uint32_t dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE;
5725
5726         /*
5727          * Required for FBC
5728          * WaFbcDisableDpfcClockGating:ilk
5729          */
5730         dspclk_gate |= ILK_DPFCRUNIT_CLOCK_GATE_DISABLE |
5731                    ILK_DPFCUNIT_CLOCK_GATE_DISABLE |
5732                    ILK_DPFDUNIT_CLOCK_GATE_ENABLE;
5733
5734         I915_WRITE(PCH_3DCGDIS0,
5735                    MARIUNIT_CLOCK_GATE_DISABLE |
5736                    SVSMUNIT_CLOCK_GATE_DISABLE);
5737         I915_WRITE(PCH_3DCGDIS1,
5738                    VFMUNIT_CLOCK_GATE_DISABLE);
5739
5740         /*
5741          * According to the spec the following bits should be set in
5742          * order to enable memory self-refresh
5743          * The bit 22/21 of 0x42004
5744          * The bit 5 of 0x42020
5745          * The bit 15 of 0x45000
5746          */
5747         I915_WRITE(ILK_DISPLAY_CHICKEN2,
5748                    (I915_READ(ILK_DISPLAY_CHICKEN2) |
5749                     ILK_DPARB_GATE | ILK_VSDPFD_FULL));
5750         dspclk_gate |= ILK_DPARBUNIT_CLOCK_GATE_ENABLE;
5751         I915_WRITE(DISP_ARB_CTL,
5752                    (I915_READ(DISP_ARB_CTL) |
5753                     DISP_FBC_WM_DIS));
5754
5755         ilk_init_lp_watermarks(dev);
5756
5757         /*
5758          * Based on the document from hardware guys the following bits
5759          * should be set unconditionally in order to enable FBC.
5760          * The bit 22 of 0x42000
5761          * The bit 22 of 0x42004
5762          * The bit 7,8,9 of 0x42020.
5763          */
5764         if (IS_IRONLAKE_M(dev)) {
5765                 /* WaFbcAsynchFlipDisableFbcQueue:ilk */
5766                 I915_WRITE(ILK_DISPLAY_CHICKEN1,
5767                            I915_READ(ILK_DISPLAY_CHICKEN1) |
5768                            ILK_FBCQ_DIS);
5769                 I915_WRITE(ILK_DISPLAY_CHICKEN2,
5770                            I915_READ(ILK_DISPLAY_CHICKEN2) |
5771                            ILK_DPARB_GATE);
5772         }
5773
5774         I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate);
5775
5776         I915_WRITE(ILK_DISPLAY_CHICKEN2,
5777                    I915_READ(ILK_DISPLAY_CHICKEN2) |
5778                    ILK_ELPIN_409_SELECT);
5779         I915_WRITE(_3D_CHICKEN2,
5780                    _3D_CHICKEN2_WM_READ_PIPELINED << 16 |
5781                    _3D_CHICKEN2_WM_READ_PIPELINED);
5782
5783         /* WaDisableRenderCachePipelinedFlush:ilk */
5784         I915_WRITE(CACHE_MODE_0,
5785                    _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE));
5786
5787         /* WaDisable_RenderCache_OperationalFlush:ilk */
5788         I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
5789
5790         g4x_disable_trickle_feed(dev);
5791
5792         ibx_init_clock_gating(dev);
5793 }
5794
5795 static void cpt_init_clock_gating(struct drm_device *dev)
5796 {
5797         struct drm_i915_private *dev_priv = dev->dev_private;
5798         int pipe;
5799         uint32_t val;
5800
5801         /*
5802          * On Ibex Peak and Cougar Point, we need to disable clock
5803          * gating for the panel power sequencer or it will fail to
5804          * start up when no ports are active.
5805          */
5806         I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE |
5807                    PCH_DPLUNIT_CLOCK_GATE_DISABLE |
5808                    PCH_CPUNIT_CLOCK_GATE_DISABLE);
5809         I915_WRITE(SOUTH_CHICKEN2, I915_READ(SOUTH_CHICKEN2) |
5810                    DPLS_EDP_PPS_FIX_DIS);
5811         /* The below fixes the weird display corruption, a few pixels shifted
5812          * downward, on (only) LVDS of some HP laptops with IVY.
5813          */
5814         for_each_pipe(dev_priv, pipe) {
5815                 val = I915_READ(TRANS_CHICKEN2(pipe));
5816                 val |= TRANS_CHICKEN2_TIMING_OVERRIDE;
5817                 val &= ~TRANS_CHICKEN2_FDI_POLARITY_REVERSED;
5818                 if (dev_priv->vbt.fdi_rx_polarity_inverted)
5819                         val |= TRANS_CHICKEN2_FDI_POLARITY_REVERSED;
5820                 val &= ~TRANS_CHICKEN2_FRAME_START_DELAY_MASK;
5821                 val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_COUNTER;
5822                 val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_MODESWITCH;
5823                 I915_WRITE(TRANS_CHICKEN2(pipe), val);
5824         }
5825         /* WADP0ClockGatingDisable */
5826         for_each_pipe(dev_priv, pipe) {
5827                 I915_WRITE(TRANS_CHICKEN1(pipe),
5828                            TRANS_CHICKEN1_DP0UNIT_GC_DISABLE);
5829         }
5830 }
5831
5832 static void gen6_check_mch_setup(struct drm_device *dev)
5833 {
5834         struct drm_i915_private *dev_priv = dev->dev_private;
5835         uint32_t tmp;
5836
5837         tmp = I915_READ(MCH_SSKPD);
5838         if ((tmp & MCH_SSKPD_WM0_MASK) != MCH_SSKPD_WM0_VAL)
5839                 DRM_DEBUG_KMS("Wrong MCH_SSKPD value: 0x%08x This can cause underruns.\n",
5840                               tmp);
5841 }
5842
5843 static void gen6_init_clock_gating(struct drm_device *dev)
5844 {
5845         struct drm_i915_private *dev_priv = dev->dev_private;
5846         uint32_t dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE;
5847
5848         I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate);
5849
5850         I915_WRITE(ILK_DISPLAY_CHICKEN2,
5851                    I915_READ(ILK_DISPLAY_CHICKEN2) |
5852                    ILK_ELPIN_409_SELECT);
5853
5854         /* WaDisableHiZPlanesWhenMSAAEnabled:snb */
5855         I915_WRITE(_3D_CHICKEN,
5856                    _MASKED_BIT_ENABLE(_3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB));
5857
5858         /* WaDisable_RenderCache_OperationalFlush:snb */
5859         I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
5860
5861         /*
5862          * BSpec recoomends 8x4 when MSAA is used,
5863          * however in practice 16x4 seems fastest.
5864          *
5865          * Note that PS/WM thread counts depend on the WIZ hashing
5866          * disable bit, which we don't touch here, but it's good
5867          * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
5868          */
5869         I915_WRITE(GEN6_GT_MODE,
5870                    _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
5871
5872         ilk_init_lp_watermarks(dev);
5873
5874         I915_WRITE(CACHE_MODE_0,
5875                    _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB));
5876
5877         I915_WRITE(GEN6_UCGCTL1,
5878                    I915_READ(GEN6_UCGCTL1) |
5879                    GEN6_BLBUNIT_CLOCK_GATE_DISABLE |
5880                    GEN6_CSUNIT_CLOCK_GATE_DISABLE);
5881
5882         /* According to the BSpec vol1g, bit 12 (RCPBUNIT) clock
5883          * gating disable must be set.  Failure to set it results in
5884          * flickering pixels due to Z write ordering failures after
5885          * some amount of runtime in the Mesa "fire" demo, and Unigine
5886          * Sanctuary and Tropics, and apparently anything else with
5887          * alpha test or pixel discard.
5888          *
5889          * According to the spec, bit 11 (RCCUNIT) must also be set,
5890          * but we didn't debug actual testcases to find it out.
5891          *
5892          * WaDisableRCCUnitClockGating:snb
5893          * WaDisableRCPBUnitClockGating:snb
5894          */
5895         I915_WRITE(GEN6_UCGCTL2,
5896                    GEN6_RCPBUNIT_CLOCK_GATE_DISABLE |
5897                    GEN6_RCCUNIT_CLOCK_GATE_DISABLE);
5898
5899         /* WaStripsFansDisableFastClipPerformanceFix:snb */
5900         I915_WRITE(_3D_CHICKEN3,
5901                    _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL));
5902
5903         /*
5904          * Bspec says:
5905          * "This bit must be set if 3DSTATE_CLIP clip mode is set to normal and
5906          * 3DSTATE_SF number of SF output attributes is more than 16."
5907          */
5908         I915_WRITE(_3D_CHICKEN3,
5909                    _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_PIPELINED_ATTR_FETCH));
5910
5911         /*
5912          * According to the spec the following bits should be
5913          * set in order to enable memory self-refresh and fbc:
5914          * The bit21 and bit22 of 0x42000
5915          * The bit21 and bit22 of 0x42004
5916          * The bit5 and bit7 of 0x42020
5917          * The bit14 of 0x70180
5918          * The bit14 of 0x71180
5919          *
5920          * WaFbcAsynchFlipDisableFbcQueue:snb
5921          */
5922         I915_WRITE(ILK_DISPLAY_CHICKEN1,
5923                    I915_READ(ILK_DISPLAY_CHICKEN1) |
5924                    ILK_FBCQ_DIS | ILK_PABSTRETCH_DIS);
5925         I915_WRITE(ILK_DISPLAY_CHICKEN2,
5926                    I915_READ(ILK_DISPLAY_CHICKEN2) |
5927                    ILK_DPARB_GATE | ILK_VSDPFD_FULL);
5928         I915_WRITE(ILK_DSPCLK_GATE_D,
5929                    I915_READ(ILK_DSPCLK_GATE_D) |
5930                    ILK_DPARBUNIT_CLOCK_GATE_ENABLE  |
5931                    ILK_DPFDUNIT_CLOCK_GATE_ENABLE);
5932
5933         g4x_disable_trickle_feed(dev);
5934
5935         cpt_init_clock_gating(dev);
5936
5937         gen6_check_mch_setup(dev);
5938 }
5939
5940 static void gen7_setup_fixed_func_scheduler(struct drm_i915_private *dev_priv)
5941 {
5942         uint32_t reg = I915_READ(GEN7_FF_THREAD_MODE);
5943
5944         /*
5945          * WaVSThreadDispatchOverride:ivb,vlv
5946          *
5947          * This actually overrides the dispatch
5948          * mode for all thread types.
5949          */
5950         reg &= ~GEN7_FF_SCHED_MASK;
5951         reg |= GEN7_FF_TS_SCHED_HW;
5952         reg |= GEN7_FF_VS_SCHED_HW;
5953         reg |= GEN7_FF_DS_SCHED_HW;
5954
5955         I915_WRITE(GEN7_FF_THREAD_MODE, reg);
5956 }
5957
5958 static void lpt_init_clock_gating(struct drm_device *dev)
5959 {
5960         struct drm_i915_private *dev_priv = dev->dev_private;
5961
5962         /*
5963          * TODO: this bit should only be enabled when really needed, then
5964          * disabled when not needed anymore in order to save power.
5965          */
5966         if (dev_priv->pch_id == INTEL_PCH_LPT_LP_DEVICE_ID_TYPE)
5967                 I915_WRITE(SOUTH_DSPCLK_GATE_D,
5968                            I915_READ(SOUTH_DSPCLK_GATE_D) |
5969                            PCH_LP_PARTITION_LEVEL_DISABLE);
5970
5971         /* WADPOClockGatingDisable:hsw */
5972         I915_WRITE(_TRANSA_CHICKEN1,
5973                    I915_READ(_TRANSA_CHICKEN1) |
5974                    TRANS_CHICKEN1_DP0UNIT_GC_DISABLE);
5975 }
5976
5977 static void lpt_suspend_hw(struct drm_device *dev)
5978 {
5979         struct drm_i915_private *dev_priv = dev->dev_private;
5980
5981         if (dev_priv->pch_id == INTEL_PCH_LPT_LP_DEVICE_ID_TYPE) {
5982                 uint32_t val = I915_READ(SOUTH_DSPCLK_GATE_D);
5983
5984                 val &= ~PCH_LP_PARTITION_LEVEL_DISABLE;
5985                 I915_WRITE(SOUTH_DSPCLK_GATE_D, val);
5986         }
5987 }
5988
5989 static void broadwell_init_clock_gating(struct drm_device *dev)
5990 {
5991         struct drm_i915_private *dev_priv = dev->dev_private;
5992         enum i915_pipe pipe;
5993
5994         I915_WRITE(WM3_LP_ILK, 0);
5995         I915_WRITE(WM2_LP_ILK, 0);
5996         I915_WRITE(WM1_LP_ILK, 0);
5997
5998         /* WaSwitchSolVfFArbitrationPriority:bdw */
5999         I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL);
6000
6001         /* WaPsrDPAMaskVBlankInSRD:bdw */
6002         I915_WRITE(CHICKEN_PAR1_1,
6003                    I915_READ(CHICKEN_PAR1_1) | DPA_MASK_VBLANK_SRD);
6004
6005         /* WaPsrDPRSUnmaskVBlankInSRD:bdw */
6006         for_each_pipe(dev_priv, pipe) {
6007                 I915_WRITE(CHICKEN_PIPESL_1(pipe),
6008                            I915_READ(CHICKEN_PIPESL_1(pipe)) |
6009                            BDW_DPRS_MASK_VBLANK_SRD);
6010         }
6011
6012         /* WaVSRefCountFullforceMissDisable:bdw */
6013         /* WaDSRefCountFullforceMissDisable:bdw */
6014         I915_WRITE(GEN7_FF_THREAD_MODE,
6015                    I915_READ(GEN7_FF_THREAD_MODE) &
6016                    ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME));
6017
6018         I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL,
6019                    _MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE));
6020
6021         /* WaDisableSDEUnitClockGating:bdw */
6022         I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
6023                    GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
6024
6025         lpt_init_clock_gating(dev);
6026 }
6027
6028 static void haswell_init_clock_gating(struct drm_device *dev)
6029 {
6030         struct drm_i915_private *dev_priv = dev->dev_private;
6031
6032         ilk_init_lp_watermarks(dev);
6033
6034         /* L3 caching of data atomics doesn't work -- disable it. */
6035         I915_WRITE(HSW_SCRATCH1, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE);
6036         I915_WRITE(HSW_ROW_CHICKEN3,
6037                    _MASKED_BIT_ENABLE(HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE));
6038
6039         /* This is required by WaCatErrorRejectionIssue:hsw */
6040         I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
6041                         I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
6042                         GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
6043
6044         /* WaVSRefCountFullforceMissDisable:hsw */
6045         I915_WRITE(GEN7_FF_THREAD_MODE,
6046                    I915_READ(GEN7_FF_THREAD_MODE) & ~GEN7_FF_VS_REF_CNT_FFME);
6047
6048         /* WaDisable_RenderCache_OperationalFlush:hsw */
6049         I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
6050
6051         /* enable HiZ Raw Stall Optimization */
6052         I915_WRITE(CACHE_MODE_0_GEN7,
6053                    _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE));
6054
6055         /* WaDisable4x2SubspanOptimization:hsw */
6056         I915_WRITE(CACHE_MODE_1,
6057                    _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
6058
6059         /*
6060          * BSpec recommends 8x4 when MSAA is used,
6061          * however in practice 16x4 seems fastest.
6062          *
6063          * Note that PS/WM thread counts depend on the WIZ hashing
6064          * disable bit, which we don't touch here, but it's good
6065          * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
6066          */
6067         I915_WRITE(GEN7_GT_MODE,
6068                    _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
6069
6070         /* WaSampleCChickenBitEnable:hsw */
6071         I915_WRITE(HALF_SLICE_CHICKEN3,
6072                    _MASKED_BIT_ENABLE(HSW_SAMPLE_C_PERFORMANCE));
6073
6074         /* WaSwitchSolVfFArbitrationPriority:hsw */
6075         I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL);
6076
6077         /* WaRsPkgCStateDisplayPMReq:hsw */
6078         I915_WRITE(CHICKEN_PAR1_1,
6079                    I915_READ(CHICKEN_PAR1_1) | FORCE_ARB_IDLE_PLANES);
6080
6081         lpt_init_clock_gating(dev);
6082 }
6083
6084 static void ivybridge_init_clock_gating(struct drm_device *dev)
6085 {
6086         struct drm_i915_private *dev_priv = dev->dev_private;
6087         uint32_t snpcr;
6088
6089         ilk_init_lp_watermarks(dev);
6090
6091         I915_WRITE(ILK_DSPCLK_GATE_D, ILK_VRHUNIT_CLOCK_GATE_DISABLE);
6092
6093         /* WaDisableEarlyCull:ivb */
6094         I915_WRITE(_3D_CHICKEN3,
6095                    _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL));
6096
6097         /* WaDisableBackToBackFlipFix:ivb */
6098         I915_WRITE(IVB_CHICKEN3,
6099                    CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE |
6100                    CHICKEN3_DGMG_DONE_FIX_DISABLE);
6101
6102         /* WaDisablePSDDualDispatchEnable:ivb */
6103         if (IS_IVB_GT1(dev))
6104                 I915_WRITE(GEN7_HALF_SLICE_CHICKEN1,
6105                            _MASKED_BIT_ENABLE(GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE));
6106
6107         /* WaDisable_RenderCache_OperationalFlush:ivb */
6108         I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
6109
6110         /* Apply the WaDisableRHWOOptimizationForRenderHang:ivb workaround. */
6111         I915_WRITE(GEN7_COMMON_SLICE_CHICKEN1,
6112                    GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC);
6113
6114         /* WaApplyL3ControlAndL3ChickenMode:ivb */
6115         I915_WRITE(GEN7_L3CNTLREG1,
6116                         GEN7_WA_FOR_GEN7_L3_CONTROL);
6117         I915_WRITE(GEN7_L3_CHICKEN_MODE_REGISTER,
6118                    GEN7_WA_L3_CHICKEN_MODE);
6119         if (IS_IVB_GT1(dev))
6120                 I915_WRITE(GEN7_ROW_CHICKEN2,
6121                            _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
6122         else {
6123                 /* must write both registers */
6124                 I915_WRITE(GEN7_ROW_CHICKEN2,
6125                            _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
6126                 I915_WRITE(GEN7_ROW_CHICKEN2_GT2,
6127                            _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
6128         }
6129
6130         /* WaForceL3Serialization:ivb */
6131         I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) &
6132                    ~L3SQ_URB_READ_CAM_MATCH_DISABLE);
6133
6134         /*
6135          * According to the spec, bit 13 (RCZUNIT) must be set on IVB.
6136          * This implements the WaDisableRCZUnitClockGating:ivb workaround.
6137          */
6138         I915_WRITE(GEN6_UCGCTL2,
6139                    GEN6_RCZUNIT_CLOCK_GATE_DISABLE);
6140
6141         /* This is required by WaCatErrorRejectionIssue:ivb */
6142         I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
6143                         I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
6144                         GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
6145
6146         g4x_disable_trickle_feed(dev);
6147
6148         gen7_setup_fixed_func_scheduler(dev_priv);
6149
6150         if (0) { /* causes HiZ corruption on ivb:gt1 */
6151                 /* enable HiZ Raw Stall Optimization */
6152                 I915_WRITE(CACHE_MODE_0_GEN7,
6153                            _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE));
6154         }
6155
6156         /* WaDisable4x2SubspanOptimization:ivb */
6157         I915_WRITE(CACHE_MODE_1,
6158                    _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
6159
6160         /*
6161          * BSpec recommends 8x4 when MSAA is used,
6162          * however in practice 16x4 seems fastest.
6163          *
6164          * Note that PS/WM thread counts depend on the WIZ hashing
6165          * disable bit, which we don't touch here, but it's good
6166          * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
6167          */
6168         I915_WRITE(GEN7_GT_MODE,
6169                    _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
6170
6171         snpcr = I915_READ(GEN6_MBCUNIT_SNPCR);
6172         snpcr &= ~GEN6_MBC_SNPCR_MASK;
6173         snpcr |= GEN6_MBC_SNPCR_MED;
6174         I915_WRITE(GEN6_MBCUNIT_SNPCR, snpcr);
6175
6176         if (!HAS_PCH_NOP(dev))
6177                 cpt_init_clock_gating(dev);
6178
6179         gen6_check_mch_setup(dev);
6180 }
6181
6182 static void valleyview_init_clock_gating(struct drm_device *dev)
6183 {
6184         struct drm_i915_private *dev_priv = dev->dev_private;
6185
6186         I915_WRITE(DSPCLK_GATE_D, VRHUNIT_CLOCK_GATE_DISABLE);
6187
6188         /* WaDisableEarlyCull:vlv */
6189         I915_WRITE(_3D_CHICKEN3,
6190                    _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL));
6191
6192         /* WaDisableBackToBackFlipFix:vlv */
6193         I915_WRITE(IVB_CHICKEN3,
6194                    CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE |
6195                    CHICKEN3_DGMG_DONE_FIX_DISABLE);
6196
6197         /* WaPsdDispatchEnable:vlv */
6198         /* WaDisablePSDDualDispatchEnable:vlv */
6199         I915_WRITE(GEN7_HALF_SLICE_CHICKEN1,
6200                    _MASKED_BIT_ENABLE(GEN7_MAX_PS_THREAD_DEP |
6201                                       GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE));
6202
6203         /* WaDisable_RenderCache_OperationalFlush:vlv */
6204         I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
6205
6206         /* WaForceL3Serialization:vlv */
6207         I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) &
6208                    ~L3SQ_URB_READ_CAM_MATCH_DISABLE);
6209
6210         /* WaDisableDopClockGating:vlv */
6211         I915_WRITE(GEN7_ROW_CHICKEN2,
6212                    _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
6213
6214         /* This is required by WaCatErrorRejectionIssue:vlv */
6215         I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
6216                    I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
6217                    GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
6218
6219         gen7_setup_fixed_func_scheduler(dev_priv);
6220
6221         /*
6222          * According to the spec, bit 13 (RCZUNIT) must be set on IVB.
6223          * This implements the WaDisableRCZUnitClockGating:vlv workaround.
6224          */
6225         I915_WRITE(GEN6_UCGCTL2,
6226                    GEN6_RCZUNIT_CLOCK_GATE_DISABLE);
6227
6228         /* WaDisableL3Bank2xClockGate:vlv
6229          * Disabling L3 clock gating- MMIO 940c[25] = 1
6230          * Set bit 25, to disable L3_BANK_2x_CLK_GATING */
6231         I915_WRITE(GEN7_UCGCTL4,
6232                    I915_READ(GEN7_UCGCTL4) | GEN7_L3BANK2X_CLOCK_GATE_DISABLE);
6233
6234         I915_WRITE(MI_ARB_VLV, MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE);
6235
6236         /*
6237          * BSpec says this must be set, even though
6238          * WaDisable4x2SubspanOptimization isn't listed for VLV.
6239          */
6240         I915_WRITE(CACHE_MODE_1,
6241                    _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
6242
6243         /*
6244          * BSpec recommends 8x4 when MSAA is used,
6245          * however in practice 16x4 seems fastest.
6246          *
6247          * Note that PS/WM thread counts depend on the WIZ hashing
6248          * disable bit, which we don't touch here, but it's good
6249          * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
6250          */
6251         I915_WRITE(GEN7_GT_MODE,
6252                    _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
6253
6254         /*
6255          * WaIncreaseL3CreditsForVLVB0:vlv
6256          * This is the hardware default actually.
6257          */
6258         I915_WRITE(GEN7_L3SQCREG1, VLV_B0_WA_L3SQCREG1_VALUE);
6259
6260         /*
6261          * WaDisableVLVClockGating_VBIIssue:vlv
6262          * Disable clock gating on th GCFG unit to prevent a delay
6263          * in the reporting of vblank events.
6264          */
6265         I915_WRITE(VLV_GUNIT_CLOCK_GATE, GCFG_DIS);
6266 }
6267
6268 static void cherryview_init_clock_gating(struct drm_device *dev)
6269 {
6270         struct drm_i915_private *dev_priv = dev->dev_private;
6271
6272         I915_WRITE(DSPCLK_GATE_D, VRHUNIT_CLOCK_GATE_DISABLE);
6273
6274         I915_WRITE(MI_ARB_VLV, MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE);
6275
6276         /* WaVSRefCountFullforceMissDisable:chv */
6277         /* WaDSRefCountFullforceMissDisable:chv */
6278         I915_WRITE(GEN7_FF_THREAD_MODE,
6279                    I915_READ(GEN7_FF_THREAD_MODE) &
6280                    ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME));
6281
6282         /* WaDisableSemaphoreAndSyncFlipWait:chv */
6283         I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL,
6284                    _MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE));
6285
6286         /* WaDisableCSUnitClockGating:chv */
6287         I915_WRITE(GEN6_UCGCTL1, I915_READ(GEN6_UCGCTL1) |
6288                    GEN6_CSUNIT_CLOCK_GATE_DISABLE);
6289
6290         /* WaDisableSDEUnitClockGating:chv */
6291         I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
6292                    GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
6293 }
6294
6295 static void g4x_init_clock_gating(struct drm_device *dev)
6296 {
6297         struct drm_i915_private *dev_priv = dev->dev_private;
6298         uint32_t dspclk_gate;
6299
6300         I915_WRITE(RENCLK_GATE_D1, 0);
6301         I915_WRITE(RENCLK_GATE_D2, VF_UNIT_CLOCK_GATE_DISABLE |
6302                    GS_UNIT_CLOCK_GATE_DISABLE |
6303                    CL_UNIT_CLOCK_GATE_DISABLE);
6304         I915_WRITE(RAMCLK_GATE_D, 0);
6305         dspclk_gate = VRHUNIT_CLOCK_GATE_DISABLE |
6306                 OVRUNIT_CLOCK_GATE_DISABLE |
6307                 OVCUNIT_CLOCK_GATE_DISABLE;
6308         if (IS_GM45(dev))
6309                 dspclk_gate |= DSSUNIT_CLOCK_GATE_DISABLE;
6310         I915_WRITE(DSPCLK_GATE_D, dspclk_gate);
6311
6312         /* WaDisableRenderCachePipelinedFlush */
6313         I915_WRITE(CACHE_MODE_0,
6314                    _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE));
6315
6316         /* WaDisable_RenderCache_OperationalFlush:g4x */
6317         I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
6318
6319         g4x_disable_trickle_feed(dev);
6320 }
6321
6322 static void crestline_init_clock_gating(struct drm_device *dev)
6323 {
6324         struct drm_i915_private *dev_priv = dev->dev_private;
6325
6326         I915_WRITE(RENCLK_GATE_D1, I965_RCC_CLOCK_GATE_DISABLE);
6327         I915_WRITE(RENCLK_GATE_D2, 0);
6328         I915_WRITE(DSPCLK_GATE_D, 0);
6329         I915_WRITE(RAMCLK_GATE_D, 0);
6330         I915_WRITE16(DEUC, 0);
6331         I915_WRITE(MI_ARB_STATE,
6332                    _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE));
6333
6334         /* WaDisable_RenderCache_OperationalFlush:gen4 */
6335         I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
6336 }
6337
6338 static void broadwater_init_clock_gating(struct drm_device *dev)
6339 {
6340         struct drm_i915_private *dev_priv = dev->dev_private;
6341
6342         I915_WRITE(RENCLK_GATE_D1, I965_RCZ_CLOCK_GATE_DISABLE |
6343                    I965_RCC_CLOCK_GATE_DISABLE |
6344                    I965_RCPB_CLOCK_GATE_DISABLE |
6345                    I965_ISC_CLOCK_GATE_DISABLE |
6346                    I965_FBC_CLOCK_GATE_DISABLE);
6347         I915_WRITE(RENCLK_GATE_D2, 0);
6348         I915_WRITE(MI_ARB_STATE,
6349                    _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE));
6350
6351         /* WaDisable_RenderCache_OperationalFlush:gen4 */
6352         I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
6353 }
6354
6355 static void gen3_init_clock_gating(struct drm_device *dev)
6356 {
6357         struct drm_i915_private *dev_priv = dev->dev_private;
6358         u32 dstate = I915_READ(D_STATE);
6359
6360         dstate |= DSTATE_PLL_D3_OFF | DSTATE_GFX_CLOCK_GATING |
6361                 DSTATE_DOT_CLOCK_GATING;
6362         I915_WRITE(D_STATE, dstate);
6363
6364         if (IS_PINEVIEW(dev))
6365                 I915_WRITE(ECOSKPD, _MASKED_BIT_ENABLE(ECO_GATING_CX_ONLY));
6366
6367         /* IIR "flip pending" means done if this bit is set */
6368         I915_WRITE(ECOSKPD, _MASKED_BIT_DISABLE(ECO_FLIP_DONE));
6369
6370         /* interrupts should cause a wake up from C3 */
6371         I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_AGPBUSY_INT_EN));
6372
6373         /* On GEN3 we really need to make sure the ARB C3 LP bit is set */
6374         I915_WRITE(MI_ARB_STATE, _MASKED_BIT_ENABLE(MI_ARB_C3_LP_WRITE_ENABLE));
6375
6376         I915_WRITE(MI_ARB_STATE,
6377                    _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE));
6378 }
6379
6380 static void i85x_init_clock_gating(struct drm_device *dev)
6381 {
6382         struct drm_i915_private *dev_priv = dev->dev_private;
6383
6384         I915_WRITE(RENCLK_GATE_D1, SV_CLOCK_GATE_DISABLE);
6385
6386         /* interrupts should cause a wake up from C3 */
6387         I915_WRITE(MI_STATE, _MASKED_BIT_ENABLE(MI_AGPBUSY_INT_EN) |
6388                    _MASKED_BIT_DISABLE(MI_AGPBUSY_830_MODE));
6389
6390         I915_WRITE(MEM_MODE,
6391                    _MASKED_BIT_ENABLE(MEM_DISPLAY_TRICKLE_FEED_DISABLE));
6392 }
6393
6394 static void i830_init_clock_gating(struct drm_device *dev)
6395 {
6396         struct drm_i915_private *dev_priv = dev->dev_private;
6397
6398         I915_WRITE(DSPCLK_GATE_D, OVRUNIT_CLOCK_GATE_DISABLE);
6399
6400         I915_WRITE(MEM_MODE,
6401                    _MASKED_BIT_ENABLE(MEM_DISPLAY_A_TRICKLE_FEED_DISABLE) |
6402                    _MASKED_BIT_ENABLE(MEM_DISPLAY_B_TRICKLE_FEED_DISABLE));
6403 }
6404
6405 void intel_init_clock_gating(struct drm_device *dev)
6406 {
6407         struct drm_i915_private *dev_priv = dev->dev_private;
6408
6409         dev_priv->display.init_clock_gating(dev);
6410 }
6411
6412 void intel_suspend_hw(struct drm_device *dev)
6413 {
6414         if (HAS_PCH_LPT(dev))
6415                 lpt_suspend_hw(dev);
6416 }
6417
6418 /* Set up chip specific power management-related functions */
6419 void intel_init_pm(struct drm_device *dev)
6420 {
6421         struct drm_i915_private *dev_priv = dev->dev_private;
6422
6423         intel_fbc_init(dev_priv);
6424
6425         /* For cxsr */
6426         if (IS_PINEVIEW(dev))
6427                 i915_pineview_get_mem_freq(dev);
6428         else if (IS_GEN5(dev))
6429                 i915_ironlake_get_mem_freq(dev);
6430
6431         /* For FIFO watermark updates */
6432         if (INTEL_INFO(dev)->gen >= 9) {
6433                 skl_setup_wm_latency(dev);
6434
6435                 dev_priv->display.init_clock_gating = gen9_init_clock_gating;
6436                 dev_priv->display.update_wm = skl_update_wm;
6437                 dev_priv->display.update_sprite_wm = skl_update_sprite_wm;
6438         } else if (HAS_PCH_SPLIT(dev)) {
6439                 ilk_setup_wm_latency(dev);
6440
6441                 if ((IS_GEN5(dev) && dev_priv->wm.pri_latency[1] &&
6442                      dev_priv->wm.spr_latency[1] && dev_priv->wm.cur_latency[1]) ||
6443                     (!IS_GEN5(dev) && dev_priv->wm.pri_latency[0] &&
6444                      dev_priv->wm.spr_latency[0] && dev_priv->wm.cur_latency[0])) {
6445                         dev_priv->display.update_wm = ilk_update_wm;
6446                         dev_priv->display.update_sprite_wm = ilk_update_sprite_wm;
6447                 } else {
6448                         DRM_DEBUG_KMS("Failed to read display plane latency. "
6449                                       "Disable CxSR\n");
6450                 }
6451
6452                 if (IS_GEN5(dev))
6453                         dev_priv->display.init_clock_gating = ironlake_init_clock_gating;
6454                 else if (IS_GEN6(dev))
6455                         dev_priv->display.init_clock_gating = gen6_init_clock_gating;
6456                 else if (IS_IVYBRIDGE(dev))
6457                         dev_priv->display.init_clock_gating = ivybridge_init_clock_gating;
6458                 else if (IS_HASWELL(dev))
6459                         dev_priv->display.init_clock_gating = haswell_init_clock_gating;
6460                 else if (INTEL_INFO(dev)->gen == 8)
6461                         dev_priv->display.init_clock_gating = broadwell_init_clock_gating;
6462         } else if (IS_CHERRYVIEW(dev)) {
6463                 dev_priv->display.update_wm = cherryview_update_wm;
6464                 dev_priv->display.update_sprite_wm = valleyview_update_sprite_wm;
6465                 dev_priv->display.init_clock_gating =
6466                         cherryview_init_clock_gating;
6467         } else if (IS_VALLEYVIEW(dev)) {
6468                 dev_priv->display.update_wm = valleyview_update_wm;
6469                 dev_priv->display.update_sprite_wm = valleyview_update_sprite_wm;
6470                 dev_priv->display.init_clock_gating =
6471                         valleyview_init_clock_gating;
6472         } else if (IS_PINEVIEW(dev)) {
6473                 if (!intel_get_cxsr_latency(IS_PINEVIEW_G(dev),
6474                                             dev_priv->is_ddr3,
6475                                             dev_priv->fsb_freq,
6476                                             dev_priv->mem_freq)) {
6477                         DRM_INFO("failed to find known CxSR latency "
6478                                  "(found ddr%s fsb freq %d, mem freq %d), "
6479                                  "disabling CxSR\n",
6480                                  (dev_priv->is_ddr3 == 1) ? "3" : "2",
6481                                  dev_priv->fsb_freq, dev_priv->mem_freq);
6482                         /* Disable CxSR and never update its watermark again */
6483                         intel_set_memory_cxsr(dev_priv, false);
6484                         dev_priv->display.update_wm = NULL;
6485                 } else
6486                         dev_priv->display.update_wm = pineview_update_wm;
6487                 dev_priv->display.init_clock_gating = gen3_init_clock_gating;
6488         } else if (IS_G4X(dev)) {
6489                 dev_priv->display.update_wm = g4x_update_wm;
6490                 dev_priv->display.init_clock_gating = g4x_init_clock_gating;
6491         } else if (IS_GEN4(dev)) {
6492                 dev_priv->display.update_wm = i965_update_wm;
6493                 if (IS_CRESTLINE(dev))
6494                         dev_priv->display.init_clock_gating = crestline_init_clock_gating;
6495                 else if (IS_BROADWATER(dev))
6496                         dev_priv->display.init_clock_gating = broadwater_init_clock_gating;
6497         } else if (IS_GEN3(dev)) {
6498                 dev_priv->display.update_wm = i9xx_update_wm;
6499                 dev_priv->display.get_fifo_size = i9xx_get_fifo_size;
6500                 dev_priv->display.init_clock_gating = gen3_init_clock_gating;
6501         } else if (IS_GEN2(dev)) {
6502                 if (INTEL_INFO(dev)->num_pipes == 1) {
6503                         dev_priv->display.update_wm = i845_update_wm;
6504                         dev_priv->display.get_fifo_size = i845_get_fifo_size;
6505                 } else {
6506                         dev_priv->display.update_wm = i9xx_update_wm;
6507                         dev_priv->display.get_fifo_size = i830_get_fifo_size;
6508                 }
6509
6510                 if (IS_I85X(dev) || IS_I865G(dev))
6511                         dev_priv->display.init_clock_gating = i85x_init_clock_gating;
6512                 else
6513                         dev_priv->display.init_clock_gating = i830_init_clock_gating;
6514         } else {
6515                 DRM_ERROR("unexpected fall-through in intel_init_pm\n");
6516         }
6517 }
6518
6519 int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val)
6520 {
6521         WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
6522
6523         if (I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) {
6524                 DRM_DEBUG_DRIVER("warning: pcode (read) mailbox access failed\n");
6525                 return -EAGAIN;
6526         }
6527
6528         I915_WRITE(GEN6_PCODE_DATA, *val);
6529         I915_WRITE(GEN6_PCODE_DATA1, 0);
6530         I915_WRITE(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox);
6531
6532         if (wait_for((I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) == 0,
6533                      500)) {
6534                 DRM_ERROR("timeout waiting for pcode read (%d) to finish\n", mbox);
6535                 return -ETIMEDOUT;
6536         }
6537
6538         *val = I915_READ(GEN6_PCODE_DATA);
6539         I915_WRITE(GEN6_PCODE_DATA, 0);
6540
6541         return 0;
6542 }
6543
6544 int sandybridge_pcode_write(struct drm_i915_private *dev_priv, u32 mbox, u32 val)
6545 {
6546         WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
6547
6548         if (I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) {
6549                 DRM_DEBUG_DRIVER("warning: pcode (write) mailbox access failed\n");
6550                 return -EAGAIN;
6551         }
6552
6553         I915_WRITE(GEN6_PCODE_DATA, val);
6554         I915_WRITE(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox);
6555
6556         if (wait_for((I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) == 0,
6557                      500)) {
6558                 DRM_ERROR("timeout waiting for pcode write (%d) to finish\n", mbox);
6559                 return -ETIMEDOUT;
6560         }
6561
6562         I915_WRITE(GEN6_PCODE_DATA, 0);
6563
6564         return 0;
6565 }
6566
6567 static int vlv_gpu_freq_div(unsigned int czclk_freq)
6568 {
6569         switch (czclk_freq) {
6570         case 200:
6571                 return 10;
6572         case 267:
6573                 return 12;
6574         case 320:
6575         case 333:
6576                 return 16;
6577         case 400:
6578                 return 20;
6579         default:
6580                 return -1;
6581         }
6582 }
6583
6584 static int byt_gpu_freq(struct drm_i915_private *dev_priv, int val)
6585 {
6586         int div, czclk_freq = DIV_ROUND_CLOSEST(dev_priv->mem_freq, 4);
6587
6588         div = vlv_gpu_freq_div(czclk_freq);
6589         if (div < 0)
6590                 return div;
6591
6592         return DIV_ROUND_CLOSEST(czclk_freq * (val + 6 - 0xbd), div);
6593 }
6594
6595 static int byt_freq_opcode(struct drm_i915_private *dev_priv, int val)
6596 {
6597         int mul, czclk_freq = DIV_ROUND_CLOSEST(dev_priv->mem_freq, 4);
6598
6599         mul = vlv_gpu_freq_div(czclk_freq);
6600         if (mul < 0)
6601                 return mul;
6602
6603         return DIV_ROUND_CLOSEST(mul * val, czclk_freq) + 0xbd - 6;
6604 }
6605
6606 static int chv_gpu_freq(struct drm_i915_private *dev_priv, int val)
6607 {
6608         int div, czclk_freq = dev_priv->rps.cz_freq;
6609
6610         div = vlv_gpu_freq_div(czclk_freq) / 2;
6611         if (div < 0)
6612                 return div;
6613
6614         return DIV_ROUND_CLOSEST(czclk_freq * val, 2 * div) / 2;
6615 }
6616
6617 static int chv_freq_opcode(struct drm_i915_private *dev_priv, int val)
6618 {
6619         int mul, czclk_freq = dev_priv->rps.cz_freq;
6620
6621         mul = vlv_gpu_freq_div(czclk_freq) / 2;
6622         if (mul < 0)
6623                 return mul;
6624
6625         /* CHV needs even values */
6626         return DIV_ROUND_CLOSEST(val * 2 * mul, czclk_freq) * 2;
6627 }
6628
6629 int intel_gpu_freq(struct drm_i915_private *dev_priv, int val)
6630 {
6631         if (IS_CHERRYVIEW(dev_priv->dev))
6632                 return chv_gpu_freq(dev_priv, val);
6633         else if (IS_VALLEYVIEW(dev_priv->dev))
6634                 return byt_gpu_freq(dev_priv, val);
6635         else
6636                 return val * GT_FREQUENCY_MULTIPLIER;
6637 }
6638
6639 int intel_freq_opcode(struct drm_i915_private *dev_priv, int val)
6640 {
6641         if (IS_CHERRYVIEW(dev_priv->dev))
6642                 return chv_freq_opcode(dev_priv, val);
6643         else if (IS_VALLEYVIEW(dev_priv->dev))
6644                 return byt_freq_opcode(dev_priv, val);
6645         else
6646                 return val / GT_FREQUENCY_MULTIPLIER;
6647 }
6648
6649 void intel_pm_setup(struct drm_device *dev)
6650 {
6651         struct drm_i915_private *dev_priv = dev->dev_private;
6652
6653         lockinit(&dev_priv->rps.hw_lock, "i915 rps.hw_lock", 0, LK_CANRECURSE);
6654
6655         INIT_DELAYED_WORK(&dev_priv->rps.delayed_resume_work,
6656                           intel_gen6_powersave_work);
6657
6658         dev_priv->pm.suspended = false;
6659 }