From: Alex Deucher Date: Thu, 28 Jun 2012 21:50:34 +0000 (-0400) Subject: drm/radeon: clean up CS functions in r100.c X-Git-Tag: v3.6-rc1~83^2~41 X-Git-Url: https://openfabrics.org/gitweb/?a=commitdiff_plain;h=0242f74d29df00ea97a6377e3c66f14efbb340d3;p=~emulex%2Finfiniband.git drm/radeon: clean up CS functions in r100.c Consolidate the CS functions to one section of the file. Previously they were spread all around. Signed-off-by: Alex Deucher Reviewed-by: Jerome Glisse Signed-off-by: Dave Airlie --- diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index 35825bf1e79..3fa82e1b942 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c @@ -103,112 +103,6 @@ void r100_wait_for_vblank(struct radeon_device *rdev, int crtc) * r100,rv100,rs100,rv200,rs200,r200,rv250,rs300,rv280 */ -int r100_reloc_pitch_offset(struct radeon_cs_parser *p, - struct radeon_cs_packet *pkt, - unsigned idx, - unsigned reg) -{ - int r; - u32 tile_flags = 0; - u32 tmp; - struct radeon_cs_reloc *reloc; - u32 value; - - r = r100_cs_packet_next_reloc(p, &reloc); - if (r) { - DRM_ERROR("No reloc for ib[%d]=0x%04X\n", - idx, reg); - r100_cs_dump_packet(p, pkt); - return r; - } - - value = radeon_get_ib_value(p, idx); - tmp = value & 0x003fffff; - tmp += (((u32)reloc->lobj.gpu_offset) >> 10); - - if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) { - if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) - tile_flags |= RADEON_DST_TILE_MACRO; - if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) { - if (reg == RADEON_SRC_PITCH_OFFSET) { - DRM_ERROR("Cannot src blit from microtiled surface\n"); - r100_cs_dump_packet(p, pkt); - return -EINVAL; - } - tile_flags |= RADEON_DST_TILE_MICRO; - } - - tmp |= tile_flags; - p->ib.ptr[idx] = (value & 0x3fc00000) | tmp; - } else - p->ib.ptr[idx] = (value & 0xffc00000) | tmp; - return 0; -} - -int r100_packet3_load_vbpntr(struct radeon_cs_parser *p, - struct radeon_cs_packet *pkt, - int idx) -{ - unsigned c, i; - struct radeon_cs_reloc *reloc; - struct r100_cs_track *track; - int r = 0; - volatile uint32_t *ib; - u32 idx_value; - - ib = p->ib.ptr; - track = (struct r100_cs_track *)p->track; - c = radeon_get_ib_value(p, idx++) & 0x1F; - if (c > 16) { - DRM_ERROR("Only 16 vertex buffers are allowed %d\n", - pkt->opcode); - r100_cs_dump_packet(p, pkt); - return -EINVAL; - } - track->num_arrays = c; - for (i = 0; i < (c - 1); i+=2, idx+=3) { - r = r100_cs_packet_next_reloc(p, &reloc); - if (r) { - DRM_ERROR("No reloc for packet3 %d\n", - pkt->opcode); - r100_cs_dump_packet(p, pkt); - return r; - } - idx_value = radeon_get_ib_value(p, idx); - ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->lobj.gpu_offset); - - track->arrays[i + 0].esize = idx_value >> 8; - track->arrays[i + 0].robj = reloc->robj; - track->arrays[i + 0].esize &= 0x7F; - r = r100_cs_packet_next_reloc(p, &reloc); - if (r) { - DRM_ERROR("No reloc for packet3 %d\n", - pkt->opcode); - r100_cs_dump_packet(p, pkt); - return r; - } - ib[idx+2] = radeon_get_ib_value(p, idx + 2) + ((u32)reloc->lobj.gpu_offset); - track->arrays[i + 1].robj = reloc->robj; - track->arrays[i + 1].esize = idx_value >> 24; - track->arrays[i + 1].esize &= 0x7F; - } - if (c & 1) { - r = r100_cs_packet_next_reloc(p, &reloc); - if (r) { - DRM_ERROR("No reloc for packet3 %d\n", - pkt->opcode); - r100_cs_dump_packet(p, pkt); - return r; - } - idx_value = radeon_get_ib_value(p, idx); - ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->lobj.gpu_offset); - track->arrays[i + 0].robj = reloc->robj; - track->arrays[i + 0].esize = idx_value >> 8; - track->arrays[i + 0].esize &= 0x7F; - } - return r; -} - void r100_pre_page_flip(struct radeon_device *rdev, int crtc) { /* enable the pflip int */ @@ -1206,6 +1100,112 @@ void r100_cp_disable(struct radeon_device *rdev) /* * CS functions */ +int r100_reloc_pitch_offset(struct radeon_cs_parser *p, + struct radeon_cs_packet *pkt, + unsigned idx, + unsigned reg) +{ + int r; + u32 tile_flags = 0; + u32 tmp; + struct radeon_cs_reloc *reloc; + u32 value; + + r = r100_cs_packet_next_reloc(p, &reloc); + if (r) { + DRM_ERROR("No reloc for ib[%d]=0x%04X\n", + idx, reg); + r100_cs_dump_packet(p, pkt); + return r; + } + + value = radeon_get_ib_value(p, idx); + tmp = value & 0x003fffff; + tmp += (((u32)reloc->lobj.gpu_offset) >> 10); + + if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) { + if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) + tile_flags |= RADEON_DST_TILE_MACRO; + if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) { + if (reg == RADEON_SRC_PITCH_OFFSET) { + DRM_ERROR("Cannot src blit from microtiled surface\n"); + r100_cs_dump_packet(p, pkt); + return -EINVAL; + } + tile_flags |= RADEON_DST_TILE_MICRO; + } + + tmp |= tile_flags; + p->ib.ptr[idx] = (value & 0x3fc00000) | tmp; + } else + p->ib.ptr[idx] = (value & 0xffc00000) | tmp; + return 0; +} + +int r100_packet3_load_vbpntr(struct radeon_cs_parser *p, + struct radeon_cs_packet *pkt, + int idx) +{ + unsigned c, i; + struct radeon_cs_reloc *reloc; + struct r100_cs_track *track; + int r = 0; + volatile uint32_t *ib; + u32 idx_value; + + ib = p->ib.ptr; + track = (struct r100_cs_track *)p->track; + c = radeon_get_ib_value(p, idx++) & 0x1F; + if (c > 16) { + DRM_ERROR("Only 16 vertex buffers are allowed %d\n", + pkt->opcode); + r100_cs_dump_packet(p, pkt); + return -EINVAL; + } + track->num_arrays = c; + for (i = 0; i < (c - 1); i+=2, idx+=3) { + r = r100_cs_packet_next_reloc(p, &reloc); + if (r) { + DRM_ERROR("No reloc for packet3 %d\n", + pkt->opcode); + r100_cs_dump_packet(p, pkt); + return r; + } + idx_value = radeon_get_ib_value(p, idx); + ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->lobj.gpu_offset); + + track->arrays[i + 0].esize = idx_value >> 8; + track->arrays[i + 0].robj = reloc->robj; + track->arrays[i + 0].esize &= 0x7F; + r = r100_cs_packet_next_reloc(p, &reloc); + if (r) { + DRM_ERROR("No reloc for packet3 %d\n", + pkt->opcode); + r100_cs_dump_packet(p, pkt); + return r; + } + ib[idx+2] = radeon_get_ib_value(p, idx + 2) + ((u32)reloc->lobj.gpu_offset); + track->arrays[i + 1].robj = reloc->robj; + track->arrays[i + 1].esize = idx_value >> 24; + track->arrays[i + 1].esize &= 0x7F; + } + if (c & 1) { + r = r100_cs_packet_next_reloc(p, &reloc); + if (r) { + DRM_ERROR("No reloc for packet3 %d\n", + pkt->opcode); + r100_cs_dump_packet(p, pkt); + return r; + } + idx_value = radeon_get_ib_value(p, idx); + ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->lobj.gpu_offset); + track->arrays[i + 0].robj = reloc->robj; + track->arrays[i + 0].esize = idx_value >> 8; + track->arrays[i + 0].esize &= 0x7F; + } + return r; +} + int r100_cs_parse_packet0(struct radeon_cs_parser *p, struct radeon_cs_packet *pkt, const unsigned *auth, unsigned n, @@ -2031,1590 +2031,1589 @@ int r100_cs_parse(struct radeon_cs_parser *p) return 0; } - -/* - * Global GPU functions - */ -void r100_errata(struct radeon_device *rdev) +static void r100_cs_track_texture_print(struct r100_cs_track_texture *t) { - rdev->pll_errata = 0; - - if (rdev->family == CHIP_RV200 || rdev->family == CHIP_RS200) { - rdev->pll_errata |= CHIP_ERRATA_PLL_DUMMYREADS; - } - - if (rdev->family == CHIP_RV100 || - rdev->family == CHIP_RS100 || - rdev->family == CHIP_RS200) { - rdev->pll_errata |= CHIP_ERRATA_PLL_DELAY; - } + DRM_ERROR("pitch %d\n", t->pitch); + DRM_ERROR("use_pitch %d\n", t->use_pitch); + DRM_ERROR("width %d\n", t->width); + DRM_ERROR("width_11 %d\n", t->width_11); + DRM_ERROR("height %d\n", t->height); + DRM_ERROR("height_11 %d\n", t->height_11); + DRM_ERROR("num levels %d\n", t->num_levels); + DRM_ERROR("depth %d\n", t->txdepth); + DRM_ERROR("bpp %d\n", t->cpp); + DRM_ERROR("coordinate type %d\n", t->tex_coord_type); + DRM_ERROR("width round to power of 2 %d\n", t->roundup_w); + DRM_ERROR("height round to power of 2 %d\n", t->roundup_h); + DRM_ERROR("compress format %d\n", t->compress_format); } -/* Wait for vertical sync on primary CRTC */ -void r100_gpu_wait_for_vsync(struct radeon_device *rdev) +static int r100_track_compress_size(int compress_format, int w, int h) { - uint32_t crtc_gen_cntl, tmp; - int i; + int block_width, block_height, block_bytes; + int wblocks, hblocks; + int min_wblocks; + int sz; - crtc_gen_cntl = RREG32(RADEON_CRTC_GEN_CNTL); - if ((crtc_gen_cntl & RADEON_CRTC_DISP_REQ_EN_B) || - !(crtc_gen_cntl & RADEON_CRTC_EN)) { - return; - } - /* Clear the CRTC_VBLANK_SAVE bit */ - WREG32(RADEON_CRTC_STATUS, RADEON_CRTC_VBLANK_SAVE_CLEAR); - for (i = 0; i < rdev->usec_timeout; i++) { - tmp = RREG32(RADEON_CRTC_STATUS); - if (tmp & RADEON_CRTC_VBLANK_SAVE) { - return; - } - DRM_UDELAY(1); + block_width = 4; + block_height = 4; + + switch (compress_format) { + case R100_TRACK_COMP_DXT1: + block_bytes = 8; + min_wblocks = 4; + break; + default: + case R100_TRACK_COMP_DXT35: + block_bytes = 16; + min_wblocks = 2; + break; } + + hblocks = (h + block_height - 1) / block_height; + wblocks = (w + block_width - 1) / block_width; + if (wblocks < min_wblocks) + wblocks = min_wblocks; + sz = wblocks * hblocks * block_bytes; + return sz; } -/* Wait for vertical sync on secondary CRTC */ -void r100_gpu_wait_for_vsync2(struct radeon_device *rdev) +static int r100_cs_track_cube(struct radeon_device *rdev, + struct r100_cs_track *track, unsigned idx) { - uint32_t crtc2_gen_cntl, tmp; - int i; + unsigned face, w, h; + struct radeon_bo *cube_robj; + unsigned long size; + unsigned compress_format = track->textures[idx].compress_format; - crtc2_gen_cntl = RREG32(RADEON_CRTC2_GEN_CNTL); - if ((crtc2_gen_cntl & RADEON_CRTC2_DISP_REQ_EN_B) || - !(crtc2_gen_cntl & RADEON_CRTC2_EN)) - return; + for (face = 0; face < 5; face++) { + cube_robj = track->textures[idx].cube_info[face].robj; + w = track->textures[idx].cube_info[face].width; + h = track->textures[idx].cube_info[face].height; - /* Clear the CRTC_VBLANK_SAVE bit */ - WREG32(RADEON_CRTC2_STATUS, RADEON_CRTC2_VBLANK_SAVE_CLEAR); - for (i = 0; i < rdev->usec_timeout; i++) { - tmp = RREG32(RADEON_CRTC2_STATUS); - if (tmp & RADEON_CRTC2_VBLANK_SAVE) { - return; + if (compress_format) { + size = r100_track_compress_size(compress_format, w, h); + } else + size = w * h; + size *= track->textures[idx].cpp; + + size += track->textures[idx].cube_info[face].offset; + + if (size > radeon_bo_size(cube_robj)) { + DRM_ERROR("Cube texture offset greater than object size %lu %lu\n", + size, radeon_bo_size(cube_robj)); + r100_cs_track_texture_print(&track->textures[idx]); + return -1; } - DRM_UDELAY(1); } + return 0; } -int r100_rbbm_fifo_wait_for_entry(struct radeon_device *rdev, unsigned n) +static int r100_cs_track_texture_check(struct radeon_device *rdev, + struct r100_cs_track *track) { - unsigned i; - uint32_t tmp; + struct radeon_bo *robj; + unsigned long size; + unsigned u, i, w, h, d; + int ret; - for (i = 0; i < rdev->usec_timeout; i++) { - tmp = RREG32(RADEON_RBBM_STATUS) & RADEON_RBBM_FIFOCNT_MASK; - if (tmp >= n) { - return 0; + for (u = 0; u < track->num_texture; u++) { + if (!track->textures[u].enabled) + continue; + if (track->textures[u].lookup_disable) + continue; + robj = track->textures[u].robj; + if (robj == NULL) { + DRM_ERROR("No texture bound to unit %u\n", u); + return -EINVAL; } - DRM_UDELAY(1); - } - return -1; -} + size = 0; + for (i = 0; i <= track->textures[u].num_levels; i++) { + if (track->textures[u].use_pitch) { + if (rdev->family < CHIP_R300) + w = (track->textures[u].pitch / track->textures[u].cpp) / (1 << i); + else + w = track->textures[u].pitch / (1 << i); + } else { + w = track->textures[u].width; + if (rdev->family >= CHIP_RV515) + w |= track->textures[u].width_11; + w = w / (1 << i); + if (track->textures[u].roundup_w) + w = roundup_pow_of_two(w); + } + h = track->textures[u].height; + if (rdev->family >= CHIP_RV515) + h |= track->textures[u].height_11; + h = h / (1 << i); + if (track->textures[u].roundup_h) + h = roundup_pow_of_two(h); + if (track->textures[u].tex_coord_type == 1) { + d = (1 << track->textures[u].txdepth) / (1 << i); + if (!d) + d = 1; + } else { + d = 1; + } + if (track->textures[u].compress_format) { -int r100_gui_wait_for_idle(struct radeon_device *rdev) -{ - unsigned i; - uint32_t tmp; + size += r100_track_compress_size(track->textures[u].compress_format, w, h) * d; + /* compressed textures are block based */ + } else + size += w * h * d; + } + size *= track->textures[u].cpp; - if (r100_rbbm_fifo_wait_for_entry(rdev, 64)) { - printk(KERN_WARNING "radeon: wait for empty RBBM fifo failed !" - " Bad things might happen.\n"); - } - for (i = 0; i < rdev->usec_timeout; i++) { - tmp = RREG32(RADEON_RBBM_STATUS); - if (!(tmp & RADEON_RBBM_ACTIVE)) { - return 0; + switch (track->textures[u].tex_coord_type) { + case 0: + case 1: + break; + case 2: + if (track->separate_cube) { + ret = r100_cs_track_cube(rdev, track, u); + if (ret) + return ret; + } else + size *= 6; + break; + default: + DRM_ERROR("Invalid texture coordinate type %u for unit " + "%u\n", track->textures[u].tex_coord_type, u); + return -EINVAL; + } + if (size > radeon_bo_size(robj)) { + DRM_ERROR("Texture of unit %u needs %lu bytes but is " + "%lu\n", u, size, radeon_bo_size(robj)); + r100_cs_track_texture_print(&track->textures[u]); + return -EINVAL; } - DRM_UDELAY(1); } - return -1; + return 0; } -int r100_mc_wait_for_idle(struct radeon_device *rdev) +int r100_cs_track_check(struct radeon_device *rdev, struct r100_cs_track *track) { unsigned i; - uint32_t tmp; + unsigned long size; + unsigned prim_walk; + unsigned nverts; + unsigned num_cb = track->cb_dirty ? track->num_cb : 0; - for (i = 0; i < rdev->usec_timeout; i++) { - /* read MC_STATUS */ - tmp = RREG32(RADEON_MC_STATUS); - if (tmp & RADEON_MC_IDLE) { - return 0; + if (num_cb && !track->zb_cb_clear && !track->color_channel_mask && + !track->blend_read_enable) + num_cb = 0; + + for (i = 0; i < num_cb; i++) { + if (track->cb[i].robj == NULL) { + DRM_ERROR("[drm] No buffer for color buffer %d !\n", i); + return -EINVAL; + } + size = track->cb[i].pitch * track->cb[i].cpp * track->maxy; + size += track->cb[i].offset; + if (size > radeon_bo_size(track->cb[i].robj)) { + DRM_ERROR("[drm] Buffer too small for color buffer %d " + "(need %lu have %lu) !\n", i, size, + radeon_bo_size(track->cb[i].robj)); + DRM_ERROR("[drm] color buffer %d (%u %u %u %u)\n", + i, track->cb[i].pitch, track->cb[i].cpp, + track->cb[i].offset, track->maxy); + return -EINVAL; } - DRM_UDELAY(1); } - return -1; -} - -bool r100_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) -{ - u32 rbbm_status; + track->cb_dirty = false; - rbbm_status = RREG32(R_000E40_RBBM_STATUS); - if (!G_000E40_GUI_ACTIVE(rbbm_status)) { - radeon_ring_lockup_update(ring); - return false; + if (track->zb_dirty && track->z_enabled) { + if (track->zb.robj == NULL) { + DRM_ERROR("[drm] No buffer for z buffer !\n"); + return -EINVAL; + } + size = track->zb.pitch * track->zb.cpp * track->maxy; + size += track->zb.offset; + if (size > radeon_bo_size(track->zb.robj)) { + DRM_ERROR("[drm] Buffer too small for z buffer " + "(need %lu have %lu) !\n", size, + radeon_bo_size(track->zb.robj)); + DRM_ERROR("[drm] zbuffer (%u %u %u %u)\n", + track->zb.pitch, track->zb.cpp, + track->zb.offset, track->maxy); + return -EINVAL; + } } - /* force CP activities */ - radeon_ring_force_activity(rdev, ring); - return radeon_ring_test_lockup(rdev, ring); -} + track->zb_dirty = false; -void r100_bm_disable(struct radeon_device *rdev) -{ - u32 tmp; + if (track->aa_dirty && track->aaresolve) { + if (track->aa.robj == NULL) { + DRM_ERROR("[drm] No buffer for AA resolve buffer %d !\n", i); + return -EINVAL; + } + /* I believe the format comes from colorbuffer0. */ + size = track->aa.pitch * track->cb[0].cpp * track->maxy; + size += track->aa.offset; + if (size > radeon_bo_size(track->aa.robj)) { + DRM_ERROR("[drm] Buffer too small for AA resolve buffer %d " + "(need %lu have %lu) !\n", i, size, + radeon_bo_size(track->aa.robj)); + DRM_ERROR("[drm] AA resolve buffer %d (%u %u %u %u)\n", + i, track->aa.pitch, track->cb[0].cpp, + track->aa.offset, track->maxy); + return -EINVAL; + } + } + track->aa_dirty = false; - /* disable bus mastering */ - tmp = RREG32(R_000030_BUS_CNTL); - WREG32(R_000030_BUS_CNTL, (tmp & 0xFFFFFFFF) | 0x00000044); - mdelay(1); - WREG32(R_000030_BUS_CNTL, (tmp & 0xFFFFFFFF) | 0x00000042); - mdelay(1); - WREG32(R_000030_BUS_CNTL, (tmp & 0xFFFFFFFF) | 0x00000040); - tmp = RREG32(RADEON_BUS_CNTL); - mdelay(1); - pci_clear_master(rdev->pdev); - mdelay(1); -} - -int r100_asic_reset(struct radeon_device *rdev) -{ - struct r100_mc_save save; - u32 status, tmp; - int ret = 0; - - status = RREG32(R_000E40_RBBM_STATUS); - if (!G_000E40_GUI_ACTIVE(status)) { - return 0; - } - r100_mc_stop(rdev, &save); - status = RREG32(R_000E40_RBBM_STATUS); - dev_info(rdev->dev, "(%s:%d) RBBM_STATUS=0x%08X\n", __func__, __LINE__, status); - /* stop CP */ - WREG32(RADEON_CP_CSQ_CNTL, 0); - tmp = RREG32(RADEON_CP_RB_CNTL); - WREG32(RADEON_CP_RB_CNTL, tmp | RADEON_RB_RPTR_WR_ENA); - WREG32(RADEON_CP_RB_RPTR_WR, 0); - WREG32(RADEON_CP_RB_WPTR, 0); - WREG32(RADEON_CP_RB_CNTL, tmp); - /* save PCI state */ - pci_save_state(rdev->pdev); - /* disable bus mastering */ - r100_bm_disable(rdev); - WREG32(R_0000F0_RBBM_SOFT_RESET, S_0000F0_SOFT_RESET_SE(1) | - S_0000F0_SOFT_RESET_RE(1) | - S_0000F0_SOFT_RESET_PP(1) | - S_0000F0_SOFT_RESET_RB(1)); - RREG32(R_0000F0_RBBM_SOFT_RESET); - mdelay(500); - WREG32(R_0000F0_RBBM_SOFT_RESET, 0); - mdelay(1); - status = RREG32(R_000E40_RBBM_STATUS); - dev_info(rdev->dev, "(%s:%d) RBBM_STATUS=0x%08X\n", __func__, __LINE__, status); - /* reset CP */ - WREG32(R_0000F0_RBBM_SOFT_RESET, S_0000F0_SOFT_RESET_CP(1)); - RREG32(R_0000F0_RBBM_SOFT_RESET); - mdelay(500); - WREG32(R_0000F0_RBBM_SOFT_RESET, 0); - mdelay(1); - status = RREG32(R_000E40_RBBM_STATUS); - dev_info(rdev->dev, "(%s:%d) RBBM_STATUS=0x%08X\n", __func__, __LINE__, status); - /* restore PCI & busmastering */ - pci_restore_state(rdev->pdev); - r100_enable_bm(rdev); - /* Check if GPU is idle */ - if (G_000E40_SE_BUSY(status) || G_000E40_RE_BUSY(status) || - G_000E40_TAM_BUSY(status) || G_000E40_PB_BUSY(status)) { - dev_err(rdev->dev, "failed to reset GPU\n"); - ret = -1; - } else - dev_info(rdev->dev, "GPU reset succeed\n"); - r100_mc_resume(rdev, &save); - return ret; -} - -void r100_set_common_regs(struct radeon_device *rdev) -{ - struct drm_device *dev = rdev->ddev; - bool force_dac2 = false; - u32 tmp; - - /* set these so they don't interfere with anything */ - WREG32(RADEON_OV0_SCALE_CNTL, 0); - WREG32(RADEON_SUBPIC_CNTL, 0); - WREG32(RADEON_VIPH_CONTROL, 0); - WREG32(RADEON_I2C_CNTL_1, 0); - WREG32(RADEON_DVI_I2C_CNTL_1, 0); - WREG32(RADEON_CAP0_TRIG_CNTL, 0); - WREG32(RADEON_CAP1_TRIG_CNTL, 0); - - /* always set up dac2 on rn50 and some rv100 as lots - * of servers seem to wire it up to a VGA port but - * don't report it in the bios connector - * table. - */ - switch (dev->pdev->device) { - /* RN50 */ - case 0x515e: - case 0x5969: - force_dac2 = true; - break; - /* RV100*/ - case 0x5159: - case 0x515a: - /* DELL triple head servers */ - if ((dev->pdev->subsystem_vendor == 0x1028 /* DELL */) && - ((dev->pdev->subsystem_device == 0x016c) || - (dev->pdev->subsystem_device == 0x016d) || - (dev->pdev->subsystem_device == 0x016e) || - (dev->pdev->subsystem_device == 0x016f) || - (dev->pdev->subsystem_device == 0x0170) || - (dev->pdev->subsystem_device == 0x017d) || - (dev->pdev->subsystem_device == 0x017e) || - (dev->pdev->subsystem_device == 0x0183) || - (dev->pdev->subsystem_device == 0x018a) || - (dev->pdev->subsystem_device == 0x019a))) - force_dac2 = true; - break; - } - - if (force_dac2) { - u32 disp_hw_debug = RREG32(RADEON_DISP_HW_DEBUG); - u32 tv_dac_cntl = RREG32(RADEON_TV_DAC_CNTL); - u32 dac2_cntl = RREG32(RADEON_DAC_CNTL2); - - /* For CRT on DAC2, don't turn it on if BIOS didn't - enable it, even it's detected. - */ - - /* force it to crtc0 */ - dac2_cntl &= ~RADEON_DAC2_DAC_CLK_SEL; - dac2_cntl |= RADEON_DAC2_DAC2_CLK_SEL; - disp_hw_debug |= RADEON_CRT2_DISP1_SEL; - - /* set up the TV DAC */ - tv_dac_cntl &= ~(RADEON_TV_DAC_PEDESTAL | - RADEON_TV_DAC_STD_MASK | - RADEON_TV_DAC_RDACPD | - RADEON_TV_DAC_GDACPD | - RADEON_TV_DAC_BDACPD | - RADEON_TV_DAC_BGADJ_MASK | - RADEON_TV_DAC_DACADJ_MASK); - tv_dac_cntl |= (RADEON_TV_DAC_NBLANK | - RADEON_TV_DAC_NHOLD | - RADEON_TV_DAC_STD_PS2 | - (0x58 << 16)); - - WREG32(RADEON_TV_DAC_CNTL, tv_dac_cntl); - WREG32(RADEON_DISP_HW_DEBUG, disp_hw_debug); - WREG32(RADEON_DAC_CNTL2, dac2_cntl); + prim_walk = (track->vap_vf_cntl >> 4) & 0x3; + if (track->vap_vf_cntl & (1 << 14)) { + nverts = track->vap_alt_nverts; + } else { + nverts = (track->vap_vf_cntl >> 16) & 0xFFFF; } - - /* switch PM block to ACPI mode */ - tmp = RREG32_PLL(RADEON_PLL_PWRMGT_CNTL); - tmp &= ~RADEON_PM_MODE_SEL; - WREG32_PLL(RADEON_PLL_PWRMGT_CNTL, tmp); - -} - -/* - * VRAM info - */ -static void r100_vram_get_type(struct radeon_device *rdev) -{ - uint32_t tmp; - - rdev->mc.vram_is_ddr = false; - if (rdev->flags & RADEON_IS_IGP) - rdev->mc.vram_is_ddr = true; - else if (RREG32(RADEON_MEM_SDRAM_MODE_REG) & RADEON_MEM_CFG_TYPE_DDR) - rdev->mc.vram_is_ddr = true; - if ((rdev->family == CHIP_RV100) || - (rdev->family == CHIP_RS100) || - (rdev->family == CHIP_RS200)) { - tmp = RREG32(RADEON_MEM_CNTL); - if (tmp & RV100_HALF_MODE) { - rdev->mc.vram_width = 32; - } else { - rdev->mc.vram_width = 64; + switch (prim_walk) { + case 1: + for (i = 0; i < track->num_arrays; i++) { + size = track->arrays[i].esize * track->max_indx * 4; + if (track->arrays[i].robj == NULL) { + DRM_ERROR("(PW %u) Vertex array %u no buffer " + "bound\n", prim_walk, i); + return -EINVAL; + } + if (size > radeon_bo_size(track->arrays[i].robj)) { + dev_err(rdev->dev, "(PW %u) Vertex array %u " + "need %lu dwords have %lu dwords\n", + prim_walk, i, size >> 2, + radeon_bo_size(track->arrays[i].robj) + >> 2); + DRM_ERROR("Max indices %u\n", track->max_indx); + return -EINVAL; + } } - if (rdev->flags & RADEON_SINGLE_CRTC) { - rdev->mc.vram_width /= 4; - rdev->mc.vram_is_ddr = true; + break; + case 2: + for (i = 0; i < track->num_arrays; i++) { + size = track->arrays[i].esize * (nverts - 1) * 4; + if (track->arrays[i].robj == NULL) { + DRM_ERROR("(PW %u) Vertex array %u no buffer " + "bound\n", prim_walk, i); + return -EINVAL; + } + if (size > radeon_bo_size(track->arrays[i].robj)) { + dev_err(rdev->dev, "(PW %u) Vertex array %u " + "need %lu dwords have %lu dwords\n", + prim_walk, i, size >> 2, + radeon_bo_size(track->arrays[i].robj) + >> 2); + return -EINVAL; + } } - } else if (rdev->family <= CHIP_RV280) { - tmp = RREG32(RADEON_MEM_CNTL); - if (tmp & RADEON_MEM_NUM_CHANNELS_MASK) { - rdev->mc.vram_width = 128; - } else { - rdev->mc.vram_width = 64; + break; + case 3: + size = track->vtx_size * nverts; + if (size != track->immd_dwords) { + DRM_ERROR("IMMD draw %u dwors but needs %lu dwords\n", + track->immd_dwords, size); + DRM_ERROR("VAP_VF_CNTL.NUM_VERTICES %u, VTX_SIZE %u\n", + nverts, track->vtx_size); + return -EINVAL; } - } else { - /* newer IGPs */ - rdev->mc.vram_width = 128; - } -} - -static u32 r100_get_accessible_vram(struct radeon_device *rdev) -{ - u32 aper_size; - u8 byte; - - aper_size = RREG32(RADEON_CONFIG_APER_SIZE); - - /* Set HDP_APER_CNTL only on cards that are known not to be broken, - * that is has the 2nd generation multifunction PCI interface - */ - if (rdev->family == CHIP_RV280 || - rdev->family >= CHIP_RV350) { - WREG32_P(RADEON_HOST_PATH_CNTL, RADEON_HDP_APER_CNTL, - ~RADEON_HDP_APER_CNTL); - DRM_INFO("Generation 2 PCI interface, using max accessible memory\n"); - return aper_size * 2; + break; + default: + DRM_ERROR("[drm] Invalid primitive walk %d for VAP_VF_CNTL\n", + prim_walk); + return -EINVAL; } - /* Older cards have all sorts of funny issues to deal with. First - * check if it's a multifunction card by reading the PCI config - * header type... Limit those to one aperture size - */ - pci_read_config_byte(rdev->pdev, 0xe, &byte); - if (byte & 0x80) { - DRM_INFO("Generation 1 PCI interface in multifunction mode\n"); - DRM_INFO("Limiting VRAM to one aperture\n"); - return aper_size; + if (track->tex_dirty) { + track->tex_dirty = false; + return r100_cs_track_texture_check(rdev, track); } - - /* Single function older card. We read HDP_APER_CNTL to see how the BIOS - * have set it up. We don't write this as it's broken on some ASICs but - * we expect the BIOS to have done the right thing (might be too optimistic...) - */ - if (RREG32(RADEON_HOST_PATH_CNTL) & RADEON_HDP_APER_CNTL) - return aper_size * 2; - return aper_size; + return 0; } -void r100_vram_init_sizes(struct radeon_device *rdev) +void r100_cs_track_clear(struct radeon_device *rdev, struct r100_cs_track *track) { - u64 config_aper_size; + unsigned i, face; - /* work out accessible VRAM */ - rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0); - rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0); - rdev->mc.visible_vram_size = r100_get_accessible_vram(rdev); - /* FIXME we don't use the second aperture yet when we could use it */ - if (rdev->mc.visible_vram_size > rdev->mc.aper_size) - rdev->mc.visible_vram_size = rdev->mc.aper_size; - config_aper_size = RREG32(RADEON_CONFIG_APER_SIZE); - if (rdev->flags & RADEON_IS_IGP) { - uint32_t tom; - /* read NB_TOM to get the amount of ram stolen for the GPU */ - tom = RREG32(RADEON_NB_TOM); - rdev->mc.real_vram_size = (((tom >> 16) - (tom & 0xffff) + 1) << 16); - WREG32(RADEON_CONFIG_MEMSIZE, rdev->mc.real_vram_size); - rdev->mc.mc_vram_size = rdev->mc.real_vram_size; - } else { - rdev->mc.real_vram_size = RREG32(RADEON_CONFIG_MEMSIZE); - /* Some production boards of m6 will report 0 - * if it's 8 MB - */ - if (rdev->mc.real_vram_size == 0) { - rdev->mc.real_vram_size = 8192 * 1024; - WREG32(RADEON_CONFIG_MEMSIZE, rdev->mc.real_vram_size); - } - /* Fix for RN50, M6, M7 with 8/16/32(??) MBs of VRAM - - * Novell bug 204882 + along with lots of ubuntu ones - */ - if (rdev->mc.aper_size > config_aper_size) - config_aper_size = rdev->mc.aper_size; + track->cb_dirty = true; + track->zb_dirty = true; + track->tex_dirty = true; + track->aa_dirty = true; - if (config_aper_size > rdev->mc.real_vram_size) - rdev->mc.mc_vram_size = config_aper_size; + if (rdev->family < CHIP_R300) { + track->num_cb = 1; + if (rdev->family <= CHIP_RS200) + track->num_texture = 3; else - rdev->mc.mc_vram_size = rdev->mc.real_vram_size; - } -} - -void r100_vga_set_state(struct radeon_device *rdev, bool state) -{ - uint32_t temp; - - temp = RREG32(RADEON_CONFIG_CNTL); - if (state == false) { - temp &= ~RADEON_CFG_VGA_RAM_EN; - temp |= RADEON_CFG_VGA_IO_DIS; + track->num_texture = 6; + track->maxy = 2048; + track->separate_cube = 1; } else { - temp &= ~RADEON_CFG_VGA_IO_DIS; + track->num_cb = 4; + track->num_texture = 16; + track->maxy = 4096; + track->separate_cube = 0; + track->aaresolve = false; + track->aa.robj = NULL; } - WREG32(RADEON_CONFIG_CNTL, temp); -} - -void r100_mc_init(struct radeon_device *rdev) -{ - u64 base; - r100_vram_get_type(rdev); - r100_vram_init_sizes(rdev); - base = rdev->mc.aper_base; - if (rdev->flags & RADEON_IS_IGP) - base = (RREG32(RADEON_NB_TOM) & 0xffff) << 16; - radeon_vram_location(rdev, &rdev->mc, base); - rdev->mc.gtt_base_align = 0; - if (!(rdev->flags & RADEON_IS_AGP)) - radeon_gtt_location(rdev, &rdev->mc); - radeon_update_bandwidth_info(rdev); -} - - -/* - * Indirect registers accessor - */ -void r100_pll_errata_after_index(struct radeon_device *rdev) -{ - if (rdev->pll_errata & CHIP_ERRATA_PLL_DUMMYREADS) { - (void)RREG32(RADEON_CLOCK_CNTL_DATA); - (void)RREG32(RADEON_CRTC_GEN_CNTL); + for (i = 0; i < track->num_cb; i++) { + track->cb[i].robj = NULL; + track->cb[i].pitch = 8192; + track->cb[i].cpp = 16; + track->cb[i].offset = 0; } -} - -static void r100_pll_errata_after_data(struct radeon_device *rdev) -{ - /* This workarounds is necessary on RV100, RS100 and RS200 chips - * or the chip could hang on a subsequent access - */ - if (rdev->pll_errata & CHIP_ERRATA_PLL_DELAY) { - mdelay(5); + track->z_enabled = true; + track->zb.robj = NULL; + track->zb.pitch = 8192; + track->zb.cpp = 4; + track->zb.offset = 0; + track->vtx_size = 0x7F; + track->immd_dwords = 0xFFFFFFFFUL; + track->num_arrays = 11; + track->max_indx = 0x00FFFFFFUL; + for (i = 0; i < track->num_arrays; i++) { + track->arrays[i].robj = NULL; + track->arrays[i].esize = 0x7F; } - - /* This function is required to workaround a hardware bug in some (all?) - * revisions of the R300. This workaround should be called after every - * CLOCK_CNTL_INDEX register access. If not, register reads afterward - * may not be correct. - */ - if (rdev->pll_errata & CHIP_ERRATA_R300_CG) { - uint32_t save, tmp; - - save = RREG32(RADEON_CLOCK_CNTL_INDEX); - tmp = save & ~(0x3f | RADEON_PLL_WR_EN); - WREG32(RADEON_CLOCK_CNTL_INDEX, tmp); - tmp = RREG32(RADEON_CLOCK_CNTL_DATA); - WREG32(RADEON_CLOCK_CNTL_INDEX, save); + for (i = 0; i < track->num_texture; i++) { + track->textures[i].compress_format = R100_TRACK_COMP_NONE; + track->textures[i].pitch = 16536; + track->textures[i].width = 16536; + track->textures[i].height = 16536; + track->textures[i].width_11 = 1 << 11; + track->textures[i].height_11 = 1 << 11; + track->textures[i].num_levels = 12; + if (rdev->family <= CHIP_RS200) { + track->textures[i].tex_coord_type = 0; + track->textures[i].txdepth = 0; + } else { + track->textures[i].txdepth = 16; + track->textures[i].tex_coord_type = 1; + } + track->textures[i].cpp = 64; + track->textures[i].robj = NULL; + /* CS IB emission code makes sure texture unit are disabled */ + track->textures[i].enabled = false; + track->textures[i].lookup_disable = false; + track->textures[i].roundup_w = true; + track->textures[i].roundup_h = true; + if (track->separate_cube) + for (face = 0; face < 5; face++) { + track->textures[i].cube_info[face].robj = NULL; + track->textures[i].cube_info[face].width = 16536; + track->textures[i].cube_info[face].height = 16536; + track->textures[i].cube_info[face].offset = 0; + } } } -uint32_t r100_pll_rreg(struct radeon_device *rdev, uint32_t reg) +/* + * Global GPU functions + */ +void r100_errata(struct radeon_device *rdev) { - uint32_t data; + rdev->pll_errata = 0; - WREG8(RADEON_CLOCK_CNTL_INDEX, reg & 0x3f); - r100_pll_errata_after_index(rdev); - data = RREG32(RADEON_CLOCK_CNTL_DATA); - r100_pll_errata_after_data(rdev); - return data; -} + if (rdev->family == CHIP_RV200 || rdev->family == CHIP_RS200) { + rdev->pll_errata |= CHIP_ERRATA_PLL_DUMMYREADS; + } -void r100_pll_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v) -{ - WREG8(RADEON_CLOCK_CNTL_INDEX, ((reg & 0x3f) | RADEON_PLL_WR_EN)); - r100_pll_errata_after_index(rdev); - WREG32(RADEON_CLOCK_CNTL_DATA, v); - r100_pll_errata_after_data(rdev); + if (rdev->family == CHIP_RV100 || + rdev->family == CHIP_RS100 || + rdev->family == CHIP_RS200) { + rdev->pll_errata |= CHIP_ERRATA_PLL_DELAY; + } } -void r100_set_safe_registers(struct radeon_device *rdev) +/* Wait for vertical sync on primary CRTC */ +void r100_gpu_wait_for_vsync(struct radeon_device *rdev) { - if (ASIC_IS_RN50(rdev)) { - rdev->config.r100.reg_safe_bm = rn50_reg_safe_bm; - rdev->config.r100.reg_safe_bm_size = ARRAY_SIZE(rn50_reg_safe_bm); - } else if (rdev->family < CHIP_R200) { - rdev->config.r100.reg_safe_bm = r100_reg_safe_bm; - rdev->config.r100.reg_safe_bm_size = ARRAY_SIZE(r100_reg_safe_bm); - } else { - r200_set_safe_registers(rdev); + uint32_t crtc_gen_cntl, tmp; + int i; + + crtc_gen_cntl = RREG32(RADEON_CRTC_GEN_CNTL); + if ((crtc_gen_cntl & RADEON_CRTC_DISP_REQ_EN_B) || + !(crtc_gen_cntl & RADEON_CRTC_EN)) { + return; + } + /* Clear the CRTC_VBLANK_SAVE bit */ + WREG32(RADEON_CRTC_STATUS, RADEON_CRTC_VBLANK_SAVE_CLEAR); + for (i = 0; i < rdev->usec_timeout; i++) { + tmp = RREG32(RADEON_CRTC_STATUS); + if (tmp & RADEON_CRTC_VBLANK_SAVE) { + return; + } + DRM_UDELAY(1); } } -/* - * Debugfs info - */ -#if defined(CONFIG_DEBUG_FS) -static int r100_debugfs_rbbm_info(struct seq_file *m, void *data) +/* Wait for vertical sync on secondary CRTC */ +void r100_gpu_wait_for_vsync2(struct radeon_device *rdev) { - struct drm_info_node *node = (struct drm_info_node *) m->private; - struct drm_device *dev = node->minor->dev; - struct radeon_device *rdev = dev->dev_private; - uint32_t reg, value; - unsigned i; + uint32_t crtc2_gen_cntl, tmp; + int i; - seq_printf(m, "RBBM_STATUS 0x%08x\n", RREG32(RADEON_RBBM_STATUS)); - seq_printf(m, "RBBM_CMDFIFO_STAT 0x%08x\n", RREG32(0xE7C)); - seq_printf(m, "CP_STAT 0x%08x\n", RREG32(RADEON_CP_STAT)); - for (i = 0; i < 64; i++) { - WREG32(RADEON_RBBM_CMDFIFO_ADDR, i | 0x100); - reg = (RREG32(RADEON_RBBM_CMDFIFO_DATA) - 1) >> 2; - WREG32(RADEON_RBBM_CMDFIFO_ADDR, i); - value = RREG32(RADEON_RBBM_CMDFIFO_DATA); - seq_printf(m, "[0x%03X] 0x%04X=0x%08X\n", i, reg, value); + crtc2_gen_cntl = RREG32(RADEON_CRTC2_GEN_CNTL); + if ((crtc2_gen_cntl & RADEON_CRTC2_DISP_REQ_EN_B) || + !(crtc2_gen_cntl & RADEON_CRTC2_EN)) + return; + + /* Clear the CRTC_VBLANK_SAVE bit */ + WREG32(RADEON_CRTC2_STATUS, RADEON_CRTC2_VBLANK_SAVE_CLEAR); + for (i = 0; i < rdev->usec_timeout; i++) { + tmp = RREG32(RADEON_CRTC2_STATUS); + if (tmp & RADEON_CRTC2_VBLANK_SAVE) { + return; + } + DRM_UDELAY(1); } - return 0; } -static int r100_debugfs_cp_ring_info(struct seq_file *m, void *data) +int r100_rbbm_fifo_wait_for_entry(struct radeon_device *rdev, unsigned n) { - struct drm_info_node *node = (struct drm_info_node *) m->private; - struct drm_device *dev = node->minor->dev; - struct radeon_device *rdev = dev->dev_private; - struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; - uint32_t rdp, wdp; - unsigned count, i, j; + unsigned i; + uint32_t tmp; - radeon_ring_free_size(rdev, ring); - rdp = RREG32(RADEON_CP_RB_RPTR); - wdp = RREG32(RADEON_CP_RB_WPTR); - count = (rdp + ring->ring_size - wdp) & ring->ptr_mask; - seq_printf(m, "CP_STAT 0x%08x\n", RREG32(RADEON_CP_STAT)); - seq_printf(m, "CP_RB_WPTR 0x%08x\n", wdp); - seq_printf(m, "CP_RB_RPTR 0x%08x\n", rdp); - seq_printf(m, "%u free dwords in ring\n", ring->ring_free_dw); - seq_printf(m, "%u dwords in ring\n", count); - for (j = 0; j <= count; j++) { - i = (rdp + j) & ring->ptr_mask; - seq_printf(m, "r[%04d]=0x%08x\n", i, ring->ring[i]); + for (i = 0; i < rdev->usec_timeout; i++) { + tmp = RREG32(RADEON_RBBM_STATUS) & RADEON_RBBM_FIFOCNT_MASK; + if (tmp >= n) { + return 0; + } + DRM_UDELAY(1); } - return 0; + return -1; } - -static int r100_debugfs_cp_csq_fifo(struct seq_file *m, void *data) +int r100_gui_wait_for_idle(struct radeon_device *rdev) { - struct drm_info_node *node = (struct drm_info_node *) m->private; - struct drm_device *dev = node->minor->dev; - struct radeon_device *rdev = dev->dev_private; - uint32_t csq_stat, csq2_stat, tmp; - unsigned r_rptr, r_wptr, ib1_rptr, ib1_wptr, ib2_rptr, ib2_wptr; unsigned i; + uint32_t tmp; - seq_printf(m, "CP_STAT 0x%08x\n", RREG32(RADEON_CP_STAT)); - seq_printf(m, "CP_CSQ_MODE 0x%08x\n", RREG32(RADEON_CP_CSQ_MODE)); - csq_stat = RREG32(RADEON_CP_CSQ_STAT); - csq2_stat = RREG32(RADEON_CP_CSQ2_STAT); - r_rptr = (csq_stat >> 0) & 0x3ff; - r_wptr = (csq_stat >> 10) & 0x3ff; - ib1_rptr = (csq_stat >> 20) & 0x3ff; - ib1_wptr = (csq2_stat >> 0) & 0x3ff; - ib2_rptr = (csq2_stat >> 10) & 0x3ff; - ib2_wptr = (csq2_stat >> 20) & 0x3ff; - seq_printf(m, "CP_CSQ_STAT 0x%08x\n", csq_stat); - seq_printf(m, "CP_CSQ2_STAT 0x%08x\n", csq2_stat); - seq_printf(m, "Ring rptr %u\n", r_rptr); - seq_printf(m, "Ring wptr %u\n", r_wptr); - seq_printf(m, "Indirect1 rptr %u\n", ib1_rptr); - seq_printf(m, "Indirect1 wptr %u\n", ib1_wptr); - seq_printf(m, "Indirect2 rptr %u\n", ib2_rptr); - seq_printf(m, "Indirect2 wptr %u\n", ib2_wptr); - /* FIXME: 0, 128, 640 depends on fifo setup see cp_init_kms - * 128 = indirect1_start * 8 & 640 = indirect2_start * 8 */ - seq_printf(m, "Ring fifo:\n"); - for (i = 0; i < 256; i++) { - WREG32(RADEON_CP_CSQ_ADDR, i << 2); - tmp = RREG32(RADEON_CP_CSQ_DATA); - seq_printf(m, "rfifo[%04d]=0x%08X\n", i, tmp); - } - seq_printf(m, "Indirect1 fifo:\n"); - for (i = 256; i <= 512; i++) { - WREG32(RADEON_CP_CSQ_ADDR, i << 2); - tmp = RREG32(RADEON_CP_CSQ_DATA); - seq_printf(m, "ib1fifo[%04d]=0x%08X\n", i, tmp); + if (r100_rbbm_fifo_wait_for_entry(rdev, 64)) { + printk(KERN_WARNING "radeon: wait for empty RBBM fifo failed !" + " Bad things might happen.\n"); } - seq_printf(m, "Indirect2 fifo:\n"); - for (i = 640; i < ib1_wptr; i++) { - WREG32(RADEON_CP_CSQ_ADDR, i << 2); - tmp = RREG32(RADEON_CP_CSQ_DATA); - seq_printf(m, "ib2fifo[%04d]=0x%08X\n", i, tmp); + for (i = 0; i < rdev->usec_timeout; i++) { + tmp = RREG32(RADEON_RBBM_STATUS); + if (!(tmp & RADEON_RBBM_ACTIVE)) { + return 0; + } + DRM_UDELAY(1); } - return 0; + return -1; } -static int r100_debugfs_mc_info(struct seq_file *m, void *data) +int r100_mc_wait_for_idle(struct radeon_device *rdev) { - struct drm_info_node *node = (struct drm_info_node *) m->private; - struct drm_device *dev = node->minor->dev; - struct radeon_device *rdev = dev->dev_private; + unsigned i; uint32_t tmp; - tmp = RREG32(RADEON_CONFIG_MEMSIZE); - seq_printf(m, "CONFIG_MEMSIZE 0x%08x\n", tmp); - tmp = RREG32(RADEON_MC_FB_LOCATION); - seq_printf(m, "MC_FB_LOCATION 0x%08x\n", tmp); - tmp = RREG32(RADEON_BUS_CNTL); - seq_printf(m, "BUS_CNTL 0x%08x\n", tmp); - tmp = RREG32(RADEON_MC_AGP_LOCATION); - seq_printf(m, "MC_AGP_LOCATION 0x%08x\n", tmp); - tmp = RREG32(RADEON_AGP_BASE); - seq_printf(m, "AGP_BASE 0x%08x\n", tmp); - tmp = RREG32(RADEON_HOST_PATH_CNTL); - seq_printf(m, "HOST_PATH_CNTL 0x%08x\n", tmp); - tmp = RREG32(0x01D0); - seq_printf(m, "AIC_CTRL 0x%08x\n", tmp); - tmp = RREG32(RADEON_AIC_LO_ADDR); - seq_printf(m, "AIC_LO_ADDR 0x%08x\n", tmp); - tmp = RREG32(RADEON_AIC_HI_ADDR); - seq_printf(m, "AIC_HI_ADDR 0x%08x\n", tmp); - tmp = RREG32(0x01E4); - seq_printf(m, "AIC_TLB_ADDR 0x%08x\n", tmp); - return 0; + for (i = 0; i < rdev->usec_timeout; i++) { + /* read MC_STATUS */ + tmp = RREG32(RADEON_MC_STATUS); + if (tmp & RADEON_MC_IDLE) { + return 0; + } + DRM_UDELAY(1); + } + return -1; } -static struct drm_info_list r100_debugfs_rbbm_list[] = { - {"r100_rbbm_info", r100_debugfs_rbbm_info, 0, NULL}, -}; - -static struct drm_info_list r100_debugfs_cp_list[] = { - {"r100_cp_ring_info", r100_debugfs_cp_ring_info, 0, NULL}, - {"r100_cp_csq_fifo", r100_debugfs_cp_csq_fifo, 0, NULL}, -}; - -static struct drm_info_list r100_debugfs_mc_info_list[] = { - {"r100_mc_info", r100_debugfs_mc_info, 0, NULL}, -}; -#endif - -int r100_debugfs_rbbm_init(struct radeon_device *rdev) +bool r100_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) { -#if defined(CONFIG_DEBUG_FS) - return radeon_debugfs_add_files(rdev, r100_debugfs_rbbm_list, 1); -#else - return 0; -#endif -} + u32 rbbm_status; -int r100_debugfs_cp_init(struct radeon_device *rdev) -{ -#if defined(CONFIG_DEBUG_FS) - return radeon_debugfs_add_files(rdev, r100_debugfs_cp_list, 2); -#else - return 0; -#endif + rbbm_status = RREG32(R_000E40_RBBM_STATUS); + if (!G_000E40_GUI_ACTIVE(rbbm_status)) { + radeon_ring_lockup_update(ring); + return false; + } + /* force CP activities */ + radeon_ring_force_activity(rdev, ring); + return radeon_ring_test_lockup(rdev, ring); } -int r100_debugfs_mc_info_init(struct radeon_device *rdev) +void r100_bm_disable(struct radeon_device *rdev) { -#if defined(CONFIG_DEBUG_FS) - return radeon_debugfs_add_files(rdev, r100_debugfs_mc_info_list, 1); -#else - return 0; -#endif + u32 tmp; + + /* disable bus mastering */ + tmp = RREG32(R_000030_BUS_CNTL); + WREG32(R_000030_BUS_CNTL, (tmp & 0xFFFFFFFF) | 0x00000044); + mdelay(1); + WREG32(R_000030_BUS_CNTL, (tmp & 0xFFFFFFFF) | 0x00000042); + mdelay(1); + WREG32(R_000030_BUS_CNTL, (tmp & 0xFFFFFFFF) | 0x00000040); + tmp = RREG32(RADEON_BUS_CNTL); + mdelay(1); + pci_clear_master(rdev->pdev); + mdelay(1); } -int r100_set_surface_reg(struct radeon_device *rdev, int reg, - uint32_t tiling_flags, uint32_t pitch, - uint32_t offset, uint32_t obj_size) +int r100_asic_reset(struct radeon_device *rdev) { - int surf_index = reg * 16; - int flags = 0; + struct r100_mc_save save; + u32 status, tmp; + int ret = 0; - if (rdev->family <= CHIP_RS200) { - if ((tiling_flags & (RADEON_TILING_MACRO|RADEON_TILING_MICRO)) - == (RADEON_TILING_MACRO|RADEON_TILING_MICRO)) - flags |= RADEON_SURF_TILE_COLOR_BOTH; - if (tiling_flags & RADEON_TILING_MACRO) - flags |= RADEON_SURF_TILE_COLOR_MACRO; - } else if (rdev->family <= CHIP_RV280) { - if (tiling_flags & (RADEON_TILING_MACRO)) - flags |= R200_SURF_TILE_COLOR_MACRO; - if (tiling_flags & RADEON_TILING_MICRO) - flags |= R200_SURF_TILE_COLOR_MICRO; - } else { - if (tiling_flags & RADEON_TILING_MACRO) - flags |= R300_SURF_TILE_MACRO; - if (tiling_flags & RADEON_TILING_MICRO) - flags |= R300_SURF_TILE_MICRO; + status = RREG32(R_000E40_RBBM_STATUS); + if (!G_000E40_GUI_ACTIVE(status)) { + return 0; } + r100_mc_stop(rdev, &save); + status = RREG32(R_000E40_RBBM_STATUS); + dev_info(rdev->dev, "(%s:%d) RBBM_STATUS=0x%08X\n", __func__, __LINE__, status); + /* stop CP */ + WREG32(RADEON_CP_CSQ_CNTL, 0); + tmp = RREG32(RADEON_CP_RB_CNTL); + WREG32(RADEON_CP_RB_CNTL, tmp | RADEON_RB_RPTR_WR_ENA); + WREG32(RADEON_CP_RB_RPTR_WR, 0); + WREG32(RADEON_CP_RB_WPTR, 0); + WREG32(RADEON_CP_RB_CNTL, tmp); + /* save PCI state */ + pci_save_state(rdev->pdev); + /* disable bus mastering */ + r100_bm_disable(rdev); + WREG32(R_0000F0_RBBM_SOFT_RESET, S_0000F0_SOFT_RESET_SE(1) | + S_0000F0_SOFT_RESET_RE(1) | + S_0000F0_SOFT_RESET_PP(1) | + S_0000F0_SOFT_RESET_RB(1)); + RREG32(R_0000F0_RBBM_SOFT_RESET); + mdelay(500); + WREG32(R_0000F0_RBBM_SOFT_RESET, 0); + mdelay(1); + status = RREG32(R_000E40_RBBM_STATUS); + dev_info(rdev->dev, "(%s:%d) RBBM_STATUS=0x%08X\n", __func__, __LINE__, status); + /* reset CP */ + WREG32(R_0000F0_RBBM_SOFT_RESET, S_0000F0_SOFT_RESET_CP(1)); + RREG32(R_0000F0_RBBM_SOFT_RESET); + mdelay(500); + WREG32(R_0000F0_RBBM_SOFT_RESET, 0); + mdelay(1); + status = RREG32(R_000E40_RBBM_STATUS); + dev_info(rdev->dev, "(%s:%d) RBBM_STATUS=0x%08X\n", __func__, __LINE__, status); + /* restore PCI & busmastering */ + pci_restore_state(rdev->pdev); + r100_enable_bm(rdev); + /* Check if GPU is idle */ + if (G_000E40_SE_BUSY(status) || G_000E40_RE_BUSY(status) || + G_000E40_TAM_BUSY(status) || G_000E40_PB_BUSY(status)) { + dev_err(rdev->dev, "failed to reset GPU\n"); + ret = -1; + } else + dev_info(rdev->dev, "GPU reset succeed\n"); + r100_mc_resume(rdev, &save); + return ret; +} - if (tiling_flags & RADEON_TILING_SWAP_16BIT) - flags |= RADEON_SURF_AP0_SWP_16BPP | RADEON_SURF_AP1_SWP_16BPP; - if (tiling_flags & RADEON_TILING_SWAP_32BIT) - flags |= RADEON_SURF_AP0_SWP_32BPP | RADEON_SURF_AP1_SWP_32BPP; - - /* when we aren't tiling the pitch seems to needs to be furtherdivided down. - tested on power5 + rn50 server */ - if (tiling_flags & (RADEON_TILING_SWAP_16BIT | RADEON_TILING_SWAP_32BIT)) { - if (!(tiling_flags & (RADEON_TILING_MACRO | RADEON_TILING_MICRO))) - if (ASIC_IS_RN50(rdev)) - pitch /= 16; - } +void r100_set_common_regs(struct radeon_device *rdev) +{ + struct drm_device *dev = rdev->ddev; + bool force_dac2 = false; + u32 tmp; - /* r100/r200 divide by 16 */ - if (rdev->family < CHIP_R300) - flags |= pitch / 16; - else - flags |= pitch / 8; + /* set these so they don't interfere with anything */ + WREG32(RADEON_OV0_SCALE_CNTL, 0); + WREG32(RADEON_SUBPIC_CNTL, 0); + WREG32(RADEON_VIPH_CONTROL, 0); + WREG32(RADEON_I2C_CNTL_1, 0); + WREG32(RADEON_DVI_I2C_CNTL_1, 0); + WREG32(RADEON_CAP0_TRIG_CNTL, 0); + WREG32(RADEON_CAP1_TRIG_CNTL, 0); + /* always set up dac2 on rn50 and some rv100 as lots + * of servers seem to wire it up to a VGA port but + * don't report it in the bios connector + * table. + */ + switch (dev->pdev->device) { + /* RN50 */ + case 0x515e: + case 0x5969: + force_dac2 = true; + break; + /* RV100*/ + case 0x5159: + case 0x515a: + /* DELL triple head servers */ + if ((dev->pdev->subsystem_vendor == 0x1028 /* DELL */) && + ((dev->pdev->subsystem_device == 0x016c) || + (dev->pdev->subsystem_device == 0x016d) || + (dev->pdev->subsystem_device == 0x016e) || + (dev->pdev->subsystem_device == 0x016f) || + (dev->pdev->subsystem_device == 0x0170) || + (dev->pdev->subsystem_device == 0x017d) || + (dev->pdev->subsystem_device == 0x017e) || + (dev->pdev->subsystem_device == 0x0183) || + (dev->pdev->subsystem_device == 0x018a) || + (dev->pdev->subsystem_device == 0x019a))) + force_dac2 = true; + break; + } - DRM_DEBUG_KMS("writing surface %d %d %x %x\n", reg, flags, offset, offset+obj_size-1); - WREG32(RADEON_SURFACE0_INFO + surf_index, flags); - WREG32(RADEON_SURFACE0_LOWER_BOUND + surf_index, offset); - WREG32(RADEON_SURFACE0_UPPER_BOUND + surf_index, offset + obj_size - 1); - return 0; -} + if (force_dac2) { + u32 disp_hw_debug = RREG32(RADEON_DISP_HW_DEBUG); + u32 tv_dac_cntl = RREG32(RADEON_TV_DAC_CNTL); + u32 dac2_cntl = RREG32(RADEON_DAC_CNTL2); -void r100_clear_surface_reg(struct radeon_device *rdev, int reg) -{ - int surf_index = reg * 16; - WREG32(RADEON_SURFACE0_INFO + surf_index, 0); -} + /* For CRT on DAC2, don't turn it on if BIOS didn't + enable it, even it's detected. + */ -void r100_bandwidth_update(struct radeon_device *rdev) -{ - fixed20_12 trcd_ff, trp_ff, tras_ff, trbs_ff, tcas_ff; - fixed20_12 sclk_ff, mclk_ff, sclk_eff_ff, sclk_delay_ff; - fixed20_12 peak_disp_bw, mem_bw, pix_clk, pix_clk2, temp_ff, crit_point_ff; - uint32_t temp, data, mem_trcd, mem_trp, mem_tras; - fixed20_12 memtcas_ff[8] = { - dfixed_init(1), - dfixed_init(2), - dfixed_init(3), - dfixed_init(0), - dfixed_init_half(1), - dfixed_init_half(2), - dfixed_init(0), - }; - fixed20_12 memtcas_rs480_ff[8] = { - dfixed_init(0), - dfixed_init(1), - dfixed_init(2), - dfixed_init(3), - dfixed_init(0), - dfixed_init_half(1), - dfixed_init_half(2), - dfixed_init_half(3), - }; - fixed20_12 memtcas2_ff[8] = { - dfixed_init(0), - dfixed_init(1), - dfixed_init(2), - dfixed_init(3), - dfixed_init(4), - dfixed_init(5), - dfixed_init(6), - dfixed_init(7), - }; - fixed20_12 memtrbs[8] = { - dfixed_init(1), - dfixed_init_half(1), - dfixed_init(2), - dfixed_init_half(2), - dfixed_init(3), - dfixed_init_half(3), - dfixed_init(4), - dfixed_init_half(4) - }; - fixed20_12 memtrbs_r4xx[8] = { - dfixed_init(4), - dfixed_init(5), - dfixed_init(6), - dfixed_init(7), - dfixed_init(8), - dfixed_init(9), - dfixed_init(10), - dfixed_init(11) - }; - fixed20_12 min_mem_eff; - fixed20_12 mc_latency_sclk, mc_latency_mclk, k1; - fixed20_12 cur_latency_mclk, cur_latency_sclk; - fixed20_12 disp_latency, disp_latency_overhead, disp_drain_rate, - disp_drain_rate2, read_return_rate; - fixed20_12 time_disp1_drop_priority; - int c; - int cur_size = 16; /* in octawords */ - int critical_point = 0, critical_point2; -/* uint32_t read_return_rate, time_disp1_drop_priority; */ - int stop_req, max_stop_req; - struct drm_display_mode *mode1 = NULL; - struct drm_display_mode *mode2 = NULL; - uint32_t pixel_bytes1 = 0; - uint32_t pixel_bytes2 = 0; + /* force it to crtc0 */ + dac2_cntl &= ~RADEON_DAC2_DAC_CLK_SEL; + dac2_cntl |= RADEON_DAC2_DAC2_CLK_SEL; + disp_hw_debug |= RADEON_CRT2_DISP1_SEL; - radeon_update_display_priority(rdev); + /* set up the TV DAC */ + tv_dac_cntl &= ~(RADEON_TV_DAC_PEDESTAL | + RADEON_TV_DAC_STD_MASK | + RADEON_TV_DAC_RDACPD | + RADEON_TV_DAC_GDACPD | + RADEON_TV_DAC_BDACPD | + RADEON_TV_DAC_BGADJ_MASK | + RADEON_TV_DAC_DACADJ_MASK); + tv_dac_cntl |= (RADEON_TV_DAC_NBLANK | + RADEON_TV_DAC_NHOLD | + RADEON_TV_DAC_STD_PS2 | + (0x58 << 16)); - if (rdev->mode_info.crtcs[0]->base.enabled) { - mode1 = &rdev->mode_info.crtcs[0]->base.mode; - pixel_bytes1 = rdev->mode_info.crtcs[0]->base.fb->bits_per_pixel / 8; + WREG32(RADEON_TV_DAC_CNTL, tv_dac_cntl); + WREG32(RADEON_DISP_HW_DEBUG, disp_hw_debug); + WREG32(RADEON_DAC_CNTL2, dac2_cntl); } - if (!(rdev->flags & RADEON_SINGLE_CRTC)) { - if (rdev->mode_info.crtcs[1]->base.enabled) { - mode2 = &rdev->mode_info.crtcs[1]->base.mode; - pixel_bytes2 = rdev->mode_info.crtcs[1]->base.fb->bits_per_pixel / 8; + + /* switch PM block to ACPI mode */ + tmp = RREG32_PLL(RADEON_PLL_PWRMGT_CNTL); + tmp &= ~RADEON_PM_MODE_SEL; + WREG32_PLL(RADEON_PLL_PWRMGT_CNTL, tmp); + +} + +/* + * VRAM info + */ +static void r100_vram_get_type(struct radeon_device *rdev) +{ + uint32_t tmp; + + rdev->mc.vram_is_ddr = false; + if (rdev->flags & RADEON_IS_IGP) + rdev->mc.vram_is_ddr = true; + else if (RREG32(RADEON_MEM_SDRAM_MODE_REG) & RADEON_MEM_CFG_TYPE_DDR) + rdev->mc.vram_is_ddr = true; + if ((rdev->family == CHIP_RV100) || + (rdev->family == CHIP_RS100) || + (rdev->family == CHIP_RS200)) { + tmp = RREG32(RADEON_MEM_CNTL); + if (tmp & RV100_HALF_MODE) { + rdev->mc.vram_width = 32; + } else { + rdev->mc.vram_width = 64; + } + if (rdev->flags & RADEON_SINGLE_CRTC) { + rdev->mc.vram_width /= 4; + rdev->mc.vram_is_ddr = true; } + } else if (rdev->family <= CHIP_RV280) { + tmp = RREG32(RADEON_MEM_CNTL); + if (tmp & RADEON_MEM_NUM_CHANNELS_MASK) { + rdev->mc.vram_width = 128; + } else { + rdev->mc.vram_width = 64; + } + } else { + /* newer IGPs */ + rdev->mc.vram_width = 128; } +} - min_mem_eff.full = dfixed_const_8(0); - /* get modes */ - if ((rdev->disp_priority == 2) && ASIC_IS_R300(rdev)) { - uint32_t mc_init_misc_lat_timer = RREG32(R300_MC_INIT_MISC_LAT_TIMER); - mc_init_misc_lat_timer &= ~(R300_MC_DISP1R_INIT_LAT_MASK << R300_MC_DISP1R_INIT_LAT_SHIFT); - mc_init_misc_lat_timer &= ~(R300_MC_DISP0R_INIT_LAT_MASK << R300_MC_DISP0R_INIT_LAT_SHIFT); - /* check crtc enables */ - if (mode2) - mc_init_misc_lat_timer |= (1 << R300_MC_DISP1R_INIT_LAT_SHIFT); - if (mode1) - mc_init_misc_lat_timer |= (1 << R300_MC_DISP0R_INIT_LAT_SHIFT); - WREG32(R300_MC_INIT_MISC_LAT_TIMER, mc_init_misc_lat_timer); +static u32 r100_get_accessible_vram(struct radeon_device *rdev) +{ + u32 aper_size; + u8 byte; + + aper_size = RREG32(RADEON_CONFIG_APER_SIZE); + + /* Set HDP_APER_CNTL only on cards that are known not to be broken, + * that is has the 2nd generation multifunction PCI interface + */ + if (rdev->family == CHIP_RV280 || + rdev->family >= CHIP_RV350) { + WREG32_P(RADEON_HOST_PATH_CNTL, RADEON_HDP_APER_CNTL, + ~RADEON_HDP_APER_CNTL); + DRM_INFO("Generation 2 PCI interface, using max accessible memory\n"); + return aper_size * 2; } - /* - * determine is there is enough bw for current mode + /* Older cards have all sorts of funny issues to deal with. First + * check if it's a multifunction card by reading the PCI config + * header type... Limit those to one aperture size */ - sclk_ff = rdev->pm.sclk; - mclk_ff = rdev->pm.mclk; + pci_read_config_byte(rdev->pdev, 0xe, &byte); + if (byte & 0x80) { + DRM_INFO("Generation 1 PCI interface in multifunction mode\n"); + DRM_INFO("Limiting VRAM to one aperture\n"); + return aper_size; + } - temp = (rdev->mc.vram_width / 8) * (rdev->mc.vram_is_ddr ? 2 : 1); - temp_ff.full = dfixed_const(temp); - mem_bw.full = dfixed_mul(mclk_ff, temp_ff); + /* Single function older card. We read HDP_APER_CNTL to see how the BIOS + * have set it up. We don't write this as it's broken on some ASICs but + * we expect the BIOS to have done the right thing (might be too optimistic...) + */ + if (RREG32(RADEON_HOST_PATH_CNTL) & RADEON_HDP_APER_CNTL) + return aper_size * 2; + return aper_size; +} - pix_clk.full = 0; - pix_clk2.full = 0; - peak_disp_bw.full = 0; - if (mode1) { - temp_ff.full = dfixed_const(1000); - pix_clk.full = dfixed_const(mode1->clock); /* convert to fixed point */ - pix_clk.full = dfixed_div(pix_clk, temp_ff); - temp_ff.full = dfixed_const(pixel_bytes1); - peak_disp_bw.full += dfixed_mul(pix_clk, temp_ff); - } - if (mode2) { - temp_ff.full = dfixed_const(1000); - pix_clk2.full = dfixed_const(mode2->clock); /* convert to fixed point */ - pix_clk2.full = dfixed_div(pix_clk2, temp_ff); - temp_ff.full = dfixed_const(pixel_bytes2); - peak_disp_bw.full += dfixed_mul(pix_clk2, temp_ff); - } +void r100_vram_init_sizes(struct radeon_device *rdev) +{ + u64 config_aper_size; - mem_bw.full = dfixed_mul(mem_bw, min_mem_eff); - if (peak_disp_bw.full >= mem_bw.full) { - DRM_ERROR("You may not have enough display bandwidth for current mode\n" - "If you have flickering problem, try to lower resolution, refresh rate, or color depth\n"); + /* work out accessible VRAM */ + rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0); + rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0); + rdev->mc.visible_vram_size = r100_get_accessible_vram(rdev); + /* FIXME we don't use the second aperture yet when we could use it */ + if (rdev->mc.visible_vram_size > rdev->mc.aper_size) + rdev->mc.visible_vram_size = rdev->mc.aper_size; + config_aper_size = RREG32(RADEON_CONFIG_APER_SIZE); + if (rdev->flags & RADEON_IS_IGP) { + uint32_t tom; + /* read NB_TOM to get the amount of ram stolen for the GPU */ + tom = RREG32(RADEON_NB_TOM); + rdev->mc.real_vram_size = (((tom >> 16) - (tom & 0xffff) + 1) << 16); + WREG32(RADEON_CONFIG_MEMSIZE, rdev->mc.real_vram_size); + rdev->mc.mc_vram_size = rdev->mc.real_vram_size; + } else { + rdev->mc.real_vram_size = RREG32(RADEON_CONFIG_MEMSIZE); + /* Some production boards of m6 will report 0 + * if it's 8 MB + */ + if (rdev->mc.real_vram_size == 0) { + rdev->mc.real_vram_size = 8192 * 1024; + WREG32(RADEON_CONFIG_MEMSIZE, rdev->mc.real_vram_size); + } + /* Fix for RN50, M6, M7 with 8/16/32(??) MBs of VRAM - + * Novell bug 204882 + along with lots of ubuntu ones + */ + if (rdev->mc.aper_size > config_aper_size) + config_aper_size = rdev->mc.aper_size; + + if (config_aper_size > rdev->mc.real_vram_size) + rdev->mc.mc_vram_size = config_aper_size; + else + rdev->mc.mc_vram_size = rdev->mc.real_vram_size; } +} - /* Get values from the EXT_MEM_CNTL register...converting its contents. */ - temp = RREG32(RADEON_MEM_TIMING_CNTL); - if ((rdev->family == CHIP_RV100) || (rdev->flags & RADEON_IS_IGP)) { /* RV100, M6, IGPs */ - mem_trcd = ((temp >> 2) & 0x3) + 1; - mem_trp = ((temp & 0x3)) + 1; - mem_tras = ((temp & 0x70) >> 4) + 1; - } else if (rdev->family == CHIP_R300 || - rdev->family == CHIP_R350) { /* r300, r350 */ - mem_trcd = (temp & 0x7) + 1; - mem_trp = ((temp >> 8) & 0x7) + 1; - mem_tras = ((temp >> 11) & 0xf) + 4; - } else if (rdev->family == CHIP_RV350 || - rdev->family <= CHIP_RV380) { - /* rv3x0 */ - mem_trcd = (temp & 0x7) + 3; - mem_trp = ((temp >> 8) & 0x7) + 3; - mem_tras = ((temp >> 11) & 0xf) + 6; - } else if (rdev->family == CHIP_R420 || - rdev->family == CHIP_R423 || - rdev->family == CHIP_RV410) { - /* r4xx */ - mem_trcd = (temp & 0xf) + 3; - if (mem_trcd > 15) - mem_trcd = 15; - mem_trp = ((temp >> 8) & 0xf) + 3; - if (mem_trp > 15) - mem_trp = 15; - mem_tras = ((temp >> 12) & 0x1f) + 6; - if (mem_tras > 31) - mem_tras = 31; - } else { /* RV200, R200 */ - mem_trcd = (temp & 0x7) + 1; - mem_trp = ((temp >> 8) & 0x7) + 1; - mem_tras = ((temp >> 12) & 0xf) + 4; +void r100_vga_set_state(struct radeon_device *rdev, bool state) +{ + uint32_t temp; + + temp = RREG32(RADEON_CONFIG_CNTL); + if (state == false) { + temp &= ~RADEON_CFG_VGA_RAM_EN; + temp |= RADEON_CFG_VGA_IO_DIS; + } else { + temp &= ~RADEON_CFG_VGA_IO_DIS; } - /* convert to FF */ - trcd_ff.full = dfixed_const(mem_trcd); - trp_ff.full = dfixed_const(mem_trp); - tras_ff.full = dfixed_const(mem_tras); + WREG32(RADEON_CONFIG_CNTL, temp); +} - /* Get values from the MEM_SDRAM_MODE_REG register...converting its */ - temp = RREG32(RADEON_MEM_SDRAM_MODE_REG); - data = (temp & (7 << 20)) >> 20; - if ((rdev->family == CHIP_RV100) || rdev->flags & RADEON_IS_IGP) { - if (rdev->family == CHIP_RS480) /* don't think rs400 */ - tcas_ff = memtcas_rs480_ff[data]; - else - tcas_ff = memtcas_ff[data]; - } else - tcas_ff = memtcas2_ff[data]; +void r100_mc_init(struct radeon_device *rdev) +{ + u64 base; - if (rdev->family == CHIP_RS400 || - rdev->family == CHIP_RS480) { - /* extra cas latency stored in bits 23-25 0-4 clocks */ - data = (temp >> 23) & 0x7; - if (data < 5) - tcas_ff.full += dfixed_const(data); + r100_vram_get_type(rdev); + r100_vram_init_sizes(rdev); + base = rdev->mc.aper_base; + if (rdev->flags & RADEON_IS_IGP) + base = (RREG32(RADEON_NB_TOM) & 0xffff) << 16; + radeon_vram_location(rdev, &rdev->mc, base); + rdev->mc.gtt_base_align = 0; + if (!(rdev->flags & RADEON_IS_AGP)) + radeon_gtt_location(rdev, &rdev->mc); + radeon_update_bandwidth_info(rdev); +} + + +/* + * Indirect registers accessor + */ +void r100_pll_errata_after_index(struct radeon_device *rdev) +{ + if (rdev->pll_errata & CHIP_ERRATA_PLL_DUMMYREADS) { + (void)RREG32(RADEON_CLOCK_CNTL_DATA); + (void)RREG32(RADEON_CRTC_GEN_CNTL); } +} - if (ASIC_IS_R300(rdev) && !(rdev->flags & RADEON_IS_IGP)) { - /* on the R300, Tcas is included in Trbs. - */ - temp = RREG32(RADEON_MEM_CNTL); - data = (R300_MEM_NUM_CHANNELS_MASK & temp); - if (data == 1) { - if (R300_MEM_USE_CD_CH_ONLY & temp) { - temp = RREG32(R300_MC_IND_INDEX); - temp &= ~R300_MC_IND_ADDR_MASK; - temp |= R300_MC_READ_CNTL_CD_mcind; - WREG32(R300_MC_IND_INDEX, temp); - temp = RREG32(R300_MC_IND_DATA); - data = (R300_MEM_RBS_POSITION_C_MASK & temp); - } else { - temp = RREG32(R300_MC_READ_CNTL_AB); - data = (R300_MEM_RBS_POSITION_A_MASK & temp); - } - } else { - temp = RREG32(R300_MC_READ_CNTL_AB); - data = (R300_MEM_RBS_POSITION_A_MASK & temp); - } - if (rdev->family == CHIP_RV410 || - rdev->family == CHIP_R420 || - rdev->family == CHIP_R423) - trbs_ff = memtrbs_r4xx[data]; - else - trbs_ff = memtrbs[data]; - tcas_ff.full += trbs_ff.full; +static void r100_pll_errata_after_data(struct radeon_device *rdev) +{ + /* This workarounds is necessary on RV100, RS100 and RS200 chips + * or the chip could hang on a subsequent access + */ + if (rdev->pll_errata & CHIP_ERRATA_PLL_DELAY) { + mdelay(5); } - sclk_eff_ff.full = sclk_ff.full; + /* This function is required to workaround a hardware bug in some (all?) + * revisions of the R300. This workaround should be called after every + * CLOCK_CNTL_INDEX register access. If not, register reads afterward + * may not be correct. + */ + if (rdev->pll_errata & CHIP_ERRATA_R300_CG) { + uint32_t save, tmp; - if (rdev->flags & RADEON_IS_AGP) { - fixed20_12 agpmode_ff; - agpmode_ff.full = dfixed_const(radeon_agpmode); - temp_ff.full = dfixed_const_666(16); - sclk_eff_ff.full -= dfixed_mul(agpmode_ff, temp_ff); + save = RREG32(RADEON_CLOCK_CNTL_INDEX); + tmp = save & ~(0x3f | RADEON_PLL_WR_EN); + WREG32(RADEON_CLOCK_CNTL_INDEX, tmp); + tmp = RREG32(RADEON_CLOCK_CNTL_DATA); + WREG32(RADEON_CLOCK_CNTL_INDEX, save); } - /* TODO PCIE lanes may affect this - agpmode == 16?? */ +} - if (ASIC_IS_R300(rdev)) { - sclk_delay_ff.full = dfixed_const(250); - } else { - if ((rdev->family == CHIP_RV100) || - rdev->flags & RADEON_IS_IGP) { - if (rdev->mc.vram_is_ddr) - sclk_delay_ff.full = dfixed_const(41); - else - sclk_delay_ff.full = dfixed_const(33); - } else { - if (rdev->mc.vram_width == 128) - sclk_delay_ff.full = dfixed_const(57); - else - sclk_delay_ff.full = dfixed_const(41); - } - } +uint32_t r100_pll_rreg(struct radeon_device *rdev, uint32_t reg) +{ + uint32_t data; - mc_latency_sclk.full = dfixed_div(sclk_delay_ff, sclk_eff_ff); + WREG8(RADEON_CLOCK_CNTL_INDEX, reg & 0x3f); + r100_pll_errata_after_index(rdev); + data = RREG32(RADEON_CLOCK_CNTL_DATA); + r100_pll_errata_after_data(rdev); + return data; +} - if (rdev->mc.vram_is_ddr) { - if (rdev->mc.vram_width == 32) { - k1.full = dfixed_const(40); - c = 3; - } else { - k1.full = dfixed_const(20); - c = 1; - } +void r100_pll_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v) +{ + WREG8(RADEON_CLOCK_CNTL_INDEX, ((reg & 0x3f) | RADEON_PLL_WR_EN)); + r100_pll_errata_after_index(rdev); + WREG32(RADEON_CLOCK_CNTL_DATA, v); + r100_pll_errata_after_data(rdev); +} + +void r100_set_safe_registers(struct radeon_device *rdev) +{ + if (ASIC_IS_RN50(rdev)) { + rdev->config.r100.reg_safe_bm = rn50_reg_safe_bm; + rdev->config.r100.reg_safe_bm_size = ARRAY_SIZE(rn50_reg_safe_bm); + } else if (rdev->family < CHIP_R200) { + rdev->config.r100.reg_safe_bm = r100_reg_safe_bm; + rdev->config.r100.reg_safe_bm_size = ARRAY_SIZE(r100_reg_safe_bm); } else { - k1.full = dfixed_const(40); - c = 3; + r200_set_safe_registers(rdev); } +} - temp_ff.full = dfixed_const(2); - mc_latency_mclk.full = dfixed_mul(trcd_ff, temp_ff); - temp_ff.full = dfixed_const(c); - mc_latency_mclk.full += dfixed_mul(tcas_ff, temp_ff); - temp_ff.full = dfixed_const(4); - mc_latency_mclk.full += dfixed_mul(tras_ff, temp_ff); - mc_latency_mclk.full += dfixed_mul(trp_ff, temp_ff); - mc_latency_mclk.full += k1.full; +/* + * Debugfs info + */ +#if defined(CONFIG_DEBUG_FS) +static int r100_debugfs_rbbm_info(struct seq_file *m, void *data) +{ + struct drm_info_node *node = (struct drm_info_node *) m->private; + struct drm_device *dev = node->minor->dev; + struct radeon_device *rdev = dev->dev_private; + uint32_t reg, value; + unsigned i; - mc_latency_mclk.full = dfixed_div(mc_latency_mclk, mclk_ff); - mc_latency_mclk.full += dfixed_div(temp_ff, sclk_eff_ff); + seq_printf(m, "RBBM_STATUS 0x%08x\n", RREG32(RADEON_RBBM_STATUS)); + seq_printf(m, "RBBM_CMDFIFO_STAT 0x%08x\n", RREG32(0xE7C)); + seq_printf(m, "CP_STAT 0x%08x\n", RREG32(RADEON_CP_STAT)); + for (i = 0; i < 64; i++) { + WREG32(RADEON_RBBM_CMDFIFO_ADDR, i | 0x100); + reg = (RREG32(RADEON_RBBM_CMDFIFO_DATA) - 1) >> 2; + WREG32(RADEON_RBBM_CMDFIFO_ADDR, i); + value = RREG32(RADEON_RBBM_CMDFIFO_DATA); + seq_printf(m, "[0x%03X] 0x%04X=0x%08X\n", i, reg, value); + } + return 0; +} - /* - HW cursor time assuming worst case of full size colour cursor. - */ - temp_ff.full = dfixed_const((2 * (cur_size - (rdev->mc.vram_is_ddr + 1)))); - temp_ff.full += trcd_ff.full; - if (temp_ff.full < tras_ff.full) - temp_ff.full = tras_ff.full; - cur_latency_mclk.full = dfixed_div(temp_ff, mclk_ff); +static int r100_debugfs_cp_ring_info(struct seq_file *m, void *data) +{ + struct drm_info_node *node = (struct drm_info_node *) m->private; + struct drm_device *dev = node->minor->dev; + struct radeon_device *rdev = dev->dev_private; + struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; + uint32_t rdp, wdp; + unsigned count, i, j; - temp_ff.full = dfixed_const(cur_size); - cur_latency_sclk.full = dfixed_div(temp_ff, sclk_eff_ff); - /* - Find the total latency for the display data. - */ - disp_latency_overhead.full = dfixed_const(8); - disp_latency_overhead.full = dfixed_div(disp_latency_overhead, sclk_ff); - mc_latency_mclk.full += disp_latency_overhead.full + cur_latency_mclk.full; - mc_latency_sclk.full += disp_latency_overhead.full + cur_latency_sclk.full; + radeon_ring_free_size(rdev, ring); + rdp = RREG32(RADEON_CP_RB_RPTR); + wdp = RREG32(RADEON_CP_RB_WPTR); + count = (rdp + ring->ring_size - wdp) & ring->ptr_mask; + seq_printf(m, "CP_STAT 0x%08x\n", RREG32(RADEON_CP_STAT)); + seq_printf(m, "CP_RB_WPTR 0x%08x\n", wdp); + seq_printf(m, "CP_RB_RPTR 0x%08x\n", rdp); + seq_printf(m, "%u free dwords in ring\n", ring->ring_free_dw); + seq_printf(m, "%u dwords in ring\n", count); + for (j = 0; j <= count; j++) { + i = (rdp + j) & ring->ptr_mask; + seq_printf(m, "r[%04d]=0x%08x\n", i, ring->ring[i]); + } + return 0; +} - if (mc_latency_mclk.full > mc_latency_sclk.full) - disp_latency.full = mc_latency_mclk.full; - else - disp_latency.full = mc_latency_sclk.full; - /* setup Max GRPH_STOP_REQ default value */ - if (ASIC_IS_RV100(rdev)) - max_stop_req = 0x5c; - else - max_stop_req = 0x7c; +static int r100_debugfs_cp_csq_fifo(struct seq_file *m, void *data) +{ + struct drm_info_node *node = (struct drm_info_node *) m->private; + struct drm_device *dev = node->minor->dev; + struct radeon_device *rdev = dev->dev_private; + uint32_t csq_stat, csq2_stat, tmp; + unsigned r_rptr, r_wptr, ib1_rptr, ib1_wptr, ib2_rptr, ib2_wptr; + unsigned i; - if (mode1) { - /* CRTC1 - Set GRPH_BUFFER_CNTL register using h/w defined optimal values. - GRPH_STOP_REQ <= MIN[ 0x7C, (CRTC_H_DISP + 1) * (bit depth) / 0x10 ] - */ - stop_req = mode1->hdisplay * pixel_bytes1 / 16; + seq_printf(m, "CP_STAT 0x%08x\n", RREG32(RADEON_CP_STAT)); + seq_printf(m, "CP_CSQ_MODE 0x%08x\n", RREG32(RADEON_CP_CSQ_MODE)); + csq_stat = RREG32(RADEON_CP_CSQ_STAT); + csq2_stat = RREG32(RADEON_CP_CSQ2_STAT); + r_rptr = (csq_stat >> 0) & 0x3ff; + r_wptr = (csq_stat >> 10) & 0x3ff; + ib1_rptr = (csq_stat >> 20) & 0x3ff; + ib1_wptr = (csq2_stat >> 0) & 0x3ff; + ib2_rptr = (csq2_stat >> 10) & 0x3ff; + ib2_wptr = (csq2_stat >> 20) & 0x3ff; + seq_printf(m, "CP_CSQ_STAT 0x%08x\n", csq_stat); + seq_printf(m, "CP_CSQ2_STAT 0x%08x\n", csq2_stat); + seq_printf(m, "Ring rptr %u\n", r_rptr); + seq_printf(m, "Ring wptr %u\n", r_wptr); + seq_printf(m, "Indirect1 rptr %u\n", ib1_rptr); + seq_printf(m, "Indirect1 wptr %u\n", ib1_wptr); + seq_printf(m, "Indirect2 rptr %u\n", ib2_rptr); + seq_printf(m, "Indirect2 wptr %u\n", ib2_wptr); + /* FIXME: 0, 128, 640 depends on fifo setup see cp_init_kms + * 128 = indirect1_start * 8 & 640 = indirect2_start * 8 */ + seq_printf(m, "Ring fifo:\n"); + for (i = 0; i < 256; i++) { + WREG32(RADEON_CP_CSQ_ADDR, i << 2); + tmp = RREG32(RADEON_CP_CSQ_DATA); + seq_printf(m, "rfifo[%04d]=0x%08X\n", i, tmp); + } + seq_printf(m, "Indirect1 fifo:\n"); + for (i = 256; i <= 512; i++) { + WREG32(RADEON_CP_CSQ_ADDR, i << 2); + tmp = RREG32(RADEON_CP_CSQ_DATA); + seq_printf(m, "ib1fifo[%04d]=0x%08X\n", i, tmp); + } + seq_printf(m, "Indirect2 fifo:\n"); + for (i = 640; i < ib1_wptr; i++) { + WREG32(RADEON_CP_CSQ_ADDR, i << 2); + tmp = RREG32(RADEON_CP_CSQ_DATA); + seq_printf(m, "ib2fifo[%04d]=0x%08X\n", i, tmp); + } + return 0; +} + +static int r100_debugfs_mc_info(struct seq_file *m, void *data) +{ + struct drm_info_node *node = (struct drm_info_node *) m->private; + struct drm_device *dev = node->minor->dev; + struct radeon_device *rdev = dev->dev_private; + uint32_t tmp; - if (stop_req > max_stop_req) - stop_req = max_stop_req; + tmp = RREG32(RADEON_CONFIG_MEMSIZE); + seq_printf(m, "CONFIG_MEMSIZE 0x%08x\n", tmp); + tmp = RREG32(RADEON_MC_FB_LOCATION); + seq_printf(m, "MC_FB_LOCATION 0x%08x\n", tmp); + tmp = RREG32(RADEON_BUS_CNTL); + seq_printf(m, "BUS_CNTL 0x%08x\n", tmp); + tmp = RREG32(RADEON_MC_AGP_LOCATION); + seq_printf(m, "MC_AGP_LOCATION 0x%08x\n", tmp); + tmp = RREG32(RADEON_AGP_BASE); + seq_printf(m, "AGP_BASE 0x%08x\n", tmp); + tmp = RREG32(RADEON_HOST_PATH_CNTL); + seq_printf(m, "HOST_PATH_CNTL 0x%08x\n", tmp); + tmp = RREG32(0x01D0); + seq_printf(m, "AIC_CTRL 0x%08x\n", tmp); + tmp = RREG32(RADEON_AIC_LO_ADDR); + seq_printf(m, "AIC_LO_ADDR 0x%08x\n", tmp); + tmp = RREG32(RADEON_AIC_HI_ADDR); + seq_printf(m, "AIC_HI_ADDR 0x%08x\n", tmp); + tmp = RREG32(0x01E4); + seq_printf(m, "AIC_TLB_ADDR 0x%08x\n", tmp); + return 0; +} - /* - Find the drain rate of the display buffer. - */ - temp_ff.full = dfixed_const((16/pixel_bytes1)); - disp_drain_rate.full = dfixed_div(pix_clk, temp_ff); +static struct drm_info_list r100_debugfs_rbbm_list[] = { + {"r100_rbbm_info", r100_debugfs_rbbm_info, 0, NULL}, +}; - /* - Find the critical point of the display buffer. - */ - crit_point_ff.full = dfixed_mul(disp_drain_rate, disp_latency); - crit_point_ff.full += dfixed_const_half(0); +static struct drm_info_list r100_debugfs_cp_list[] = { + {"r100_cp_ring_info", r100_debugfs_cp_ring_info, 0, NULL}, + {"r100_cp_csq_fifo", r100_debugfs_cp_csq_fifo, 0, NULL}, +}; - critical_point = dfixed_trunc(crit_point_ff); +static struct drm_info_list r100_debugfs_mc_info_list[] = { + {"r100_mc_info", r100_debugfs_mc_info, 0, NULL}, +}; +#endif - if (rdev->disp_priority == 2) { - critical_point = 0; - } +int r100_debugfs_rbbm_init(struct radeon_device *rdev) +{ +#if defined(CONFIG_DEBUG_FS) + return radeon_debugfs_add_files(rdev, r100_debugfs_rbbm_list, 1); +#else + return 0; +#endif +} - /* - The critical point should never be above max_stop_req-4. Setting - GRPH_CRITICAL_CNTL = 0 will thus force high priority all the time. - */ - if (max_stop_req - critical_point < 4) - critical_point = 0; +int r100_debugfs_cp_init(struct radeon_device *rdev) +{ +#if defined(CONFIG_DEBUG_FS) + return radeon_debugfs_add_files(rdev, r100_debugfs_cp_list, 2); +#else + return 0; +#endif +} - if (critical_point == 0 && mode2 && rdev->family == CHIP_R300) { - /* some R300 cards have problem with this set to 0, when CRTC2 is enabled.*/ - critical_point = 0x10; - } +int r100_debugfs_mc_info_init(struct radeon_device *rdev) +{ +#if defined(CONFIG_DEBUG_FS) + return radeon_debugfs_add_files(rdev, r100_debugfs_mc_info_list, 1); +#else + return 0; +#endif +} - temp = RREG32(RADEON_GRPH_BUFFER_CNTL); - temp &= ~(RADEON_GRPH_STOP_REQ_MASK); - temp |= (stop_req << RADEON_GRPH_STOP_REQ_SHIFT); - temp &= ~(RADEON_GRPH_START_REQ_MASK); - if ((rdev->family == CHIP_R350) && - (stop_req > 0x15)) { - stop_req -= 0x10; - } - temp |= (stop_req << RADEON_GRPH_START_REQ_SHIFT); - temp |= RADEON_GRPH_BUFFER_SIZE; - temp &= ~(RADEON_GRPH_CRITICAL_CNTL | - RADEON_GRPH_CRITICAL_AT_SOF | - RADEON_GRPH_STOP_CNTL); - /* - Write the result into the register. - */ - WREG32(RADEON_GRPH_BUFFER_CNTL, ((temp & ~RADEON_GRPH_CRITICAL_POINT_MASK) | - (critical_point << RADEON_GRPH_CRITICAL_POINT_SHIFT))); +int r100_set_surface_reg(struct radeon_device *rdev, int reg, + uint32_t tiling_flags, uint32_t pitch, + uint32_t offset, uint32_t obj_size) +{ + int surf_index = reg * 16; + int flags = 0; -#if 0 - if ((rdev->family == CHIP_RS400) || - (rdev->family == CHIP_RS480)) { - /* attempt to program RS400 disp regs correctly ??? */ - temp = RREG32(RS400_DISP1_REG_CNTL); - temp &= ~(RS400_DISP1_START_REQ_LEVEL_MASK | - RS400_DISP1_STOP_REQ_LEVEL_MASK); - WREG32(RS400_DISP1_REQ_CNTL1, (temp | - (critical_point << RS400_DISP1_START_REQ_LEVEL_SHIFT) | - (critical_point << RS400_DISP1_STOP_REQ_LEVEL_SHIFT))); - temp = RREG32(RS400_DMIF_MEM_CNTL1); - temp &= ~(RS400_DISP1_CRITICAL_POINT_START_MASK | - RS400_DISP1_CRITICAL_POINT_STOP_MASK); - WREG32(RS400_DMIF_MEM_CNTL1, (temp | - (critical_point << RS400_DISP1_CRITICAL_POINT_START_SHIFT) | - (critical_point << RS400_DISP1_CRITICAL_POINT_STOP_SHIFT))); - } -#endif + if (rdev->family <= CHIP_RS200) { + if ((tiling_flags & (RADEON_TILING_MACRO|RADEON_TILING_MICRO)) + == (RADEON_TILING_MACRO|RADEON_TILING_MICRO)) + flags |= RADEON_SURF_TILE_COLOR_BOTH; + if (tiling_flags & RADEON_TILING_MACRO) + flags |= RADEON_SURF_TILE_COLOR_MACRO; + } else if (rdev->family <= CHIP_RV280) { + if (tiling_flags & (RADEON_TILING_MACRO)) + flags |= R200_SURF_TILE_COLOR_MACRO; + if (tiling_flags & RADEON_TILING_MICRO) + flags |= R200_SURF_TILE_COLOR_MICRO; + } else { + if (tiling_flags & RADEON_TILING_MACRO) + flags |= R300_SURF_TILE_MACRO; + if (tiling_flags & RADEON_TILING_MICRO) + flags |= R300_SURF_TILE_MICRO; + } - DRM_DEBUG_KMS("GRPH_BUFFER_CNTL from to %x\n", - /* (unsigned int)info->SavedReg->grph_buffer_cntl, */ - (unsigned int)RREG32(RADEON_GRPH_BUFFER_CNTL)); + if (tiling_flags & RADEON_TILING_SWAP_16BIT) + flags |= RADEON_SURF_AP0_SWP_16BPP | RADEON_SURF_AP1_SWP_16BPP; + if (tiling_flags & RADEON_TILING_SWAP_32BIT) + flags |= RADEON_SURF_AP0_SWP_32BPP | RADEON_SURF_AP1_SWP_32BPP; + + /* when we aren't tiling the pitch seems to needs to be furtherdivided down. - tested on power5 + rn50 server */ + if (tiling_flags & (RADEON_TILING_SWAP_16BIT | RADEON_TILING_SWAP_32BIT)) { + if (!(tiling_flags & (RADEON_TILING_MACRO | RADEON_TILING_MICRO))) + if (ASIC_IS_RN50(rdev)) + pitch /= 16; } - if (mode2) { - u32 grph2_cntl; - stop_req = mode2->hdisplay * pixel_bytes2 / 16; + /* r100/r200 divide by 16 */ + if (rdev->family < CHIP_R300) + flags |= pitch / 16; + else + flags |= pitch / 8; - if (stop_req > max_stop_req) - stop_req = max_stop_req; - /* - Find the drain rate of the display buffer. - */ - temp_ff.full = dfixed_const((16/pixel_bytes2)); - disp_drain_rate2.full = dfixed_div(pix_clk2, temp_ff); + DRM_DEBUG_KMS("writing surface %d %d %x %x\n", reg, flags, offset, offset+obj_size-1); + WREG32(RADEON_SURFACE0_INFO + surf_index, flags); + WREG32(RADEON_SURFACE0_LOWER_BOUND + surf_index, offset); + WREG32(RADEON_SURFACE0_UPPER_BOUND + surf_index, offset + obj_size - 1); + return 0; +} - grph2_cntl = RREG32(RADEON_GRPH2_BUFFER_CNTL); - grph2_cntl &= ~(RADEON_GRPH_STOP_REQ_MASK); - grph2_cntl |= (stop_req << RADEON_GRPH_STOP_REQ_SHIFT); - grph2_cntl &= ~(RADEON_GRPH_START_REQ_MASK); - if ((rdev->family == CHIP_R350) && - (stop_req > 0x15)) { - stop_req -= 0x10; - } - grph2_cntl |= (stop_req << RADEON_GRPH_START_REQ_SHIFT); - grph2_cntl |= RADEON_GRPH_BUFFER_SIZE; - grph2_cntl &= ~(RADEON_GRPH_CRITICAL_CNTL | - RADEON_GRPH_CRITICAL_AT_SOF | - RADEON_GRPH_STOP_CNTL); +void r100_clear_surface_reg(struct radeon_device *rdev, int reg) +{ + int surf_index = reg * 16; + WREG32(RADEON_SURFACE0_INFO + surf_index, 0); +} - if ((rdev->family == CHIP_RS100) || - (rdev->family == CHIP_RS200)) - critical_point2 = 0; - else { - temp = (rdev->mc.vram_width * rdev->mc.vram_is_ddr + 1)/128; - temp_ff.full = dfixed_const(temp); - temp_ff.full = dfixed_mul(mclk_ff, temp_ff); - if (sclk_ff.full < temp_ff.full) - temp_ff.full = sclk_ff.full; +void r100_bandwidth_update(struct radeon_device *rdev) +{ + fixed20_12 trcd_ff, trp_ff, tras_ff, trbs_ff, tcas_ff; + fixed20_12 sclk_ff, mclk_ff, sclk_eff_ff, sclk_delay_ff; + fixed20_12 peak_disp_bw, mem_bw, pix_clk, pix_clk2, temp_ff, crit_point_ff; + uint32_t temp, data, mem_trcd, mem_trp, mem_tras; + fixed20_12 memtcas_ff[8] = { + dfixed_init(1), + dfixed_init(2), + dfixed_init(3), + dfixed_init(0), + dfixed_init_half(1), + dfixed_init_half(2), + dfixed_init(0), + }; + fixed20_12 memtcas_rs480_ff[8] = { + dfixed_init(0), + dfixed_init(1), + dfixed_init(2), + dfixed_init(3), + dfixed_init(0), + dfixed_init_half(1), + dfixed_init_half(2), + dfixed_init_half(3), + }; + fixed20_12 memtcas2_ff[8] = { + dfixed_init(0), + dfixed_init(1), + dfixed_init(2), + dfixed_init(3), + dfixed_init(4), + dfixed_init(5), + dfixed_init(6), + dfixed_init(7), + }; + fixed20_12 memtrbs[8] = { + dfixed_init(1), + dfixed_init_half(1), + dfixed_init(2), + dfixed_init_half(2), + dfixed_init(3), + dfixed_init_half(3), + dfixed_init(4), + dfixed_init_half(4) + }; + fixed20_12 memtrbs_r4xx[8] = { + dfixed_init(4), + dfixed_init(5), + dfixed_init(6), + dfixed_init(7), + dfixed_init(8), + dfixed_init(9), + dfixed_init(10), + dfixed_init(11) + }; + fixed20_12 min_mem_eff; + fixed20_12 mc_latency_sclk, mc_latency_mclk, k1; + fixed20_12 cur_latency_mclk, cur_latency_sclk; + fixed20_12 disp_latency, disp_latency_overhead, disp_drain_rate, + disp_drain_rate2, read_return_rate; + fixed20_12 time_disp1_drop_priority; + int c; + int cur_size = 16; /* in octawords */ + int critical_point = 0, critical_point2; +/* uint32_t read_return_rate, time_disp1_drop_priority; */ + int stop_req, max_stop_req; + struct drm_display_mode *mode1 = NULL; + struct drm_display_mode *mode2 = NULL; + uint32_t pixel_bytes1 = 0; + uint32_t pixel_bytes2 = 0; - read_return_rate.full = temp_ff.full; + radeon_update_display_priority(rdev); - if (mode1) { - temp_ff.full = read_return_rate.full - disp_drain_rate.full; - time_disp1_drop_priority.full = dfixed_div(crit_point_ff, temp_ff); - } else { - time_disp1_drop_priority.full = 0; - } - crit_point_ff.full = disp_latency.full + time_disp1_drop_priority.full + disp_latency.full; - crit_point_ff.full = dfixed_mul(crit_point_ff, disp_drain_rate2); - crit_point_ff.full += dfixed_const_half(0); + if (rdev->mode_info.crtcs[0]->base.enabled) { + mode1 = &rdev->mode_info.crtcs[0]->base.mode; + pixel_bytes1 = rdev->mode_info.crtcs[0]->base.fb->bits_per_pixel / 8; + } + if (!(rdev->flags & RADEON_SINGLE_CRTC)) { + if (rdev->mode_info.crtcs[1]->base.enabled) { + mode2 = &rdev->mode_info.crtcs[1]->base.mode; + pixel_bytes2 = rdev->mode_info.crtcs[1]->base.fb->bits_per_pixel / 8; + } + } - critical_point2 = dfixed_trunc(crit_point_ff); + min_mem_eff.full = dfixed_const_8(0); + /* get modes */ + if ((rdev->disp_priority == 2) && ASIC_IS_R300(rdev)) { + uint32_t mc_init_misc_lat_timer = RREG32(R300_MC_INIT_MISC_LAT_TIMER); + mc_init_misc_lat_timer &= ~(R300_MC_DISP1R_INIT_LAT_MASK << R300_MC_DISP1R_INIT_LAT_SHIFT); + mc_init_misc_lat_timer &= ~(R300_MC_DISP0R_INIT_LAT_MASK << R300_MC_DISP0R_INIT_LAT_SHIFT); + /* check crtc enables */ + if (mode2) + mc_init_misc_lat_timer |= (1 << R300_MC_DISP1R_INIT_LAT_SHIFT); + if (mode1) + mc_init_misc_lat_timer |= (1 << R300_MC_DISP0R_INIT_LAT_SHIFT); + WREG32(R300_MC_INIT_MISC_LAT_TIMER, mc_init_misc_lat_timer); + } - if (rdev->disp_priority == 2) { - critical_point2 = 0; - } + /* + * determine is there is enough bw for current mode + */ + sclk_ff = rdev->pm.sclk; + mclk_ff = rdev->pm.mclk; - if (max_stop_req - critical_point2 < 4) - critical_point2 = 0; + temp = (rdev->mc.vram_width / 8) * (rdev->mc.vram_is_ddr ? 2 : 1); + temp_ff.full = dfixed_const(temp); + mem_bw.full = dfixed_mul(mclk_ff, temp_ff); - } + pix_clk.full = 0; + pix_clk2.full = 0; + peak_disp_bw.full = 0; + if (mode1) { + temp_ff.full = dfixed_const(1000); + pix_clk.full = dfixed_const(mode1->clock); /* convert to fixed point */ + pix_clk.full = dfixed_div(pix_clk, temp_ff); + temp_ff.full = dfixed_const(pixel_bytes1); + peak_disp_bw.full += dfixed_mul(pix_clk, temp_ff); + } + if (mode2) { + temp_ff.full = dfixed_const(1000); + pix_clk2.full = dfixed_const(mode2->clock); /* convert to fixed point */ + pix_clk2.full = dfixed_div(pix_clk2, temp_ff); + temp_ff.full = dfixed_const(pixel_bytes2); + peak_disp_bw.full += dfixed_mul(pix_clk2, temp_ff); + } - if (critical_point2 == 0 && rdev->family == CHIP_R300) { - /* some R300 cards have problem with this set to 0 */ - critical_point2 = 0x10; - } + mem_bw.full = dfixed_mul(mem_bw, min_mem_eff); + if (peak_disp_bw.full >= mem_bw.full) { + DRM_ERROR("You may not have enough display bandwidth for current mode\n" + "If you have flickering problem, try to lower resolution, refresh rate, or color depth\n"); + } - WREG32(RADEON_GRPH2_BUFFER_CNTL, ((grph2_cntl & ~RADEON_GRPH_CRITICAL_POINT_MASK) | - (critical_point2 << RADEON_GRPH_CRITICAL_POINT_SHIFT))); + /* Get values from the EXT_MEM_CNTL register...converting its contents. */ + temp = RREG32(RADEON_MEM_TIMING_CNTL); + if ((rdev->family == CHIP_RV100) || (rdev->flags & RADEON_IS_IGP)) { /* RV100, M6, IGPs */ + mem_trcd = ((temp >> 2) & 0x3) + 1; + mem_trp = ((temp & 0x3)) + 1; + mem_tras = ((temp & 0x70) >> 4) + 1; + } else if (rdev->family == CHIP_R300 || + rdev->family == CHIP_R350) { /* r300, r350 */ + mem_trcd = (temp & 0x7) + 1; + mem_trp = ((temp >> 8) & 0x7) + 1; + mem_tras = ((temp >> 11) & 0xf) + 4; + } else if (rdev->family == CHIP_RV350 || + rdev->family <= CHIP_RV380) { + /* rv3x0 */ + mem_trcd = (temp & 0x7) + 3; + mem_trp = ((temp >> 8) & 0x7) + 3; + mem_tras = ((temp >> 11) & 0xf) + 6; + } else if (rdev->family == CHIP_R420 || + rdev->family == CHIP_R423 || + rdev->family == CHIP_RV410) { + /* r4xx */ + mem_trcd = (temp & 0xf) + 3; + if (mem_trcd > 15) + mem_trcd = 15; + mem_trp = ((temp >> 8) & 0xf) + 3; + if (mem_trp > 15) + mem_trp = 15; + mem_tras = ((temp >> 12) & 0x1f) + 6; + if (mem_tras > 31) + mem_tras = 31; + } else { /* RV200, R200 */ + mem_trcd = (temp & 0x7) + 1; + mem_trp = ((temp >> 8) & 0x7) + 1; + mem_tras = ((temp >> 12) & 0xf) + 4; + } + /* convert to FF */ + trcd_ff.full = dfixed_const(mem_trcd); + trp_ff.full = dfixed_const(mem_trp); + tras_ff.full = dfixed_const(mem_tras); - if ((rdev->family == CHIP_RS400) || - (rdev->family == CHIP_RS480)) { -#if 0 - /* attempt to program RS400 disp2 regs correctly ??? */ - temp = RREG32(RS400_DISP2_REQ_CNTL1); - temp &= ~(RS400_DISP2_START_REQ_LEVEL_MASK | - RS400_DISP2_STOP_REQ_LEVEL_MASK); - WREG32(RS400_DISP2_REQ_CNTL1, (temp | - (critical_point2 << RS400_DISP1_START_REQ_LEVEL_SHIFT) | - (critical_point2 << RS400_DISP1_STOP_REQ_LEVEL_SHIFT))); - temp = RREG32(RS400_DISP2_REQ_CNTL2); - temp &= ~(RS400_DISP2_CRITICAL_POINT_START_MASK | - RS400_DISP2_CRITICAL_POINT_STOP_MASK); - WREG32(RS400_DISP2_REQ_CNTL2, (temp | - (critical_point2 << RS400_DISP2_CRITICAL_POINT_START_SHIFT) | - (critical_point2 << RS400_DISP2_CRITICAL_POINT_STOP_SHIFT))); -#endif - WREG32(RS400_DISP2_REQ_CNTL1, 0x105DC1CC); - WREG32(RS400_DISP2_REQ_CNTL2, 0x2749D000); - WREG32(RS400_DMIF_MEM_CNTL1, 0x29CA71DC); - WREG32(RS400_DISP1_REQ_CNTL1, 0x28FBC3AC); - } + /* Get values from the MEM_SDRAM_MODE_REG register...converting its */ + temp = RREG32(RADEON_MEM_SDRAM_MODE_REG); + data = (temp & (7 << 20)) >> 20; + if ((rdev->family == CHIP_RV100) || rdev->flags & RADEON_IS_IGP) { + if (rdev->family == CHIP_RS480) /* don't think rs400 */ + tcas_ff = memtcas_rs480_ff[data]; + else + tcas_ff = memtcas_ff[data]; + } else + tcas_ff = memtcas2_ff[data]; - DRM_DEBUG_KMS("GRPH2_BUFFER_CNTL from to %x\n", - (unsigned int)RREG32(RADEON_GRPH2_BUFFER_CNTL)); + if (rdev->family == CHIP_RS400 || + rdev->family == CHIP_RS480) { + /* extra cas latency stored in bits 23-25 0-4 clocks */ + data = (temp >> 23) & 0x7; + if (data < 5) + tcas_ff.full += dfixed_const(data); } -} -static void r100_cs_track_texture_print(struct r100_cs_track_texture *t) -{ - DRM_ERROR("pitch %d\n", t->pitch); - DRM_ERROR("use_pitch %d\n", t->use_pitch); - DRM_ERROR("width %d\n", t->width); - DRM_ERROR("width_11 %d\n", t->width_11); - DRM_ERROR("height %d\n", t->height); - DRM_ERROR("height_11 %d\n", t->height_11); - DRM_ERROR("num levels %d\n", t->num_levels); - DRM_ERROR("depth %d\n", t->txdepth); - DRM_ERROR("bpp %d\n", t->cpp); - DRM_ERROR("coordinate type %d\n", t->tex_coord_type); - DRM_ERROR("width round to power of 2 %d\n", t->roundup_w); - DRM_ERROR("height round to power of 2 %d\n", t->roundup_h); - DRM_ERROR("compress format %d\n", t->compress_format); -} + if (ASIC_IS_R300(rdev) && !(rdev->flags & RADEON_IS_IGP)) { + /* on the R300, Tcas is included in Trbs. + */ + temp = RREG32(RADEON_MEM_CNTL); + data = (R300_MEM_NUM_CHANNELS_MASK & temp); + if (data == 1) { + if (R300_MEM_USE_CD_CH_ONLY & temp) { + temp = RREG32(R300_MC_IND_INDEX); + temp &= ~R300_MC_IND_ADDR_MASK; + temp |= R300_MC_READ_CNTL_CD_mcind; + WREG32(R300_MC_IND_INDEX, temp); + temp = RREG32(R300_MC_IND_DATA); + data = (R300_MEM_RBS_POSITION_C_MASK & temp); + } else { + temp = RREG32(R300_MC_READ_CNTL_AB); + data = (R300_MEM_RBS_POSITION_A_MASK & temp); + } + } else { + temp = RREG32(R300_MC_READ_CNTL_AB); + data = (R300_MEM_RBS_POSITION_A_MASK & temp); + } + if (rdev->family == CHIP_RV410 || + rdev->family == CHIP_R420 || + rdev->family == CHIP_R423) + trbs_ff = memtrbs_r4xx[data]; + else + trbs_ff = memtrbs[data]; + tcas_ff.full += trbs_ff.full; + } -static int r100_track_compress_size(int compress_format, int w, int h) -{ - int block_width, block_height, block_bytes; - int wblocks, hblocks; - int min_wblocks; - int sz; + sclk_eff_ff.full = sclk_ff.full; - block_width = 4; - block_height = 4; + if (rdev->flags & RADEON_IS_AGP) { + fixed20_12 agpmode_ff; + agpmode_ff.full = dfixed_const(radeon_agpmode); + temp_ff.full = dfixed_const_666(16); + sclk_eff_ff.full -= dfixed_mul(agpmode_ff, temp_ff); + } + /* TODO PCIE lanes may affect this - agpmode == 16?? */ - switch (compress_format) { - case R100_TRACK_COMP_DXT1: - block_bytes = 8; - min_wblocks = 4; - break; - default: - case R100_TRACK_COMP_DXT35: - block_bytes = 16; - min_wblocks = 2; - break; + if (ASIC_IS_R300(rdev)) { + sclk_delay_ff.full = dfixed_const(250); + } else { + if ((rdev->family == CHIP_RV100) || + rdev->flags & RADEON_IS_IGP) { + if (rdev->mc.vram_is_ddr) + sclk_delay_ff.full = dfixed_const(41); + else + sclk_delay_ff.full = dfixed_const(33); + } else { + if (rdev->mc.vram_width == 128) + sclk_delay_ff.full = dfixed_const(57); + else + sclk_delay_ff.full = dfixed_const(41); + } } - hblocks = (h + block_height - 1) / block_height; - wblocks = (w + block_width - 1) / block_width; - if (wblocks < min_wblocks) - wblocks = min_wblocks; - sz = wblocks * hblocks * block_bytes; - return sz; -} + mc_latency_sclk.full = dfixed_div(sclk_delay_ff, sclk_eff_ff); -static int r100_cs_track_cube(struct radeon_device *rdev, - struct r100_cs_track *track, unsigned idx) -{ - unsigned face, w, h; - struct radeon_bo *cube_robj; - unsigned long size; - unsigned compress_format = track->textures[idx].compress_format; + if (rdev->mc.vram_is_ddr) { + if (rdev->mc.vram_width == 32) { + k1.full = dfixed_const(40); + c = 3; + } else { + k1.full = dfixed_const(20); + c = 1; + } + } else { + k1.full = dfixed_const(40); + c = 3; + } - for (face = 0; face < 5; face++) { - cube_robj = track->textures[idx].cube_info[face].robj; - w = track->textures[idx].cube_info[face].width; - h = track->textures[idx].cube_info[face].height; + temp_ff.full = dfixed_const(2); + mc_latency_mclk.full = dfixed_mul(trcd_ff, temp_ff); + temp_ff.full = dfixed_const(c); + mc_latency_mclk.full += dfixed_mul(tcas_ff, temp_ff); + temp_ff.full = dfixed_const(4); + mc_latency_mclk.full += dfixed_mul(tras_ff, temp_ff); + mc_latency_mclk.full += dfixed_mul(trp_ff, temp_ff); + mc_latency_mclk.full += k1.full; - if (compress_format) { - size = r100_track_compress_size(compress_format, w, h); - } else - size = w * h; - size *= track->textures[idx].cpp; + mc_latency_mclk.full = dfixed_div(mc_latency_mclk, mclk_ff); + mc_latency_mclk.full += dfixed_div(temp_ff, sclk_eff_ff); - size += track->textures[idx].cube_info[face].offset; + /* + HW cursor time assuming worst case of full size colour cursor. + */ + temp_ff.full = dfixed_const((2 * (cur_size - (rdev->mc.vram_is_ddr + 1)))); + temp_ff.full += trcd_ff.full; + if (temp_ff.full < tras_ff.full) + temp_ff.full = tras_ff.full; + cur_latency_mclk.full = dfixed_div(temp_ff, mclk_ff); - if (size > radeon_bo_size(cube_robj)) { - DRM_ERROR("Cube texture offset greater than object size %lu %lu\n", - size, radeon_bo_size(cube_robj)); - r100_cs_track_texture_print(&track->textures[idx]); - return -1; - } - } - return 0; -} + temp_ff.full = dfixed_const(cur_size); + cur_latency_sclk.full = dfixed_div(temp_ff, sclk_eff_ff); + /* + Find the total latency for the display data. + */ + disp_latency_overhead.full = dfixed_const(8); + disp_latency_overhead.full = dfixed_div(disp_latency_overhead, sclk_ff); + mc_latency_mclk.full += disp_latency_overhead.full + cur_latency_mclk.full; + mc_latency_sclk.full += disp_latency_overhead.full + cur_latency_sclk.full; -static int r100_cs_track_texture_check(struct radeon_device *rdev, - struct r100_cs_track *track) -{ - struct radeon_bo *robj; - unsigned long size; - unsigned u, i, w, h, d; - int ret; + if (mc_latency_mclk.full > mc_latency_sclk.full) + disp_latency.full = mc_latency_mclk.full; + else + disp_latency.full = mc_latency_sclk.full; - for (u = 0; u < track->num_texture; u++) { - if (!track->textures[u].enabled) - continue; - if (track->textures[u].lookup_disable) - continue; - robj = track->textures[u].robj; - if (robj == NULL) { - DRM_ERROR("No texture bound to unit %u\n", u); - return -EINVAL; - } - size = 0; - for (i = 0; i <= track->textures[u].num_levels; i++) { - if (track->textures[u].use_pitch) { - if (rdev->family < CHIP_R300) - w = (track->textures[u].pitch / track->textures[u].cpp) / (1 << i); - else - w = track->textures[u].pitch / (1 << i); - } else { - w = track->textures[u].width; - if (rdev->family >= CHIP_RV515) - w |= track->textures[u].width_11; - w = w / (1 << i); - if (track->textures[u].roundup_w) - w = roundup_pow_of_two(w); - } - h = track->textures[u].height; - if (rdev->family >= CHIP_RV515) - h |= track->textures[u].height_11; - h = h / (1 << i); - if (track->textures[u].roundup_h) - h = roundup_pow_of_two(h); - if (track->textures[u].tex_coord_type == 1) { - d = (1 << track->textures[u].txdepth) / (1 << i); - if (!d) - d = 1; - } else { - d = 1; - } - if (track->textures[u].compress_format) { + /* setup Max GRPH_STOP_REQ default value */ + if (ASIC_IS_RV100(rdev)) + max_stop_req = 0x5c; + else + max_stop_req = 0x7c; - size += r100_track_compress_size(track->textures[u].compress_format, w, h) * d; - /* compressed textures are block based */ - } else - size += w * h * d; - } - size *= track->textures[u].cpp; + if (mode1) { + /* CRTC1 + Set GRPH_BUFFER_CNTL register using h/w defined optimal values. + GRPH_STOP_REQ <= MIN[ 0x7C, (CRTC_H_DISP + 1) * (bit depth) / 0x10 ] + */ + stop_req = mode1->hdisplay * pixel_bytes1 / 16; - switch (track->textures[u].tex_coord_type) { - case 0: - case 1: - break; - case 2: - if (track->separate_cube) { - ret = r100_cs_track_cube(rdev, track, u); - if (ret) - return ret; - } else - size *= 6; - break; - default: - DRM_ERROR("Invalid texture coordinate type %u for unit " - "%u\n", track->textures[u].tex_coord_type, u); - return -EINVAL; - } - if (size > radeon_bo_size(robj)) { - DRM_ERROR("Texture of unit %u needs %lu bytes but is " - "%lu\n", u, size, radeon_bo_size(robj)); - r100_cs_track_texture_print(&track->textures[u]); - return -EINVAL; - } - } - return 0; -} + if (stop_req > max_stop_req) + stop_req = max_stop_req; -int r100_cs_track_check(struct radeon_device *rdev, struct r100_cs_track *track) -{ - unsigned i; - unsigned long size; - unsigned prim_walk; - unsigned nverts; - unsigned num_cb = track->cb_dirty ? track->num_cb : 0; + /* + Find the drain rate of the display buffer. + */ + temp_ff.full = dfixed_const((16/pixel_bytes1)); + disp_drain_rate.full = dfixed_div(pix_clk, temp_ff); - if (num_cb && !track->zb_cb_clear && !track->color_channel_mask && - !track->blend_read_enable) - num_cb = 0; + /* + Find the critical point of the display buffer. + */ + crit_point_ff.full = dfixed_mul(disp_drain_rate, disp_latency); + crit_point_ff.full += dfixed_const_half(0); - for (i = 0; i < num_cb; i++) { - if (track->cb[i].robj == NULL) { - DRM_ERROR("[drm] No buffer for color buffer %d !\n", i); - return -EINVAL; - } - size = track->cb[i].pitch * track->cb[i].cpp * track->maxy; - size += track->cb[i].offset; - if (size > radeon_bo_size(track->cb[i].robj)) { - DRM_ERROR("[drm] Buffer too small for color buffer %d " - "(need %lu have %lu) !\n", i, size, - radeon_bo_size(track->cb[i].robj)); - DRM_ERROR("[drm] color buffer %d (%u %u %u %u)\n", - i, track->cb[i].pitch, track->cb[i].cpp, - track->cb[i].offset, track->maxy); - return -EINVAL; + critical_point = dfixed_trunc(crit_point_ff); + + if (rdev->disp_priority == 2) { + critical_point = 0; } - } - track->cb_dirty = false; - if (track->zb_dirty && track->z_enabled) { - if (track->zb.robj == NULL) { - DRM_ERROR("[drm] No buffer for z buffer !\n"); - return -EINVAL; + /* + The critical point should never be above max_stop_req-4. Setting + GRPH_CRITICAL_CNTL = 0 will thus force high priority all the time. + */ + if (max_stop_req - critical_point < 4) + critical_point = 0; + + if (critical_point == 0 && mode2 && rdev->family == CHIP_R300) { + /* some R300 cards have problem with this set to 0, when CRTC2 is enabled.*/ + critical_point = 0x10; } - size = track->zb.pitch * track->zb.cpp * track->maxy; - size += track->zb.offset; - if (size > radeon_bo_size(track->zb.robj)) { - DRM_ERROR("[drm] Buffer too small for z buffer " - "(need %lu have %lu) !\n", size, - radeon_bo_size(track->zb.robj)); - DRM_ERROR("[drm] zbuffer (%u %u %u %u)\n", - track->zb.pitch, track->zb.cpp, - track->zb.offset, track->maxy); - return -EINVAL; + + temp = RREG32(RADEON_GRPH_BUFFER_CNTL); + temp &= ~(RADEON_GRPH_STOP_REQ_MASK); + temp |= (stop_req << RADEON_GRPH_STOP_REQ_SHIFT); + temp &= ~(RADEON_GRPH_START_REQ_MASK); + if ((rdev->family == CHIP_R350) && + (stop_req > 0x15)) { + stop_req -= 0x10; } - } - track->zb_dirty = false; + temp |= (stop_req << RADEON_GRPH_START_REQ_SHIFT); + temp |= RADEON_GRPH_BUFFER_SIZE; + temp &= ~(RADEON_GRPH_CRITICAL_CNTL | + RADEON_GRPH_CRITICAL_AT_SOF | + RADEON_GRPH_STOP_CNTL); + /* + Write the result into the register. + */ + WREG32(RADEON_GRPH_BUFFER_CNTL, ((temp & ~RADEON_GRPH_CRITICAL_POINT_MASK) | + (critical_point << RADEON_GRPH_CRITICAL_POINT_SHIFT))); - if (track->aa_dirty && track->aaresolve) { - if (track->aa.robj == NULL) { - DRM_ERROR("[drm] No buffer for AA resolve buffer %d !\n", i); - return -EINVAL; - } - /* I believe the format comes from colorbuffer0. */ - size = track->aa.pitch * track->cb[0].cpp * track->maxy; - size += track->aa.offset; - if (size > radeon_bo_size(track->aa.robj)) { - DRM_ERROR("[drm] Buffer too small for AA resolve buffer %d " - "(need %lu have %lu) !\n", i, size, - radeon_bo_size(track->aa.robj)); - DRM_ERROR("[drm] AA resolve buffer %d (%u %u %u %u)\n", - i, track->aa.pitch, track->cb[0].cpp, - track->aa.offset, track->maxy); - return -EINVAL; +#if 0 + if ((rdev->family == CHIP_RS400) || + (rdev->family == CHIP_RS480)) { + /* attempt to program RS400 disp regs correctly ??? */ + temp = RREG32(RS400_DISP1_REG_CNTL); + temp &= ~(RS400_DISP1_START_REQ_LEVEL_MASK | + RS400_DISP1_STOP_REQ_LEVEL_MASK); + WREG32(RS400_DISP1_REQ_CNTL1, (temp | + (critical_point << RS400_DISP1_START_REQ_LEVEL_SHIFT) | + (critical_point << RS400_DISP1_STOP_REQ_LEVEL_SHIFT))); + temp = RREG32(RS400_DMIF_MEM_CNTL1); + temp &= ~(RS400_DISP1_CRITICAL_POINT_START_MASK | + RS400_DISP1_CRITICAL_POINT_STOP_MASK); + WREG32(RS400_DMIF_MEM_CNTL1, (temp | + (critical_point << RS400_DISP1_CRITICAL_POINT_START_SHIFT) | + (critical_point << RS400_DISP1_CRITICAL_POINT_STOP_SHIFT))); } - } - track->aa_dirty = false; +#endif - prim_walk = (track->vap_vf_cntl >> 4) & 0x3; - if (track->vap_vf_cntl & (1 << 14)) { - nverts = track->vap_alt_nverts; - } else { - nverts = (track->vap_vf_cntl >> 16) & 0xFFFF; + DRM_DEBUG_KMS("GRPH_BUFFER_CNTL from to %x\n", + /* (unsigned int)info->SavedReg->grph_buffer_cntl, */ + (unsigned int)RREG32(RADEON_GRPH_BUFFER_CNTL)); } - switch (prim_walk) { - case 1: - for (i = 0; i < track->num_arrays; i++) { - size = track->arrays[i].esize * track->max_indx * 4; - if (track->arrays[i].robj == NULL) { - DRM_ERROR("(PW %u) Vertex array %u no buffer " - "bound\n", prim_walk, i); - return -EINVAL; - } - if (size > radeon_bo_size(track->arrays[i].robj)) { - dev_err(rdev->dev, "(PW %u) Vertex array %u " - "need %lu dwords have %lu dwords\n", - prim_walk, i, size >> 2, - radeon_bo_size(track->arrays[i].robj) - >> 2); - DRM_ERROR("Max indices %u\n", track->max_indx); - return -EINVAL; - } + + if (mode2) { + u32 grph2_cntl; + stop_req = mode2->hdisplay * pixel_bytes2 / 16; + + if (stop_req > max_stop_req) + stop_req = max_stop_req; + + /* + Find the drain rate of the display buffer. + */ + temp_ff.full = dfixed_const((16/pixel_bytes2)); + disp_drain_rate2.full = dfixed_div(pix_clk2, temp_ff); + + grph2_cntl = RREG32(RADEON_GRPH2_BUFFER_CNTL); + grph2_cntl &= ~(RADEON_GRPH_STOP_REQ_MASK); + grph2_cntl |= (stop_req << RADEON_GRPH_STOP_REQ_SHIFT); + grph2_cntl &= ~(RADEON_GRPH_START_REQ_MASK); + if ((rdev->family == CHIP_R350) && + (stop_req > 0x15)) { + stop_req -= 0x10; } - break; - case 2: - for (i = 0; i < track->num_arrays; i++) { - size = track->arrays[i].esize * (nverts - 1) * 4; - if (track->arrays[i].robj == NULL) { - DRM_ERROR("(PW %u) Vertex array %u no buffer " - "bound\n", prim_walk, i); - return -EINVAL; + grph2_cntl |= (stop_req << RADEON_GRPH_START_REQ_SHIFT); + grph2_cntl |= RADEON_GRPH_BUFFER_SIZE; + grph2_cntl &= ~(RADEON_GRPH_CRITICAL_CNTL | + RADEON_GRPH_CRITICAL_AT_SOF | + RADEON_GRPH_STOP_CNTL); + + if ((rdev->family == CHIP_RS100) || + (rdev->family == CHIP_RS200)) + critical_point2 = 0; + else { + temp = (rdev->mc.vram_width * rdev->mc.vram_is_ddr + 1)/128; + temp_ff.full = dfixed_const(temp); + temp_ff.full = dfixed_mul(mclk_ff, temp_ff); + if (sclk_ff.full < temp_ff.full) + temp_ff.full = sclk_ff.full; + + read_return_rate.full = temp_ff.full; + + if (mode1) { + temp_ff.full = read_return_rate.full - disp_drain_rate.full; + time_disp1_drop_priority.full = dfixed_div(crit_point_ff, temp_ff); + } else { + time_disp1_drop_priority.full = 0; } - if (size > radeon_bo_size(track->arrays[i].robj)) { - dev_err(rdev->dev, "(PW %u) Vertex array %u " - "need %lu dwords have %lu dwords\n", - prim_walk, i, size >> 2, - radeon_bo_size(track->arrays[i].robj) - >> 2); - return -EINVAL; + crit_point_ff.full = disp_latency.full + time_disp1_drop_priority.full + disp_latency.full; + crit_point_ff.full = dfixed_mul(crit_point_ff, disp_drain_rate2); + crit_point_ff.full += dfixed_const_half(0); + + critical_point2 = dfixed_trunc(crit_point_ff); + + if (rdev->disp_priority == 2) { + critical_point2 = 0; } - } - break; - case 3: - size = track->vtx_size * nverts; - if (size != track->immd_dwords) { - DRM_ERROR("IMMD draw %u dwors but needs %lu dwords\n", - track->immd_dwords, size); - DRM_ERROR("VAP_VF_CNTL.NUM_VERTICES %u, VTX_SIZE %u\n", - nverts, track->vtx_size); - return -EINVAL; - } - break; - default: - DRM_ERROR("[drm] Invalid primitive walk %d for VAP_VF_CNTL\n", - prim_walk); - return -EINVAL; - } - if (track->tex_dirty) { - track->tex_dirty = false; - return r100_cs_track_texture_check(rdev, track); - } - return 0; -} + if (max_stop_req - critical_point2 < 4) + critical_point2 = 0; -void r100_cs_track_clear(struct radeon_device *rdev, struct r100_cs_track *track) -{ - unsigned i, face; + } - track->cb_dirty = true; - track->zb_dirty = true; - track->tex_dirty = true; - track->aa_dirty = true; + if (critical_point2 == 0 && rdev->family == CHIP_R300) { + /* some R300 cards have problem with this set to 0 */ + critical_point2 = 0x10; + } - if (rdev->family < CHIP_R300) { - track->num_cb = 1; - if (rdev->family <= CHIP_RS200) - track->num_texture = 3; - else - track->num_texture = 6; - track->maxy = 2048; - track->separate_cube = 1; - } else { - track->num_cb = 4; - track->num_texture = 16; - track->maxy = 4096; - track->separate_cube = 0; - track->aaresolve = false; - track->aa.robj = NULL; - } + WREG32(RADEON_GRPH2_BUFFER_CNTL, ((grph2_cntl & ~RADEON_GRPH_CRITICAL_POINT_MASK) | + (critical_point2 << RADEON_GRPH_CRITICAL_POINT_SHIFT))); - for (i = 0; i < track->num_cb; i++) { - track->cb[i].robj = NULL; - track->cb[i].pitch = 8192; - track->cb[i].cpp = 16; - track->cb[i].offset = 0; - } - track->z_enabled = true; - track->zb.robj = NULL; - track->zb.pitch = 8192; - track->zb.cpp = 4; - track->zb.offset = 0; - track->vtx_size = 0x7F; - track->immd_dwords = 0xFFFFFFFFUL; - track->num_arrays = 11; - track->max_indx = 0x00FFFFFFUL; - for (i = 0; i < track->num_arrays; i++) { - track->arrays[i].robj = NULL; - track->arrays[i].esize = 0x7F; - } - for (i = 0; i < track->num_texture; i++) { - track->textures[i].compress_format = R100_TRACK_COMP_NONE; - track->textures[i].pitch = 16536; - track->textures[i].width = 16536; - track->textures[i].height = 16536; - track->textures[i].width_11 = 1 << 11; - track->textures[i].height_11 = 1 << 11; - track->textures[i].num_levels = 12; - if (rdev->family <= CHIP_RS200) { - track->textures[i].tex_coord_type = 0; - track->textures[i].txdepth = 0; - } else { - track->textures[i].txdepth = 16; - track->textures[i].tex_coord_type = 1; + if ((rdev->family == CHIP_RS400) || + (rdev->family == CHIP_RS480)) { +#if 0 + /* attempt to program RS400 disp2 regs correctly ??? */ + temp = RREG32(RS400_DISP2_REQ_CNTL1); + temp &= ~(RS400_DISP2_START_REQ_LEVEL_MASK | + RS400_DISP2_STOP_REQ_LEVEL_MASK); + WREG32(RS400_DISP2_REQ_CNTL1, (temp | + (critical_point2 << RS400_DISP1_START_REQ_LEVEL_SHIFT) | + (critical_point2 << RS400_DISP1_STOP_REQ_LEVEL_SHIFT))); + temp = RREG32(RS400_DISP2_REQ_CNTL2); + temp &= ~(RS400_DISP2_CRITICAL_POINT_START_MASK | + RS400_DISP2_CRITICAL_POINT_STOP_MASK); + WREG32(RS400_DISP2_REQ_CNTL2, (temp | + (critical_point2 << RS400_DISP2_CRITICAL_POINT_START_SHIFT) | + (critical_point2 << RS400_DISP2_CRITICAL_POINT_STOP_SHIFT))); +#endif + WREG32(RS400_DISP2_REQ_CNTL1, 0x105DC1CC); + WREG32(RS400_DISP2_REQ_CNTL2, 0x2749D000); + WREG32(RS400_DMIF_MEM_CNTL1, 0x29CA71DC); + WREG32(RS400_DISP1_REQ_CNTL1, 0x28FBC3AC); } - track->textures[i].cpp = 64; - track->textures[i].robj = NULL; - /* CS IB emission code makes sure texture unit are disabled */ - track->textures[i].enabled = false; - track->textures[i].lookup_disable = false; - track->textures[i].roundup_w = true; - track->textures[i].roundup_h = true; - if (track->separate_cube) - for (face = 0; face < 5; face++) { - track->textures[i].cube_info[face].robj = NULL; - track->textures[i].cube_info[face].width = 16536; - track->textures[i].cube_info[face].height = 16536; - track->textures[i].cube_info[face].offset = 0; - } + + DRM_DEBUG_KMS("GRPH2_BUFFER_CNTL from to %x\n", + (unsigned int)RREG32(RADEON_GRPH2_BUFFER_CNTL)); } }