bober007's picture

Texture atlas class using vbos

I've been working on a tile-based game and encountered a serious performance problem. I was using immediate mode to draw my map and called GL.BindTexture for each tile. Even though I was drawing only the tiles visible on the screen, with small tiles and big resolution, frame rate was terrible.
Then I thought of a way to optimize drawing many tiles and came up with the following TextureAtlas class:

using System;
using System.Collections.Generic;
using System.Drawing;
using System.Drawing.Imaging;
using System.IO;
using OpenTK;
using OpenTK.Graphics.OpenGL;
namespace Explorer.Graphics
    public class TextureAtlas
        private static Dictionary<string, TextureAtlas> dict = new Dictionary<string, TextureAtlas>();
        private int texId;
        private int vboId;
        private int[] iboIds;
        private TextureAtlas() { }
        public int TileCount { get { return iboIds.Length; } }
        public void Bind()
            GL.BindBuffer(BufferTarget.ArrayBuffer, vboId);
            GL.VertexPointer(2, VertexPointerType.Float, 4 * sizeof(float), (IntPtr)(0));
            GL.TexCoordPointer(2, TexCoordPointerType.Float, 4 * sizeof(float), (IntPtr)(2 * sizeof(float)));
            GL.BindTexture(TextureTarget.Texture2D, texId);
        public void DrawQuad(int tile, Matrix4 transformation)
            tile %= TileCount;
            GL.MultMatrix(ref transformation);
            GL.BindBuffer(BufferTarget.ElementArrayBuffer, iboIds[tile]);
            GL.DrawElements(BeginMode.Quads, 4, DrawElementsType.UnsignedInt, 0);
        private void CreateTexture(Bitmap bitmap)
            texId = GL.GenTexture();
            GL.BindTexture(TextureTarget.Texture2D, texId);
            BitmapData bmp_data = bitmap.LockBits(new Rectangle(0, 0, bitmap.Width, bitmap.Height),
                ImageLockMode.ReadOnly, System.Drawing.Imaging.PixelFormat.Format32bppArgb);
            GL.TexImage2D(TextureTarget.Texture2D, 0, PixelInternalFormat.Rgba, bmp_data.Width, bmp_data.Height, 0,
                OpenTK.Graphics.OpenGL.PixelFormat.Bgra, PixelType.UnsignedByte, bmp_data.Scan0);
            GL.TexParameter(TextureTarget.Texture2D, TextureParameterName.TextureMinFilter, (int)TextureMinFilter.Nearest);
            GL.TexParameter(TextureTarget.Texture2D, TextureParameterName.TextureMagFilter, (int)TextureMagFilter.Nearest);
            GL.TexParameter(TextureTarget.Texture2D, TextureParameterName.TextureWrapS, (float)TextureWrapMode.Clamp);
            GL.TexParameter(TextureTarget.Texture2D, TextureParameterName.TextureWrapT, (float)TextureWrapMode.Clamp);
        private void CreateBuffers(Size bitmapSize, int tileSize)
            Vector2 posLeftTop = new Vector2(0.0f, 0.0f);
            Vector2 posLeftBottom = new Vector2(0.0f, 1.0f);
            Vector2 posRightBottom = new Vector2(1.0f, 1.0f);
            Vector2 posRightTop = new Vector2(1.0f, 0.0f);
            int tilesOnWidth = bitmapSize.Width / tileSize;
            int tilesOnHeight = bitmapSize.Height / tileSize;
            List<Vector2> vertexList = new List<Vector2>();
            List<uint>[] indexListArr = new List<uint>[tilesOnWidth * tilesOnHeight];
            uint currentIndex = 0;
            for (int tileY = 0; tileY < tilesOnHeight; tileY++)
                for (int tileX = 0; tileX < tilesOnWidth; tileX++)
                    Vector2 texLeftTop = new Vector2((float)tileX / (float)tilesOnWidth, (float)tileY / (float)tilesOnHeight);
                    Vector2 texLeftBottom = new Vector2((float)tileX / (float)tilesOnWidth, (float)(tileY + 1) / (float)tilesOnHeight);
                    Vector2 texRightBottom = new Vector2((float)(tileX + 1) / (float)tilesOnWidth, (float)(tileY + 1) / (float)tilesOnHeight);
                    Vector2 texRightTop = new Vector2((float)(tileX + 1) / (float)tilesOnWidth, (float)tileY / (float)tilesOnHeight);
                    List<uint> indexList = new List<uint>();
                    indexListArr[tileX + tileY * tilesOnWidth] = indexList;
            GL.GenBuffers(1, out vboId);
            GL.BindBuffer(BufferTarget.ArrayBuffer, vboId);
            GL.BufferData(BufferTarget.ArrayBuffer, (IntPtr)(vertexList.Count * Vector2.SizeInBytes), vertexList.ToArray(), BufferUsageHint.StaticDraw);
            iboIds = new int[tilesOnWidth * tilesOnHeight];
            for (int i = 0; i < iboIds.Length && i < indexListArr.Length; i++)
                GL.GenBuffers(1, out iboIds[i]);
                GL.BindBuffer(BufferTarget.ElementArrayBuffer, iboIds[i]);
                GL.BufferData(BufferTarget.ElementArrayBuffer, (IntPtr)(indexListArr[i].Count * Vector2.SizeInBytes),
                    indexListArr[i].ToArray(), BufferUsageHint.StaticDraw);
        private void Cleanup()
            GL.DeleteBuffers(1, ref vboId);
            GL.DeleteBuffers(iboIds.Length, iboIds);
        public static TextureAtlas Load(string fileName, int tileSize)
            if (!System.IO.File.Exists(fileName))
                Console.WriteLine("ERROR: File not found: " + fileName);
                return null;
            if (dict.ContainsKey(fileName))
                Console.WriteLine("WARNING: File already loaded " + fileName);
                return dict[fileName];
            TextureAtlas texAtlas = new TextureAtlas();
            Bitmap bitmap = new Bitmap(fileName);
            texAtlas.CreateBuffers(bitmap.Size, tileSize);
            dict.Add(fileName, texAtlas);
            return texAtlas;
        public static void CleanupAll()
            foreach (TextureAtlas ta in dict.Values)

The idea is to have one texture object and one vertex buffer object for the whole map and bind them once. The class can also be used for sprite animation.
Are there any obvious mistakes? Is there any way to further optimize the process? Your opinion will be appreciated.


Comment viewing options

Select your preferred way to display the comments and click "Save settings" to activate your changes.
the Fiddler's picture

This looks good and should give you a significant performance boost! In principle, you could go even faster with hardware instancing (OpenGL 3.x/4.x), but it's probably not worth the effort for a tile-based game. Plain OpenGL 1.x VBOs should already be fast enough.

A few random suggestions:

Do not call GL.EnableClientState(ArrayCap.IndexArray), it does not do what the name suggests and you will (probably) get a crash if you leave it enabled. The correct approach is to allocate a separate ElementArrayBuffer and use GL.DrawElements (which is exactly what you are doing!) See

Move the GL.TexParameters call before GL.TexImage2D. If you place them after, the driver may have to re-allocate and convert the texture (probably not in this specific case, but it's a good idea in general.)

Unless you are rendering millions of tiles, use an ushort ElementArrayBuffer. This is especially important on mobile devices, which do not support uint.

Replace BeginMode with PrimitiveType. Khronos has renamed this enum in OpenGL 4.x (since GL.Begin() is no longer available). OpenTK provides both names, so it's mostly a matter of style.

If you ever plan on moving to OpenGL 3.x or OpenGL ES, you will not be able to use BeginMode.Quads. Use PrimitiveType.Triangles or TriangleStrip instead.

bober007's picture

Thank You very much for the quick and comprehensive response!

Performance boost is significant indeed. It is even possible to bruteforce draw the entire map now and it runs smoothly.

"Move the GL.TexParameters call before GL.TexImage2D. If you place them after, the driver may have to re-allocate and convert the texture (probably not in this specific case, but it's a good idea in general.)"
I thought that driver having to convert the texture was the reason why GL.TexParameters should be called after GL.TexImage2D and GL.GenerateMipmaps but i must have got something wrong.

I found one more mistake in:

GL.BufferData(BufferTarget.ElementArrayBuffer, (IntPtr)(indexListArr[i].Count * Vector2.SizeInBytes),
                    indexListArr[i].ToArray(), BufferUsageHint.StaticDraw);

Buffer size should be:

indexListArr[i].Count * sizeof(uint)


indexListArr[i].Count * sizeof(ushort)

depending on ElementArrayBuffer type.

the Fiddler's picture

The driver will allocate memory and convert your texture data during the GL.TexImage2D call, according to the current texture parameters. If you change those parameters afterwards, the driver may have to recreate the texture to match. I remember a discussion on with an AMD driver developer who suggested setting the parameters beforehand.

Instead of changing sizeof(uint) / sizeof(ushort), you can instead use:

indexListArr[i].Count * BlittableValueType.StrideOf(indexListArr[i][0])

This will work correctly for every element type, including Vector[234], Matrix[234]x[234] and their combinations. For example:

struct Vertex
    public Vector3 Position;
    public Vector3 Normal;
    public Vector2 TexCoord;
var buffer = new Vertex[1024];
int stride = BlittableValueType.StrideOf(buffer); // (3 + 3 + 2) * sizeof(float) = 32
int bytes = buffer.Length * stride;
winterhell's picture

For dynamic sprite rendering from a spritesheet I'm using a normalized Quad that is 1x1 size and texcoordinates from (0,0) to (1,1).
Then I pass to a shader 2 uniform Vector4's. The first one contains the xy positions and sizes of the quad.
The other one contains the number of sprites in each direction on the sheet, and which of those sprites we are drawing.
Then in the shader you transform the vertices and texture coordinates accordingly.
This way you skip texture binding and VBO generation+binding.
Haven't tried batching/instancing multiple quads in a single draw, but drawing quads this way resulted in more than a million sprites per second, CPU/ API bound. On the plus side it works with OGL 2.X

P.S. The texture uniform can store the top left and bottom right texture coordinates of the sprite, for spritesheets with non-uniform sprite sizes and distributions.

bober007's picture

@the Fiddler
BlittableValueType is great! How could I not know about it before?

I'm not into GLSL and this kind of low-level stuff yet, but this sounds very good in principle and a million sprites per second is impressive. I can see this beautifully efficient particle engine already ;).
Thanks for the hint, I'll give it a try sooner or later. For now a quarter of this efficiency is more than enough for my needs.