Flash Player 11’s new Stage3D hardware-accelerated graphics API not only allows you to write shaders (custom code to position vertices and color pixels), it downright requires you to do so. To get the lowest level access (and therefore most power) out of your shaders, you write them in an assembly language called AGAL. Read on for a test app that compares the speed of these shader instructions, the fundamental building blocks of all Stage3D apps.

The following test app does a very simple task: it draws two triangles that take up the whole screen. To draw even more pixels (and therefore increase the intensity of the test), it will also draw the same full-screen two triangles to off-screen 2048×2048 textures. A UI is provided to switch between all of the AGAL instructions (except tex and kil) and to toggle between hardware-accelerated mode (if you support it) and software rendering. CAUTION: too many “extra renders” (to off-screen textures) can cause system instability.

Drawing the pixels themselves only takes one vertex shader instruction (mov op, va0) and one fragment shader instruction (mov oc, fc0), but the bulk of the fragment shader is then padded out to the maximum 200 instructions with 199 of the instruction you choose. So if you choose the add instruction, you’ll get a shader like this:

add ft0, v0, fc0
add ft0, v0, fc0
add ft0, v0, fc0
add ft0, v0, fc0
add ft0, v0, fc0
// ^ 194 more of this instruction
mov oc, fc0 // final output

Therefore, each pixel of the stage as well as each pixel of the off-screen texture will have 199 of the chosen instruction executed.

Without further ado, here is the source code for the test app:

package
{
	import com.adobe.utils.*;
	import flash.display3D.textures.*;
	import flash.display3D.*;
	import flash.display.*;
	import flash.filters.*;
	import flash.events.*;
	import flash.text.*;
	import flash.utils.*;
 
	public class Stage3DShaderTest extends Sprite 
	{
		private static const VERT_DATA:Vector.<Number> = new <Number>[
			-1, -1, 0,
			1, -1, 0,
			1, 1, 0,
			-1, 1, 0,
		];
		private static const TRIS:Vector.<uint> = new <uint>[
			0, 1, 2,
			0, 2, 3
		];
		private static const FRAG_CONST:Vector.<Number> = new <Number>[
			0.9296875, 0.9140625, 0.84765625, 1, // color
			0, 0, 0, 0,
			0, 0, 0, 0,
			0, 0, 0, 0
		];
		private static const VERT_CONST:Vector.<Number> = new <Number>[
			0, 0, 0, 0
		];
 
		private var context3D:Context3D;
		private var vertexBuffer:VertexBuffer3D;
		private var indexBuffer:IndexBuffer3D; 
		private var program:Program3D;
		private var texture:Texture;
 
		private var fps:TextField = new TextField();
		private var lastFPSUpdateTime:uint;
		private var lastFrameTime:uint;
		private var frameCount:uint;
		private var driver:TextField = new TextField();
		private var extraRendersText:TextField = new TextField();
 
		private var mode:String;
		private var padFragSource:String;
		private var extraRenders:int;
 
		public function Stage3DShaderTest()
		{
			stage.align = StageAlign.TOP_LEFT;
			stage.scaleMode = StageScaleMode.NO_SCALE;
			stage.frameRate = 60;
			setupContext(Context3DRenderMode.AUTO);
		}
 
		private function setupContext(renderMode:String): void
		{
			driver.text = "Setting up context with render mode: " + renderMode;
			var stage3D:Stage3D = stage.stage3Ds[0];
			stage3D.addEventListener(Event.CONTEXT3D_CREATE, onContextCreated);
			stage3D.requestContext3D(renderMode);
		}
 
		protected function onContextCreated(ev:Event): void
		{
			var firstTime:Boolean = context3D == null;
 
			// Setup context
			var stage3D:Stage3D = stage.stage3Ds[0];
			stage3D.removeEventListener(Event.CONTEXT3D_CREATE, onContextCreated);
			context3D = stage3D.context3D;			
			context3D.configureBackBuffer(
				stage.stageWidth,
				stage.stageHeight,
				0,
				true
			);
 
			// Setup UI
			driver.text = "Driver: " + context3D.driverInfo;
			if (firstTime)
			{
				makeButtons(
					"mov", "add", "sub", "mul", "div", "rcp", "min", "max",
					"frc", "sqt", "rsq", "pow", "log", "exp", "nrm", "sin",
					"cos", "crs", "dp3", "dp4", "abs", "neg", "sat", "m33",
					"m44", "m34", "sge", "slt", "seq", "sne",
					"Toggle Hardware", "Extra Renders +", "Extra Renders -"
				);
 
				fps.autoSize = TextFieldAutoSize.LEFT;
				fps.text = "Getting FPS...";
				addChild(fps);
 
				driver.autoSize = TextFieldAutoSize.LEFT;
				driver.y = fps.height;
				addChild(driver);
 
				extraRendersText.autoSize = TextFieldAutoSize.LEFT;
				extraRendersText.y = driver.y + driver.height;
				addChild(extraRendersText);
 
				setExtraRenders(extraRenders);
			}
 
			setMode(mode || "mov");
 
			// Setup buffers
			if (vertexBuffer)
			{
				vertexBuffer.dispose();
				indexBuffer.dispose();
			}
			vertexBuffer = context3D.createVertexBuffer(4, 3);
			vertexBuffer.uploadFromVector(VERT_DATA, 0, 4);
			indexBuffer = context3D.createIndexBuffer(6);
			indexBuffer.uploadFromVector(TRIS, 0, 6);
			texture = context3D.createTexture(
				2048,
				2048,
				Context3DTextureFormat.BGRA,
				true
			);
 
			// Begin rendering every frame
			if (firstTime)
			{
				addEventListener(Event.ENTER_FRAME, onEnterFrame);
			}
			else
			{
				frameCount = 0;
				lastFPSUpdateTime = lastFrameTime = getTimer();
			}
		}
 
		private function makeProgram(padFragSource:String): void
		{
			this.padFragSource = padFragSource;
 
			var assembler:AGALMiniAssembler = new AGALMiniAssembler();
 
			// Vertex shader
			var vertSource:String = "mov op, va0\nmov v0, vc0\n"
			assembler.assemble(Context3DProgramType.VERTEX, vertSource);
			var vertexShaderAGAL:ByteArray = assembler.agalcode;
 
			// Fragment shader
			var fragSource:String = "mov oc, fc0";
			for (var i:int = 0; i < 199; ++i)
			{
				fragSource = padFragSource + "\n" + fragSource;
			}
			assembler.assemble(Context3DProgramType.FRAGMENT, fragSource);
			var fragmentShaderAGAL:ByteArray = assembler.agalcode;
 
			// Shader program
			if (program)
			{
				program.dispose();
			}
			program = context3D.createProgram();
			program.upload(vertexShaderAGAL, fragmentShaderAGAL);
		}
 
		private function makeButtons(...labels): void
		{
			const PAD:Number = 5;
 
			var curX:Number = PAD;
			var curY:Number = stage.stageHeight - PAD;
			for each (var label:String in labels)
			{
				var tf:TextField = new TextField();
				tf.mouseEnabled = false;
				tf.selectable = false;
				tf.defaultTextFormat = new TextFormat("_sans", 16, 0x0071BB);
				tf.autoSize = TextFieldAutoSize.LEFT;
				tf.text = label;
				tf.name = "lbl";
 
				var button:Sprite = new Sprite();
				button.buttonMode = true;
				button.graphics.beginFill(0xF5F5F5);
				button.graphics.drawRect(0, 0, tf.width+PAD, tf.height+PAD);
				button.graphics.endFill();
				button.graphics.lineStyle(1);
				button.graphics.drawRect(0, 0, tf.width+PAD, tf.height+PAD);
				button.addChild(tf);
				button.addEventListener(MouseEvent.CLICK, onButton);
				if (curX + button.width > stage.stageWidth - PAD)
				{
					curX = PAD;
					curY -= button.height + PAD;
				}
				button.x = curX;
				button.y = curY - button.height;
				addChild(button);
 
				curX += button.width + PAD;
			}
		}
 
		private function onButton(ev:MouseEvent): void
		{
			var mode:String = ev.target.getChildByName("lbl").text;
			switch (mode)
			{
				case "Toggle Hardware":
					var oldRenderMode:String = context3D.driverInfo;
					context3D.dispose();
					driver.text = "Toggling hardware...";
					setupContext(
						oldRenderMode.toLowerCase().indexOf("software") >= 0
							? Context3DRenderMode.AUTO
							: Context3DRenderMode.SOFTWARE
					);
					break;
				case "Extra Renders +":
					setExtraRenders(extraRenders+1);
					break;
				case "Extra Renders -":
					setExtraRenders(extraRenders-1);
					break;
				default:
					setMode(mode);
			}
		}
 
		private function setMode(mode:String): void
		{
			this.mode = mode;
 
			for (var i:int; i < numChildren; ++i)
			{
				var child:DisplayObject = getChildAt(i);
				if (child is Sprite)
				{
					var spr:Sprite = child as Sprite;
					var lbl:TextField = spr.getChildByName("lbl") as TextField;
					if (lbl.text == mode)
					{
						spr.filters = [new GlowFilter(0x261C13)];
					}
					else
					{
						spr.filters = [];
					}
				}
			}
 
			switch (mode)
			{
				case "mov":
					makeProgram("mov ft0, fc0");
					break;
				case "add":
					makeProgram("add ft0, v0, fc0");
					break;
				case "sub":
					makeProgram("sub ft0, v0, fc0");
					break;
				case "mul":
					makeProgram("mul ft0, v0, fc0");
					break;
				case "div":
					makeProgram("div ft0, v0, fc0");
					break;
				case "rcp":
					makeProgram("rcp ft0, v0");
					break;
				case "min":
					makeProgram("min ft0, v0, fc0");
					break;
				case "max":
					makeProgram("max ft0, v0, fc0");
					break;
				case "frc":
					makeProgram("frc ft0, v0");
					break;
				case "sqt":
					makeProgram("sqt ft0, v0");
					break;
				case "rsq":
					makeProgram("rsq ft0, v0");
					break;
				case "pow":
					makeProgram("pow ft0, v0, fc0");
					break;
				case "log":
					makeProgram("log ft0, v0");
					break;
				case "exp":
					makeProgram("exp ft0, v0");
					break;
				case "nrm":
					makeProgram("nrm ft0.xyz, v0");
					break;
				case "sin":
					makeProgram("sin ft0, v0");
					break;
				case "cos":
					makeProgram("cos ft0, v0");
					break;
				case "crs":
					makeProgram("crs ft0.xyz, v0, fc0");
					break;
				case "dp3":
					makeProgram("dp3 ft0, v0, fc0");
					break;
				case "dp4":
					makeProgram("dp4 ft0, v0, fc0");
					break;
				case "abs":
					makeProgram("abs ft0, v0");
					break;
				case "neg":
					makeProgram("neg ft0, v0");
					break;
				case "sat":
					makeProgram("sat ft0, v0");
					break;
				case "m33":
					makeProgram("m33 ft0.xyz, v0, fc0");
					break;
				case "m44":
					makeProgram("m44 ft0, v0, fc0");
					break;
				case "m34":
					makeProgram("m34 ft0.xyz, v0, fc0");
					break;
				case "sge":
					makeProgram("sge ft0, v0, fc0");
					break;
				case "slt":
					makeProgram("slt ft0, v0, fc0");
					break;
				case "seq":
					makeProgram("seq ft0, v0, fc0");
					break;
				case "sne":
					makeProgram("sne ft0, v0, fc0");
					break;
			}
		}
 
		private function setExtraRenders(extra:int): void
		{
			extraRenders = extra;
			extraRendersText.text = "Extra Renders: " + extra;
		}
 
		private function onEnterFrame(ev:Event): void
		{
			if (!context3D)
			{
				return;
			}
 
			// Render scene
			context3D.setProgram(program);
			context3D.setVertexBufferAt(
				0,
				vertexBuffer,
				0,
				Context3DVertexBufferFormat.FLOAT_3
			);
			context3D.setProgramConstantsFromVector(
				Context3DProgramType.VERTEX,
				0,
				VERT_CONST
			);
			context3D.setProgramConstantsFromVector(
				Context3DProgramType.FRAGMENT,
				0,
				FRAG_CONST
			);
 
			if (extraRenders)
			{
				context3D.setRenderToTexture(texture);
				context3D.clear(0.5, 0.5, 0.5);
				for (var i:int; i < extraRenders; ++i)
				{
					context3D.drawTriangles(indexBuffer, 0, 2);
				}
			}
			context3D.setRenderToBackBuffer();
			context3D.clear(0.5, 0.5, 0.5);
			context3D.drawTriangles(indexBuffer, 0, 2);
			context3D.present();
 
			// Update frame rate display
			frameCount++;
			var now:int = getTimer();
			var dTime:int = now - lastFrameTime;
			var elapsed:int = now - lastFPSUpdateTime;
			if (elapsed > 1000)
			{
				var framerateValue:Number = 1000 / (elapsed / frameCount);
				fps.text = "FPS: " + framerateValue.toFixed(4);
				lastFPSUpdateTime = now;
				frameCount = 0;
			}
			lastFrameTime = now;
		}
	}
}

Launch Test App

If you have a fast video card, you may be unable to lower the framerate below 60 FPS without adding a lot of extra renders. Try switching to software mode though and you’ll see an entirely different story, especially with instructions like m44. This can be very useful for getting a picture of what kind of performance your users will get if they are forced to run your app with software rendering. Don’t think it can’t happen to you: there are several situations that trigger a fallback to software, even including old drivers.

Spot a bug? Have a suggestion? Interesting results on your environment? Post a comment!