pmeerw's blog

20 Mar 2010

Sat, 20 Mar 2010

NVidia OpenCL toolkit fail

One might think that SDKs are supposed to facilitate programming, which not the case for the NVidia OpenCL SDK. Here is how I got very basic OpenCL code working...
Disclaimer: Linux only, using Ubuntu 8.10 Hardy

First, it is not clear what version of the graphics driver, CUDA and OpenCL SDK you are supposed to install. The latest (stable) graphics drivers do not include, version 195.36.15 worked for me.

Second, what version of CUDA? cudatoolkit 3.0 worked for me, maybe you are supposed to use cudasdk 2.3? or maybe cudatoolkit 2.3?

Third, what version of OpenCL? the package is convincingly named gpucomputingsdk 2.3a, so maybe to be used with the 2.3 CUDA stuff? CUDA SDK 3.x also contains the relevant OpenCL header files, and there are no libs except for the shipped with the graphics driver; good documentation is available from the khronos size.

In the end, the OpenCL samples fail to work as

// Create the OpenCL context on a GPU device
cxGPUContext = clCreateContextFromType(0, CL_DEVICE_TYPE_GPU, NULL, 
NULL, &ciErr1);
returns -32 (CL_INVALID_PLATFORM), other samples just crash. Turns out that the first parameter must not be zero; specification says the behaviour is implementation-defined if zero -- great idea to put such code in the SDK samples. Properly calling clGetPlatformIDs() and you are good to go!

Here is some device query code which works for me:

#include "CL/cl.h"

int main() {
    cl_uint n;

    cl_platform_id plat_ids[3];
    cl_int ret = clGetPlatformIDs(3, plat_ids, &n);
    printf("ret = %d, platforms = %d\n", ret, n);
    for (cl_uint i = 0; i < n; i++) {
        char buf[1024];
        size_t bytes;

        ret = clGetPlatformInfo(plat_ids[i], CL_PLATFORM_VERSION, sizeof(buf), buf, &bytes);
        printf("ret = %d, bytes = %d, version %s\n", ret, bytes, buf);
        ret = clGetPlatformInfo(plat_ids[i], CL_PLATFORM_NAME, sizeof(buf), buf, &bytes);
        printf("ret = %d, bytes = %d, name %s\n", ret, bytes, buf);

    cl_device_id dev_ids[3];
    ret = clGetDeviceIDs(plat_ids[0], CL_DEVICE_TYPE_GPU, 3, dev_ids, &n);
    printf("ret = %d, devices = %d\n", ret, n);

    char buf[1024];
    size_t bytes;
    ret = clGetDeviceInfo(dev_ids[0], CL_DEVICE_AVAILABLE, sizeof(buf), buf, &bytes);
    printf("ret = %d, bytes = %d, avail %d\n", ret, bytes, *(cl_bool*) &buf);
    ret = clGetDeviceInfo(dev_ids[0], CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(buf), buf, &bytes);
    printf("ret = %d, bytes = %d, global mem %lld\n", ret, bytes, *(cl_ulong*) &buf);
    ret = clGetDeviceInfo(dev_ids[0], CL_DEVICE_LOCAL_MEM_SIZE, sizeof(buf), buf, &bytes);
    printf("ret = %d, bytes = %d, local mem %lld\n", ret, bytes, *(cl_ulong*) &buf);
    ret = clGetDeviceInfo(dev_ids[0], CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, sizeof(buf), buf, &bytes);
    printf("ret = %d, bytes = %d, global mem cache %lld\n", ret, bytes, *(cl_ulong*) &buf);
    ret = clGetDeviceInfo(dev_ids[0], CL_DEVICE_IMAGE_SUPPORT, sizeof(buf), buf, &bytes);
    printf("ret = %d, bytes = %d, image support %d\n", ret, bytes, *(cl_bool*) &buf);
    ret = clGetDeviceInfo(dev_ids[0], CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof(buf), buf, &bytes);
    printf("ret = %d, bytes = %d, image 2d max height %d\n", ret, bytes, *(size_t*) &buf);
    ret = clGetDeviceInfo(dev_ids[0], CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(buf), buf, &bytes);
    printf("ret = %d, bytes = %d, clock freq %d\n", ret, bytes, *(cl_uint*) &buf);
    ret = clGetDeviceInfo(dev_ids[0], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(buf), buf, &bytes);
    printf("ret = %d, bytes = %d, compute units %d\n", ret, bytes, *(cl_uint*) &buf);
    ret = clGetDeviceInfo(dev_ids[0], CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(buf), buf, &bytes);
    printf("ret = %d, bytes = %d, work group size %d\n", ret, bytes, *(size_t*) &buf);
    ret = clGetDeviceInfo(dev_ids[0], CL_DEVICE_NAME, sizeof(buf), buf, &bytes);
    printf("ret = %d, bytes = %d, name %s\n", ret, bytes, buf);

    cl_context_properties props[3];
    props[0] = CL_CONTEXT_PLATFORM;
    props[1] = (cl_context_properties)plat_ids[0];
    props[2] = 0;

    cl_context ctx = clCreateContextFromType(props, CL_DEVICE_TYPE_GPU, 0, 0, &ret);
    printf("ret = %d\n", ret);

    return 0;

posted at: 12:30 | path: /rant | permanent link

Made with PyBlosxom