We're building a gravity simulation on a GPU using CUDA, and last time we actually got a working simulator running for a 2-body system. That was exciting, but there were some drawbacks, namely the printout-copy-paste-to-spreadsheet-and-graph way of visualizing the simulation. In this post we're going to remedy that issue with a simulation display that shows the position of the bodies graphically while the simulation is running and that also runs on the graphics card alongside the simulation. We'll even make the position buffer multipurpose so that we can calculate the positions directly into this buffer, and then turn around and draw those positions into a window from the same buffer. No copying required, not even automated copying.
Making this display for the gravity simulator turned out to be more difficult than I thought because I haven't programmed in OpenGL since the graphics course I took in college, and I've certainly never done any OpenGL-CUDA interop before. I managed to pull something together by leaning heavily on the N-Body sample project that's part of the nVidia CUDA sample projects. This sample project is also a gravity simulator, but the underlying simulation engine is substantially different than the one we've built so far. Even so, I was able to use the renderer without any modifications. Let's see how it works.
Initialization
#include <helper_gl.h>
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
#include <GL/wglew.h>
#endif
#include <GL/freeglut.h>
#include <cuda_runtime.h>
#include <cuda_gl_interop.h>
#include <helper_cuda.h>
#include "render_particles.h"
#include "kernel.h"
ParticleRenderer* _renderer;
unsigned int _pbo[2];
cudaGraphicsResource* _pGRes[2];
const float _scale = 3.0e11;
const float _point_size = _scale / 50.0;
const int _num_bodies = 2;
const float _dt = 900;int main(int argc, char** argv) {
    initGL(&argc, argv);
    const float m[_num_bodies] = { 1.989e30, 5.972e24 };
    const vec3d d0[_num_bodies] = { {0, 0, 0, 1}, {1.496e11, 0, 0, 1} };
    const vec3d v0[_num_bodies] = { {0, -8.9415e-2, 0, 1}, {0, 2.978e4, 0, 1} };
    init(m, v0, d0);
    glutDisplayFunc(display);
    glutReshapeFunc(reshape);
    glutIdleFunc(idle);
    glutMainLoop();
    finalize();
    return 0;
}typedef struct vec3d {
    float x;
    float y;
    float z;
    float a;
} vec3d;
cudaError_t initCuda(const float* m, const vec3d* v0, unsigned int size);
cudaError_t gravitySim(vec3d** d, const float dt, unsigned int size);
void finalizeCuda();void initGL(int* argc, char** argv) {
    // First initialize OpenGL context, so we can properly set the GL for CUDA.
    // This is necessary in order to achieve optimal performance with OpenGL/CUDA interop.
    glutInit(argc, argv);
    glutInitDisplayMode(GLUT_RGB | GLUT_DEPTH | GLUT_DOUBLE);
    glutInitWindowSize(720, 480);
    glutCreateWindow("CUDA n-body gravity simulator");
    if (!isGLVersionSupported(2, 0) ||
        !areGLExtensionsSupported(
            "GL_ARB_multitexture "
            "GL_ARB_vertex_buffer_object")) {
        fprintf(stderr, "Required OpenGL extensions missing.");
        exit(EXIT_FAILURE);
    } else {
#if   defined(WIN32)
        wglSwapIntervalEXT(0);
#elif defined(LINUX)
        glxSwapIntervalSGI(0);
#endif
    }
    glEnable(GL_DEPTH_TEST);
    glClearColor(0.0, 0.0, 0.0, 1.0);
    checkGLErrors("initGL");
}void init(const float* m, const vec3d* v0, const vec3d* d0) {
    // create the position pixel buffer objects for rendering
    // we will actually compute directly from this memory in CUDA too
    glGenBuffers(2, (GLuint*)_pbo);
    unsigned int memSize = sizeof(float) * 4 * _num_bodies;
    for (int i = 0; i < 2; ++i) {
        glBindBuffer(GL_ARRAY_BUFFER, _pbo[i]);
        glBufferData(GL_ARRAY_BUFFER, memSize, d0, GL_DYNAMIC_DRAW);
        int size = 0;
        glGetBufferParameteriv(GL_ARRAY_BUFFER, GL_BUFFER_SIZE, (GLint*)&size);
        if ((unsigned)size != memSize) {
            fprintf(stderr, "WARNING: Pixel Buffer Object allocation failed!n");
        }
        glBindBuffer(GL_ARRAY_BUFFER, 0);
        checkCudaErrors(cudaGraphicsGLRegisterBuffer(&_pGRes[i], _pbo[i], cudaGraphicsMapFlagsNone));
    }
    initCuda(m, v0, _num_bodies);
    _renderer = new ParticleRenderer;
    
    float color[4] = { 1.0f, 0.6f, 0.3f, 1.0f };
    _renderer->setBaseColor(color);
    float* hColor = new float[_num_bodies * 4];
    for (int i = 0; i < sizeof(hColor); i += 4) {
        hColor[i] = color[0];
        hColor[i + 1] = color[1];
        hColor[i + 2] = color[2];
        hColor[i + 3] = color[3];
    }
    _renderer->setColors(hColor, _num_bodies);
    _renderer->setSpriteSize(_point_size);
    _renderer->setPBO(_pbo[1], _num_bodies, false);
}cudaError_t initCuda(const float* m, const vec3d* v0, unsigned int size)
{
    cudaError_t cudaStatus;
    // Choose which GPU to run on, change this on a multi-GPU system.
    cudaStatus = cudaSetDevice(0);
    // Allocate GPU buffers for 3 vectors (two input, one output)
    cudaStatus = cudaMalloc((void**)&dev_v, size * sizeof(vec3d));
    cudaStatus = cudaMalloc((void**)&dev_m, size * sizeof(float));
    cudaStatus = cudaMalloc((void**)&dev_v0, size * sizeof(vec3d));
    // Copy input vectors from host memory to GPU buffers.
    cudaStatus = cudaMemcpy(dev_m, m, size * sizeof(float), cudaMemcpyHostToDevice);
    cudaStatus = cudaMemcpy(dev_v0, v0, size * sizeof(vec3d), cudaMemcpyHostToDevice);
    return cudaStatus;
}Display
void display() {
    vec3d* dev_d[2];
    checkCudaErrors(cudaGraphicsMapResources(2, _pGRes, 0));
    size_t bytes;
    checkCudaErrors(cudaGraphicsResourceGetMappedPointer((void**)&(dev_d[0]), &bytes, _pGRes[0]));
    checkCudaErrors(cudaGraphicsResourceGetMappedPointer((void**)&(dev_d[1]), &bytes, _pGRes[1]));
    gravitySim(dev_d, _dt, _num_bodies);
    checkCudaErrors(cudaGraphicsUnmapResources(2, _pGRes, 0));
    glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
    glMatrixMode(GL_MODELVIEW);
    glLoadIdentity();
    glTranslatef(0, 0, -_scale);
    _renderer->display(ParticleRenderer::PARTICLE_SPRITES_COLOR);
    glutSwapBuffers();
    glutReportErrors();
}cudaError_t gravitySim(vec3d** dev_d, const float dt, unsigned int size) {
    cudaError_t cudaStatus = cudaSuccess;
    vec3d *d = new vec3d[size];
    cudaStatus = gravityIter(d, dev_v, dev_d[1], dev_m, dev_v0, dev_d[0], dt, size);
    cudaStatus = gravityIter(d, dev_v0, dev_d[0], dev_m, dev_v, dev_d[1], dt, size);
    return cudaStatus;
}void reshape(int w, int h) {
    glMatrixMode(GL_PROJECTION);
    glLoadIdentity();
    gluPerspective(60.0, (float)w / (float)h, _scale * 2 / 1.0e6, _scale * 2);
    glMatrixMode(GL_MODELVIEW);
    glViewport(0, 0, w, h);
}void idle(void) {
    glutPostRedisplay();
}Cleanup
void finalize() {
    checkCudaErrors(cudaGraphicsUnregisterResource(_pGRes[0]));
    checkCudaErrors(cudaGraphicsUnregisterResource(_pGRes[1]));
    glDeleteBuffers(2, (const GLuint*)_pbo);
    finalizeCuda();
}void finalizeCuda() {
    cudaFree(dev_v);
    cudaFree(dev_m);
    cudaFree(dev_v0);
    // cudaDeviceReset must be called before exiting in order for profiling and
    // tracing tools such as Nsight and Visual Profiler to show complete traces.
    checkCudaErrors(cudaDeviceReset());
}
No comments:
Post a Comment