Warum scheint CudaFree keinen Speicher freizugeben?
Ich versuche, Gerätespeicher zuzuweisen, zu kopieren, die Berechnungen auf der GPU durchzuführen, die Ergebnisse zurück zu kopieren und dann den von mir zugewiesenen Gerätespeicher freizugeben. Ich wollte sicherstellen, dass ich nicht über das Limit hinausging, und ich wollte sehen, ob im gemeinsam genutzten Speicherbereich genügend Speicher vorhanden ist, um ein paar Arrays zu sichern.
Wenn ich Gerätespeicher zuteile, werden keine Fehler zurückgegeben. Wenn ich benutzecudaMemGetInfo
Um die Größe des zugewiesenen Speichers zu überprüfen, sieht es so auscudaMalloc
hat keinen Speicher zugewiesen. Auch wenn ich versuche, den Speicher freizugeben, sieht es so aus, als würde nur ein Zeiger freigegeben.
Ich benutze das MatlabMexfunction
Schnittstelle, um den GPU-Speicher einzurichten und den Kernel zu starten. Zu diesem Zeitpunkt rufe ich noch nicht einmal den Kernel an und gebe nur eine Einheitenmatrix für die Ergebnisse zurück.
<code>cudaError_t cudaErr; size_t freeMem = 0; size_t totalMem = 0; size_t allocMem = 0; cudaMemGetInfo(&freeMem, &totalMem); mexPrintf("Memory avaliable: Free: %lu, Total: %lu\n",freeMem, totalMem); /* Pointers for the device memory */ double *devicePulseDelay, *deviceTarDistance, *deviceScattDistance, *deviceScatterers; double *deviceReceivedReal, *deviceReceivedImag; /* Allocate memory on the device for the arrays. */ mexPrintf("Allocating memory.\n"); cudaErr = cudaMalloc( (void **) &devicePulseDelay, sizeof(double)*512); if (cudaErr != cudaSuccess) { mexPrintf("could not allocate memory to devicePulseDelay\n"); mexPrintf("Error: %s\n",cudaGetErrorString(cudaErr)); } cudaMemGetInfo(&allocMem, &totalMem); mexPrintf("devicePulseDelay: Memory avaliable: Free: %lu, Total: %lu, Consumed: %lu\n",allocMem, totalMem,(freeMem - allocMem)); cudaErr = cudaMalloc( (void **) &deviceTarDistance, sizeof(double)*512); if (cudaErr != cudaSuccess) { mexPrintf("could not allocate memory to deviceTarDistance\n"); mexPrintf("Error: %s\n",cudaGetErrorString(cudaErr)); } cudaMemGetInfo(&allocMem, &totalMem); mexPrintf("deviceTarDistance: Memory avaliable: Free: %lu, Total: %lu, Consumed: %lu\n",allocMem, totalMem,(freeMem - allocMem)); cudaErr = cudaMalloc( (void **) &deviceScattDistance, sizeof(double)*999*512); if (cudaErr != cudaSuccess) { mexPrintf("could not allocate memory to deviceScattDistance\n"); mexPrintf("Error: %s\n",cudaGetErrorString(cudaErr)); } cudaMemGetInfo(&allocMem, &totalMem); mexPrintf("deviceScattDistance: Memory avaliable: Free: %lu, Total: %lu, Consumed: %lu\n",allocMem, totalMem,(freeMem - allocMem)); cudaErr = cudaMalloc( (void **) &deviceScatterers, sizeof(double)*999); if (cudaErr != cudaSuccess) { mexPrintf("could not allocate memory to deviceScatterers\n"); mexPrintf("Error: %s\n",cudaGetErrorString(cudaErr)); } cudaMemGetInfo(&allocMem, &totalMem); mexPrintf("deviceScatterers: Memory avaliable: Free: %lu, Total: %lu, Consumed: %lu\n",allocMem, totalMem,(freeMem - allocMem)); cudaErr = cudaMalloc( (void **) &deviceReceivedReal, sizeof(double)*999*512); if (cudaErr != cudaSuccess) { mexPrintf("could not allocate memory to deviceReceivedReal\n"); mexPrintf("Error: %s\n",cudaGetErrorString(cudaErr)); } cudaMemGetInfo(&allocMem, &totalMem); mexPrintf("deviceReceivedReal: Memory avaliable: Free: %lu, Total: %lu, Consumed: %lu\n",allocMem, totalMem,(freeMem - allocMem)); cudaErr = cudaMalloc( (void **) &deviceReceivedImag, sizeof(double)*999*512); if (cudaErr != cudaSuccess) { mexPrintf("could not allocate memory to deviceReceivedImag\n"); mexPrintf("Error: %s\n",cudaGetErrorString(cudaErr)); } cudaMemGetInfo(&allocMem, &totalMem); mexPrintf("deviceReceivedImag: Memory avaliable: Free: %lu, Total: %lu, Consumed: %lu\n", allocMem, totalMem,(freeMem - allocMem)); /* copy the input arrays across to the device */ mexPrintf("\nCopying memory.\n"); cudaErr = cudaMemcpy(devicePulseDelay, pulseDelay, sizeof(double)*512,cudaMemcpyHostToDevice); if (cudaErr != cudaSuccess) { mexPrintf("could not copy to devicePulseDelay\n"); mexPrintf("Error: %s\n",cudaGetErrorString(cudaErr)); } cudaMemGetInfo(&allocMem, &totalMem); mexPrintf("devicePulseDelay: Memory avaliable: Free: %lu, Total: %lu, Consumed: %lu\n",allocMem, totalMem,(freeMem - allocMem)); cudaErr = cudaMemcpy(deviceTarDistance, tarDistance, sizeof(double)*512,cudaMemcpyHostToDevice); if (cudaErr != cudaSuccess) { mexPrintf("could not copy to deviceTarDistance\n"); mexPrintf("Error: %s\n",cudaGetErrorString(cudaErr)); } cudaMemGetInfo(&allocMem, &totalMem); mexPrintf("deviceTarDistance: Memory avaliable: Free: %lu, Total: %lu, Consumed: %lu\n",allocMem, totalMem,(freeMem - allocMem)); cudaErr = cudaMemcpy(deviceScattDistance, scattDistance, sizeof(double)*999*512,cudaMemcpyHostToDevice); if (cudaErr != cudaSuccess) { mexPrintf("could not copy to deviceScattDistance\n"); mexPrintf("Error: %s\n",cudaGetErrorString(cudaErr)); } cudaMemGetInfo(&allocMem, &totalMem); mexPrintf("deviceScattDistance: Memory avaliable: Free: %lu, Total: %lu, Consumed: %lu\n",allocMem, totalMem,(freeMem - allocMem)); cudaErr = cudaMemcpy(deviceScatterers, scatterers, sizeof(double)*999,cudaMemcpyHostToDevice); if (cudaErr != cudaSuccess) { mexPrintf("could not copy to deviceScatterers\n"); mexPrintf("Error: %s\n",cudaGetErrorString(cudaErr)); } cudaMemGetInfo(&allocMem, &totalMem); mexPrintf("deviceScatterers: Memory avaliable: Free: %lu, Total: %lu, Consumed: %lu\n",allocMem, totalMem,(freeMem - allocMem)); /* call the kernel */ // launchKernel<<<1,512>>>(........); /* retireve the output */ cudaErr = cudaMemcpy(receivedReal, deviceReceivedReal, sizeof(double)*512*512,cudaMemcpyDeviceToHost); if (cudaErr != cudaSuccess) { mexPrintf("could not copy to receivedReal\n"); mexPrintf("Error: %s\n",cudaGetErrorString(cudaErr)); } cudaMemGetInfo(&allocMem, &totalMem); mexPrintf("receivedReal: Memory avaliable: Free: %lu, Total: %lu, Consumed: %lu\n",allocMem, totalMem,(freeMem - allocMem)); cudaErr = cudaMemcpy(receivedImag, deviceReceivedImag, sizeof(double)*512*512,cudaMemcpyDeviceToHost); if (cudaErr != cudaSuccess) { mexPrintf("could not copy to receivedImag\n"); mexPrintf("Error: %s\n",cudaGetErrorString(cudaErr)); } cudaMemGetInfo(&allocMem, &totalMem); mexPrintf("receivedImag: Memory avaliable: Free: %lu, Total: %lu, Consumed: %lu\n",allocMem, totalMem,(freeMem - allocMem)); /* free the memory. */ mexPrintf("\nFree'ing memory.\n"); cudaMemGetInfo(&freeMem, &totalMem); mexPrintf("Before freeing: Free %lu, Total: %lu\n", freeMem, totalMem); cudaErr = cudaFree(devicePulseDelay); if (cudaErr != cudaSuccess) { mexPrintf("could free devicePulseDelay\n"); mexPrintf("Error: %s\n",cudaGetErrorString(cudaErr)); } cudaMemGetInfo(&allocMem, &totalMem); mexPrintf("devicePulseDelay: Memory avaliable: Free: %lu, Total: %lu, Free'd: %lu\n",allocMem, totalMem,(allocMem - freeMem)); cudaErr = cudaFree(deviceTarDistance); if (cudaErr != cudaSuccess) { mexPrintf("could free deviceTarDistance\n"); mexPrintf("Error: %s\n",cudaGetErrorString(cudaErr)); } cudaMemGetInfo(&allocMem, &totalMem); mexPrintf("deviceTarDistance: Memory avaliable: Free: %lu, Total: %lu, Free'd: %lu\n",allocMem, totalMem,(allocMem - freeMem)); cudaErr = cudaFree(deviceScattDistance); if (cudaErr != cudaSuccess) { mexPrintf("could free deviceScattDistance\n"); mexPrintf("Error: %s\n",cudaGetErrorString(cudaErr)); } cudaMemGetInfo(&allocMem, &totalMem); mexPrintf("deviceScattDistance: Memory avaliable: Free: %lu, Total: %lu, Free'd: %lu\n",allocMem, totalMem,(allocMem - freeMem)); cudaErr = cudaFree(deviceScatterers); if (cudaErr != cudaSuccess) { mexPrintf("could free deviceScatterers\n"); mexPrintf("Error: %s\n",cudaGetErrorString(cudaErr)); } cudaMemGetInfo(&allocMem, &totalMem); mexPrintf("deviceScatterers: Memory avaliable: Free: %lu, Total: %lu, Free'd: %lu\n",allocMem, totalMem,(allocMem - freeMem)); cudaErr = cudaFree(deviceReceivedReal); if (cudaErr != cudaSuccess) { mexPrintf("could free deviceReceivedReal\n"); mexPrintf("Error: %s\n",cudaGetErrorString(cudaErr)); } cudaMemGetInfo(&allocMem, &totalMem); mexPrintf("deviceReceivedReal: Memory avaliable: Free: %lu, Total: %lu, Free'd: %lu\n",allocMem, totalMem,(allocMem - freeMem)); cudaErr = cudaFree(deviceReceivedImag); if (cudaErr != cudaSuccess) { mexPrintf("could free deviceReceivedImag\n"); mexPrintf("Error: %s\n",cudaGetErrorString(cudaErr)); } cudaMemGetInfo(&allocMem, &totalMem); mexPrintf("deviceReceivedImag: Memory avaliable: Free: %lu, Total: %lu, Free'd: %lu\n",allocMem, totalMem,(allocMem - freeMem)); </code>
Hier ist die Ausgabe davon:
Memory avaliable: Free: 2523959296, Total: 2818572288 Allocating memory. devicePulseDelay: Memory avaliable: Free: 2522910720, Total: 2818572288, Consumed: 1048576 deviceTarDistance: Memory avaliable: Free: 2522910720, Total: 2818572288, Consumed: 1048576 deviceScattDistance: Memory avaliable: Free: 2518716416, Total: 2818572288, Consumed: 5242880 deviceScatterers: Memory avaliable: Free: 2517667840, Total: 2818572288, Consumed: 6291456 deviceReceivedReal: Memory avaliable: Free: 2515570688, Total: 2818572288, Consumed: 8388608 deviceReceivedImag: Memory avaliable: Free: 2513473536, Total: 2818572288, Consumed: 10485760 Copying memory. devicePulseDelay: Memory avaliable: Free: 2513473536, Total: 2818572288, Consumed: 10485760 deviceTarDistance: Memory avaliable: Free: 2513473536, Total: 2818572288, Consumed: 10485760 deviceScattDistance: Memory avaliable: Free: 2513473536, Total: 2818572288, Consumed: 10485760 deviceScatterers: Memory avaliable: Free: 2513473536, Total: 2818572288, Consumed: 10485760 receivedReal: Memory avaliable: Free: 2513473536, Total: 2818572288, Consumed: 10485760 receivedImag: Memory avaliable: Free: 2513473536, Total: 2818572288, Consumed: 10485760 Free'ing memory. Before freeing: Free 2513473536, Total: 2818572288 devicePulseDelay: Memory avaliable: Free: 2513473536, Total: 2818572288, Free'd: 0 deviceTarDistance: Memory avaliable: Free: 2513473536, Total: 2818572288, Free'd: 0 deviceScattDistance: Memory avaliable: Free: 2513473536, Total: 2818572288, Free'd: 0 deviceScatterers: Memory avaliable: Free: 2514522112, Total: 2818572288, Free'd: 1048576 deviceReceivedReal: Memory avaliable: Free: 2514522112, Total: 2818572288, Free'd: 1048576 deviceReceivedImag: Memory avaliable: Free: 2514522112, Total: 2818572288, Free'd: 1048576
Ich habe das Gefühl, dass es etwas Offensichtliches gibt, das mir fehlt. Kann mir jemand erklären, was los ist?
BEARBEITEN: Plattform ist Windows 7 mit einer Tesla C2050 GPU-Karte.