/*
* edgeDetection.cu
*
* Prerelease License - for engineering feedback and testing purposes
* only. Not for sale.
*
* Code generation for model "edgeDetection".
*
* Model version : 1.8
* Simulink Coder version : 9.4 (R2020b) 19-May-2020
* C++ source code generated on : Tue Jun 16 11:46:16 2020
*
* Target selection: grt.tlc
* Note: GRT includes extra infrastructure and instrumentation for prototyping
* Embedded hardware selection: Intel->x86-64 (Windows64)
* Code generation objectives: Unspecified
* Validation result: Not run
*/
#include "edgeDetection.h"
#include "edgeDetection_private.h"
#include "math_constants.h"
/* Forward declaration for local functions */
static __global__ __launch_bounds__(512, 1) void edgeDete_eML_blk_kernel_kernel1
(const real32_T RGB[230400], real_T gray[76800]);
/* Forward declaration for local functions */
static __global__ __launch_bounds__(512, 1) void edgeDe_eML_blk_kernel_kernel1_l
(real32_T expanded[77924]);
static __global__ __launch_bounds__(512, 1) void edgeDete_eML_blk_kernel_kernel2
(const real_T grayImage[76800], real32_T expanded[77924]);
static __global__ __launch_bounds__(1024, 1) void
edgeDete_eML_blk_kernel_kernel3(const int8_T b[9], const real32_T expanded
[77924], real32_T H[76800]);
static __global__ __launch_bounds__(512, 1) void edgeDete_eML_blk_kernel_kernel4
(real32_T expanded[77924]);
static __global__ __launch_bounds__(512, 1) void edgeDete_eML_blk_kernel_kernel5
(const real_T grayImage[76800], real32_T expanded[77924]);
static __global__ __launch_bounds__(1024, 1) void
edgeDete_eML_blk_kernel_kernel6(const int8_T b_b[9], const real32_T expanded
[77924], real32_T V[76800]);
static __global__ __launch_bounds__(512, 1) void edgeDete_eML_blk_kernel_kernel7
(const real_T threshold, const real32_T V[76800], uint8_T edgeImage[76800],
real32_T H[76800]);
static __global__ __launch_bounds__(512, 1) void edgeDete_eML_blk_kernel_kernel1
(const real32_T RGB[230400], real_T gray[76800])
{
uint64_T threadId;
int32_T gray_tmp;
threadId = mwGetGlobalThreadIndex();
gray_tmp = static_cast<int32_T>(threadId % 240ULL);
threadId = (threadId - static_cast<uint64_T>(gray_tmp)) / 240ULL;
if ((static_cast<int32_T>(static_cast<int32_T>(threadId) < 320)) && (
static_cast<int32_T>(gray_tmp < 240))) {
gray_tmp += 240 * static_cast<int32_T>(threadId);
gray[gray_tmp] = (static_cast<real_T>(RGB[gray_tmp + 76800]) * 0.587 +
static_cast<real_T>(RGB[gray_tmp]) * 0.2989) +
static_cast<real_T>(RGB[gray_tmp + 153600]) * 0.114;
}
}
/* Function for MATLAB Function: '<Root>/RGB to Gray' */
void edgeDetectionModelClass::edgeDetection_eML_blk_kernel(const real32_T RGB
[230400], real_T gray[76800])
{
real_T (*gpu_gray)[76800];
real32_T (*gpu_RGB)[230400];
cudaMalloc(&gpu_gray, 614400ULL);
cudaMalloc(&gpu_RGB, 921600ULL);
cudaMemcpy(gpu_RGB, (void *)&RGB[0], 921600ULL, cudaMemcpyHostToDevice);
edgeDete_eML_blk_kernel_kernel1<<<dim3(150U, 1U, 1U), dim3(512U, 1U, 1U)>>>
(*gpu_RGB, *gpu_gray);
cudaMemcpy(&gray[0], gpu_gray, 614400ULL, cudaMemcpyDeviceToHost);
cudaFree(*gpu_RGB);
cudaFree(*gpu_gray);
}
/* Output and update for atomic system: '<Root>/RGB to Gray' */
void edgeDetectionModelClass::edgeDetection_RGBtoGray(const real32_T rtu_RGB
[230400], B_RGBtoGray_edgeDetection_T *localB)
{
edgeDetection_eML_blk_kernel(rtu_RGB, localB->dv);
std::memcpy(&localB->gray[0], &localB->dv[0], 76800U * sizeof(real_T));
}
static __global__ __launch_bounds__(512, 1) void edgeDe_eML_blk_kernel_kernel1_l
(real32_T expanded[77924])
{
uint64_T threadId;
threadId = mwGetGlobalThreadIndex();
if (static_cast<int32_T>(threadId) < 77924) {
expanded[static_cast<int32_T>(threadId)] = 0.0F;
}
}
static __global__ __launch_bounds__(512, 1) void edgeDete_eML_blk_kernel_kernel2
(const real_T grayImage[76800], real32_T expanded[77924])
{
uint64_T threadId;
int32_T n;
threadId = mwGetGlobalThreadIndex();
n = static_cast<int32_T>(threadId % 240ULL);
threadId = (threadId - static_cast<uint64_T>(n)) / 240ULL;
if ((static_cast<int32_T>(static_cast<int32_T>(threadId) < 320)) && (
static_cast<int32_T>(n < 240))) {
expanded[(n + 242 * (static_cast<int32_T>(threadId) + 1)) + 1] =
static_cast<real32_T>(grayImage[240 * static_cast<int32_T>(threadId) + n]);
}
}
static __global__ __launch_bounds__(1024, 1) void
edgeDete_eML_blk_kernel_kernel3(const int8_T b[9], const real32_T expanded
[77924], real32_T H[76800])
{
__shared__ real32_T expanded_shared[1156];
int32_T baseC;
int32_T baseR;
int32_T ocol;
int32_T orow;
int32_T scol;
int32_T srow;
int32_T strideCol;
int32_T strideRow;
int32_T x_idx;
int32_T y_idx;
real32_T cv;
ocol = mwGetGlobalThreadIndexInYDimension();
orow = mwGetGlobalThreadIndexInXDimension();
baseR = orow;
srow = static_cast<int32_T>(threadIdx.x);
strideRow = static_cast<int32_T>(blockDim.x);
scol = static_cast<int32_T>(threadIdx.y);
strideCol = static_cast<int32_T>(blockDim.y);
for (y_idx = srow; y_idx <= 33; y_idx += strideRow) {
baseC = ocol;
for (x_idx = scol; x_idx <= 33; x_idx += strideCol) {
if ((static_cast<int32_T>((static_cast<int32_T>(baseR >= 0)) && (
static_cast<int32_T>(baseR < 242)))) && (static_cast<int32_T>((
static_cast<int32_T>(baseC >= 0)) && (static_cast<int32_T>(baseC <
322))))) {
expanded_shared[y_idx + 34 * x_idx] = expanded[242 * baseC + baseR];
} else {
expanded_shared[y_idx + 34 * x_idx] = 0.0F;
}
baseC += strideCol;
}
baseR += strideRow;
}
__syncthreads();
if ((static_cast<int32_T>(ocol < 320)) && (static_cast<int32_T>(orow < 240)))
{
cv = 0.0F;
for (baseR = 0; baseR < 3; baseR++) {
strideRow = (baseR + ocol) * 242 + orow;
strideCol = (2 - baseR) * 3;
cv += expanded_shared[((srow + strideRow % 242) - orow) + 34 * ((scol +
strideRow / 242) - ocol)] * static_cast<real32_T>(b[strideCol + 2]);
cv += expanded_shared[((srow + (strideRow + 1) % 242) - orow) + 34 *
((scol + (strideRow + 1) / 242) - ocol)] * static_cast<real32_T>
(b[strideCol + 1]);
cv += expanded_shared[((srow + (strideRow + 2) % 242) - orow) + 34 *
((scol + (strideRow + 2) / 242) - ocol)] * static_cast<real32_T>
(b[strideCol]);
}
H[orow + 240 * ocol] = cv;
}
}
static __global__ __launch_bounds__(512, 1) void edgeDete_eML_blk_kernel_kernel4
(real32_T expanded[77924])
{
uint64_T threadId;
threadId = mwGetGlobalThreadIndex();
if (static_cast<int32_T>(threadId) < 77924) {
expanded[static_cast<int32_T>(threadId)] = 0.0F;
}
}
static __global__ __launch_bounds__(512, 1) void edgeDete_eML_blk_kernel_kernel5
(const real_T grayImage[76800], real32_T expanded[77924])
{
uint64_T threadId;
int32_T n;
threadId = mwGetGlobalThreadIndex();
n = static_cast<int32_T>(threadId % 240ULL);
threadId = (threadId - static_cast<uint64_T>(n)) / 240ULL;
if ((static_cast<int32_T>(static_cast<int32_T>(threadId) < 320)) && (
static_cast<int32_T>(n < 240))) {
expanded[(n + 242 * (static_cast<int32_T>(threadId) + 1)) + 1] =
static_cast<real32_T>(grayImage[240 * static_cast<int32_T>(threadId) + n]);
}
}
static __global__ __launch_bounds__(1024, 1) void
edgeDete_eML_blk_kernel_kernel6(const int8_T b_b[9], const real32_T expanded
[77924], real32_T V[76800])
{
__shared__ real32_T expanded_shared[1156];
int32_T baseC;
int32_T baseR;
int32_T ocol;
int32_T orow;
int32_T scol;
int32_T srow;
int32_T strideCol;
int32_T strideRow;
int32_T x_idx;
int32_T y_idx;
real32_T cv;
ocol = mwGetGlobalThreadIndexInYDimension();
orow = mwGetGlobalThreadIndexInXDimension();
baseR = orow;
srow = static_cast<int32_T>(threadIdx.x);
strideRow = static_cast<int32_T>(blockDim.x);
scol = static_cast<int32_T>(threadIdx.y);
strideCol = static_cast<int32_T>(blockDim.y);
for (y_idx = srow; y_idx <= 33; y_idx += strideRow) {
baseC = ocol;
for (x_idx = scol; x_idx <= 33; x_idx += strideCol) {
if ((static_cast<int32_T>((static_cast<int32_T>(baseR >= 0)) && (
static_cast<int32_T>(baseR < 242)))) && (static_cast<int32_T>((
static_cast<int32_T>(baseC >= 0)) && (static_cast<int32_T>(baseC <
322))))) {
expanded_shared[y_idx + 34 * x_idx] = expanded[242 * baseC + baseR];
} else {
expanded_shared[y_idx + 34 * x_idx] = 0.0F;
}
baseC += strideCol;
}
baseR += strideRow;
}
__syncthreads();
if ((static_cast<int32_T>(ocol < 320)) && (static_cast<int32_T>(orow < 240)))
{
cv = 0.0F;
for (baseR = 0; baseR < 3; baseR++) {
cv += expanded_shared[((srow + orow) - orow) + 34 * (((scol + baseR) +
ocol) - ocol)] * static_cast<real32_T>(b_b[(2 - baseR) * 3 + 2]);
cv += expanded_shared[(((srow + orow) - orow) + 34 * (((scol + baseR) +
ocol) - ocol)) + 1] * static_cast<real32_T>(b_b[(2 - baseR) * 3 + 1]);
cv += expanded_shared[(((srow + orow) - orow) + 34 * (((scol + baseR) +
ocol) - ocol)) + 2] * static_cast<real32_T>(b_b[(2 - baseR) * 3]);
}
V[orow + 240 * ocol] = cv;
}
}
static __global__ __launch_bounds__(512, 1) void edgeDete_eML_blk_kernel_kernel7
(const real_T threshold, const real32_T V[76800], uint8_T edgeImage[76800],
real32_T H[76800])
{
uint64_T threadId;
threadId = mwGetGlobalThreadIndex();
if (static_cast<int32_T>(threadId) < 76800) {
H[static_cast<int32_T>(threadId)] = H[static_cast<int32_T>(threadId)] * H[
static_cast<int32_T>(threadId)] + V[static_cast<int32_T>(threadId)] * V[
static_cast<int32_T>(threadId)];
H[static_cast<int32_T>(threadId)] = sqrtf(H[static_cast<int32_T>(threadId)]);
edgeImage[static_cast<int32_T>(threadId)] = static_cast<uint8_T>(
static_cast<uint32_T>(static_cast<int32_T>(static_cast<real_T>(H[
static_cast<int32_T>(threadId)]) > threshold)) * 255U);
}
}
/* Function for MATLAB Function: '<Root>/Sobel Edge' */
void edgeDetectionModelClass::edgeDetection_eML_blk_kernel_b(const real_T
grayImage[76800], real_T threshold, uint8_T edgeImage[76800])
{
static const int8_T b[9] = { 1, 0, -1, 2, 0, -2, 1, 0, -1 };
static const int8_T b_b[9] = { 1, 2, 1, 0, 0, 0, -1, -2, -1 };
real_T (*gpu_grayImage)[76800];
real32_T (*gpu_expanded)[77924];
real32_T (*gpu_H)[76800];
real32_T (*gpu_V)[76800];
int8_T (*gpu_b)[9];
int8_T (*gpu_b_b)[9];
uint8_T (*gpu_edgeImage)[76800];
cudaMalloc(&gpu_edgeImage, 76800ULL);
cudaMalloc(&gpu_V, 307200ULL);
cudaMalloc(&gpu_H, 307200ULL);
cudaMalloc(&gpu_b_b, 9ULL);
cudaMalloc(&gpu_b, 9ULL);
cudaMalloc(&gpu_grayImage, 614400ULL);
cudaMalloc(&gpu_expanded, 311696ULL);
edgeDe_eML_blk_kernel_kernel1_l<<<dim3(153U, 1U, 1U), dim3(512U, 1U, 1U)>>>
(*gpu_expanded);
cudaMemcpy(gpu_grayImage, (void *)&grayImage[0], 614400ULL,
cudaMemcpyHostToDevice);
edgeDete_eML_blk_kernel_kernel2<<<dim3(150U, 1U, 1U), dim3(512U, 1U, 1U)>>>
(*gpu_grayImage, *gpu_expanded);
cudaMemcpy(gpu_b, (void *)&b[0], 9ULL, cudaMemcpyHostToDevice);
edgeDete_eML_blk_kernel_kernel3<<<dim3(8U, 10U, 1U), dim3(32U, 32U, 1U)>>>
(*gpu_b, *gpu_expanded, *gpu_H);
edgeDete_eML_blk_kernel_kernel4<<<dim3(153U, 1U, 1U), dim3(512U, 1U, 1U)>>>
(*gpu_expanded);
edgeDete_eML_blk_kernel_kernel5<<<dim3(150U, 1U, 1U), dim3(512U, 1U, 1U)>>>
(*gpu_grayImage, *gpu_expanded);
cudaMemcpy(gpu_b_b, (void *)&b_b[0], 9ULL, cudaMemcpyHostToDevice);
edgeDete_eML_blk_kernel_kernel6<<<dim3(8U, 10U, 1U), dim3(32U, 32U, 1U)>>>
(*gpu_b_b, *gpu_expanded, *gpu_V);
edgeDete_eML_blk_kernel_kernel7<<<dim3(150U, 1U, 1U), dim3(512U, 1U, 1U)>>>
(threshold, *gpu_V, *gpu_edgeImage, *gpu_H);
cudaMemcpy(&edgeImage[0], gpu_edgeImage, 76800ULL, cudaMemcpyDeviceToHost);
cudaFree(*gpu_expanded);
cudaFree(*gpu_grayImage);
cudaFree(*gpu_b);
cudaFree(*gpu_b_b);
cudaFree(*gpu_H);
cudaFree(*gpu_V);
cudaFree(*gpu_edgeImage);
}
/* Output and update for atomic system: '<Root>/Sobel Edge' */
void edgeDetectionModelClass::edgeDetection_SobelEdge(const real_T
rtu_grayImage[76800], real_T rtu_threshold, B_SobelEdge_edgeDetection_T
*localB)
{
edgeDetection_eML_blk_kernel_b(rtu_grayImage, rtu_threshold, localB->uv);
std::memcpy(&localB->edgeImage[0], &localB->uv[0], 76800U * sizeof(uint8_T));
}
void edgeDetectionModelClass::edgeDetection_setupGpuResources(void)
{
}
void edgeDetectionModelClass::edgeDetecti_cleanupGpuResources(void)
{
}
/* Model step function */
void edgeDetectionModelClass::step()
{
char_T *sErr;
void *source_R;
/* S-Function (sdspwmmfi2): '<Root>/From Multimedia File' */
sErr = GetErrorBuffer(&edgeDetection_DW.FromMultimediaFile_HostLib[0U]);
source_R = (void *)&edgeDetection_B.FromMultimediaFile[0U];
LibOutputs_FromMMFile(&edgeDetection_DW.FromMultimediaFile_HostLib[0U],
GetNullPointer(), GetNullPointer(), source_R,
GetNullPointer(), GetNullPointer());
if (*sErr != 0) {
rtmSetErrorStatus((&edgeDetection_M), sErr);
rtmSetStopRequested((&edgeDetection_M), 1);
}
/* End of S-Function (sdspwmmfi2): '<Root>/From Multimedia File' */
/* MATLAB Function: '<Root>/RGB to Gray' */
edgeDetection_RGBtoGray(edgeDetection_B.FromMultimediaFile,
&edgeDetection_B.sf_RGBtoGray);
/* MATLAB Function: '<Root>/Sobel Edge' incorporates:
* Constant: '<Root>/Threshold'
*/
edgeDetection_SobelEdge(edgeDetection_B.sf_RGBtoGray.gray,
edgeDetection_P.Threshold_Value, &edgeDetection_B.sf_SobelEdge);
}
/* Model initialize function */
void edgeDetectionModelClass::initialize()
{
{
char_T *sErr;
edgeDetection_setupGpuResources();
/* Start for S-Function (sdspwmmfi2): '<Root>/From Multimedia File' */
sErr = GetErrorBuffer(&edgeDetection_DW.FromMultimediaFile_HostLib[0U]);
CreateHostLibrary("frommmfile.dll",
&edgeDetection_DW.FromMultimediaFile_HostLib[0U]);
createAudioInfo(&edgeDetection_DW.FromMultimediaFile_AudioInfo[0U], 0U, 0U,
0.0, 0, 0, 0, 0, GetNullPointer());
createVideoInfo(&edgeDetection_DW.FromMultimediaFile_VideoInfo[0U], 1U, 15.0,
15.000015000015, "RGB ", 1, 3, 320, 240, 0U, 1, 1,
GetNullPointer());
if (*sErr == 0) {
LibCreate_FromMMFile(&edgeDetection_DW.FromMultimediaFile_HostLib[0U], 0,
(void *)
"C:\\MATLAB\\R2020b\\matlab\\toolbox
\\images\\imdata\\rhinos.avi",
1, "", "",
&edgeDetection_DW.FromMultimediaFile_AudioInfo[0U],
&edgeDetection_DW.FromMultimediaFile_VideoInfo[0U],
0U, 1U, 1U, 0U, 0U, 1U, 1.0, 9.2233720368547758E+18);
}
if (*sErr == 0) {
LibStart(&edgeDetection_DW.FromMultimediaFile_HostLib[0U]);
}
if (*sErr != 0) {
DestroyHostLibrary(&edgeDetection_DW.FromMultimediaFile_HostLib[0U]);
if (*sErr != 0) {
rtmSetErrorStatus((&edgeDetection_M), sErr);
rtmSetStopRequested((&edgeDetection_M), 1);
}
}
/* End of Start for S-Function (sdspwmmfi2): '<Root>/From Multimedia File' */
}
/* InitializeConditions for S-Function (sdspwmmfi2): '<Root>/From Multimedia File' */
LibReset(&edgeDetection_DW.FromMultimediaFile_HostLib[0U]);
}
/* Model terminate function */
void edgeDetectionModelClass::terminate()
{
char_T *sErr;
/* Terminate for S-Function (sdspwmmfi2): '<Root>/From Multimedia File' */
sErr = GetErrorBuffer(&edgeDetection_DW.FromMultimediaFile_HostLib[0U]);
LibTerminate(&edgeDetection_DW.FromMultimediaFile_HostLib[0U]);
if (*sErr != 0) {
rtmSetErrorStatus((&edgeDetection_M), sErr);
rtmSetStopRequested((&edgeDetection_M), 1);
}
LibDestroy(&edgeDetection_DW.FromMultimediaFile_HostLib[0U], 0);
DestroyHostLibrary(&edgeDetection_DW.FromMultimediaFile_HostLib[0U]);
/* End of Terminate for S-Function (sdspwmmfi2): '<Root>/From Multimedia File' */
edgeDetecti_cleanupGpuResources();
}
/* Constructor */
edgeDetectionModelClass::edgeDetectionModelClass():
edgeDetection_B()
,edgeDetection_DW()
,edgeDetection_M()
{
/* Currently there is no constructor body generated.*/
}
/* Destructor */
edgeDetectionModelClass::~edgeDetectionModelClass()
{
/* Currently there is no destructor body generated.*/
}
/* Real-Time Model get method */
RT_MODEL_edgeDetection_T * edgeDetectionModelClass::getRTM()
{
return (&edgeDetection_M);
}