Description
:441:12: error: expected '}'
:439:10: note: to match this '{'
{
^
kernel file:
// TODO: Add OpenCL kernel code here.
__kernel void dpqc(
int Ndim,
//__global float *G,
__constant float *G,
//__global float *B,
__constant float *B,
__global float *e,
__global float *f,
__global float *dP,
__global float *dQ,
__global float *temp1,
__global float *temp2,
__constant float *Pc,
__constant float *Qc,
__constant float *Uc,
__global float *dU2,
int Mdim,
__global float *P,
__global float *Q,
__global int *PQV_flag){
int k;
int i = get_global_id(0);
float tmp1, tmp2;
printf("%d\n",PQV_flag[i]);
if(PQV_flag[i]==1){ // 1~Mdim是只算PQ节点
tmp1 = 0.0;
tmp2 = 0.0;
#pragma unroll
for(k=0;k<Ndim;k++){
tmp1 += G[i*Ndim+k]*e[k] - B[i*Ndim+k]*f[k];
tmp2 += G[i*Ndim+k]*f[k] + B[i*Ndim+k]*e[k];
//printf("%f\n",tmp2);
}
//printf("%f\n",tmp2);
P[i] = (e[i]*tmp1 + f[i]*tmp2);
dP[i] = Pc[i] - (e[i]*tmp1 + f[i]*tmp2);
Q[i] = f[i]*tmp1 - e[i]*tmp2;
dQ[i] = Qc[i] - f[i]*tmp1 + e[i]*tmp2;
//dU2[i] = Uc[i]*Uc[i] - (e[i]*e[i] + f[i]*f[i]);
temp1[i] = tmp1; //中间量,其他内核可以用到
temp2[i] = tmp2;
}
else if(PQV_flag[i]==0){ // Mdim+1~Ndim-1是只算PV节点
tmp1 = 0.0;
tmp2 = 0.0;
#pragma unroll
for(k=0;k<Ndim;k++){
tmp1 += G[i*Ndim+k]*e[k] - B[i*Ndim+k]*f[k];
tmp2 += G[i*Ndim+k]*f[k] + B[i*Ndim+k]*e[k];
//printf("%f\n",tmp1);
}
P[i] = (e[i]*tmp1 + f[i]*tmp2);
dP[i] = Pc[i] - (e[i]*tmp1 + f[i]*tmp2);
//printf("%f %f %f\n",Uc[i], e[i], f[i]);
dU2[i] = Uc[i]*Uc[i] - (e[i]*e[i] + f[i]*f[i]);
//printf("%d %f\n", i ,dU2[i]);
temp1[i] = tmp1; //中间量,其他内核可以用到
temp2[i] = tmp2;
}
}
__kernel void jmcc(
int M,
int N,
//__global float *G,
__constant float *G,
//__global float *B,
__constant float *B,
__global float *e,
__global float *f,
__global float *temp1,
__global float *temp2,
__global float *J,
__global int *PQV_flag){
int k;
int Jdim;
Jdim = (2*N-2)*2;
float tmp1,tmp2;
int i = get_global_id(0);
if(PQV_flag[i]==1){
for(k=0; k<N-1; k++){
if(i != k){ //单次计算H,N,J,L,Jaccobi矩阵一行2*N-2个元素
J[i*Jdim+k*2] = -(G[i*N+k]*e[i] + B[i*N+k]*f[i]); //Hij
J[i*Jdim+k*2+1] = B[i*N+k]*e[i] - G[i*N+k]*f[i]; //Nij = Bij*ei - Gij*fi
J[i*Jdim+2*k+(2*N-2)] = J[i*Jdim+k*2+1]; //Jij = Nij
J[i*Jdim+2*k+1+(2*N-2)] = -J[i*Jdim+k*2]; //Lij = -Hij
//printf("%f %f %f %f")
}
else{
tmp1=0;
tmp2=0;
for(int j=0;j<N;j++){
tmp1 += G[i*N+j]*e[j] - B[i*N+j]*f[j];
tmp2 += G[i*N+j]*f[j] + B[i*N+j]*e[j];
//printf("%f\n",tmp1);
}
J[i*Jdim+k*2] = -tmp1 - G[i*N+k]*e[i] - B[i*N+k]*f[i]; //Hii
J[i*Jdim+k*2+1] = -tmp2 + B[i*N+k]*e[i] - G[i*N+k]*f[i]; //Nii
J[i*Jdim+2*k+(2*N-2)] = tmp2 + B[i*N+k]*e[i] - G[i*N+k]*f[i]; //Jii
J[i*Jdim+2*k+1+(2*N-2)] = -tmp1 + G[i*N+k]*e[i] + B[i*N+k]*f[i]; //Lii
}
}
}
else if(PQV_flag[i]==0){
for(k=0; k<N-1; k++){
if(i != k){
J[i*Jdim+k*2] = -(G[i*N+k]*e[i] + B[i*N+k]*f[i]); //Hij
J[i*Jdim+k*2+1] = B[i*N+k]*e[i] - G[i*N+k]*f[i]; //Nij = Bij*ei - Gij*fi
J[i*Jdim+2*k+(2*N-2)] = 0; //Rij
J[i*Jdim+2*k+1+(2*N-2)] = 0; //Sij
}
else{
tmp1=0;
tmp2=0;
for(int j=0;j<N;j++){
tmp1 += G[i*N+j]*e[j] - B[i*N+j]*f[j];
tmp2 += G[i*N+j]*f[j] + B[i*N+j]*e[j];
//printf("%f\n",tmp1);
}
J[i*Jdim+k*2] = -tmp1 - G[i*N+k]*e[i] - B[i*N+k]*f[i]; //Hii
J[i*Jdim+k*2+1] = -tmp2 + B[i*N+k]*e[i] - G[i*N+k]*f[i]; //Nii
J[i*Jdim+2*k+(2*N-2)] = -2*e[i];
J[i*Jdim+2*k+1+(2*N-2)] = -2*f[i];
}
}
}
}
__kernel void transpose(
__global float input_mat,
__global float output_mat,
int width,
int height)
{
int row = get_global_id(0);
//int col = get_global_id(1);
for(int col=0; col<width; col++){
output_mat[rowwidth+col] = input_mat[colheight+row];
}
}
__kernel void qr(
__local float *u_vec,
__global float *a_mat,
__global float *q_mat,
__global float *p_mat,
__global float *prod_mat) {
local float u_length_squared, dot;
float prod, vec_length = 0.0f;
int id = get_local_id(0);
int num_cols = get_global_size(0);
/* Load first column into local memory as u vector */
u_vec[id] = a_mat[id*num_cols];
//printf("%d %f\n",id,u_vec[id]);
barrier(CLK_LOCAL_MEM_FENCE);
/* Find length of first A column and u vector */
if(id == 0) {
for(int i=1; i<num_cols; i++) {
vec_length += u_vec[i] * u_vec[i];
}
u_length_squared = vec_length;
vec_length = sqrt(vec_length + u_vec[0] * u_vec[0]);
a_mat[0] = vec_length;
u_vec[0] -= vec_length;
u_length_squared += u_vec[0] * u_vec[0];
}
else {
a_mat[id*num_cols] = 0.0f;
}
barrier(CLK_GLOBAL_MEM_FENCE);
/* Transform further columns of A */
for(int i=1; i<num_cols; i++) {
dot = 0.0f;
if(id == 0) {
for(int j=0; j<num_cols; j++) {
dot += a_mat[j*num_cols + i] * u_vec[j];
}
}
barrier(CLK_LOCAL_MEM_FENCE);
a_mat[id*num_cols + i] -= 2 * u_vec[id] * dot / u_length_squared;
}
/* Update Q matrix */
for(int i=0; i<num_cols; i++) {
q_mat[id*num_cols + i] = -2 * u_vec[i] *
u_vec[id] / u_length_squared;
}
q_mat[id*num_cols + id] += 1;
barrier(CLK_GLOBAL_MEM_FENCE);
/* Loop through other columns */
for(int col = 1; col < num_cols-1; col++) {
/* Load new column into memory */
u_vec[id] = a_mat[id * num_cols + col];
barrier(CLK_LOCAL_MEM_FENCE);
/* Find length of A column and u vector */
if(id == col) {
vec_length = 0.0f;
for(int i = col + 1; i < num_cols; i++) {
vec_length += u_vec[i] * u_vec[i];
}
u_length_squared = vec_length;
vec_length = sqrt(vec_length + u_vec[col] * u_vec[col]);
u_vec[col] -= vec_length;
u_length_squared += u_vec[col] * u_vec[col];
a_mat[col * num_cols + col] = vec_length;
}
else if(id > col) {
a_mat[id * num_cols + col] = 0.0f;
}
barrier(CLK_GLOBAL_MEM_FENCE);
/* Transform further columns of A */
for(int i = col+1; i < num_cols; i++) {
if(id == 0) {
dot = 0.0f;
for(int j=col; j<num_cols; j++) {
dot += a_mat[j*num_cols + i] * u_vec[j];
}
}
barrier(CLK_LOCAL_MEM_FENCE);
if(id >= col)
a_mat[id*num_cols + i] -= 2 * u_vec[id] *
dot / u_length_squared;
barrier(CLK_GLOBAL_MEM_FENCE);
}
/* Update P matrix */
if(id >= col) {
for(int i=col; i<num_cols; i++) {
p_mat[id*num_cols + i] = -2 * u_vec[i] *
u_vec[id] / u_length_squared;
}
p_mat[id*num_cols + id] += 1;
}
barrier(CLK_GLOBAL_MEM_FENCE);
/* Multiply q_mat * p_mat = prod_mat */
for(int i=col; i<num_cols; i++) {
prod = 0.0f;
for(int j=col; j<num_cols; j++) {
prod += q_mat[id*num_cols + j] * p_mat[j*num_cols + i];
}
prod_mat[id*num_cols + i] = prod;
}
barrier(CLK_GLOBAL_MEM_FENCE);
/* Place the content of prod_mat in q_mat */
for(int i=col; i<num_cols; i++) {
q_mat[id*num_cols + i] = prod_mat[id*num_cols + i];
}
barrier(CLK_GLOBAL_MEM_FENCE);
}
}
__kernel void sevc1(
int dim,
__global float *R,
__global float Q,
__global float b,
__global float x,
__local float tmp
){
float tmp1=0;
int id = get_global_id(0);
for(int i=0; i<dim; i++){
tmp[id] += Q[iddim+i] * b[i];
}
barrier(CLK_GLOBAL_MEM_FENCE);
//printf("b'[%d]=%f\n",id,tmp[id]);
for(int i=dim-1; i>=0; i--){
if(i==dim-1){
x[i] = tmp[i]/R[idim+i]; //x(5) = b'(5)/R[5][5];
//printf("x[%d] = %f %f\n",i,x[i],R[idim+i]);
}
else{
for(int j=i+1; j<dim; j++){
//if(i<dim-1){
tmp1 += R[idim+j]x[j];
//}
//else{
//tmp1=0;
//}
}
x[i] = (tmp[i]-tmp1)/R[idim+i];
tmp1=0;
}
}
}
__kernel void dpqc_sparse(
int Ndim,
__constant float *G_data,
__constant int *G_ICOL,
__constant int *G_ICFR,
__constant float *B_data,
__constant int *B_ICOL,
__constant int *B_ICFR,
__global float *e,
__global float *f,
__global float *dP,
__global float *dQ,
__constant float *Pc,
__constant float *Qc,
__constant float *Uc,
__global float *dU2,
int Mdim,
__global float *P,
__global float *Q,
__global int *PQV_flag){
int i = get_global_id(0);
//int k;
float tmp1=0.0, tmp2=0.0, tmp3=0.0, tmp4=0.0;
//printf("%d %f\n",i,G_data[i]);
//printf("%d\n",PQV_flag[i]);
if(/*i<Mdim*/PQV_flag[i]==1){
tmp1=0;tmp2=0;tmp3=0;tmp4=0;
for(int j = G_ICFR[i]; j < G_ICFR[i+1]; j++){
tmp1 += G_data[j] * e[G_ICOL[j]]; //G*e
tmp2 += G_data[j] * f[G_ICOL[j]]; //G*f
}
for(int j = B_ICFR[i]; j < B_ICFR[i+1]; j++){
tmp3 += B_data[j] * e[B_ICOL[j]]; //B*e
tmp4 += B_data[j] * f[B_ICOL[j]]; //B*f
}
dP[i] = Pc[i] - (e[i]*(tmp1-tmp4) + f[i]*(tmp2+tmp3));
dQ[i] = Qc[i] - (f[i]*(tmp1-tmp4) - e[i]*(tmp2+tmp3));
P[i] = (e[i]*(tmp1-tmp4) + f[i]*(tmp2+tmp3));
Q[i] = (f[i]*(tmp1-tmp4) - e[i]*(tmp2+tmp3));
}
else if(/*i<Ndim-1*/PQV_flag[i]==0){
tmp1=0;tmp2=0;tmp3=0;tmp4=0;
for(int j = G_ICFR[i]; j < G_ICFR[i+1]; j++){
tmp1 += G_data[j] * e[G_ICOL[j]]; //G*e
tmp2 += G_data[j] * f[G_ICOL[j]]; //G*f
}
for(int j = B_ICFR[i]; j < B_ICFR[i+1]; j++){
tmp3 += B_data[j] * e[B_ICOL[j]]; //B*e
tmp4 += B_data[j] * f[B_ICOL[j]]; //B*f
}
dP[i] = Pc[i] - (e[i]*(tmp1-tmp4) + f[i]*(tmp2+tmp3));
//dQ[i] = Qc[i] - (f[i]*(tmp1-tmp4) - e[i]*(tmp2+tmp3));
//printf("%f %f %f\n",Uc[i], e[i], f[i]);
dU2[i] = Uc[i]*Uc[i] - (e[i]*e[i] + f[i]*f[i]);
P[i] = (e[i]*(tmp1-tmp4) + f[i]*(tmp2+tmp3));
}
else if(PQV_flag[i] == 2){
tmp1=0;tmp2=0;tmp3=0;tmp4=0;
for(int j = G_ICFR[i]; j < G_ICFR[i+1]; j++){
tmp1 += G_data[j] * e[G_ICOL[j]]; //G*e
tmp2 += G_data[j] * f[G_ICOL[j]]; //G*f
}
for(int j = B_ICFR[i]; j < B_ICFR[i+1]; j++){
tmp3 += B_data[j] * e[B_ICOL[j]]; //B*e
tmp4 += B_data[j] * f[B_ICOL[j]]; //B*f
}
dP[i] = Pc[i] - (e[i]*(tmp1-tmp4) + f[i]*(tmp2+tmp3));
dQ[i] = Qc[i] - (f[i]*(tmp1-tmp4) - e[i]*(tmp2+tmp3));
P[i] = (e[i]*(tmp1-tmp4) + f[i]*(tmp2+tmp3));
Q[i] = (f[i]*(tmp1-tmp4) - e[i]*(tmp2+tmp3));
}
}
__kernel void jmcc_sparse(
int M,
int N,
__constant float *G_data,
__constant float *B_data,
__constant float *ICFR,
__constant int *ROW,
__constant int *COL,
int num,
__global int *PQV_flag,
__global float *e,
__global float *f,
__global float *J){
int Jdim = (2*N-2)*2;
float tmp1,tmp2,tmp3,tmp4;
int i = get_global_id(0);
printf("%2d %d %d \n",i,G_data[ROW[i]],G_data[COL[i]]);
if( PQV_flag[i] == 1 ){ //PQV_flag[i] == 1,表示PQ节点
for(int j=ICFR[i];j<ICFR[i+1];j++){ //排零运算
if(i!=COL[j] && COL[j]<7){ //非对角块时
J[ i*Jdim + 2*COL[j] ] = -(G_data[j]*e[i] + B_data[j]*f[i]); //Hij
J[ i*Jdim + 2*COL[j] + 1 ] = B_data[j]*e[i] - G_data[j]*f[i]; //Nij
J[ i*Jdim + (2*N-2) + 2*COL[j] ] = B_data[j]*e[i] - G_data[j]*f[i]; //Jij
J[ i*Jdim + (2*N-2) + 2*COL[j] + 1] = G_data[j]*e[i] + B_data[j]*f[i]; //Lij
}
else if(i == COL[j] && COL[j]<7){
tmp1=0;tmp2=0;
for(int k=ICFR[i];k<ICFR[i+1];k++){
tmp1 += G_data[k]*e[COL[k]] - B_data[k]*f[COL[k]]; //求和Gij*ej - Bij*fj
tmp2 += G_data[k]*f[COL[k]] + B_data[k]*e[COL[k]];
}
J[ i*Jdim + 2*COL[j] ] = -tmp1 - G_data[j]*e[i] - B_data[j]*f[i];
J[ i*Jdim + 2*COL[j] + 1 ] = -tmp2 - G_data[j]*f[i] + B_data[j]*e[i];
J[ i*Jdim + (2*N-2) + 2*COL[j] ] = tmp2 - G_data[j]*f[i] + B_data[j]*e[i];
J[ i*Jdim + (2*N-2) + 2*COL[j] + 1 ] = -tmp1 + G_data[j]*e[i] - B_data[j]*f[i];
}
}
}
else if( PQV_flag[i] == 0 ){ //PQV_flag[i] == 0,表示第i个节点从PQ节点变为了PV节点
for(int j=ICFR[i];j<ICFR[i+1];j++){
if(i!=COL[j] && COL[j]<7){
J[ i*Jdim + 2*COL[j] ] = -(G_data[j]*e[i] + B_data[j]*f[i]); //Hij
J[ i*Jdim + 2*COL[j] + 1 ] = B_data[j]*e[i] - G_data[j]*f[i]; //Nij
}
else if(i == COL[j] && COL[j]<7){
tmp1=0;tmp2=0;
for(int k=ICFR[i];k<ICFR[i+1];k++){
tmp1 += G_data[k]*e[COL[k]] - B_data[k]*f[COL[k]]; //求和Gij*ej - Bij*fj
tmp2 += G_data[k]*f[COL[k]] + B_data[k]*e[COL[k]];
}
J[i*Jdim + 2*COL[j]] = -tmp1 - G_data[j]*e[i] - B_data[j]*f[i];
J[i*Jdim + 2*COL[j]+1] = -tmp2 - G_data[j]*f[i] + B_data[j]*e[i];
for(int m=0;m<(N-1);m++)
{
J[i*Jdim+(2*N-2)+2*m]=-2*e[i];
J[i*Jdim+(2*N-2)+2*m+1]=-2*f[i];
}
}
}
}
}
JAVA FILE:
package Flow_OpenCL;
import java.io.File;
import java.io.IOException;
import com.nativelibs4java.*;
import com.nativelibs4java.opencl.CLBuildException;
import com.nativelibs4java.opencl.CLContext;
import com.nativelibs4java.opencl.CLDevice;
import com.nativelibs4java.opencl.CLException;
import com.nativelibs4java.opencl.CLKernel;
import com.nativelibs4java.opencl.CLPlatform;
import com.nativelibs4java.opencl.CLProgram;
import com.nativelibs4java.opencl.CLQueue;
import com.nativelibs4java.opencl.JavaCL;
import com.nativelibs4java.util.IOUtils;
public class Flow1 {
public static void main(String[] args) throws Exception {
CLPlatform[] Platforms = JavaCL.listPlatforms(); //查询可用平台
CLDevice device = Platforms[0].getBestDevice(); //调用设备
String platformName = Platforms[0].getName(); //获取平台名称
String deviceName = device.getName(); //获取设备名称
System.out.println("平台:"+platformName+" 设备:"+deviceName); //打印平台和设备名称
//CLContext context = JavaCL.createBestContext(); //为设备创建上下文
CLContext context = JavaCL.createContext(null, device);
//CLQueue queue = context.createDefaultQueue((CLDevice.QueueProperties[] )null);
CLQueue queue = context.createDefaultProfilingQueue();
//jmcc.cl-done
//dpqc_sparse.cl-done
//qr.cl-done
//transpose.cl-done
//sevc1.cl-done
String kernelText = IOUtils.readText(new File("E:\\LSH\\JavaCL\\src\\Flow_OpenCL\\FLOW.cl"));
CLProgram program = context.createProgram(kernelText);
/*
program.addInclude("E:\\LSH\\JavaCL\\src\\Flow_OpenCL\\qr.cl");
String qrText = IOUtils.readText(new File("E:\\LSH\\JavaCL\\src\\Flow_OpenCL\\qr.cl"));
program.addSource(qrText);
*/
program.build();
CLKernel[] kernels = program.createKernels();
System.out.println("完成");
}
}