Добавил:

Upload Опубликованный материал нарушает ваши авторские права? Сообщите нам.

Вуз:

Санкт-Петербургский государственный электротехнический университет "ЛЭТИ"

Предмет:

[НЕСОРТИРОВАННОЕ]

Файл:

CUBLAS and MAGMA by example.pdf

Скачиваний:

Добавлен:

22.03.2016

Размер:

2.45 Mб

Скачать

☆

<<< < Предыдущая 1 2 3 4 5 6 78 / 228 9 10 11 12 13 14 15 16 17 18 19 20 21 22 > Следующая >>>

3.2 CUBLAS Level-1. Scalar and vector based operations

[1/2

sqrt (3)/2]

[0 ,1 ,2 ,3 , 4, 5]

[- sqrt (3)/2

1/2

]

[0 ,1 ,4 ,9 ,16 ,25]

stat=cublasSrot(handle,n,d

x,1,d

y,1,&c,&s);

stat = cublasGetVector (n , sizeof ( float ),d_x ,1 ,x ,1); // cp

d_x ->x

printf ("x after

Srot :\ n" );

after

Srot

for (j =0;j <n;j ++)

printf (" %7.3 f ," ,x[j ]);

printf ("\n" );

stat = cublasGetVector (n , sizeof ( float ),d_y ,1 ,y ,1); // cp

d_y ->y

printf ("y after

Srot :\ n" );

after

Srot

for (j =0;j <n;j ++)

printf (" %7.3 f ," ,y[j ]);

printf ("\n" );

cudaFree ( d_x );

free

device

memory

cudaFree ( d_y );

free

device

memory

cublasDestroy ( handle );

destroy CUBLAS

context

free (x );

free

host

memory

free (y );

free

host

memory

return EXIT_SUCCESS ;

}

// x:

// y:

16 ,

25 ,

// x after Srot :

0.000 ,

1.367 ,

4.468 ,

9.302 ,

15.871 ,

24.173 ,

// y after Srot :

0.000 ,

-0.367 ,

0.266 ,

1.899 ,

4.532 ,

8.165 ,

[x]

[ 0.5

0.867]

[ ]=

[

]*[

]

[y]

[ -0.867

0.5

]

25]

3.2.8cublasSrotg - construct the Givens rotation matrix

This function constructs the Givens rotation matrix G =							c		s	that
This function constructs the Givens rotation matrix G =							s		c	that
zeros out the 2 1 vector b		i.e.	s	c	b	=		0	, where
	a		c	s	a			r
c2 + s2 = 1; r2 = a2 + b2:
// nvcc	009 srotg .c - lcublas
// This function is provided for			completeness			and	runs
// exclusively on the host
# include < stdio .h >
# include < stdlib .h >
# include	< cuda_runtime .h >
# include	" cublas_v2 .h"

int main ( void ){

3.2 CUBLAS Level-1. Scalar and vector based operations								27
cublasStatus_t stat ;			// CUBLAS functions				status
cublasHandle_t handle ;					//	CUBLAS	context
int j;
float	a =1.0;
float	b =1.0;
printf	("a: %7.3 f\n" ,a );					//	print	a
printf	("b: %7.3 f\n" ,b );					//	print	b
stat =	cublasCreate (& handle ); //		initialize			CUBLAS	context
float	c;
float	s;
//			[	c	s	]
// find	the Givens	rotation matrix	G =[			]
//			[	-s	c	]
//	[a]	[r]

//such that G *[ ]=[ ]

//	[b] [0]
//
// c ^2+ s ^2=1 ,	r =\ sqrt {a ^2+ b ^2} ,	a is	replaced	by r
stat=cublasSrotg(handle,&a,&b,&c,&s);
printf (" After	Srotg :\ n" );
printf ("a: %7.5 f\n" ,a );				//	print	a
printf ("c: %7.5 f\n" ,c );				//	print	c
printf ("s: %7.5 f\n" ,s );				//	print	s
cublasDestroy ( handle );		//	destroy	CUBLAS	context
return EXIT_SUCCESS ;
}

//a: 1.000

//b: 1.000

//After Srotg :

// a: 1.41421			//	\ sqrt {1^2		+1	^2}
// c: 0.70711				//	cos ( pi /4)
// s: 0.70711				//	sin ( pi /4)
//	// [ 0.70711		0.70711] [1] [1.4			1422]
//	//	[	]*[ ]=[				]
//	//	[ -0.70711	0.70711]	[1]	[	0	]

3.2.9cublasSrotm - apply the modi ed Givens rotation

			h11	h12
This function multiplies the modi ed Givens 2 2 matrix h21				h22
with 2 n matrix y0	: : :	yn 1	.
x0	: : : xn 1
// nvcc 010 srotmVec .c		- lcublas

#include < stdio .h >

#include < stdlib .h >

#include < cuda_runtime .h >

3.2 CUBLAS Level-1. Scalar and vector based operations

# include

" cublas_v2 .h"

# define n

// length

of x ,y

int main ( void ){

cudaError_t cudaStat ;

// cudaMalloc status

cublasStatus_t stat ;

CUBLAS functions status

cublasHandle_t handle ;

// CUBLAS

context

int

index

elements

float *

// n - vector on the host

float *

// n - vector

the host

float *

param ;

x =( float *) malloc

(n* sizeof (* x )); //

host

memory

alloc

for

for (j =0;j <n;j ++)

x[j ]=( float )j;

// x ={0 ,1 ,2 ,3 ,4 ,5}

printf ("x :\ n" );

for (j =0;j <n;j ++)

printf (" %3.0 f ," ,x[j ]);

printf ("\n" );

y =( float *) malloc

(n* sizeof (* y )); //

host

memory

alloc

for

for (j =0;j <n;j ++)

y[j ]=( float )j*j;

// y ={0 ,1 ,4 ,9 ,16 ,25}

printf ("y :\ n" );

for (j =0;j <n;j ++)

printf (" %3.0 f ," ,y[j ]);

printf ("\n" );

param =( float *) malloc (5* sizeof (* param ));

param [0]=1.0 f;

// flag

param [1]=0.5 f;

// param [1] ,... , param [4]

param [2]=1.0 f;

// - entries of

the Givens

matrix

param [3]= -1.0 f;

h11 = param [1]

h12 = param [2]

param [4]=0.5 f;

h21 = param [3]

h22 = param [4]

// on

the

device

float * d_x ;

// d_x - x on the

device

float * d_y ;

// d_y - y on the device

cudaStat = cudaMalloc (( void **)& d_x ,n* sizeof (* x ));

// device

// memory alloc for x

cudaStat = cudaMalloc (( void **)& d_y ,n* sizeof (* y ));

// device

memory

alloc

for

stat

= cublasCreate (& handle );

initialize CUBLAS context

stat

= cublasSetVector (n , sizeof (* x),x ,1 , d_x ,1); // copy

x -> d_x

stat

= cublasSetVector (n , sizeof (* y),y ,1 , d_y ,1); // copy

y -> d_y

[0.5

1.0

]

// multiply the 2 x2 modified Givens

matrix

H =[

]

// by the 2 xn matrix with two rows

and y

[ -1.0

0.5

]

stat=cublasSrotm(handle,n,d

x,1,d

y,1,param);

stat = cublasGetVector (n , sizeof ( float ),d_x ,1 ,x ,1); // cp

d_x ->x

printf ("x after Srotm x :\ n" );

// print x

after

Srotm

for (j =0;j <n;j ++)

printf (" %7.3 f ," ,x[j ]);

printf ("\n" );

stat = cublasGetVector (n , sizeof ( float ),d_y ,1 ,y ,1); // cp

d_y ->y

3.2 CUBLAS Level-1. Scalar and vector based operations

printf ("y after Srotm y :\ n" );

after

Srotm

for (j =0;j <n;j ++)

printf (" %7.3 f ," ,y[j ]);

printf ("\n" );

cudaFree ( d_x );

free

device

memory

cudaFree ( d_y );

free

device

memory

cublasDestroy ( handle );

destroy

CUBLAS

context

free (x );

free

host

memory

free (y );

free

host

memory

free ( param );

free

host

memory

return EXIT_SUCCESS ;

}

// 0, 1, 2, 3, 4, 5,

// 0, 1, 4, 9, 16 , 25 ,

// x after Srotm :

0.000 , 1.500 ,

5.000 ,

10.500 ,

18.000 ,

27.500 ,

// y after Srotm :

0.000 , -0.500 ,

0.000 ,

1.500 ,

4.000 , 7.500 ,

[x]

[

0.5

1 ]

[0 1 2 3

[ ]=

[

]*[

]

[y]

[

-1

0.5]

4 9

25]

3.2.10cublasSrotmg - construct the modi ed Givens rotation matrix

This function constructs the modi ed Givens transformation p

d1		x1
that zeros out the second entry of the vector p		y1	:
	d2

h11 h12 h21 h22

//	nvcc	011 srotmg .c	- lcublas
//	this	function is	provided	for completeness
//	and	runs exclusively on the Host

#include < stdio .h >

#include < stdlib .h >

#include < cuda_runtime .h >

#include " cublas_v2 .h"

int main ( void ){
cublasStatus_t stat ;			// CUBLAS functions status
cublasHandle_t handle ;				// CUBLAS		context
float	d1 =5.0 f;				//	d1 =5.0
float	d2 =5.0 f;				//	d2 =5.0
float	param [5];	//	[ param [1]	param [2]]	[ h11		h12 ]
		//	[	] =	[		]
		//	[ param [3]	param [4]]	[ h21		h22 ]
param [0]=1.0 f;				// param [0]	is	a	flag
// if param [0]=1.0 , then		h12 =1= param [2] ,		h21 = -1= param [3]

<<< < Предыдущая 1 2 3 4 5 6 78 / 228 9 10 11 12 13 14 15 16 17 18 19 20 21 22 > Следующая >>>

Соседние файлы в предмете [НЕСОРТИРОВАННОЕ]

#
20.04.20192.55 Mб19Computer Simulation.doc
#
11.07.201923.01 Кб4Computer viruses.docx
#
24.11.2019177.15 Кб3Conf_2012_12_05_FEM_ПО СТРАНИЦАМ ДИССЕРТАЦИЙ 20...doc
#
09.02.201582.78 Кб70Course_project_ads_2.docx
#
09.02.2015101.19 Кб25Course_project_PR_2_pravki.docx
#
22.03.20162.45 Mб36CUBLAS and MAGMA by example.pdf
#
09.02.2015435.71 Кб23culture_anticue_world.doc
#
09.02.201549 Кб5cолнышкин отчет.docx
#
27.10.20186.46 Mб12diplom.docx
#
27.10.20186.46 Mб11diplom.docx
#
09.02.20151.48 Mб143Diplom3.pdf