Thursday, 27 December 2012

Matrix multiplication

Today i tested matrix multiplication using Aparapi and my 7770 card. The results are great, another 40X improvement over my quad core 960T cpu.


The interesting part is that the shape of the graphs is the same. This means that we can't change the complexity of the algorithm by using a gpu but only make it a lot run faster.

The results 

and also the Java Code
 import java.util.Random;  
 import com.amd.aparapi.Kernel;  
 public class MatrixMultiplication {  
   public static void main ( String [] arg ){  
     test( 500,500,500 );   
     test( 1000,1000,1000 );   
     test( 1500,1500,1500 );   
     test( 2000,2000,2000 );   
     test( 2500,2500,2500 );   
   }  
   public static void test ( int a, int b, int c ){  
     Random rnd = new Random();   
     int [] A = new int[a*b];  
     int [] B = new int[b*c];  
     for ( int i=0; i<A.length; i++ ){  
       A[i] = rnd.nextInt( 1000 );   
     }  
     for ( int i=0; i<B.length; i++ ){  
       B[i] = rnd.nextInt( 1000 );   
     }  
     long min = Integer.MAX_VALUE;  
     for (int i = 0; i < 2; i++) {  
       long start = System.currentTimeMillis();  
       int[] rezultat = mult(A, B, a, b, c);  
       long end = System.currentTimeMillis();  
       min = Math.min( min, end - start );   
     }  
 //    printMatrix( A, a, b );   
 //    System.out.println();   
 //    printMatrix( B, b, c );   
 //    System.out.println();   
 //    printMatrix( rezultat, a, c );  
     System.out.println( "[" + a + "," + b +"]X[" + b + "," + c + "] " + min + " ms" );   
   }  
   public static void printMatrix( int [] m, int a, int b ){  
     for ( int i=0; i<a; i++ ){  
       for ( int j=0; j<b; j++ ){  
         System.out.print( m[i*b+j] + " " );   
       }  
       System.out.println();   
     }  
   }  
   public static int [] mult( final int [] A, final int [] B, final int a, final int b, final int c ) {  
     final int [] rezultat = new int [ a * c ];   
     Kernel kernel = new Kernel() {  
       @Override  
       public void run() {  
         int k = getGlobalId();  
         int linie = k/c;  
         int coloana = k%c;  
         for ( int i=0; i<b; i++ ){  
           rezultat[k] += A[ linie*b + i ] * B[ i*c + coloana ];   
         }  
       }  
     };  
     kernel.setExecutionMode( Kernel.EXECUTION_MODE.CPU );  
     kernel.execute( rezultat.length );  
     return rezultat;   
   }  
 }  

No comments:

Post a Comment