Compiling Python Modules to Native Parallel Modules Using Pythran and OpenMP Annotations¶
Serge Guelton, École Normale Supérieure, Paris, France & Télécom Bretagne, Plouzané, France
Pierrick Brunet, Télécom Bretagne, Plouzané, France
Mehdi Amini, SILKAN INC., Los Altos, USA
Get it - PyHPC release!¶
[2]:
from IPython.display import IFrame
IFrame("http://pythonhosted.org/pythran/", 1000, 500)
[2]:
Introduction to Pythran¶
[3]:
%%sh
pythran --help
usage: pythran [-h] [-o OUTPUT_FILE] [-E] [-e] [-f flag] [-v] [-p pass] [-m machine] [-I include_dir] [-L ldflags] [-D macro_definition] [-O level] [-g] input_file
pythran: a python to C++ compiler
positional arguments:
input_file the pythran module to compile, either a .py or a .cpp file
optional arguments:
-h, --help show this help message and exit
-o OUTPUT_FILE path to generated file
-E only run the translator, do not compile
-e similar to -E, but does not generate python glue
-f flag any compiler switch relevant to the underlying C++ compiler
-v be verbose
-p pass any pythran optimization to apply before code generation
-m machine any machine flag relevant to the underlying C++ compiler
-I include_dir any include dir relevant to the underlying C++ compiler
-L ldflags any search dir relevant to the linker
-D macro_definition any macro definition relevant to the underlying C++ compiler
-O level any optimization level relevant to the underlying C++ compiler
-g any debug level relevant to the underlying C++ compiler
It's a megablast!
Step 1: Using Pythran to Turn Python Code into C++ Metaprograms¶
[4]:
%%file hello_world.py
def hello(s="world"):
print "hello", s
Overwriting hello_world.py
[5]:
%%sh
pythran -e hello_world.py -o hello_world.hpp
head -n 20 hello_world.hpp
#include <pythran/pythran.h>
namespace __pythran_hello_world
{
;
;
struct hello
{
typedef void callable;
template <typename argument_type0 = core::string>
struct type
{
typedef typename assignable<typename std::remove_cv<typename std::remove_reference<decltype(__builtin__::None)>::type>::type>::type result_type;
}
;
template <typename argument_type0 = core::string>
typename type<argument_type0>::result_type operator()(argument_type0 const & s= core::string("world")) const
;
} ;
template <typename argument_type0 >
typename hello::type<argument_type0>::result_type hello::operator()(argument_type0 const & s) const
[6]:
%%file hello_world.cpp
#include "hello_world.hpp"
#include <iostream>
using namespace __pythran_hello_world;
int main(int argc, char *argv[])
{
if(argc == 1)
hello()();
else
{
std::string msg(argv[1]);
hello()(msg);
}
return 0;
}
Overwriting hello_world.cpp
[7]:
%%sh
INCLUDES="-I $HOME/.local/lib/python2.7/site-packages -I $HOME/.local/lib/python2.7/site-packages/pythran -I $HOME/.local/lib/python2.7/site-packages/pythran/pythonic++"
clang++ -std=c++11 $INCLUDES hello_world.cpp -o hello_world
[8]:
%%sh
./hello_world
./hello_world donald
hello world
hello donald
Step 2: Using Pythran to Turn Python Code Into Native Modules¶
[9]:
%%sh
pythran hello_world.py
CRITICAL I am in trouble. Your input file does not seem to match Pythran's constraints...
E: Pythran spec error: no pythran specification
[10]:
%%sh
sed -i '1 i #pythran export hello(str)' hello_world.py
head -n 3 hello_world.py
#pythran export hello(str)
def hello(s="world"):
print "hello", s
[11]:
%%sh
pythran hello_world.py
ls hello_world.so
python -c "import hello_world as hw ; hw.hello('PyHPC')"
hello_world.so
hello PyHPC
Step 3: Running Numpy Code¶
[12]:
%%file arc_distance.py
#pythran export arc_distance(float [], float[], float[], float[])
import numpy as np
def arc_distance(theta_1, phi_1,theta_2, phi_2):
"""Calculates the pairwise arc distance between all points in vector a and b."""
temp = (np.sin((theta_2 - theta_1) / 2) ** 2
+ np.cos(theta_1) * np.cos(theta_2) * np.sin((phi_2 - phi_1) / 2) ** 2)
distance_matrix = 2 * (np.arctan2(np.sqrt(temp), np.sqrt(1 - temp)))
return distance_matrix
Overwriting arc_distance.py
[13]:
%pylab inline
import timeit
Populating the interactive namespace from numpy and matplotlib
[14]:
n = 10000000
t0, p0, t1, p1 = random.random(n), random.random(n), random.random(n), random.random(n),
[15]:
import arc_distance as ad
%timeit ad.arc_distance(t0, p0, t1, p1)
1 loops, best of 3: 2.15 s per loop
[16]:
%%sh
pythran arc_distance.py -O3 -o pythran_arc_distance.so
pythran arc_distance.py -O3 -fopenmp -o pythran_fast_arc_distance.so
[17]:
import pythran_arc_distance as pad
%timeit pad.arc_distance(t0, p0, t1, p1)
1 loops, best of 3: 1.72 s per loop
[18]:
import pythran_fast_arc_distance as pfad
%timeit pfad.arc_distance(t0, p0, t1, p1)
1 loops, best of 3: 304 ms per loop
An Introduction to OpenMP with Pythran¶
[19]:
%%file parallel_hello.py
#pythran export parallel_hello()
def parallel_hello():
#omp parallel
print "hello"
Overwriting parallel_hello.py
[20]:
import parallel_hello as ph
ph.parallel_hello()
hello
[21]:
%%sh
pythran parallel_hello.py -o sequential_hello.so
pythran parallel_hello.py -fopenmp -o real_parallel_hello.so
[22]:
%%sh
python -c 'import sequential_hello as sh ; sh.parallel_hello()'
echo "*********"
python -c 'import real_parallel_hello as ph ; ph.parallel_hello()'
hello
*********
hello hellohello
hello
hello
hello
hello
hello
[23]:
%load_ext pythranmagic
[24]:
%%pythran
#pythran export nthreads()
def nthreads():
import omp
m = omp.get_num_threads()
#omp parallel shared(n)
#omp master
n = omp.get_num_threads()
return m, n
[25]:
nthreads()
[25]:
(1, 8)
Parallelizing Loops with Pythran and OpenMP¶
[26]:
%%pythran
#pythran export pi(int)
def pi(n):
s, step = 0, 1 / (1 + n)
for i in range(n):
x = (i - .5) * step
s += 4. / (1 + x ** 2)
return step * s
[27]:
n = 1000000
%timeit pi(n)
100 loops, best of 3: 10 ms per loop
[28]:
%%pythran
#pythran export pi(int)
def pi(n):
s, step = 0, 1 / (1 + n)
#omp parallel for reduction(+:s)
for i in range(n):
x = (i - .5) * step
s += 4. / (1 + x ** 2)
return step * s
[29]:
%timeit pi(n)
1000 loops, best of 3: 1.39 ms per loop
Parallelizing with Tasks¶
[30]:
import numpy as np
def nested_loop(a):
n = a.shape[0]
b = np.zeros((n,n))
for i in range(n):
for j in range(i):
b[i,j] = np.cos(i * j)
return b
[31]:
n = 400
r = random.random(n)
%timeit nested_loop(r)
1 loops, best of 3: 206 ms per loop
[32]:
%%pythran
#pythran export nested_loop(float [])
import numpy as np
def nested_loop(a):
n = a.shape[0]
b = np.zeros((n,n))
#omp parallel for
for i in range(n):
for j in range(i):
#omp task
b[i,j] = np.cos(i * j)
return b
[33]:
%timeit nested_loop(r)
10 loops, best of 3: 26.9 ms per loop
[34]:
%%pythran
#pythran export nested_loop(float [])
import numpy as np
def nested_loop(a):
n = a.shape[0]
b = np.zeros((n,n))
#omp parallel
#omp single
for i in range(n):
for j in range(i):
#omp task
b[i,j] = np.cos(i * j)
return b
[35]:
%timeit nested_loop(r)
10 loops, best of 3: 15.9 ms per loop
Parallelizing Complex reductions¶
[36]:
import numpy as np
def minmax(arr):
n, m = arr.shape
mini, maxi = np.inf, -np.inf
for i in range(n):
for j in range(m):
mini = min(mini, arr[i, j])
maxi = max(maxi, arr[i, j])
return mini, maxi
[37]:
n = 1000
r = random.random((n,n))
minmax(r)
[37]:
(5.6717835938968619e-07, 0.99999986590219125)
[38]:
%timeit minmax(r)
1 loops, best of 3: 680 ms per loop
[39]:
%%pythran
import numpy as np
#pythran export minmax(float[][])
def minmax(arr):
n, m = arr.shape
mini, maxi = np.inf, -np.inf
#omp parallel private(lmini, lmaxi)
if 1:
lmini, lmaxi = np.inf, -np.inf
#omp for
for i in range(n):
for j in range(m):
lmini = min(lmini, arr[i, j])
lmaxi = max(lmaxi, arr[i, j])
#omp critical
if 1:
mini = min(lmini, mini)
maxi = max(lmaxi, maxi)
return mini, maxi
[40]:
minmax(r)
[40]:
(5.671783593896862e-07, 0.9999998659021913)
[41]:
%timeit minmax(r)
1000 loops, best of 3: 224 µs per loop
Integration with Iterators¶
[43]:
%%pythran
#pythran export enum(str list)
def enum(l):
out = 0
#omp parallel for
for i, v in enumerate(l):
if len(v) == i:
#omp atomic
out += 1
return out
[44]:
enum(['Say', 'Hello', 'to', 'PyHPC', 'from', 'Denver'])
[44]:
2
Validation¶
An OpenMP 3.1 validation test suite, C. Wang, S. Chandrasekaran and B. M. Chapman in 8th International Workshop on OpenMP
[45]:
IFrame("http://numfocus.github.io/python-benchmarks/", 1000, 1000)
[45]:
[46]:
%%pythran bye.py
#pythran export bye(int)
import math
def bye(n=3):
radii = [i * n for i in [1, 3, 6]]
ranges = [list(range(-r, r+1)) for r in radii]
squares = [[ (x,y) for x in rnge for y in rnge] for rnge in ranges]
circles = [[ (x,y) for x,y in sqrpoints if math.hypot(x,y) <= radius ]for sqrpoints, radius in zip(squares, radii)]
m = {(x,y):' ' for x,y in squares[-1]}
for x,y in circles[-1]:
m[x,y] = '*'
for x,y in circles[-1]:
if x>0: m[(x,y)] = '.'
for x,y in circles[-2]:
m[(x,y+3*n)] = '*'
m[(x,y-3*n)] = '.'
for x,y in circles[-3]:
m[(x,y+3*n)] = '.'
m[(x,y-3*n)] = '*'
return '\n'.join(''.join(m[(x,y)] for x in reversed(ranges[-1])) for y in ranges[-1])
[47]:
print bye(5)
.
.............**
..................***
.......................****
.........................****
............................*****
...............................******
................................*******
..................................*******
...................................********
......................*..............********
....................*******...........*********
....................*********..........**********
....................*********..........**********
.....................*********..........***********
....................***********..........**********
......................*********..........************
.......................*********..........*************
.......................*********..........*************
........................*******...........*************
............................*..............**************
..........................................***************
..........................................***************
..........................................*****************
..........................................*****************
.........................................******************
........................................*******************
.......................................********************
.....................................**********************
...................................************************
...............................******************************
........................***********************************
......................*************************************
....................***************************************
...................****************************************
..................*****************************************
.................******************************************
.................******************************************
...............******************************************
...............******************************************
..............**************.****************************
.............***********.......************************
.............**********.........***********************
.............**********.........***********************
............**********.........**********************
..........**********...........********************
...........**********.........*********************
..........**********.........********************
..........**********.........********************
.........***********.......********************
........**************.**********************
........***********************************
.......**********************************
.......********************************
......*******************************
.....****************************
....*************************
....***********************
...******************
..*************
*
[ ]: