Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 22 additions & 3 deletions compyle/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,22 @@ def get_backend(backend=None):

minmax_operator_tpl = """

__device__ ${dtype}()
{
}

__device__ ${dtype}(${dtype} const volatile &src)
{
% for prop in prop_names:
% if not only_max:
this->cur_min_${prop} = src.cur_min_${prop};
% endif
% if not only_min:
this->cur_max_${prop} = src.cur_max_${prop};
% endif
% endfor
}
Comment on lines +100 to +114

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@copilot apply changes based on this feedback


__device__ ${dtype} volatile &operator=(
${dtype} const &src) volatile
{
Expand Down Expand Up @@ -287,7 +303,8 @@ def to_device(array, backend='cython'):
out = gpuarray.to_device(get_queue(), array)
elif backend == 'cuda':
import pycuda.gpuarray as gpuarray
out = gpuarray.to_gpu(array)
from .cuda import get_cuda_allocator
out = gpuarray.to_gpu(array, allocator=get_cuda_allocator())
return wrap_array(out, backend)


Expand Down Expand Up @@ -325,7 +342,8 @@ def empty(n, dtype, backend='cython'):
out = gpuarray.empty(get_queue(), n, dtype)
elif backend == 'cuda':
import pycuda.gpuarray as gpuarray
out = gpuarray.empty(n, dtype)
from .cuda import get_cuda_allocator
out = gpuarray.empty(n, dtype, allocator=get_cuda_allocator())
else:
out = np.empty(n, dtype=dtype)
return wrap_array(out, backend)
Expand All @@ -342,7 +360,8 @@ def zeros(n, dtype, backend='cython'):
out = gpuarray.zeros(get_queue(), n, dtype)
elif backend == 'cuda':
import pycuda.gpuarray as gpuarray
out = gpuarray.zeros(n, dtype)
from .cuda import get_cuda_allocator
out = gpuarray.zeros(n, dtype, allocator=get_cuda_allocator())
else:
out = np.zeros(n, dtype=dtype)
return wrap_array(out, backend)
Expand Down
12 changes: 12 additions & 0 deletions compyle/ast_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,18 @@
basestring = str if PY_VER > 2 else basestring


def get_string_value(node):
"""Return a string literal's value or None if *node* is not a string."""
ast_constant = getattr(ast, 'Constant', None)
if ast_constant is not None and isinstance(node, ast_constant) and \
isinstance(node.value, str):
return node.value
ast_str = getattr(ast, 'Str', None)
if ast_str is not None and isinstance(node, ast_str):
return node.s
return None


class NameLister(ast.NodeVisitor):
"""Utility class to collect the Names in an AST.
"""
Expand Down
13 changes: 12 additions & 1 deletion compyle/cuda.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import numpy as np
import six
_cuda_ctx = False
_cuda_memory_pool = None


def set_context():
Expand All @@ -27,6 +28,15 @@ def set_context():
_cuda_ctx = True


def get_cuda_allocator():
global _cuda_memory_pool
set_context()
if _cuda_memory_pool is None:
from pycuda.tools import DeviceMemoryPool
_cuda_memory_pool = DeviceMemoryPool()
return _cuda_memory_pool.allocate


# The following code is taken from pyopencl for struct mapping.
# it should be ported over to pycuda eventually.
import pycuda.gpuarray as gpuarray # noqa
Expand Down Expand Up @@ -1396,7 +1406,8 @@ def __init__(self, dtype,

generic_scan_kernel_cache = WriteOncePersistentDict(
"pycuda-generated-scan-kernel-cache-v1",
key_builder=_NumpyTypesKeyBuilder())
key_builder=_NumpyTypesKeyBuilder(),
safe_sync=False)


class GenericScanKernel(_GenericScanKernelBase):
Expand Down
9 changes: 5 additions & 4 deletions compyle/cython_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@

from .types import KnownType, Undefined, get_declare_info
from .config import get_config
from .ast_utils import get_assigned, has_return
from .ast_utils import get_assigned, get_string_value, has_return
from .utils import getsourcelines

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -247,11 +247,12 @@ def parse_declare(code):
if call.func.id != 'declare':
raise CodeGenerationError('Unknown declare statement: %s' % code)
arg0 = call.args[0]
if not isinstance(arg0, ast.Str):
err = 'Type should be a string, given :%r' % arg0.s
type_str = get_string_value(arg0)
if type_str is None:
err = 'Type should be a string, given :%r' % getattr(arg0, 'value', arg0)
raise CodeGenerationError(err)

return get_declare_info(arg0.s)
return get_declare_info(type_str)


class CythonGenerator(object):
Expand Down
42 changes: 26 additions & 16 deletions compyle/jit.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import time
from pytools import memoize
from .config import get_config
from .ast_utils import get_string_value
from .cython_generator import CythonGenerator
from .transpiler import Transpiler, BUILTINS
from .types import (dtype_to_ctype, get_declare_info,
Expand Down Expand Up @@ -198,15 +199,16 @@ def warn(self, message, node):
warnings.warn(msg)

def visit_declare(self, node):
if not isinstance(node.args[0], ast.Str):
type_str = get_string_value(node.args[0])
if type_str is None:
self.error("Argument to declare should be a string.", node)
type_str = node.args[0].s
return self.get_declare_type(type_str)

def visit_cast(self, node):
if not isinstance(node.args[1], ast.Str):
type_str = get_string_value(node.args[1])
if type_str is None:
self.error("Cast type should be a string.", node)
return node.args[1].s
return type_str

def visit_address(self, node):
base_type = self.visit(node.args[0])
Expand Down Expand Up @@ -294,6 +296,11 @@ def visit_BinOp(self, node):
def visit_Num(self, node):
return get_ctype_from_arg(node.n)

def visit_Constant(self, node):
if isinstance(node.value, str):
return None
return get_ctype_from_arg(node.value)

def visit_UnaryOp(self, node):
return self.visit(node.operand)

Expand Down Expand Up @@ -363,11 +370,12 @@ def __call__(self, *args, **kw):
c_func(*c_args, **kw)
self.queue.finish()
elif self.backend == 'cuda':
import pycuda.driver as drv
event = drv.Event()
c_func(*c_args, **kw)
event.record()
event.synchronize()
if get_config().profile:
import pycuda.driver as drv
event = drv.Event()
event.record()
event.synchronize()


class ReductionJIT(parallel.ReductionBase):
Expand Down Expand Up @@ -442,11 +450,12 @@ def __call__(self, *args, **kw):
self.queue.finish()
return result.get()
elif self.backend == 'cuda':
import pycuda.driver as drv
event = drv.Event()
result = c_func(*c_args, **kw)
event.record()
event.synchronize()
if get_config().profile:
import pycuda.driver as drv
event = drv.Event()
event.record()
event.synchronize()
return result.get()


Expand Down Expand Up @@ -562,8 +571,9 @@ def __call__(self, **kwargs):
c_func(*[c_args_dict[k] for k in output_arg_keys])
self.queue.finish()
elif self.backend == 'cuda':
import pycuda.driver as drv
event = drv.Event()
c_func(*[c_args_dict[k] for k in output_arg_keys])
event.record()
event.synchronize()
if get_config().profile:
import pycuda.driver as drv
event = drv.Event()
event.record()
event.synchronize()
9 changes: 5 additions & 4 deletions compyle/low_level.py
Original file line number Diff line number Diff line change
Expand Up @@ -260,15 +260,16 @@ def __call__(self, *args, **kw):
self.knl(*c_args)
self.queue.finish()
elif self.backend == 'cuda':
import pycuda.driver as drv
shared_mem_size = int(self._get_local_size(args, ls[0]))
num_blocks = int((n + ls[0] - 1) / ls[0])
num_tpb = int(ls[0])
event = drv.Event()
self.knl(*c_args, block=(num_tpb, 1, 1), grid=(num_blocks, 1),
shared=shared_mem_size)
event.record()
event.synchronize()
if get_config().profile:
import pycuda.driver as drv
event = drv.Event()
event.record()
event.synchronize()


class _prange(Extern):
Expand Down
27 changes: 15 additions & 12 deletions compyle/parallel.py
Original file line number Diff line number Diff line change
Expand Up @@ -547,11 +547,12 @@ def __call__(self, *args, **kw):
self.c_func(*c_args, **kw)
self.queue.finish()
elif self.backend == 'cuda':
import pycuda.driver as drv
event = drv.Event()
self.c_func(*c_args, **kw)
event.record()
event.synchronize()
if get_config().profile:
import pycuda.driver as drv
event = drv.Event()
event.record()
event.synchronize()


class Elementwise(object):
Expand Down Expand Up @@ -809,11 +810,12 @@ def __call__(self, *args):
self.queue.finish()
return result.get()
elif self.backend == 'cuda':
import pycuda.driver as drv
event = drv.Event()
result = self.c_func(*c_args)
event.record()
event.synchronize()
if get_config().profile:
import pycuda.driver as drv
event = drv.Event()
event.record()
event.synchronize()
return result.get()


Expand Down Expand Up @@ -1229,11 +1231,12 @@ def __call__(self, **kwargs):
self.c_func(*[c_args_dict[k] for k in output_arg_keys])
self.queue.finish()
elif self.backend == 'cuda':
import pycuda.driver as drv
event = drv.Event()
self.c_func(*[c_args_dict[k] for k in output_arg_keys])
event.record()
event.synchronize()
if get_config().profile:
import pycuda.driver as drv
event = drv.Event()
event.record()
event.synchronize()


class Scan(object):
Expand Down
13 changes: 10 additions & 3 deletions compyle/profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,9 +179,16 @@ def _profile_knl(*args, **kwargs):
_record_profile(name, end - start)
return event
elif backend == 'cuda':
exec_time = kernel(*args, **kwargs, time_kernel=True)
_record_profile(name, exec_time)
return exec_time
from pycuda import driver as cuda
stream = kwargs.get('stream')
start = cuda.Event()
end = cuda.Event()
start.record(stream)
result = kernel(*args, **kwargs)
end.record(stream)
end.synchronize()
_record_profile(name, end.time_since(start) * 1e-3)
return result
else:
start = time.time()
kernel(*args, **kwargs)
Expand Down
11 changes: 9 additions & 2 deletions compyle/template.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,13 @@
getfullargspec = inspect.getfullargspec


def _string_value(node):
value = node.value
if isinstance(value, ast.Constant):
return value.value
return value.s


class Template(object):
def __init__(self, name):
self.name = name
Expand Down Expand Up @@ -45,8 +52,8 @@ def _get_code(self):
args += extra_args
arg_string = ', '.join(args)
body = m.body[0].body
template = body[-1].value.s
docstring = body[0].value.s if len(body) == 2 else ''
template = _string_value(body[-1])
docstring = _string_value(body[0]) if len(body) == 2 else ''
name = self.name
sig = 'def {name}({args}):\n """{docs}\n """'.format(
name=name, args=arg_string, docs=docstring
Expand Down
Loading