graph_framework-docs/jit_8hpp_source.html

//------------------------------------------------------------------------------

//------------------------------------------------------------------------------


#ifndef jit_h

#define jit_h


#include <algorithm>

#include <iterator>

#include <thread>


#ifdef USE_METAL

#include "metal_context.hpp"

#elif defined(USE_CUDA)

#include "cuda_context.hpp"

#endif

#include "cpu_context.hpp"


//------------------------------------------------------------------------------

//------------------------------------------------------------------------------

//------------------------------------------------------------------------------

//------------------------------------------------------------------------------

#ifdef USE_METAL

#define START_GPU @autoreleasepool {

#define END_GPU }

#else

#define START_GPU

#define END_GPU

#endif


namespace jit {

//------------------------------------------------------------------------------

//------------------------------------------------------------------------------

    template<float_scalar T, bool SAFE_MATH=false>


    class context {

    private:

        std::ostringstream source_buffer;

        register_map registers;

        std::vector<std::string> kernel_names;

        std::map<std::string, texture1d_list> kernel_1dtextures;

        std::map<std::string, texture2d_list> kernel_2dtextures;


        using gpu_context_type = typename std::conditional<use_gpu<T> (),

#ifdef USE_CUDA

                                                           gpu::cuda_context<T, SAFE_MATH>,

#elif defined(USE_METAL)

                                                           gpu::metal_context<SAFE_MATH>,

#else

                                                           gpu::cpu_context<T, SAFE_MATH>,

#endif

                                                           gpu::cpu_context<T, SAFE_MATH>>::type;


        gpu_context_type gpu_context;


    public:

        constexpr static size_t random_state_size = gpu_context_type::random_state_size;


//------------------------------------------------------------------------------

//------------------------------------------------------------------------------


        static size_t max_concurrency() {

            const size_t num = gpu_context_type::max_concurrency();

            std::cout << "Located " << num << " "

                      << gpu_context_type::device_type() << " device"

                      << (num == 1 ? "." : "s.")

                      << std::endl;

            return num;

        }


//------------------------------------------------------------------------------

//------------------------------------------------------------------------------


        context(const size_t index) : gpu_context(index) {

            source_buffer << std::setprecision(max_digits10<T> ());

            gpu_context.create_header(source_buffer);

        }


//------------------------------------------------------------------------------

//------------------------------------------------------------------------------


        void add_kernel(const std::string name,

                        graph::input_nodes<T, SAFE_MATH> inputs,

                        graph::output_nodes<T, SAFE_MATH> outputs,

                        graph::map_nodes<T, SAFE_MATH> setters,

                        graph::shared_random_state<T, SAFE_MATH> state,

                        const size_t size) {

            kernel_names.push_back(name);


            std::vector<bool> is_constant(inputs.size(), true);

            visiter_map visited;

            register_usage usage;

            kernel_1dtextures[name] = texture1d_list();

            kernel_2dtextures[name] = texture2d_list();

            for (auto &[out, in] : setters) {

                auto found = std::distance(inputs.begin(),

                                           std::find(inputs.begin(),

                                                     inputs.end(), in));

                if (found < is_constant.size()) {

                    is_constant[found] = false;

                }

                out->compile_preamble(source_buffer, registers,

                                      visited, usage,

                                      kernel_1dtextures[name],

                                      kernel_2dtextures[name],

                                      gpu_context.remaining_const_memory);

            }

            for (auto &out : outputs) {

                out->compile_preamble(source_buffer, registers,

                                      visited, usage,

                                      kernel_1dtextures[name],

                                      kernel_2dtextures[name],

                                      gpu_context.remaining_const_memory);

            }


            for (auto &in : inputs) {

                if (usage.find(in.get()) == usage.end()) {

                    usage[in.get()] = 0;

                }

            }


            gpu_context.create_kernel_prefix(source_buffer,

                                             name, inputs, outputs, state,

                                             size, is_constant,

                                             registers, usage,

                                             kernel_1dtextures[name],

                                             kernel_2dtextures[name]);


            register_map indices;

            for (auto &[out, in] : setters) {

                out->compile(source_buffer, registers, indices, usage);

            }

            for (auto &out : outputs) {

                out->compile(source_buffer, registers, indices, usage);

            }


            gpu_context.create_kernel_postfix(source_buffer, outputs,

                                              setters, state,

                                              registers, indices, usage);


//  Delete the registers so that they can be used again in other kernels.

            std::vector<void *> removed_elements;

            for (auto &[key, value] : registers) {

                if (value[0] == 'r') {

                    removed_elements.push_back(key);

                }

            }


            for (auto &key : removed_elements) {

                registers.erase(key);

            }

        }


//------------------------------------------------------------------------------

//------------------------------------------------------------------------------


        void add_max_reduction(const size_t size) {

            gpu_context.create_reduction(source_buffer, size);

        }


//------------------------------------------------------------------------------

//------------------------------------------------------------------------------


        void print_source() {

            std::cout << std::endl << source_buffer.str() << std::endl;

        }


//------------------------------------------------------------------------------

//------------------------------------------------------------------------------


        void save_source() {

            std::string source = source_buffer.str();

            std::ostringstream filename;

            filename << std::hash<std::string> {} (source)

                     << std::hash<std::thread::id>{}(std::this_thread::get_id());

            if constexpr (use_cuda()) {

                filename << ".cu";

            } else if constexpr (use_metal<T> ()) {

                filename << ".metal";

            } else {

                filename << ".cpp";

            }


            std::ofstream outFile(filename.str());

            outFile << source;

        }


//------------------------------------------------------------------------------

//------------------------------------------------------------------------------


        void compile(const bool add_reduction=false) {

#ifdef SAVE_KERNEL_SOURCE

            save_source();

#endif

            gpu_context.compile(source_buffer.str(),

                                kernel_names,

                                add_reduction);

        }


//------------------------------------------------------------------------------

//------------------------------------------------------------------------------


        std::function<void(void)> create_kernel_call(const std::string kernel_name,

                                                     graph::input_nodes<T, SAFE_MATH> inputs,

                                                     graph::output_nodes<T, SAFE_MATH> outputs,

                                                     graph::shared_random_state<T, SAFE_MATH> state,

                                                     const size_t num_rays) {

            return gpu_context.create_kernel_call(kernel_name, inputs, outputs, state, num_rays,

                                                  kernel_1dtextures[kernel_name],

                                                  kernel_2dtextures[kernel_name]);

        }


//------------------------------------------------------------------------------

//------------------------------------------------------------------------------


        std::function<T(void)> create_max_call(graph::shared_leaf<T, SAFE_MATH> &argument,

                                               std::function<void(void)> run) {

            return gpu_context.create_max_call(argument, run);

        }


//------------------------------------------------------------------------------

//------------------------------------------------------------------------------


        void print(const size_t index,

                   const graph::output_nodes<T, SAFE_MATH> &nodes) {

            gpu_context.print_results(index, nodes);

        }


//------------------------------------------------------------------------------

//------------------------------------------------------------------------------


        T check_value(const size_t index,

                      const graph::shared_leaf<T, SAFE_MATH> &node) {

            return gpu_context.check_value(index, node);

        }


//------------------------------------------------------------------------------

//------------------------------------------------------------------------------


        void wait() {

            gpu_context.wait();

        }


//------------------------------------------------------------------------------

//------------------------------------------------------------------------------


        void copy_to_device(graph::shared_leaf<T, SAFE_MATH> &node,

                            T *source) {

            gpu_context.copy_to_device(node, source);

        }


//------------------------------------------------------------------------------

//------------------------------------------------------------------------------


        void copy_to_host(graph::shared_leaf<T, SAFE_MATH> &node,

                          T *destination) {

            gpu_context.copy_to_host(node, destination);

        }


//------------------------------------------------------------------------------

//------------------------------------------------------------------------------


        T *get_buffer(graph::shared_leaf<T, SAFE_MATH> &node) {

            return gpu_context.get_buffer(node);

        }


    };


}


#endif /* jit_h */

gpu::cpu_context
Class representing a cpu context.
Definition cpu_context.hpp:82

gpu::cuda_context
Class representing a cuda gpu context.
Definition cuda_context.hpp:73

gpu::metal_context
Class representing a metal gpu context.
Definition metal_context.hpp:25

jit::context
Class for JIT compile of the GPU kernels.
Definition jit.hpp:49

jit::context::compile
void compile(const bool add_reduction=false)
Compile the kernel.
Definition jit.hpp:230

jit::context::random_state_size
static constexpr size_t random_state_size
Size of random state needed.
Definition jit.hpp:78

jit::context::max_concurrency
static size_t max_concurrency()
Get the maximum number of concurrent instances.
Definition jit.hpp:85

jit::context::wait
void wait()
Wait for kernel to finish.
Definition jit.hpp:297

jit::context::context
context(const size_t index)
Construct a jit context object.
Definition jit.hpp:99

jit::context::create_max_call
std::function< T(void)> create_max_call(graph::shared_leaf< T, SAFE_MATH > &argument, std::function< void(void)> run)
Create a max compute kernel calling function.
Definition jit.hpp:266

jit::context::add_max_reduction
void add_max_reduction(const size_t size)
Add max reduction kernel.
Definition jit.hpp:193

jit::context::save_source
void save_source()
Save the kernel source code.
Definition jit.hpp:207

jit::context::create_kernel_call
std::function< void(void)> create_kernel_call(const std::string kernel_name, graph::input_nodes< T, SAFE_MATH > inputs, graph::output_nodes< T, SAFE_MATH > outputs, graph::shared_random_state< T, SAFE_MATH > state, const size_t num_rays)
Create a kernel calling function.
Definition jit.hpp:249

jit::context::print
void print(const size_t index, const graph::output_nodes< T, SAFE_MATH > &nodes)
Print output.
Definition jit.hpp:277

jit::context::copy_to_device
void copy_to_device(graph::shared_leaf< T, SAFE_MATH > &node, T *source)
Copy contexts of buffer to device.
Definition jit.hpp:307

jit::context::get_buffer
T * get_buffer(graph::shared_leaf< T, SAFE_MATH > &node)
Get buffer from the gpu_context.
Definition jit.hpp:328

jit::context::check_value
T check_value(const size_t index, const graph::shared_leaf< T, SAFE_MATH > &node)
Check the value.
Definition jit.hpp:289

jit::context::add_kernel
void add_kernel(const std::string name, graph::input_nodes< T, SAFE_MATH > inputs, graph::output_nodes< T, SAFE_MATH > outputs, graph::map_nodes< T, SAFE_MATH > setters, graph::shared_random_state< T, SAFE_MATH > state, const size_t size)
Add a kernel.
Definition jit.hpp:116

jit::context::print_source
void print_source()
Print the kernel source.
Definition jit.hpp:200

jit::context::copy_to_host
void copy_to_host(graph::shared_leaf< T, SAFE_MATH > &node, T *destination)
Copy contexts of buffer to host.
Definition jit.hpp:318

cpu_context.hpp
Cpu context for cpus.

cuda_context.hpp
Cuda context for metal based gpus.

metal_context.hpp
Metal context for metal based gpus.

graph::shared_random_state
std::shared_ptr< random_state_node< T, SAFE_MATH > > shared_random_state
Convenience type alias for shared sqrt nodes.
Definition random.hpp:272

graph::input_nodes
std::vector< shared_variable< T, SAFE_MATH > > input_nodes
Convenience type alias for a vector of inputs.
Definition node.hpp:1730

graph::shared_leaf
std::shared_ptr< leaf_node< T, SAFE_MATH > > shared_leaf
Convenience type alias for shared leaf nodes.
Definition node.hpp:673

graph::map_nodes
std::vector< std::pair< shared_leaf< T, SAFE_MATH >, shared_variable< T, SAFE_MATH > > > map_nodes
Convenience type alias for maping end codes back to inputs.
Definition node.hpp:1734

graph::output_nodes
std::vector< shared_leaf< T, SAFE_MATH > > output_nodes
Convenience type alias for a vector of output nodes.
Definition node.hpp:688

jit
Name space for JIT functions.
Definition jit.hpp:41

jit::texture1d_list
std::map< void *, size_t > texture1d_list
Type alias for indexing 1D textures.
Definition register.hpp:262

jit::texture2d_list
std::map< void *, std::array< size_t, 2 > > texture2d_list
Type alias for indexing 2D textures.
Definition register.hpp:264

jit::register_usage
std::map< void *, size_t > register_usage
Type alias for counting register usage.
Definition register.hpp:258

jit::register_map
std::map< void *, std::string > register_map
Type alias for mapping node pointers to register names.
Definition register.hpp:256

jit::use_cuda
constexpr bool use_cuda()
Test to use Cuda.
Definition register.hpp:67

jit::visiter_map
std::set< void * > visiter_map
Type alias for listing visited nodes.
Definition register.hpp:260