Graph Framework
Loading...
Searching...
No Matches
jit.hpp
Go to the documentation of this file.
1//------------------------------------------------------------------------------
6//------------------------------------------------------------------------------
7
8#ifndef jit_h
9#define jit_h
10
11#include <algorithm>
12#include <iterator>
13#include <thread>
14
15#ifdef USE_METAL
16#include "metal_context.hpp"
17#elif defined(USE_CUDA)
18#include "cuda_context.hpp"
19#endif
20#include "cpu_context.hpp"
21
22//------------------------------------------------------------------------------
26//------------------------------------------------------------------------------
27//------------------------------------------------------------------------------
31//------------------------------------------------------------------------------
32#ifdef USE_METAL
33#define START_GPU @autoreleasepool {
34#define END_GPU }
35#else
36#define START_GPU
37#define END_GPU
38#endif
39
41namespace jit {
42//------------------------------------------------------------------------------
47//------------------------------------------------------------------------------
48 template<float_scalar T, bool SAFE_MATH=false>
49 class context {
50 private:
52 std::ostringstream source_buffer;
54 register_map registers;
56 std::vector<std::string> kernel_names;
58 std::map<std::string, texture1d_list> kernel_1dtextures;
60 std::map<std::string, texture2d_list> kernel_2dtextures;
61
63 using gpu_context_type = typename std::conditional<use_gpu<T> (),
64#ifdef USE_CUDA
66#elif defined(USE_METAL)
68#else
70#endif
72
74 gpu_context_type gpu_context;
75
76 public:
78 constexpr static size_t random_state_size = gpu_context_type::random_state_size;
79
80//------------------------------------------------------------------------------
84//------------------------------------------------------------------------------
85 static size_t max_concurrency() {
86 const size_t num = gpu_context_type::max_concurrency();
87 std::cout << "Located " << num << " "
88 << gpu_context_type::device_type() << " device"
89 << (num == 1 ? "." : "s.")
90 << std::endl;
91 return num;
92 }
93
94//------------------------------------------------------------------------------
98//------------------------------------------------------------------------------
99 context(const size_t index) : gpu_context(index) {
100 source_buffer << std::setprecision(max_digits10<T> ());
101 gpu_context.create_header(source_buffer);
102 }
103
104//------------------------------------------------------------------------------
115//------------------------------------------------------------------------------
116 void add_kernel(const std::string name,
121 const size_t size) {
122 kernel_names.push_back(name);
123
124 std::vector<bool> is_constant(inputs.size(), true);
125 visiter_map visited;
126 register_usage usage;
127 kernel_1dtextures[name] = texture1d_list();
128 kernel_2dtextures[name] = texture2d_list();
129 for (auto &[out, in] : setters) {
130 auto found = std::distance(inputs.begin(),
131 std::find(inputs.begin(),
132 inputs.end(), in));
133 if (found < is_constant.size()) {
134 is_constant[found] = false;
135 }
136 out->compile_preamble(source_buffer, registers,
137 visited, usage,
138 kernel_1dtextures[name],
139 kernel_2dtextures[name],
140 gpu_context.remaining_const_memory);
141 }
142 for (auto &out : outputs) {
143 out->compile_preamble(source_buffer, registers,
144 visited, usage,
145 kernel_1dtextures[name],
146 kernel_2dtextures[name],
147 gpu_context.remaining_const_memory);
148 }
149
150 for (auto &in : inputs) {
151 if (usage.find(in.get()) == usage.end()) {
152 usage[in.get()] = 0;
153 }
154 }
155
156 gpu_context.create_kernel_prefix(source_buffer,
157 name, inputs, outputs, state,
158 size, is_constant,
159 registers, usage,
160 kernel_1dtextures[name],
161 kernel_2dtextures[name]);
162
163 register_map indices;
164 for (auto &[out, in] : setters) {
165 out->compile(source_buffer, registers, indices, usage);
166 }
167 for (auto &out : outputs) {
168 out->compile(source_buffer, registers, indices, usage);
169 }
170
171 gpu_context.create_kernel_postfix(source_buffer, outputs,
172 setters, state,
173 registers, indices, usage);
174
175// Delete the registers so that they can be used again in other kernels.
176 std::vector<void *> removed_elements;
177 for (auto &[key, value] : registers) {
178 if (value[0] == 'r') {
179 removed_elements.push_back(key);
180 }
181 }
182
183 for (auto &key : removed_elements) {
184 registers.erase(key);
185 }
186 }
187
188//------------------------------------------------------------------------------
192//------------------------------------------------------------------------------
193 void add_max_reduction(const size_t size) {
194 gpu_context.create_reduction(source_buffer, size);
195 }
196
197//------------------------------------------------------------------------------
199//------------------------------------------------------------------------------
201 std::cout << std::endl << source_buffer.str() << std::endl;
202 }
203
204//------------------------------------------------------------------------------
206//------------------------------------------------------------------------------
207 void save_source() {
208 std::string source = source_buffer.str();
209 std::ostringstream filename;
210 filename << std::hash<std::string> {} (source)
211 << std::hash<std::thread::id>{}(std::this_thread::get_id());
212 if constexpr (use_cuda()) {
213 filename << ".cu";
214 } else if constexpr (use_metal<T> ()) {
215 filename << ".metal";
216 } else {
217 filename << ".cpp";
218 }
219
220 std::ofstream outFile(filename.str());
221 outFile << source;
222 }
223
224//------------------------------------------------------------------------------
229//------------------------------------------------------------------------------
230 void compile(const bool add_reduction=false) {
231#ifdef SAVE_KERNEL_SOURCE
232 save_source();
233#endif
234 gpu_context.compile(source_buffer.str(),
235 kernel_names,
236 add_reduction);
237 }
238
239//------------------------------------------------------------------------------
248//------------------------------------------------------------------------------
249 std::function<void(void)> create_kernel_call(const std::string kernel_name,
253 const size_t num_rays) {
254 return gpu_context.create_kernel_call(kernel_name, inputs, outputs, state, num_rays,
255 kernel_1dtextures[kernel_name],
256 kernel_2dtextures[kernel_name]);
257 }
258
259//------------------------------------------------------------------------------
265//------------------------------------------------------------------------------
266 std::function<T(void)> create_max_call(graph::shared_leaf<T, SAFE_MATH> &argument,
267 std::function<void(void)> run) {
268 return gpu_context.create_max_call(argument, run);
269 }
270
271//------------------------------------------------------------------------------
276//------------------------------------------------------------------------------
277 void print(const size_t index,
279 gpu_context.print_results(index, nodes);
280 }
281
282//------------------------------------------------------------------------------
288//------------------------------------------------------------------------------
289 T check_value(const size_t index,
291 return gpu_context.check_value(index, node);
292 }
293
294//------------------------------------------------------------------------------
296//------------------------------------------------------------------------------
297 void wait() {
298 gpu_context.wait();
299 }
300
301//------------------------------------------------------------------------------
306//------------------------------------------------------------------------------
308 T *source) {
309 gpu_context.copy_to_device(node, source);
310 }
311
312//------------------------------------------------------------------------------
317//------------------------------------------------------------------------------
319 T *destination) {
320 gpu_context.copy_to_host(node, destination);
321 }
322
323//------------------------------------------------------------------------------
327//------------------------------------------------------------------------------
329 return gpu_context.get_buffer(node);
330 }
331 };
332}
333
334#endif /* jit_h */
Class representing a cpu context.
Definition cpu_context.hpp:82
Class representing a cuda gpu context.
Definition cuda_context.hpp:73
Class representing a metal gpu context.
Definition metal_context.hpp:25
Class for JIT compile of the GPU kernels.
Definition jit.hpp:49
void compile(const bool add_reduction=false)
Compile the kernel.
Definition jit.hpp:230
static constexpr size_t random_state_size
Size of random state needed.
Definition jit.hpp:78
static size_t max_concurrency()
Get the maximum number of concurrent instances.
Definition jit.hpp:85
void wait()
Wait for kernel to finish.
Definition jit.hpp:297
context(const size_t index)
Construct a jit context object.
Definition jit.hpp:99
std::function< T(void)> create_max_call(graph::shared_leaf< T, SAFE_MATH > &argument, std::function< void(void)> run)
Create a max compute kernel calling function.
Definition jit.hpp:266
void add_max_reduction(const size_t size)
Add max reduction kernel.
Definition jit.hpp:193
void save_source()
Save the kernel source code.
Definition jit.hpp:207
std::function< void(void)> create_kernel_call(const std::string kernel_name, graph::input_nodes< T, SAFE_MATH > inputs, graph::output_nodes< T, SAFE_MATH > outputs, graph::shared_random_state< T, SAFE_MATH > state, const size_t num_rays)
Create a kernel calling function.
Definition jit.hpp:249
void print(const size_t index, const graph::output_nodes< T, SAFE_MATH > &nodes)
Print output.
Definition jit.hpp:277
void copy_to_device(graph::shared_leaf< T, SAFE_MATH > &node, T *source)
Copy contexts of buffer to device.
Definition jit.hpp:307
T * get_buffer(graph::shared_leaf< T, SAFE_MATH > &node)
Get buffer from the gpu_context.
Definition jit.hpp:328
T check_value(const size_t index, const graph::shared_leaf< T, SAFE_MATH > &node)
Check the value.
Definition jit.hpp:289
void add_kernel(const std::string name, graph::input_nodes< T, SAFE_MATH > inputs, graph::output_nodes< T, SAFE_MATH > outputs, graph::map_nodes< T, SAFE_MATH > setters, graph::shared_random_state< T, SAFE_MATH > state, const size_t size)
Add a kernel.
Definition jit.hpp:116
void print_source()
Print the kernel source.
Definition jit.hpp:200
void copy_to_host(graph::shared_leaf< T, SAFE_MATH > &node, T *destination)
Copy contexts of buffer to host.
Definition jit.hpp:318
Cpu context for cpus.
Cuda context for metal based gpus.
Metal context for metal based gpus.
std::shared_ptr< random_state_node< T, SAFE_MATH > > shared_random_state
Convenience type alias for shared sqrt nodes.
Definition random.hpp:272
std::vector< shared_variable< T, SAFE_MATH > > input_nodes
Convenience type alias for a vector of inputs.
Definition node.hpp:1730
std::shared_ptr< leaf_node< T, SAFE_MATH > > shared_leaf
Convenience type alias for shared leaf nodes.
Definition node.hpp:673
std::vector< std::pair< shared_leaf< T, SAFE_MATH >, shared_variable< T, SAFE_MATH > > > map_nodes
Convenience type alias for maping end codes back to inputs.
Definition node.hpp:1734
std::vector< shared_leaf< T, SAFE_MATH > > output_nodes
Convenience type alias for a vector of output nodes.
Definition node.hpp:688
Name space for JIT functions.
Definition jit.hpp:41
std::map< void *, size_t > texture1d_list
Type alias for indexing 1D textures.
Definition register.hpp:262
std::map< void *, std::array< size_t, 2 > > texture2d_list
Type alias for indexing 2D textures.
Definition register.hpp:264
std::map< void *, size_t > register_usage
Type alias for counting register usage.
Definition register.hpp:258
std::map< void *, std::string > register_map
Type alias for mapping node pointers to register names.
Definition register.hpp:256
constexpr bool use_cuda()
Test to use Cuda.
Definition register.hpp:67
std::set< void * > visiter_map
Type alias for listing visited nodes.
Definition register.hpp:260