Graph Framework
Loading...
Searching...
No Matches
cpu_context.hpp
Go to the documentation of this file.
1//------------------------------------------------------------------------------
6//------------------------------------------------------------------------------
7
8#ifndef cpu_context_h
9#define cpu_context_h
10
11#include <fstream>
12#include <cstdlib>
13#include <cstring>
14#include <thread>
15#include <unordered_set>
16
17// Clang headers will define IBAction and IBOutlet these so undefine them here.
18#undef IBAction
19#undef IBOutlet
20#include "llvm/Support/VirtualFileSystem.h"
21#include "clang/Frontend/TextDiagnosticPrinter.h"
22#include "clang/Frontend/CompilerInvocation.h"
23#include "clang/Frontend/CompilerInstance.h"
24#include "clang/Basic/TargetInfo.h"
25#include "clang/CodeGen/CodeGenAction.h"
26#include "clang/Lex/PreprocessorOptions.h"
27#include "llvm/Support/TargetSelect.h"
28#include "llvm/ExecutionEngine/Orc/LLJIT.h"
29#ifndef NDEBUG
30#include "llvm/ExecutionEngine/Orc/Debugging/DebuggerSupport.h"
31#include "llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.h"
32#endif
33#include "llvm/Support/raw_ostream.h"
34#include "llvm/ADT/IntrusiveRefCntPtr.h"
35#include "llvm/ADT/SmallVector.h"
36#include "llvm/TargetParser/Host.h"
37#include "llvm/ExecutionEngine/Orc/ThreadSafeModule.h"
38
39#include "random.hpp"
40
41#ifndef NDEBUG
42//------------------------------------------------------------------------------
44//------------------------------------------------------------------------------
45LLVM_ATTRIBUTE_USED void linkComponents() {
46 llvm::errs() << (void *)&llvm_orc_registerJITLoaderGDBWrapper
47 << (void *)&llvm_orc_registerJITLoaderGDBAllocAction;
48}
49#endif
50
51namespace gpu {
52//------------------------------------------------------------------------------
61//------------------------------------------------------------------------------
62 llvm::SmallVector<const char *, 8> split_string(char *string) {
63 llvm::SmallVector<const char *, 8> args = {string};
64
65 while (*(++string) != '\0') {
66 if (*string == ' ') {
67 *string = '\0';
68 args.push_back(++string);
69 }
70 }
71
72 return args;
73 }
74
75//------------------------------------------------------------------------------
80//------------------------------------------------------------------------------
81 template<jit::float_scalar T, bool SAFE_MATH=false>
83 private:
85 std::unique_ptr<llvm::orc::LLJIT> jit;
87 std::map<graph::leaf_node<T, SAFE_MATH> *, std::vector<T>> kernel_arguments;
89 std::map<graph::leaf_node<T, SAFE_MATH> *, std::vector<T>> host_buffers;
91 std::map<graph::leaf_node<T, SAFE_MATH> *, size_t> arg_index;
92
93 public:
95 constexpr static size_t random_state_size = 1;
96
99
100//------------------------------------------------------------------------------
104//------------------------------------------------------------------------------
105 static size_t max_concurrency() {
106 return std::thread::hardware_concurrency();
107 }
108
109//------------------------------------------------------------------------------
111//------------------------------------------------------------------------------
112 static std::string device_type() {
113 return "CPU";
114 }
115
116//------------------------------------------------------------------------------
120//------------------------------------------------------------------------------
121 cpu_context(const size_t index) {
122 llvm::InitializeNativeTarget();
123 llvm::InitializeNativeTargetAsmPrinter();
124 }
125
126//------------------------------------------------------------------------------
132//------------------------------------------------------------------------------
133 void compile(const std::string kernel_source,
134 std::vector<std::string> names,
135 const bool add_reduction=false) {
136 std::ostringstream temp_stream;
137 temp_stream << reinterpret_cast<size_t> (this);
138 const std::string thread_id = temp_stream.str();
139
140 temp_stream.str(std::string());
141 temp_stream.clear();
142
143 temp_stream << "temp_" << thread_id << ".cpp";
144
145 const std::string filename = temp_stream.str();
146
147 if (jit::verbose) {
148 std::cout << "CPU info." << std::endl;
149 std::cout << " Command Line : " << std::endl;
150 }
151
152 char arg_string[] = CXX_ARGS;
153 llvm::SmallVector<const char *, 8> args = split_string(arg_string);
154 args.push_back(filename.c_str());
155#ifdef NDEBUG
156 args.push_back("-ffast-math");
157 args.push_back("-O3");
158#else
159 args.push_back("-debug-info-kind=standalone");
160#endif
161 if (jit::verbose) {
162 for (auto &arg : args) {
163 std::cout << " " << arg << std::endl;
164 }
165 }
166
167 clang::DiagnosticOptions diagnostic_options;
168 auto diagnostic_printer = std::make_unique<clang::TextDiagnosticPrinter> (llvm::errs(),
169 diagnostic_options);
170
171 auto diagnostic_ids = llvm::makeIntrusiveRefCnt<clang::DiagnosticIDs> ();
172 clang::DiagnosticsEngine diagnostic_engine(diagnostic_ids,
173 diagnostic_options,
174 diagnostic_printer.release());
175
176 auto invocation = std::make_shared<clang::CompilerInvocation> ();
177 clang::CompilerInvocation::CreateFromArgs(*(invocation.get()), args,
178 diagnostic_engine);
179
180 llvm::StringRef source_code_data(kernel_source);
181 auto buffer = llvm::MemoryBuffer::getMemBuffer(source_code_data);
182 invocation->getPreprocessorOpts().addRemappedFile(filename.c_str(),
183 buffer.release());
184
185 clang::CompilerInstance clang(invocation);
186 std::shared_ptr<llvm::vfs::FileSystem> VFS =
187 std::make_shared<llvm::vfs::InMemoryFileSystem> ();
188 clang.createDiagnostics(*VFS.get());
189
190 clang::TargetOptions target_options;
191 target_options.Triple = llvm::sys::getProcessTriple();
192 auto *target_info = clang::TargetInfo::CreateTargetInfo(diagnostic_engine,
193 target_options);
194 clang.setTarget(target_info);
195
196 clang::EmitLLVMOnlyAction action;
197 clang.ExecuteAction(action);
198
199 auto ir_module = action.takeModule();
200 auto context = std::unique_ptr<llvm::LLVMContext> (action.takeLLVMContext());
201
202 auto jit_try = llvm::orc::LLJITBuilder()
203#ifndef NDEBUG
204 .setPrePlatformSetup([](llvm::orc::LLJIT &J) {
205 return llvm::orc::enableDebuggerSupport(J);
206 })
207#endif
208 .create();
209 if (auto jiterror = jit_try.takeError()) {
210 std::cerr << "Failed to build JIT : " << toString(std::move(jiterror)) << std::endl;
211 exit(-1);
212 }
213 jit = std::move(jit_try.get());
214
215 auto error = jit->addIRModule(llvm::orc::ThreadSafeModule(std::move(ir_module),
216 llvm::orc::ThreadSafeContext(std::move(context))));
217
218#ifdef MACOS_LIB_RT
219 error = jit->linkStaticLibraryInto(jit->getMainJITDylib(), MACOS_LIB_RT);
220#endif
221 }
222
223//------------------------------------------------------------------------------
234//------------------------------------------------------------------------------
235 std::function<void(void)> create_kernel_call(const std::string kernel_name,
239 const size_t num_rays,
240 const jit::texture1d_list &tex1d_list,
241 const jit::texture2d_list &tex2d_list) {
242 auto entry = std::move(jit->lookup(kernel_name)).get();
243
244 std::map<size_t, T *> buffers;
245
246 for (auto &input : inputs) {
247 if (!kernel_arguments.contains(input.get())) {
248 backend::buffer<T> buffer = input->evaluate();
249 std::vector<T> arg(buffer.size());
250 memcpy(arg.data(), buffer.data(), buffer.size()*sizeof(T));
251 kernel_arguments[input.get()] = arg;
252 }
253 buffers[reinterpret_cast<size_t> (input.get())] = kernel_arguments[input.get()].data();
254 }
255 for (auto &output : outputs) {
256 if (!kernel_arguments.contains(output.get())) {
257 std::vector<T> arg(num_rays);
258 kernel_arguments[output.get()] = arg;
259 }
260 buffers[reinterpret_cast<size_t> (output.get())] = kernel_arguments[output.get()].data();
261 }
262
263 if (state.get()) {
264 auto kernel = entry.toPtr<void(*)(std::map<size_t, T *> &, typename graph::random_state_node<T, SAFE_MATH>::mt_state *)> ();
265
266 if (!kernel) {
267 std::cerr << "Failed to load function. " << kernel_name
268 << std::endl;
269 exit(-1);
270 }
271
272 if (jit::verbose) {
273 std::cout << " Function pointer: "
274 << reinterpret_cast<size_t> (kernel)
275 << std::endl;
276 }
277
278 return [kernel, buffers, state] () mutable {
279 kernel(buffers, state->data());
280 };
281 } else {
282 auto kernel = entry.toPtr<void(*)(std::map<size_t, T *> &)> ();
283
284 if (!kernel) {
285 std::cerr << "Failed to load function. " << kernel_name
286 << std::endl;
287 exit(-1);
288 }
289
290 if (jit::verbose) {
291 std::cout << " Function pointer: "
292 << reinterpret_cast<size_t> (kernel)
293 << std::endl;
294 }
295
296 return [kernel, buffers] () mutable {
297 kernel(buffers);
298 };
299 }
300 }
301
302//------------------------------------------------------------------------------
307//------------------------------------------------------------------------------
308 std::function<T(void)> create_max_call(graph::shared_leaf<T, SAFE_MATH> &argument,
309 std::function<void(void)> run) {
310 auto begin = kernel_arguments[argument.get()].cbegin();
311 auto end = kernel_arguments[argument.get()].cend();
312
313 return [run, begin, end] () mutable {
314 run();
315 if constexpr (jit::complex_scalar<T>) {
316 return *std::max_element(begin, end,
317 [] (const T a, const T b) {
318 return std::abs(a) < std::abs(b);
319 });
320 } else {
321 return *std::max_element(begin, end);
322 }
323 };
324 }
325
326//------------------------------------------------------------------------------
331//------------------------------------------------------------------------------
332 void wait() {
333 for (auto &item : host_buffers) {
334 memcpy(item.second.data(),
335 kernel_arguments[item.first].data(),
336 sizeof(T)*kernel_arguments[item.first].size());
337 }
338 }
339
340//------------------------------------------------------------------------------
345//------------------------------------------------------------------------------
346 void print_results(const size_t index,
348 for (auto &out : nodes) {
349 const T temp = kernel_arguments[out.get()][index];
350 if constexpr (jit::complex_scalar<T>) {
351 std::cout << std::real(temp) << " " << std::imag(temp) << " ";
352 } else {
353 std::cout << temp << " ";
354 }
355 }
356 std::cout << std::endl;
357 }
358
359//------------------------------------------------------------------------------
365//------------------------------------------------------------------------------
366 T check_value(const size_t index,
368 return kernel_arguments[node.get()][index];
369 }
370
371//------------------------------------------------------------------------------
376//------------------------------------------------------------------------------
378 T *source) {
379 memcpy(kernel_arguments[node.get()].data(),
380 source,
381 sizeof(T)*kernel_arguments[node.get()].size());
382 }
383
384//------------------------------------------------------------------------------
389//------------------------------------------------------------------------------
391 T *destination) {
392 memcpy(destination,
393 kernel_arguments[node.get()].data(),
394 sizeof(T)*kernel_arguments[node.get()].size());
395 }
396
397//------------------------------------------------------------------------------
401//------------------------------------------------------------------------------
402 void create_header(std::ostringstream &source_buffer) {
403 source_buffer << "#include <map>" << std::endl
404 << "#include <array>" << std::endl;
406 source_buffer << "#include <complex>" << std::endl;
407 source_buffer << "#include <special_functions.hpp>" << std::endl;
408 } else {
409 source_buffer << "#include <cmath>" << std::endl;
410 }
411 source_buffer << "using namespace std;" << std::endl;
412 }
413
414//------------------------------------------------------------------------------
428//------------------------------------------------------------------------------
429 void create_kernel_prefix(std::ostringstream &source_buffer,
430 const std::string name,
434 const size_t size,
435 const std::vector<bool> &is_constant,
436 jit::register_map &registers,
437 const jit::register_usage &usage,
438 jit::texture1d_list &textures1d,
439 jit::texture2d_list &textures2d) {
440 source_buffer << std::endl;
441 source_buffer << "extern \"C\" void " << name << "(" << std::endl;
442
443 source_buffer << " map<size_t, ";
444 jit::add_type<T> (source_buffer);
445 source_buffer << " *> &args";
446 if (state.get()) {
447 source_buffer << "," << std::endl
448 << " mt_state *" << jit::to_string('s', state.get());
449 }
450 source_buffer << ") {" << std::endl;
451
452 std::unordered_set<void *> used_args;
453 for (size_t i = 0, ie = inputs.size(); i < ie; i++) {
454 if (!used_args.contains(inputs[i].get())) {
455 source_buffer << " ";
456 if (is_constant[i]) {
457 source_buffer << "const ";
458 }
459 jit::add_type<T> (source_buffer);
460 source_buffer << " *" << jit::to_string('v', inputs[i].get())
461 << " = args["
462 << reinterpret_cast<size_t> (inputs[i].get())
463 << "];" << std::endl;
464 used_args.insert(inputs[i].get());
465 }
466 }
467 for (auto &output : outputs) {
468 if (!used_args.contains(output.get())) {
469 source_buffer << " ";
470 jit::add_type<T> (source_buffer);
471 source_buffer << " *" << jit::to_string('o', output.get())
472 << " = args["
473 << reinterpret_cast<size_t> (output.get())
474 << "];" << std::endl;
475 used_args.insert(output.get());
476 }
477 }
478 if (state.get()) {
479 registers[state.get()] = jit::to_string('r', state.get());
480 source_buffer << " mt_state &"
481 << registers[state.get()] << " = "
482 << jit::to_string('s', state.get()) << "[0];"
483#ifdef SHOW_USE_COUNT
484 << " // used " << usage.at(state.get())
485#endif
486 << std::endl;
487 }
488 source_buffer << " for (size_t i = 0; i < " << size << "; i++) {" << std::endl;
489
490 for (auto &input : inputs) {
491 registers[input.get()] = jit::to_string('r', input.get());
492 source_buffer << " const ";
493 jit::add_type<T> (source_buffer);
494 source_buffer << " " << registers[input.get()]
495 << " = " << jit::to_string('v', input.get())
496 << "[i]; // " << input->get_symbol()
497#ifdef SHOW_USE_COUNT
498 << " used " << usage.at(input.get())
499#endif
500 << std::endl;
501 }
502 }
503
504//------------------------------------------------------------------------------
514//------------------------------------------------------------------------------
515 void create_kernel_postfix(std::ostringstream &source_buffer,
519 jit::register_map &registers,
520 jit::register_map &indices,
521 const jit::register_usage &usage) {
522 std::unordered_set<void *> out_registers;
523 for (auto &[out, in] : setters) {
524 if (!out->is_match(in) &&
525 !out_registers.contains(out.get())) {
526 graph::shared_leaf<T, SAFE_MATH> a = out->compile(source_buffer,
527 registers,
528 indices,
529 usage);
530 source_buffer << " " << jit::to_string('v', in.get());
531 source_buffer << "[i] = ";
532 if constexpr (SAFE_MATH) {
533 if constexpr (jit::complex_scalar<T>) {
534 jit::add_type<T> (source_buffer);
535 source_buffer << " (";
536 source_buffer << "isnan(real(" << registers[a.get()]
537 << ")) ? 0.0 : real(" << registers[a.get()]
538 << "), ";
539 source_buffer << "isnan(imag(" << registers[a.get()]
540 << ")) ? 0.0 : imag(" << registers[a.get()]
541 << "));" << std::endl;
542 } else {
543 source_buffer << "isnan(" << registers[a.get()]
544 << ") ? 0.0 : " << registers[a.get()]
545 << ";" << std::endl;
546 }
547 } else {
548 source_buffer << registers[a.get()] << ";" << std::endl;
549 }
550 out_registers.insert(out.get());
551 }
552 }
553 for (auto &out : outputs) {
554 if (!graph::variable_cast(out).get() &&
555 !out_registers.contains(out.get())) {
556 graph::shared_leaf<T, SAFE_MATH> a = out->compile(source_buffer,
557 registers,
558 indices,
559 usage);
560 source_buffer << " " << jit::to_string('o', out.get());
561 source_buffer << "[i] = ";
562 if constexpr (SAFE_MATH) {
563 if constexpr (jit::complex_scalar<T>) {
564 jit::add_type<T> (source_buffer);
565 source_buffer << " (";
566 source_buffer << "isnan(real(" << registers[a.get()]
567 << ")) ? 0.0 : real(" << registers[a.get()]
568 << "), ";
569 source_buffer << "isnan(imag(" << registers[a.get()]
570 << ")) ? 0.0 : imag(" << registers[a.get()]
571 << "));" << std::endl;
572 } else {
573 source_buffer << "isnan(" << registers[a.get()]
574 << ") ? 0.0 : " << registers[a.get()]
575 << ";" << std::endl;
576 }
577 } else {
578 source_buffer << registers[a.get()] << ";" << std::endl;
579 }
580 out_registers.insert(out.get());
581 }
582 }
583
584 source_buffer << " }" << std::endl;
585 source_buffer << "}" << std::endl;
586 }
587
588//------------------------------------------------------------------------------
593//------------------------------------------------------------------------------
594 void create_reduction(std::ostringstream &source_buffer,
595 const size_t size) {}
596
597//------------------------------------------------------------------------------
606//------------------------------------------------------------------------------
608 if (!host_buffers.contains(node.get())) {
609 host_buffers[node.get()] = kernel_arguments[node.get()];
610 }
611 return host_buffers[node.get()].data();
612 }
613 };
614}
615
616#endif /* cpu_context_h */
Class representing a generic buffer.
Definition backend.hpp:29
size_t size() const
Get size of the buffer.
Definition backend.hpp:116
T * data()
Get a pointer to the basic memory buffer.
Definition backend.hpp:270
Class representing a cpu context.
Definition cpu_context.hpp:82
void print_results(const size_t index, const graph::output_nodes< T, SAFE_MATH > &nodes)
Print out the results.
Definition cpu_context.hpp:346
void copy_to_device(graph::shared_leaf< T, SAFE_MATH > node, T *source)
Copy buffer contents to the device.
Definition cpu_context.hpp:377
void create_kernel_prefix(std::ostringstream &source_buffer, const std::string name, graph::input_nodes< T, SAFE_MATH > &inputs, graph::output_nodes< T, SAFE_MATH > &outputs, graph::shared_random_state< T, SAFE_MATH > state, const size_t size, const std::vector< bool > &is_constant, jit::register_map &registers, const jit::register_usage &usage, jit::texture1d_list &textures1d, jit::texture2d_list &textures2d)
Create kernel prefix.
Definition cpu_context.hpp:429
T check_value(const size_t index, const graph::shared_leaf< T, SAFE_MATH > &node)
Check the value.
Definition cpu_context.hpp:366
void create_reduction(std::ostringstream &source_buffer, const size_t size)
Create a reduction kernel.
Definition cpu_context.hpp:594
cpu_context(const size_t index)
Construct a cpu context.
Definition cpu_context.hpp:121
void wait()
Hold the current thread until the command buffer has completed.
Definition cpu_context.hpp:332
static constexpr size_t random_state_size
Size of random state needed.
Definition cpu_context.hpp:95
std::function< void(void)> create_kernel_call(const std::string kernel_name, graph::input_nodes< T, SAFE_MATH > inputs, graph::output_nodes< T, SAFE_MATH > outputs, graph::shared_random_state< T, SAFE_MATH > state, const size_t num_rays, const jit::texture1d_list &tex1d_list, const jit::texture2d_list &tex2d_list)
Create a kernel calling function.
Definition cpu_context.hpp:235
void copy_to_host(const graph::shared_leaf< T, SAFE_MATH > node, T *destination)
Copy buffer contents to host.
Definition cpu_context.hpp:390
static std::string device_type()
Device discription.
Definition cpu_context.hpp:112
T * get_buffer(graph::shared_leaf< T, SAFE_MATH > &node)
Get the buffer for a node.
Definition cpu_context.hpp:607
void compile(const std::string kernel_source, std::vector< std::string > names, const bool add_reduction=false)
Compile the kernels.
Definition cpu_context.hpp:133
int remaining_const_memory
Remaining constant memory in bytes. NOT USED.
Definition cpu_context.hpp:98
std::function< T(void)> create_max_call(graph::shared_leaf< T, SAFE_MATH > &argument, std::function< void(void)> run)
Create a max compute pipeline.
Definition cpu_context.hpp:308
void create_header(std::ostringstream &source_buffer)
Create the source header.
Definition cpu_context.hpp:402
static size_t max_concurrency()
Get the maximum number of concurrent instances.
Definition cpu_context.hpp:105
void create_kernel_postfix(std::ostringstream &source_buffer, graph::output_nodes< T, SAFE_MATH > &outputs, graph::map_nodes< T, SAFE_MATH > &setters, graph::shared_random_state< T, SAFE_MATH > state, jit::register_map &registers, jit::register_map &indices, const jit::register_usage &usage)
Create kernel postfix.
Definition cpu_context.hpp:515
Complex scalar concept.
Definition register.hpp:24
LLVM_ATTRIBUTE_USED void linkComponents()
This just exposes the functions so the debugger links.
Definition cpu_context.hpp:45
Name space for GPU backends.
Definition cpu_context.hpp:51
llvm::SmallVector< const char *, 8 > split_string(char *string)
Split a string by the space delimiter.
Definition cpu_context.hpp:62
std::shared_ptr< random_state_node< T, SAFE_MATH > > shared_random_state
Convenience type alias for shared sqrt nodes.
Definition random.hpp:272
std::vector< shared_variable< T, SAFE_MATH > > input_nodes
Convenience type alias for a vector of inputs.
Definition node.hpp:1730
shared_variable< T, SAFE_MATH > variable_cast(shared_leaf< T, SAFE_MATH > x)
Cast to a variable node.
Definition node.hpp:1746
std::shared_ptr< leaf_node< T, SAFE_MATH > > shared_leaf
Convenience type alias for shared leaf nodes.
Definition node.hpp:673
std::vector< std::pair< shared_leaf< T, SAFE_MATH >, shared_variable< T, SAFE_MATH > > > map_nodes
Convenience type alias for maping end codes back to inputs.
Definition node.hpp:1734
std::vector< shared_leaf< T, SAFE_MATH > > output_nodes
Convenience type alias for a vector of output nodes.
Definition node.hpp:688
Name space for JIT functions.
Definition jit.hpp:41
std::map< void *, size_t > texture1d_list
Type alias for indexing 1D textures.
Definition register.hpp:262
std::map< void *, std::array< size_t, 2 > > texture2d_list
Type alias for indexing 2D textures.
Definition register.hpp:264
std::map< void *, size_t > register_usage
Type alias for counting register usage.
Definition register.hpp:258
std::map< void *, std::string > register_map
Type alias for mapping node pointers to register names.
Definition register.hpp:256
std::string to_string(const char prefix, const NODE *pointer)
Convert a graph::leaf_node pointer to a string.
Definition register.hpp:245
Name space for output files.
Definition output.hpp:16
Random constants and distributions.
Random state structure.
Definition random.hpp:29