15#include <unordered_set>
20#include "llvm/Support/VirtualFileSystem.h"
21#include "clang/Frontend/TextDiagnosticPrinter.h"
22#include "clang/Frontend/CompilerInvocation.h"
23#include "clang/Frontend/CompilerInstance.h"
24#include "clang/Basic/TargetInfo.h"
25#include "clang/CodeGen/CodeGenAction.h"
26#include "clang/Lex/PreprocessorOptions.h"
27#include "llvm/Support/TargetSelect.h"
28#include "llvm/ExecutionEngine/Orc/LLJIT.h"
30#include "llvm/ExecutionEngine/Orc/Debugging/DebuggerSupport.h"
31#include "llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.h"
33#include "llvm/Support/raw_ostream.h"
34#include "llvm/ADT/IntrusiveRefCntPtr.h"
35#include "llvm/ADT/SmallVector.h"
36#include "llvm/TargetParser/Host.h"
37#include "llvm/ExecutionEngine/Orc/ThreadSafeModule.h"
46 llvm::errs() << (
void *)&llvm_orc_registerJITLoaderGDBWrapper
47 << (
void *)&llvm_orc_registerJITLoaderGDBAllocAction;
63 llvm::SmallVector<const char *, 8> args = {
string};
65 while (*(++
string) !=
'\0') {
68 args.push_back(++
string);
81 template<jit::
float_scalar T,
bool SAFE_MATH=false>
85 std::unique_ptr<llvm::orc::LLJIT>
jit;
87 std::map<graph::leaf_node<T, SAFE_MATH> *, std::vector<T>> kernel_arguments;
89 std::map<graph::leaf_node<T, SAFE_MATH> *, std::vector<T>> host_buffers;
91 std::map<graph::leaf_node<T, SAFE_MATH> *,
size_t> arg_index;
106 return std::thread::hardware_concurrency();
122 llvm::InitializeNativeTarget();
123 llvm::InitializeNativeTargetAsmPrinter();
134 std::vector<std::string> names,
135 const bool add_reduction=
false) {
136 std::ostringstream temp_stream;
137 temp_stream << reinterpret_cast<size_t> (
this);
138 const std::string thread_id = temp_stream.str();
140 temp_stream.str(std::string());
143 temp_stream <<
"temp_" << thread_id <<
".cpp";
145 const std::string filename = temp_stream.str();
148 std::cout <<
"CPU info." << std::endl;
149 std::cout <<
" Command Line : " << std::endl;
152 char arg_string[] = CXX_ARGS;
153 llvm::SmallVector<const char *, 8> args =
split_string(arg_string);
154 args.push_back(filename.c_str());
156 args.push_back(
"-ffast-math");
157 args.push_back(
"-O3");
159 args.push_back(
"-debug-info-kind=standalone");
162 for (
auto &arg : args) {
163 std::cout <<
" " << arg << std::endl;
167 clang::DiagnosticOptions diagnostic_options;
168 auto diagnostic_printer = std::make_unique<clang::TextDiagnosticPrinter> (llvm::errs(),
171 auto diagnostic_ids = llvm::makeIntrusiveRefCnt<clang::DiagnosticIDs> ();
172 clang::DiagnosticsEngine diagnostic_engine(diagnostic_ids,
174 diagnostic_printer.release());
176 auto invocation = std::make_shared<clang::CompilerInvocation> ();
177 clang::CompilerInvocation::CreateFromArgs(*(invocation.get()), args,
180 llvm::StringRef source_code_data(kernel_source);
181 auto buffer = llvm::MemoryBuffer::getMemBuffer(source_code_data);
182 invocation->getPreprocessorOpts().addRemappedFile(filename.c_str(),
185 clang::CompilerInstance clang(invocation);
186 std::shared_ptr<llvm::vfs::FileSystem> VFS =
187 std::make_shared<llvm::vfs::InMemoryFileSystem> ();
188 clang.createDiagnostics(*VFS.get());
190 clang::TargetOptions target_options;
191 target_options.Triple = llvm::sys::getProcessTriple();
192 auto *target_info = clang::TargetInfo::CreateTargetInfo(diagnostic_engine,
194 clang.setTarget(target_info);
196 clang::EmitLLVMOnlyAction action;
197 clang.ExecuteAction(action);
199 auto ir_module = action.takeModule();
200 auto context = std::unique_ptr<llvm::LLVMContext> (action.takeLLVMContext());
202 auto jit_try = llvm::orc::LLJITBuilder()
204 .setPrePlatformSetup([](llvm::orc::LLJIT &J) {
205 return llvm::orc::enableDebuggerSupport(J);
209 if (
auto jiterror = jit_try.takeError()) {
210 std::cerr <<
"Failed to build JIT : " << toString(std::move(jiterror)) << std::endl;
213 jit = std::move(jit_try.get());
215 auto error =
jit->addIRModule(llvm::orc::ThreadSafeModule(std::move(ir_module),
216 llvm::orc::ThreadSafeContext(std::move(context))));
219 error =
jit->linkStaticLibraryInto(
jit->getMainJITDylib(), MACOS_LIB_RT);
239 const size_t num_rays,
242 auto entry = std::move(
jit->lookup(kernel_name)).get();
244 std::map<size_t, T *> buffers;
246 for (
auto &input : inputs) {
247 if (!kernel_arguments.contains(input.get())) {
249 std::vector<T> arg(buffer.
size());
250 memcpy(arg.data(), buffer.
data(), buffer.
size()*
sizeof(T));
251 kernel_arguments[input.get()] = arg;
253 buffers[
reinterpret_cast<size_t> (input.get())] = kernel_arguments[input.get()].data();
255 for (
auto &
output : outputs) {
256 if (!kernel_arguments.contains(
output.get())) {
257 std::vector<T> arg(num_rays);
258 kernel_arguments[
output.get()] = arg;
260 buffers[
reinterpret_cast<size_t> (
output.get())] = kernel_arguments[
output.get()].data();
267 std::cerr <<
"Failed to load function. " << kernel_name
273 std::cout <<
" Function pointer: "
274 <<
reinterpret_cast<size_t> (kernel)
278 return [kernel, buffers, state] ()
mutable {
279 kernel(buffers, state->data());
282 auto kernel = entry.toPtr<void(*)(std::map<size_t, T *> &)> ();
285 std::cerr <<
"Failed to load function. " << kernel_name
291 std::cout <<
" Function pointer: "
292 <<
reinterpret_cast<size_t> (kernel)
296 return [kernel, buffers] ()
mutable {
309 std::function<
void(
void)> run) {
310 auto begin = kernel_arguments[argument.get()].cbegin();
311 auto end = kernel_arguments[argument.get()].cend();
313 return [run, begin, end] ()
mutable {
316 return *std::max_element(begin, end,
317 [] (
const T a,
const T b) {
318 return std::abs(a) < std::abs(b);
321 return *std::max_element(begin, end);
333 for (
auto &item : host_buffers) {
334 memcpy(item.second.data(),
335 kernel_arguments[item.first].data(),
336 sizeof(T)*kernel_arguments[item.first].size());
348 for (
auto &out : nodes) {
349 const T temp = kernel_arguments[out.get()][index];
351 std::cout << std::real(temp) <<
" " << std::imag(temp) <<
" ";
353 std::cout << temp <<
" ";
356 std::cout << std::endl;
368 return kernel_arguments[node.get()][index];
379 memcpy(kernel_arguments[node.get()].data(),
381 sizeof(T)*kernel_arguments[node.get()].size());
393 kernel_arguments[node.get()].data(),
394 sizeof(T)*kernel_arguments[node.get()].size());
403 source_buffer <<
"#include <map>" << std::endl
404 <<
"#include <array>" << std::endl;
406 source_buffer <<
"#include <complex>" << std::endl;
407 source_buffer <<
"#include <special_functions.hpp>" << std::endl;
409 source_buffer <<
"#include <cmath>" << std::endl;
411 source_buffer <<
"using namespace std;" << std::endl;
430 const std::string name,
435 const std::vector<bool> &is_constant,
440 source_buffer << std::endl;
441 source_buffer <<
"extern \"C\" void " << name <<
"(" << std::endl;
443 source_buffer <<
" map<size_t, ";
444 jit::add_type<T> (source_buffer);
445 source_buffer <<
" *> &args";
447 source_buffer <<
"," << std::endl
450 source_buffer <<
") {" << std::endl;
452 std::unordered_set<void *> used_args;
453 for (
size_t i = 0, ie = inputs.size(); i < ie; i++) {
454 if (!used_args.contains(inputs[i].get())) {
455 source_buffer <<
" ";
456 if (is_constant[i]) {
457 source_buffer <<
"const ";
459 jit::add_type<T> (source_buffer);
462 <<
reinterpret_cast<size_t> (inputs[i].get())
463 <<
"];" << std::endl;
464 used_args.insert(inputs[i].get());
467 for (
auto &
output : outputs) {
468 if (!used_args.contains(
output.get())) {
469 source_buffer <<
" ";
470 jit::add_type<T> (source_buffer);
473 <<
reinterpret_cast<size_t> (
output.get())
474 <<
"];" << std::endl;
475 used_args.insert(
output.get());
480 source_buffer <<
" mt_state &"
481 << registers[state.get()] <<
" = "
484 <<
" // used " << usage.at(state.get())
488 source_buffer <<
" for (size_t i = 0; i < " << size <<
"; i++) {" << std::endl;
490 for (
auto &input : inputs) {
492 source_buffer <<
" const ";
493 jit::add_type<T> (source_buffer);
494 source_buffer <<
" " << registers[input.get()]
496 <<
"[i]; // " << input->get_symbol()
498 <<
" used " << usage.at(input.get())
522 std::unordered_set<void *> out_registers;
523 for (
auto &[out, in] : setters) {
524 if (!out->is_match(in) &&
525 !out_registers.contains(out.get())) {
531 source_buffer <<
"[i] = ";
532 if constexpr (SAFE_MATH) {
534 jit::add_type<T> (source_buffer);
535 source_buffer <<
" (";
536 source_buffer <<
"isnan(real(" << registers[a.get()]
537 <<
")) ? 0.0 : real(" << registers[a.get()]
539 source_buffer <<
"isnan(imag(" << registers[a.get()]
540 <<
")) ? 0.0 : imag(" << registers[a.get()]
541 <<
"));" << std::endl;
543 source_buffer <<
"isnan(" << registers[a.get()]
544 <<
") ? 0.0 : " << registers[a.get()]
548 source_buffer << registers[a.get()] <<
";" << std::endl;
550 out_registers.insert(out.get());
553 for (
auto &out : outputs) {
555 !out_registers.contains(out.get())) {
561 source_buffer <<
"[i] = ";
562 if constexpr (SAFE_MATH) {
564 jit::add_type<T> (source_buffer);
565 source_buffer <<
" (";
566 source_buffer <<
"isnan(real(" << registers[a.get()]
567 <<
")) ? 0.0 : real(" << registers[a.get()]
569 source_buffer <<
"isnan(imag(" << registers[a.get()]
570 <<
")) ? 0.0 : imag(" << registers[a.get()]
571 <<
"));" << std::endl;
573 source_buffer <<
"isnan(" << registers[a.get()]
574 <<
") ? 0.0 : " << registers[a.get()]
578 source_buffer << registers[a.get()] <<
";" << std::endl;
580 out_registers.insert(out.get());
584 source_buffer <<
" }" << std::endl;
585 source_buffer <<
"}" << std::endl;
595 const size_t size) {}
608 if (!host_buffers.contains(node.get())) {
609 host_buffers[node.get()] = kernel_arguments[node.get()];
611 return host_buffers[node.get()].data();
Class representing a generic buffer.
Definition backend.hpp:29
size_t size() const
Get size of the buffer.
Definition backend.hpp:116
T * data()
Get a pointer to the basic memory buffer.
Definition backend.hpp:270
Class representing a cpu context.
Definition cpu_context.hpp:82
void print_results(const size_t index, const graph::output_nodes< T, SAFE_MATH > &nodes)
Print out the results.
Definition cpu_context.hpp:346
void copy_to_device(graph::shared_leaf< T, SAFE_MATH > node, T *source)
Copy buffer contents to the device.
Definition cpu_context.hpp:377
void create_kernel_prefix(std::ostringstream &source_buffer, const std::string name, graph::input_nodes< T, SAFE_MATH > &inputs, graph::output_nodes< T, SAFE_MATH > &outputs, graph::shared_random_state< T, SAFE_MATH > state, const size_t size, const std::vector< bool > &is_constant, jit::register_map ®isters, const jit::register_usage &usage, jit::texture1d_list &textures1d, jit::texture2d_list &textures2d)
Create kernel prefix.
Definition cpu_context.hpp:429
T check_value(const size_t index, const graph::shared_leaf< T, SAFE_MATH > &node)
Check the value.
Definition cpu_context.hpp:366
void create_reduction(std::ostringstream &source_buffer, const size_t size)
Create a reduction kernel.
Definition cpu_context.hpp:594
cpu_context(const size_t index)
Construct a cpu context.
Definition cpu_context.hpp:121
void wait()
Hold the current thread until the command buffer has completed.
Definition cpu_context.hpp:332
static constexpr size_t random_state_size
Size of random state needed.
Definition cpu_context.hpp:95
std::function< void(void)> create_kernel_call(const std::string kernel_name, graph::input_nodes< T, SAFE_MATH > inputs, graph::output_nodes< T, SAFE_MATH > outputs, graph::shared_random_state< T, SAFE_MATH > state, const size_t num_rays, const jit::texture1d_list &tex1d_list, const jit::texture2d_list &tex2d_list)
Create a kernel calling function.
Definition cpu_context.hpp:235
void copy_to_host(const graph::shared_leaf< T, SAFE_MATH > node, T *destination)
Copy buffer contents to host.
Definition cpu_context.hpp:390
static std::string device_type()
Device discription.
Definition cpu_context.hpp:112
T * get_buffer(graph::shared_leaf< T, SAFE_MATH > &node)
Get the buffer for a node.
Definition cpu_context.hpp:607
void compile(const std::string kernel_source, std::vector< std::string > names, const bool add_reduction=false)
Compile the kernels.
Definition cpu_context.hpp:133
int remaining_const_memory
Remaining constant memory in bytes. NOT USED.
Definition cpu_context.hpp:98
std::function< T(void)> create_max_call(graph::shared_leaf< T, SAFE_MATH > &argument, std::function< void(void)> run)
Create a max compute pipeline.
Definition cpu_context.hpp:308
void create_header(std::ostringstream &source_buffer)
Create the source header.
Definition cpu_context.hpp:402
static size_t max_concurrency()
Get the maximum number of concurrent instances.
Definition cpu_context.hpp:105
void create_kernel_postfix(std::ostringstream &source_buffer, graph::output_nodes< T, SAFE_MATH > &outputs, graph::map_nodes< T, SAFE_MATH > &setters, graph::shared_random_state< T, SAFE_MATH > state, jit::register_map ®isters, jit::register_map &indices, const jit::register_usage &usage)
Create kernel postfix.
Definition cpu_context.hpp:515
Complex scalar concept.
Definition register.hpp:24
LLVM_ATTRIBUTE_USED void linkComponents()
This just exposes the functions so the debugger links.
Definition cpu_context.hpp:45
Name space for GPU backends.
Definition cpu_context.hpp:51
llvm::SmallVector< const char *, 8 > split_string(char *string)
Split a string by the space delimiter.
Definition cpu_context.hpp:62
std::shared_ptr< random_state_node< T, SAFE_MATH > > shared_random_state
Convenience type alias for shared sqrt nodes.
Definition random.hpp:272
std::vector< shared_variable< T, SAFE_MATH > > input_nodes
Convenience type alias for a vector of inputs.
Definition node.hpp:1730
shared_variable< T, SAFE_MATH > variable_cast(shared_leaf< T, SAFE_MATH > x)
Cast to a variable node.
Definition node.hpp:1746
std::shared_ptr< leaf_node< T, SAFE_MATH > > shared_leaf
Convenience type alias for shared leaf nodes.
Definition node.hpp:673
std::vector< std::pair< shared_leaf< T, SAFE_MATH >, shared_variable< T, SAFE_MATH > > > map_nodes
Convenience type alias for maping end codes back to inputs.
Definition node.hpp:1734
std::vector< shared_leaf< T, SAFE_MATH > > output_nodes
Convenience type alias for a vector of output nodes.
Definition node.hpp:688
Name space for JIT functions.
Definition jit.hpp:41
std::map< void *, size_t > texture1d_list
Type alias for indexing 1D textures.
Definition register.hpp:262
std::map< void *, std::array< size_t, 2 > > texture2d_list
Type alias for indexing 2D textures.
Definition register.hpp:264
std::map< void *, size_t > register_usage
Type alias for counting register usage.
Definition register.hpp:258
std::map< void *, std::string > register_map
Type alias for mapping node pointers to register names.
Definition register.hpp:256
std::string to_string(const char prefix, const NODE *pointer)
Convert a graph::leaf_node pointer to a string.
Definition register.hpp:245
Name space for output files.
Definition output.hpp:16
Random constants and distributions.
Random state structure.
Definition random.hpp:29