intern/cycles/app/cycles_standalone.cpp | 412 ++++++++++++++++++++++++++++-- intern/cycles/app/cycles_xml.cpp | 116 ++++++++- intern/cycles/app/cycles_xml.h | 13 + intern/cycles/blender/blender_session.cpp | 2 + intern/cycles/device/device_cpu.cpp | 30 ++- intern/cycles/device/device_cuda.cpp | 6 +- intern/cycles/kernel/kernel_path.h | 45 ++-- intern/cycles/render/buffers.cpp | 20 ++ intern/cycles/render/buffers.h | 4 + intern/cycles/render/session.cpp | 188 ++++++++++++++ intern/cycles/render/session.h | 7 + intern/cycles/render/tile.cpp | 68 ++++- intern/cycles/render/tile.h | 22 +- intern/cycles/util/util_progress.h | 3 + 14 files changed, 871 insertions(+), 65 deletions(-) diff --git a/intern/cycles/app/cycles_standalone.cpp b/intern/cycles/app/cycles_standalone.cpp index 90333eb..a147ccc 100644 --- a/intern/cycles/app/cycles_standalone.cpp +++ b/intern/cycles/app/cycles_standalone.cpp @@ -22,6 +22,9 @@ #include "scene.h" #include "session.h" +#include "mesh.h" +#include "object.h" + #include "util_args.h" #include "util_foreach.h" #include "util_function.h" @@ -30,6 +33,7 @@ #include "util_string.h" #include "util_time.h" #include "util_transform.h" +#include "util_xml.h" #ifdef WITH_CYCLES_STANDALONE_GUI #include "util_view.h" @@ -171,6 +180,7 @@ static void display_info(Progress& progress) sample = progress.get_sample(); progress.get_tile(tile, total_time, sample_time); progress.get_status(status, substatus); + int nopped=progress.nopped_tiles; if(substatus != "") status += ": " + substatus; @@ -183,8 +193,9 @@ static void display_info(Progress& progress) " Latency: %.4f" " Sample: %d" " Average: %.4f" - " Interactive: %s", - status.c_str(), total_time, latency, sample, sample_time, interactive.c_str()); + " Nopped: %i/%i" + " Interactive: %s PRESS S TO SAVE THE IMAGE", + status.c_str(), total_time, latency, sample, sample_time, nopped, options.session->tile_manager.state.tiles.size(), interactive.c_str()); view_display_info(str.c_str()); diff --git a/intern/cycles/blender/blender_session.cpp b/intern/cycles/blender/blender_session.cpp index 13d4041..3dedbd6 100644 --- a/intern/cycles/blender/blender_session.cpp +++ b/intern/cycles/blender/blender_session.cpp @@ -822,6 +822,8 @@ void BlenderSession::update_status_progress() BLI_timestr(remaining_time, time_str, sizeof(time_str)); timestatus += "Remaining:" + string(time_str) + " | "; } + + timestatus += string_printf("Nopped:%i/%i | ", session->progress.nopped_tiles, session->tile_manager.state.tiles.size() ); timestatus += string_printf("Mem:%.2fM, Peak:%.2fM", mem_used, mem_peak); diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp index b0739dd..37cc3ee 100644 --- a/intern/cycles/device/device_cpu.cpp +++ b/intern/cycles/device/device_cpu.cpp @@ -175,10 +175,14 @@ public: break; } + int _sample=sample; + if ( tile.flags&RenderTile::TILE_FLAG_NOP ) + _sample = -sample-1+tile.num_samples; + for(int y = tile.y; y < tile.y + tile.h; y++) { for(int x = tile.x; x < tile.x + tile.w; x++) { kernel_cpu_avx_path_trace(&kg, render_buffer, rng_state, - sample, x, y, tile.offset, tile.stride); + _sample, x, y, tile.offset, tile.stride); } } @@ -197,10 +201,14 @@ public: break; } + int _sample=sample; + if ( tile.flags&RenderTile::TILE_FLAG_NOP ) + _sample = -sample-1+tile.num_samples; + for(int y = tile.y; y < tile.y + tile.h; y++) { for(int x = tile.x; x < tile.x + tile.w; x++) { kernel_cpu_sse41_path_trace(&kg, render_buffer, rng_state, - sample, x, y, tile.offset, tile.stride); + _sample, x, y, tile.offset, tile.stride); } } @@ -219,10 +227,14 @@ public: break; } + int _sample=sample; + if ( tile.flags&RenderTile::TILE_FLAG_NOP ) + _sample = -sample-1+tile.num_samples; + for(int y = tile.y; y < tile.y + tile.h; y++) { for(int x = tile.x; x < tile.x + tile.w; x++) { kernel_cpu_sse3_path_trace(&kg, render_buffer, rng_state, - sample, x, y, tile.offset, tile.stride); + _sample, x, y, tile.offset, tile.stride); } } @@ -241,10 +253,14 @@ public: break; } + int _sample=sample; + if ( tile.flags&RenderTile::TILE_FLAG_NOP ) + _sample = -sample-1+tile.num_samples; + for(int y = tile.y; y < tile.y + tile.h; y++) { for(int x = tile.x; x < tile.x + tile.w; x++) { kernel_cpu_sse2_path_trace(&kg, render_buffer, rng_state, - sample, x, y, tile.offset, tile.stride); + _sample, x, y, tile.offset, tile.stride); } } @@ -262,10 +278,14 @@ public: break; } + int _sample=sample; + if ( tile.flags&RenderTile::TILE_FLAG_NOP ) + _sample = -sample-1+tile.num_samples; + for(int y = tile.y; y < tile.y + tile.h; y++) { for(int x = tile.x; x < tile.x + tile.w; x++) { kernel_cpu_path_trace(&kg, render_buffer, rng_state, - sample, x, y, tile.offset, tile.stride); + _sample, x, y, tile.offset, tile.stride); } } diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp index 48d1c18..d2f5178 100644 --- a/intern/cycles/device/device_cuda.cpp +++ b/intern/cycles/device/device_cuda.cpp @@ -1019,7 +1019,11 @@ public: break; } - path_trace(tile, sample, branched); + int _sample = sample; + if ( tile.flags&RenderTile::TILE_FLAG_NOP ) + _sample = -sample-1+tile.num_samples; + + path_trace(tile, _sample, branched); tile.sample = sample + 1; diff --git a/intern/cycles/kernel/kernel_path.h b/intern/cycles/kernel/kernel_path.h index d760af1..d511b33 100644 --- a/intern/cycles/kernel/kernel_path.h +++ b/intern/cycles/kernel/kernel_path.h @@ -1425,20 +1425,26 @@ ccl_device void kernel_path_trace(KernelGlobals *kg, RNG rng; Ray ray; - kernel_path_trace_setup(kg, rng_state, sample, x, y, &rng, &ray); - /* integrate */ float4 L; - if(ray.t != 0.0f) - L = kernel_path_integrate(kg, &rng, sample, ray, buffer); - else - L = make_float4(0.0f, 0.0f, 0.0f, 0.0f); + if ( sample >= 0 ) { + kernel_path_trace_setup(kg, rng_state, sample, x, y, &rng, &ray); + + if(ray.t != 0.0f) + L = kernel_path_integrate(kg, &rng, sample, ray, buffer); + else + L = make_float4(0.0f, 0.0f, 0.0f, 0.0f); + + path_rng_end(kg, rng_state, rng); + } else { + // take average value + ccl_global float4 *buf = (ccl_global float4*)buffer; + L = (*buf) * (1.0f / (-1.0f*sample)); + } /* accumulate result in output buffer */ kernel_write_pass_float4(buffer, sample, L); - - path_rng_end(kg, rng_state, rng); } #ifdef __BRANCHED_PATH__ @@ -1457,20 +1463,27 @@ ccl_device void kernel_branched_path_trace(KernelGlobals *kg, RNG rng; Ray ray; - kernel_path_trace_setup(kg, rng_state, sample, x, y, &rng, &ray); - /* integrate */ float4 L; - if(ray.t != 0.0f) - L = kernel_branched_path_integrate(kg, &rng, sample, ray, buffer); - else - L = make_float4(0.0f, 0.0f, 0.0f, 0.0f); + if ( sample >= 0 ) + { + kernel_path_trace_setup(kg, rng_state, sample, x, y, &rng, &ray); + + if(ray.t != 0.0f) + L = kernel_branched_path_integrate(kg, &rng, sample, ray, buffer); + else + L = make_float4(0.0f, 0.0f, 0.0f, 0.0f); + + path_rng_end(kg, rng_state, rng); + } else { + // take average value + ccl_global float4 *buf = (ccl_global float4*)buffer; + L = (*buf) * (1.0f / (-1.0f*sample)); + } /* accumulate result in output buffer */ kernel_write_pass_float4(buffer, sample, L); - - path_rng_end(kg, rng_state, rng); } #endif diff --git a/intern/cycles/render/buffers.cpp b/intern/cycles/render/buffers.cpp index fc65922f..cc8ea26 100644 --- a/intern/cycles/render/buffers.cpp +++ b/intern/cycles/render/buffers.cpp @@ -93,6 +93,8 @@ RenderTile::RenderTile() rng_state = 0; buffers = NULL; + + flags = 0; } /* Render Buffers */ diff --git a/intern/cycles/render/buffers.h b/intern/cycles/render/buffers.h index 27ab20b..fbb25ac 100644 --- a/intern/cycles/render/buffers.h +++ b/intern/cycles/render/buffers.h @@ -139,6 +140,9 @@ public: int offset; int stride; + int flags; + enum flags_enum { TILE_FLAG_NOP=1, }; + device_ptr buffer; device_ptr rng_state; diff --git a/intern/cycles/render/session.cpp b/intern/cycles/render/session.cpp index 9fcd9fa..cb85e70 100644 --- a/intern/cycles/render/session.cpp +++ b/intern/cycles/render/session.cpp @@ -77,6 +77,16 @@ Session::Session(const SessionParams& params_) gpu_need_tonemap = false; pause = false; kernels_loaded = false; + +#if CYCLES_DLL + params.background=false; + if ( !buffers ) { + buffers = new RenderBuffers(device); + display = new DisplayBuffer(device, params.display_buffer_linear); + } +#endif + + float* last_buffers=0; } Session::~Session() @@ -266,6 +297,9 @@ void Session::run_gpu() /* update status and timing */ update_status_time(); + /* determine tiles to refine */ + nop_tiles(); + /* path trace */ path_trace(); @@ -370,6 +404,7 @@ bool Session::acquire_tile(Device *tile_device, RenderTile& rtile) rtile.start_sample = tile_manager.state.sample; rtile.num_samples = tile_manager.state.num_samples; rtile.resolution = tile_manager.state.resolution_divider; + rtile.flags = tile.flags; tile_lock.unlock(); @@ -465,6 +500,156 @@ void Session::release_tile(RenderTile& rtile) update_status_time(); } +void Session::nop_tiles() +{ + // parameters + int min_num_samples=5; // run at least N samples, take first snapshot + int nth_sample=10; // then run N samples and check progress + float eps = .001f; // tiles with progress below this threshold are NOPPED + + if ( tile_manager.state.resolution_divider!=1 ) { + last_buffers.clear(); + for(int i=0; ibuffer.size() != last_buffers.size() ) + copy_to_last_buffers=true; + } else { + // use "tile_buffers" + if ( last_tile_buffers.size() != tile_buffers.size() ) + copy_to_last_buffers=true; + else + if ( last_tile_buffers.size() && tile_buffers[0]->buffer.size() != last_tile_buffers[0]->size() ) + copy_to_last_buffers=true; + } + + int pass_stride = tile_manager.params.get_passes_size(); + + if ( buffers_num_samples-last_buffers_num_samples>=nth_sample && + ( + ( buffers && buffers->buffer.size() == last_buffers.size() ) || + ( last_tile_buffers.size() && tile_buffers[0]->buffer.size() == last_tile_buffers[0]->size() ) + ) + ) { + // retrieve buffers + if ( !buffers_from_device ) { + if ( buffers ) + buffers->copy_from_device(); + else + for(int i=0; icopy_from_device(); + buffers_from_device=true; + } + // compare to last_buffer, tilewise + int noppedtiles=0; + int offset,stride; + tile_manager.params.get_offset_stride(offset, stride); + Tile* tile=0; + float* _A=0; + float* _B=0; + if ( buffers ) { + _A = (float*)last_buffers.data_pointer; + _B = (float*)buffers->buffer.data_pointer; + } + float avg_scale_A = 1.0f / (1.0f*last_buffers_num_samples); + float avg_scale_B = 1.0f / (1.0f*buffers_num_samples); + for(int d=0; dflags & Tile::TILE_FLAGS_NOP ) { + noppedtiles++; + continue; + } + if ( !buffers ) { + _A = (float*)last_tile_buffers[tile->index]->data_pointer; + _B = (float*)tile_buffers[tile->index]->buffer.data_pointer; + } + // evaluate progress on this tile + float maxVariation=0; + for(int x=0; xw; x++) { + for(int y=0; yh; y++) { + int index = 0; + if ( buffers ) + index += tile->x + x + (tile->y + y) * buffers->params.full_width; + else + index += x + y * tile_buffers[tile->index]->params.width; + index *= pass_stride; + + float4* A = (float4*)(_A + index); + float4* B = (float4*)(_B + index); + + float4 avgA = (*A) * avg_scale_A; + float4 avgB = (*B) * avg_scale_B; + float4 progress = avgA-avgB; + maxVariation = max( maxVariation, max( fabsf(progress.x), max(fabsf(progress.y), max(fabsf(progress.z), fabsf(progress.w))))); + if ( maxVariation > eps ) + break; + } + if ( maxVariation > eps ) + break; + } + if ( maxVariation > eps ) { + tile->flags &= ~Tile::TILE_FLAGS_NOP; + } else { + tile->flags |= Tile::TILE_FLAGS_NOP; + noppedtiles++; + } + } + } + progress.nopped_tiles = noppedtiles++; + // mark them as "not rendering" so device can process them + tile_manager.mark_tiles_rendering(false); + + copy_to_last_buffers=true; + } + + if ( copy_to_last_buffers ) { + // retrieve buffers + if ( !buffers_from_device ) { + if ( buffers ) + buffers->copy_from_device(); + else + for(int i=0; icopy_from_device(); + buffers_from_device=true; + } + // copy to last buffer + if ( buffers ) { + last_buffers.resize( buffers->buffer.data_size ); + memcpy( (void*)last_buffers.data_pointer, (void*)buffers->buffer.data_pointer, buffers->buffer.data_size * sizeof(float) ); + } else { + for(int i=0; i(); + last_tile_buffers[i]->resize( tile_buffers[i]->buffer.data_size ); + memcpy( (void*)last_tile_buffers[i]->data_pointer, (void*)tile_buffers[i]->buffer.data_pointer, tile_buffers[i]->buffer.data_size * sizeof(float) ); + } + } + last_buffers_num_samples = buffers_num_samples; + } +} + void Session::run_cpu() { bool tiles_written = false; @@ -548,6 +733,9 @@ void Session::run_cpu() /* update status and timing */ update_status_time(); + /* determine tiles to refine */ + nop_tiles(); + /* path trace */ path_trace(); diff --git a/intern/cycles/render/session.h b/intern/cycles/render/session.h index 9da7a0aa..d04c839 100644 --- a/intern/cycles/render/session.h +++ b/intern/cycles/render/session.h @@ -158,6 +160,11 @@ protected: void path_trace(); void reset_(BufferParams& params, int samples); + device_vector last_buffers; + int last_buffers_num_samples; + vector *> last_tile_buffers; + void nop_tiles(); + void run_cpu(); bool draw_cpu(BufferParams& params, DeviceDrawParams& draw_params); void reset_cpu(BufferParams& params, int samples); diff --git a/intern/cycles/render/tile.cpp b/intern/cycles/render/tile.cpp index 72bcdf9..f56b890 100644 --- a/intern/cycles/render/tile.cpp +++ b/intern/cycles/render/tile.cpp @@ -63,6 +63,7 @@ void TileManager::reset(BufferParams& params_, int num_samples_) state.num_tiles = 0; state.num_rendered_tiles = 0; state.num_samples = 0; + state.last_num_samples = 0; state.resolution_divider = divider; state.tiles.clear(); } @@ -78,13 +79,20 @@ void TileManager::gen_tiles_global() int resolution = state.resolution_divider; int image_w = max(1, params.width/resolution); int image_h = max(1, params.height/resolution); + int num_logical_devices = preserve_tile_device? num_devices: 1; + + tiles_generation_params params( resolution, image_w, image_h, num_logical_devices ); + if ( params.equals( gen_tiles_last_params ) ) { + mark_tiles_rendering(false); + return; + } + gen_tiles_last_params = params; state.tiles.clear(); int tile_w = (tile_size.x >= image_w)? 1: (image_w + tile_size.x - 1)/tile_size.x; int tile_h = (tile_size.y >= image_h)? 1: (image_h + tile_size.y - 1)/tile_size.y; - int num_logical_devices = preserve_tile_device? num_devices: 1; int num = min(image_h, num_logical_devices); int tile_index = 0; @@ -98,7 +106,7 @@ void TileManager::gen_tiles_global() int w = (tile_x == tile_w-1)? image_w - x: tile_size.x; int h = (tile_y == tile_h-1)? image_h - y: tile_size.y; - state.tiles.push_back(Tile(tile_index, x, y, w, h, cur_device)); + state.tiles.push_back(Tile(tile_index, x, y, w, h, cur_device, 0)); cur_tiles++; if(cur_tiles == tiles_per_device) { @@ -115,11 +123,21 @@ void TileManager::gen_tiles_sliced() int resolution = state.resolution_divider; int image_w = max(1, params.width/resolution); int image_h = max(1, params.height/resolution); + int num_logical_devices = preserve_tile_device? num_devices: 1; + + tiles_generation_params params( resolution, image_w, image_h, num_logical_devices ); + if ( params.equals( gen_tiles_last_params ) ) { + mark_tiles_rendering(false); + return; + } + gen_tiles_last_params = params; + + int tile_w = (tile_size.x >= image_w)? 1: (image_w + tile_size.x - 1)/tile_size.x; + int tile_h = (tile_size.y >= image_h)? 1: (image_h + tile_size.y - 1)/tile_size.y; state.tiles.clear(); - int num_logical_devices = preserve_tile_device? num_devices: 1; - int num = min(image_h, num_logical_devices); + int num = min(params.image_h, params.num_logical_devices); int tile_index = 0; for(int device = 0; device < num; device++) { @@ -136,7 +154,7 @@ void TileManager::gen_tiles_sliced() int w = (tile_x == tile_w-1)? image_w - x: tile_size.x; int h = (tile_y == tile_h-1)? device_h - y: tile_size.y; - state.tiles.push_back(Tile(tile_index, x, y + device_y, w, h, device)); + state.tiles.push_back(Tile(tile_index, x, y + device_y, w, h, device, 0)); } } } @@ -230,6 +248,14 @@ list::iterator TileManager::next_background_tile(int device, TileOrder til return best; } +void TileManager::mark_tiles_rendering(bool rendering) +{ + list::iterator iter; + for(iter = state.tiles.begin(); iter != state.tiles.end(); iter++) + iter->rendering = rendering; + state.num_rendered_tiles=0; +} + bool TileManager::next_tile(Tile& tile, int device) { list::iterator tile_it; @@ -250,6 +276,26 @@ bool TileManager::next_tile(Tile& tile, int device) return false; } +bool TileManager::next_tile(Tile** tile, int device) +{ + list::iterator tile_it; + + if (background) + tile_it = next_background_tile(device, tile_order); + else + tile_it = next_viewport_tile(device); + + if(tile_it != state.tiles.end()) { + tile_it->rendering = true; + *tile = &(*tile_it); + state.num_rendered_tiles++; + + return true; + } + + return false; +} + bool TileManager::done() { return (state.sample+state.num_samples >= num_samples && state.resolution_divider == 1); @@ -260,6 +306,8 @@ bool TileManager::next() if(done()) return false; + state.last_num_samples = state.num_samples; + if(progressive && state.resolution_divider > 1) { state.sample = 0; state.resolution_divider /= 2; @@ -267,12 +315,16 @@ bool TileManager::next() set_tiles(); } else { - state.sample++; - if(progressive) - state.num_samples = 1; + { + state.num_samples = 1;//2;//num_progressive_samples + state.sample += state.num_samples; + } else + { + state.sample++; state.num_samples = num_samples; + } state.resolution_divider = 1; set_tiles(); diff --git a/intern/cycles/render/tile.h b/intern/cycles/render/tile.h index 7796518..9efd6c1 100644 --- a/intern/cycles/render/tile.h +++ b/intern/cycles/render/tile.h @@ -32,12 +32,14 @@ public: int x, y, w, h; int device; bool rendering; + int flags; + enum TILE_FLAGS { TILE_FLAGS_NOP=1, }; Tile() {} - Tile(int index_, int x_, int y_, int w_, int h_, int device_) - : index(index_), x(x_), y(y_), w(w_), h(h_), device(device_), rendering(false) {} + Tile(int index_, int x_, int y_, int w_, int h_, int device_, int flags_) + : index(index_), x(x_), y(y_), w(w_), h(h_), device(device_), rendering(false), flags(flags_) {} }; /* Tile order */ @@ -61,6 +63,7 @@ public: BufferParams buffer; int sample; int num_samples; + int last_num_samples; int resolution_divider; int num_tiles; int num_rendered_tiles; @@ -77,8 +80,12 @@ public: void set_samples(int num_samples); bool next(); bool next_tile(Tile& tile, int device = 0); + bool next_tile(Tile** tile, int device=0); bool done(); + void mark_tiles_rendering(bool rendering=false); + int get_num_devices() { return num_devices; } + void set_tile_order(TileOrder tile_order_) { tile_order = tile_order_; } protected: @@ -113,6 +120,17 @@ protected: void gen_tiles_global(); /* slices image into as much pieces as how many devices are rendering this image */ + typedef struct tiles_generation_params + { + tiles_generation_params() {resolution=0;image_w=0;image_h=0;num_logical_devices=0;}; + tiles_generation_params(int r,int w, int h, int n) {resolution=r;image_w=w;image_h=h;num_logical_devices=r;}; + bool equals(tiles_generation_params& p) { return resolution==p.resolution && image_w==p.image_w && image_h==p.image_h && num_logical_devices==p.num_logical_devices; }; + int resolution; + int image_w; + int image_h; + int num_logical_devices; + } tiles_generation_params; + tiles_generation_params gen_tiles_last_params; void gen_tiles_sliced(); /* returns tiles for background render */ diff --git a/intern/cycles/util/util_progress.h b/intern/cycles/util/util_progress.h index 5d1219b..a2acbc3 100644 --- a/intern/cycles/util/util_progress.h +++ b/intern/cycles/util/util_progress.h @@ -47,6 +47,7 @@ public: cancel = false; cancel_message = ""; cancel_cb = NULL; + nopped_tiles=0; } Progress(Progress& progress) @@ -232,6 +233,8 @@ public: update_cb = function; } + int nopped_tiles; + protected: thread_mutex progress_mutex; thread_mutex update_mutex;