intern/cycles/app/cycles_standalone.cpp | 412 ++++++++++++++++++++++++++++-- intern/cycles/app/cycles_xml.cpp | 116 ++++++++- intern/cycles/app/cycles_xml.h | 13 + intern/cycles/blender/blender_session.cpp | 5 + intern/cycles/device/device_cpu.cpp | 5 + intern/cycles/device/device_cuda.cpp | 5 + intern/cycles/kernel/kernel_film.h | 14 +- intern/cycles/kernel/kernel_path.h | 12 + intern/cycles/kernel/kernel_types.h | 3 + intern/cycles/render/buffers.cpp | 84 +++++- intern/cycles/render/buffers.h | 4 + intern/cycles/render/film.cpp | 10 + intern/cycles/render/session.cpp | 262 +++++++++++++++++++ intern/cycles/render/session.h | 7 + intern/cycles/render/tile.cpp | 68 ++++- intern/cycles/render/tile.h | 24 +- intern/cycles/util/util_progress.h | 3 + 17 files changed, 993 insertions(+), 54 deletions(-) diff --git a/intern/cycles/app/cycles_xml.cpp b/intern/cycles/app/cycles_xml.cpp index 915ef96..1617f5c 100644 --- a/intern/cycles/app/cycles_xml.cpp +++ b/intern/cycles/app/cycles_xml.cpp @@ -17,6 +17,7 @@ #include #include +#include #include #include @@ -45,17 +46,7 @@ CCL_NAMESPACE_BEGIN -/* XML reading state */ -struct XMLReadState { - Scene *scene; /* scene pointer */ - Transform tfm; /* current transform state */ - bool smooth; /* smooth normal state */ - int shader; /* current shader */ - string base; /* base path to current file*/ - float dicing_rate; /* current dicing rate */ - Mesh::DisplacementMethod displacement_method; -}; /* Attribute Reading */ @@ -248,6 +239,54 @@ static ShaderSocketType xml_read_socket_type(pugi::xml_node node, const char *na return SHADER_SOCKET_UNDEFINED; } +/* Embedded File */ +static void xml_read_embedded_file(const XMLReadState& state, pugi::xml_node node) +{ + std::string filename; + xml_read_string(&filename, node, "dst"); + filename = path_join(state.base, filename); + + std::string data; + xml_read_string(&data, node, "data"); + + bool fileexists=false; + { + FILE* test = fopen(filename.c_str(),"rb"); + if ( test ) + { + fileexists=true; + fclose(test); + } + } + + if ( !fileexists ) + { + FILE* dump = fopen(filename.c_str(),"wb"); + + unsigned char buffer[1000]; + int buffptr=0; + for(int i=0; idirection, node, "direction"); snode = normal; } + else if(string_iequals(node.name(), "bump")) { + BumpNode *bump = new BumpNode(); + xml_read_bool(&bump->invert, node, "invert"); + snode = bump; + } else if(string_iequals(node.name(), "mapping")) { snode = new MappingNode(); } @@ -849,9 +893,11 @@ static void xml_read_mesh(const XMLReadState& state, pugi::xml_node node) /* read vertices and polygons, RIB style */ vector P; + vector N; vector verts, nverts; xml_read_float3_array(P, node, "P"); + xml_read_float3_array(N, node, "N"); xml_read_int_array(verts, node, "verts"); xml_read_int_array(nverts, node, "nverts"); @@ -897,6 +943,8 @@ static void xml_read_mesh(const XMLReadState& state, pugi::xml_node node) DiagSplit dsplit(sdparams); sdmesh.tessellate(&dsplit); + + mesh->attributes.remove(ATTR_STD_VERTEX_NORMAL); } else { /* create vertices */ @@ -920,10 +968,18 @@ static void xml_read_mesh(const XMLReadState& state, pugi::xml_node node) index_offset += nverts[i]; } + + if ( P.size() == N.size() && + N.size() ) + { + Attribute* attr_N = mesh->attributes.add(ATTR_STD_VERTEX_NORMAL); + float3 *_N = attr_N->data_float3(); + memcpy( _N, &N[0], N.size()*sizeof(float3) ); + } } /* temporary for test compatibility */ - mesh->attributes.remove(ATTR_STD_VERTEX_NORMAL); + //mesh->attributes.remove(ATTR_STD_VERTEX_NORMAL); } /* Patch */ @@ -1094,7 +1150,7 @@ static void xml_read_state(XMLReadState& state, pugi::xml_node node) static void xml_read_include(const XMLReadState& state, const string& src); -static void xml_read_scene(const XMLReadState& state, pugi::xml_node scene_node) +void xml_read_scene(const XMLReadState& state, pugi::xml_node scene_node) { for(pugi::xml_node node = scene_node.first_child(); node; node = node.next_sibling()) { if(string_iequals(node.name(), "film")) { @@ -1139,6 +1195,9 @@ static void xml_read_scene(const XMLReadState& state, pugi::xml_node scene_node) if(xml_read_string(&src, node, "src")) xml_read_include(state, src); } + else if(string_iequals(node.name(), "embedded_file")) { + xml_read_embedded_file(state, node); + } else fprintf(stderr, "Unknown node \"%s\".\n", node.name()); } @@ -1185,5 +1244,38 @@ void xml_read_file(Scene *scene, const char *filepath) scene->params.bvh_type = SceneParams::BVH_STATIC; } +void xml_read_buffer(Scene *scene, char *buffer) +{ + XMLReadState state; + + state.scene = scene; + state.tfm = transform_identity(); + state.shader = scene->default_surface; + state.smooth = false; + state.dicing_rate = 0.1f; + //state.base = path_dirname(filepath); + + //xml_read_include(state, path_filename(filepath)); + /* open XML document */ + pugi::xml_document doc; + pugi::xml_parse_result parse_result; + + //string path = path_join(state.base, src); + parse_result = doc.load_buffer(buffer, strlen(buffer));//.load_file(path.c_str()); + + if(parse_result) { + XMLReadState substate = state; + //substate.base = path_dirname(path); + + xml_read_scene(substate, doc); + } + else { + //fprintf(stderr, "%s read error: %s\n", src.c_str(), parse_result.description()); + exit(EXIT_FAILURE); + } + + scene->params.bvh_type = SceneParams::BVH_STATIC; +} + CCL_NAMESPACE_END diff --git a/intern/cycles/app/cycles_xml.h b/intern/cycles/app/cycles_xml.h index 96bc79c..b2e44e0 100644 --- a/intern/cycles/app/cycles_xml.h +++ b/intern/cycles/app/cycles_xml.h @@ -22,11 +22,24 @@ CCL_NAMESPACE_BEGIN class Scene; void xml_read_file(Scene *scene, const char *filepath); +void xml_read_buffer(Scene* scene, char *buffer ); /* macros for importing */ #define RAD2DEGF(_rad) ((_rad) * (float)(180.0 / M_PI)) #define DEG2RADF(_deg) ((_deg) * (float)(M_PI / 180.0)) +/* XML reading state */ + +struct XMLReadState { + Scene *scene; /* scene pointer */ + Transform tfm; /* current transform state */ + bool smooth; /* smooth normal state */ + int shader; /* current shader */ + string base; /* base path to current file*/ + float dicing_rate; /* current dicing rate */ + Mesh::DisplacementMethod displacement_method; +}; + CCL_NAMESPACE_END #endif /* __CYCLES_XML_H__ */ diff --git a/intern/cycles/blender/blender_session.cpp b/intern/cycles/blender/blender_session.cpp index 0f31e55..a2d698c 100644 --- a/intern/cycles/blender/blender_session.cpp +++ b/intern/cycles/blender/blender_session.cpp @@ -421,6 +421,9 @@ void BlenderSession::render() vector passes; Pass::add(PASS_COMBINED, passes); + // pixelwise sample count + Pass::add(PASS_SAMPLE_COUNT, passes); + if(session_params.device.advanced_shading) { /* loop over passes */ @@ -802,6 +805,8 @@ void BlenderSession::update_status_progress() BLI_timestr(remaining_time, time_str, sizeof(time_str)); timestatus += "Remaining:" + string(time_str) + " | "; } + + timestatus += string_printf("Nopped:%i/%i | ", session->progress.nopped_tiles, session->tile_manager.state.tiles.size() ); timestatus += string_printf("Mem:%.2fM, Peak:%.2fM", mem_used, mem_peak); diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp index 7308d03..94cdf37 100644 --- a/intern/cycles/device/device_cpu.cpp +++ b/intern/cycles/device/device_cpu.cpp @@ -163,6 +163,11 @@ public: RenderTile tile; while(task.acquire_tile(this, tile)) { + if ( tile.flags&RenderTile::TILE_FLAG_NOP) { + task.release_tile(tile); + continue; + } + float *render_buffer = (float*)tile.buffer; uint *rng_state = (uint*)tile.rng_state; int start_sample = tile.start_sample; diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp index 0aa09ac..1444c81 100644 --- a/intern/cycles/device/device_cuda.cpp +++ b/intern/cycles/device/device_cuda.cpp @@ -1019,6 +1019,11 @@ public: /* keep rendering tiles until done */ while(task->acquire_tile(this, tile)) { + if ( tile.flags&RenderTile::TILE_FLAG_NOP) { + task->release_tile(tile); + continue; + } + int start_sample = tile.start_sample; int end_sample = tile.start_sample + tile.num_samples; diff --git a/intern/cycles/kernel/kernel_film.h b/intern/cycles/kernel/kernel_film.h index dc5f6e7..f61bef6 100644 --- a/intern/cycles/kernel/kernel_film.h +++ b/intern/cycles/kernel/kernel_film.h @@ -55,6 +55,11 @@ ccl_device void kernel_film_convert_to_byte(KernelGlobals *kg, rgba += index; buffer += index*kernel_data.film.pass_stride; + if ( kernel_data.film.pass_sample_count ) { + //sample_scale = 1.0f/(*((int*)(buffer+kernel_data.film.pass_sample_count))); + sample_scale = *(buffer+kernel_data.film.pass_sample_count+1); + } + /* map colors */ float4 irradiance = *((ccl_global float4*)buffer); float4 float_result = film_map(kg, irradiance, sample_scale); @@ -70,7 +75,14 @@ ccl_device void kernel_film_convert_to_half_float(KernelGlobals *kg, /* buffer offset */ int index = offset + x + y*stride; - ccl_global float4 *in = (ccl_global float4*)(buffer + index*kernel_data.film.pass_stride); + buffer += index*kernel_data.film.pass_stride; + + if ( kernel_data.film.pass_sample_count ) { + //sample_scale = 1.0f/(*((int*)(buffer+kernel_data.film.pass_sample_count))); + sample_scale = *(buffer+kernel_data.film.pass_sample_count+1); + } + + ccl_global float4 *in = (ccl_global float4*)buffer; ccl_global half *out = (ccl_global half*)rgba + index*4; float exposure = kernel_data.film.exposure; diff --git a/intern/cycles/kernel/kernel_path.h b/intern/cycles/kernel/kernel_path.h index 83bceed..ae27670 100644 --- a/intern/cycles/kernel/kernel_path.h +++ b/intern/cycles/kernel/kernel_path.h @@ -1112,6 +1112,12 @@ ccl_device void kernel_path_trace(KernelGlobals *kg, kernel_write_pass_float4(buffer, sample, L); path_rng_end(kg, rng_state, rng); + + if ( kernel_data.film.pass_sample_count ) { + int count = (sample==0) ? 1 : (*((int*)(buffer+kernel_data.film.pass_sample_count)))+1; + (*((int*)(buffer+kernel_data.film.pass_sample_count))) = count; + *(buffer+kernel_data.film.pass_sample_count+1) = 1.0f/count; + } } #ifdef __BRANCHED_PATH__ @@ -1144,6 +1150,12 @@ ccl_device void kernel_branched_path_trace(KernelGlobals *kg, kernel_write_pass_float4(buffer, sample, L); path_rng_end(kg, rng_state, rng); + + if ( kernel_data.film.pass_sample_count ) { + int count = (sample==0) ? 1 : (*((int*)(buffer+kernel_data.film.pass_sample_count)))+1; + (*((int*)(buffer+kernel_data.film.pass_sample_count))) = count; + *(buffer+kernel_data.film.pass_sample_count+1) = 1.0f/count; + } } #endif diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h index d81909a..7c0c98b 100644 --- a/intern/cycles/kernel/kernel_types.h +++ b/intern/cycles/kernel/kernel_types.h @@ -311,6 +311,7 @@ typedef enum PassType { PASS_SUBSURFACE_INDIRECT = 8388608, PASS_SUBSURFACE_COLOR = 16777216, PASS_LIGHT = 33554432, /* no real pass, used to force use_light_pass */ + PASS_SAMPLE_COUNT = 67108864, } PassType; #define PASS_ALL (~0) @@ -825,6 +826,8 @@ typedef struct KernelFilm { float mist_start; float mist_inv_depth; float mist_falloff; + + int pass_sample_count; } KernelFilm; typedef struct KernelBackground { diff --git a/intern/cycles/render/buffers.cpp b/intern/cycles/render/buffers.cpp index fc65922f..731e517 100644 --- a/intern/cycles/render/buffers.cpp +++ b/intern/cycles/render/buffers.cpp @@ -43,6 +43,9 @@ BufferParams::BufferParams() full_height = 0; Pass::add(PASS_COMBINED, passes); + + // pixelwise sample count + Pass::add(PASS_SAMPLE_COUNT, passes); } void BufferParams::get_offset_stride(int& offset, int& stride) @@ -93,6 +96,8 @@ RenderTile::RenderTile() rng_state = 0; buffers = NULL; + + flags = 0; } /* Render Buffers */ @@ -158,6 +163,16 @@ bool RenderBuffers::copy_from_device() bool RenderBuffers::get_pass_rect(PassType type, float exposure, int sample, int components, float *pixels) { + bool pass_sample_count_found=false; + int pass_sample_count=0; + foreach(Pass& pass, params.passes) { + if(pass.type == PASS_SAMPLE_COUNT) { + pass_sample_count_found=true; + break; + } + pass_sample_count += pass.components; + } + int pass_offset = 0; foreach(Pass& pass, params.passes) { @@ -166,6 +181,9 @@ bool RenderBuffers::get_pass_rect(PassType type, float exposure, int sample, int continue; } + pass_sample_count -= pass_offset; + float scale_correction=1.0f; + float *in = (float*)buffer.data_pointer + pass_offset; int pass_stride = params.get_passes_size(); @@ -181,19 +199,37 @@ bool RenderBuffers::get_pass_rect(PassType type, float exposure, int sample, int if(type == PASS_DEPTH) { for(int i = 0; i < size; i++, in += pass_stride, pixels++) { float f = *in; - pixels[0] = (f == 0.0f)? 1e10f: f*scale_exposure; + + if ( pass.filter && pass_sample_count_found ) { + //scale_correction = (float)sample / (*((int*)in+pass_sample_count)); + scale_correction = (float)sample * (*(in+pass_sample_count+1)); + } + + pixels[0] = (f == 0.0f)? 1e10f: f*scale_exposure *scale_correction; } } else if(type == PASS_MIST) { for(int i = 0; i < size; i++, in += pass_stride, pixels++) { float f = *in; - pixels[0] = clamp(f*scale_exposure, 0.0f, 1.0f); + + if ( pass.filter && pass_sample_count_found ) { + //scale_correction = (float)sample / (*((int*)in+pass_sample_count)); + scale_correction = (float)sample * (*(in+pass_sample_count+1)); + } + + pixels[0] = clamp(f*scale_exposure *scale_correction, 0.0f, 1.0f); } } else { for(int i = 0; i < size; i++, in += pass_stride, pixels++) { float f = *in; - pixels[0] = f*scale_exposure; + + if ( pass.filter && pass_sample_count_found ) { + //scale_correction = (float)sample / (*((int*)in+pass_sample_count)); + scale_correction = (float)sample * (*(in+pass_sample_count+1)); + } + + pixels[0] = f*scale_exposure *scale_correction; } } } @@ -238,9 +274,14 @@ bool RenderBuffers::get_pass_rect(PassType type, float exposure, int sample, int for(int i = 0; i < size; i++, in += pass_stride, pixels += 3) { float3 f = make_float3(in[0], in[1], in[2]); - pixels[0] = f.x*scale_exposure; - pixels[1] = f.y*scale_exposure; - pixels[2] = f.z*scale_exposure; + if ( pass.filter && pass_sample_count_found ) { + //scale_correction = (float)sample / (*((int*)in+pass_sample_count)); + scale_correction = (float)sample * (*(in+pass_sample_count+1)); + } + + pixels[0] = f.x*scale_exposure *scale_correction; + pixels[1] = f.y*scale_exposure *scale_correction; + pixels[2] = f.z*scale_exposure *scale_correction; } } } @@ -285,12 +326,17 @@ bool RenderBuffers::get_pass_rect(PassType type, float exposure, int sample, int for(int i = 0; i < size; i++, in += pass_stride, pixels += 4) { float4 f = make_float4(in[0], in[1], in[2], in[3]); - pixels[0] = f.x*scale_exposure; - pixels[1] = f.y*scale_exposure; - pixels[2] = f.z*scale_exposure; + if ( pass.filter && pass_sample_count_found ) { + //scale_correction = (float)sample / (*((int*)in+pass_sample_count)); + scale_correction = (float)sample * (*(in+pass_sample_count+1)); + } + + pixels[0] = f.x*scale_exposure *scale_correction; + pixels[1] = f.y*scale_exposure *scale_correction; + pixels[2] = f.z*scale_exposure *scale_correction; /* clamp since alpha might be > 1.0 due to russian roulette */ - pixels[3] = clamp(f.w*scale, 0.0f, 1.0f); + pixels[3] = clamp(f.w*scale *scale_correction, 0.0f, 1.0f); } } } @@ -376,6 +422,24 @@ bool DisplayBuffer::draw_ready() return (draw_width != 0 && draw_height != 0); } +void* DisplayBuffer::rgba_ptr() +{ + int w = draw_width; + int h = draw_height; + + if(w == 0 || h == 0) + return 0; + + if(half_float) + return 0; + + /* read buffer from device */ + device_memory& rgba = rgba_data(); + device->pixels_copy_from(rgba, 0, w, h); + + return (void*)rgba.data_pointer; +} + void DisplayBuffer::write(Device *device, const string& filename) { int w = draw_width; diff --git a/intern/cycles/render/buffers.h b/intern/cycles/render/buffers.h index 27ab20b..fbb25ac 100644 --- a/intern/cycles/render/buffers.h +++ b/intern/cycles/render/buffers.h @@ -113,6 +113,7 @@ public: void reset(Device *device, BufferParams& params); void write(Device *device, const string& filename); + void* rgba_ptr(); void draw_set(int width, int height); void draw(Device *device, const DeviceDrawParams& draw_params); @@ -139,6 +140,9 @@ public: int offset; int stride; + int flags; + enum flags_enum { TILE_FLAG_NOP=1, }; + device_ptr buffer; device_ptr rng_state; diff --git a/intern/cycles/render/film.cpp b/intern/cycles/render/film.cpp index c1aefbc..204055b 100644 --- a/intern/cycles/render/film.cpp +++ b/intern/cycles/render/film.cpp @@ -158,6 +158,10 @@ void Pass::add(PassType type, vector& passes) case PASS_LIGHT: /* ignores */ break; + case PASS_SAMPLE_COUNT: + pass.components = 2; + pass.exposure = false; + break; } passes.push_back(pass); @@ -264,6 +268,8 @@ Film::Film() { exposure = 0.8f; Pass::add(PASS_COMBINED, passes); + // pixelwise sample count + Pass::add(PASS_SAMPLE_COUNT, passes); pass_alpha_threshold = 0.5f; filter_type = FILTER_BOX; @@ -298,6 +304,7 @@ void Film::device_update(Device *device, DeviceScene *dscene, Scene *scene) kfilm->pass_flag = 0; kfilm->pass_stride = 0; kfilm->use_light_pass = use_light_visibility || use_sample_clamp; + kfilm->pass_sample_count = 0; foreach(Pass& pass, passes) { kfilm->pass_flag |= pass.type; @@ -402,6 +409,9 @@ void Film::device_update(Device *device, DeviceScene *dscene, Scene *scene) break; case PASS_NONE: break; + case PASS_SAMPLE_COUNT: + kfilm->pass_sample_count = kfilm->pass_stride; + break; } kfilm->pass_stride += pass.components; diff --git a/intern/cycles/render/session.cpp b/intern/cycles/render/session.cpp index 9fcd9fa..c1b006a 100644 --- a/intern/cycles/render/session.cpp +++ b/intern/cycles/render/session.cpp @@ -77,6 +77,16 @@ Session::Session(const SessionParams& params_) gpu_need_tonemap = false; pause = false; kernels_loaded = false; + +#if CYCLES_DLL + params.background=false; + if ( !buffers ) { + buffers = new RenderBuffers(device); + display = new DisplayBuffer(device, params.display_buffer_linear); + } +#endif + + float* last_buffers=0; } Session::~Session() @@ -113,6 +123,10 @@ Session::~Session() foreach(RenderBuffers *buffers, tile_buffers) delete buffers; + foreach(device_vector *buffer, last_tile_buffers) + delete buffer; + last_tile_buffers.clear(); + delete buffers; delete display; delete scene; @@ -121,6 +135,27 @@ Session::~Session() TaskScheduler::exit(); } +void* Session::get_rgba(int* width, int* height) +{ + if ( !display || !buffers ) + { + delete buffers; + delete display; + + buffers = new RenderBuffers(device); + display = new DisplayBuffer(device, params.display_buffer_linear); + display->reset(device, buffers->params); + } + + *width = display->draw_width; + *height = display->draw_height; + + device_memory& rgba = display->rgba_data(); + device->pixels_copy_from(rgba,0,display->draw_width,display->draw_height); + + return (void*) rgba.data_pointer; +} + void Session::start() { session_thread = new thread(function_bind(&Session::run, this)); @@ -266,6 +301,9 @@ void Session::run_gpu() /* update status and timing */ update_status_time(); + /* determine tiles to refine */ + nop_tiles(); + /* path trace */ path_trace(); @@ -370,6 +408,7 @@ bool Session::acquire_tile(Device *tile_device, RenderTile& rtile) rtile.start_sample = tile_manager.state.sample; rtile.num_samples = tile_manager.state.num_samples; rtile.resolution = tile_manager.state.resolution_divider; + rtile.flags = tile.flags; tile_lock.unlock(); @@ -465,6 +504,226 @@ void Session::release_tile(RenderTile& rtile) update_status_time(); } +//#define BLENDER_APP + +void Session::nop_tiles() +{ +#ifdef BLENDER_APP + // parameters + int min_num_samples=10; // run at least N samples, take first snapshot + int nth_sample=10; // run N samples and check progress + float eps_reset=0.002f; // tiles with progress below this threshold are NOPPED + float max_NOP = 0.3f; // maximum 30% nopped +#else + int min_num_samples=2; // run at least N samples, take first snapshot + int nth_sample_max=10; // run N samples and check progress + float eps_reset=0.01f; // tiles with progress below this threshold are NOPPED + float max_NOP = 0.9f; // maximum 90% nopped + static int nth_sample=1; +#endif + + // start value + static float eps = 0.1f; + + if ( tile_manager.state.resolution_divider!=1 ) { + last_buffers_num_samples=0; + last_buffers.clear(); + for(int i=0; ifilm->passes) { + if ( pass.type == PASS_SAMPLE_COUNT ) { + pass_count_samples_found=true; + break; + } + pass_count_samples += pass.components; + } + if ( !pass_count_samples_found ) + return; + + bool buffers_from_device=false; + bool copy_to_last_buffers=false; + if ( buffers ) { + // use "buffers" + if ( buffers->buffer.size() != last_buffers.size() ) + copy_to_last_buffers=true; + } else { + // use "tile_buffers" + if ( last_tile_buffers.size() != tile_buffers.size() ) + copy_to_last_buffers=true; + else + if ( last_tile_buffers.size() && tile_buffers[0]->buffer.size() != last_tile_buffers[0]->size() ) + copy_to_last_buffers=true; + } + + int pass_stride = tile_manager.params.get_passes_size(); + + if ( buffers_num_samples-last_buffers_num_samples>=nth_sample && + ( + ( buffers && buffers->buffer.size() == last_buffers.size() ) || + ( last_tile_buffers.size() && tile_buffers[0]->buffer.size() == last_tile_buffers[0]->size() ) + ) + ) { +#ifndef BLENDER_APP + nth_sample = min( nth_sample*2, nth_sample_max ); +#endif + // retrieve buffers + if ( !buffers_from_device ) { + if ( buffers ) + buffers->copy_from_device(); + //else + // for(int i=0; icopy_from_device(); + buffers_from_device=true; + } + // compare to last_buffer, tilewise + int offset,stride; + tile_manager.params.get_offset_stride(offset, stride); + Tile* tile=0; + float* _A=0; + float* _B=0; + if ( buffers ) { + _A = (float*)last_buffers.data_pointer; + _B = (float*)buffers->buffer.data_pointer; + } + int noppedtiles=0; + for(int d=0; dflags&Tile::TILE_FLAGS_NOP ) { + if ( tile->priority > eps ) { + tile->flags &= ~Tile::TILE_FLAGS_NOP; + } else { + noppedtiles++; + } + continue; + } + // priority 0 means terminal death + if ( tile->priority==0 ) { + noppedtiles++; + tile->flags |= Tile::TILE_FLAGS_NOP; + continue; + } + if ( !buffers ) { + _A = (float*)last_tile_buffers[tile->index]->data_pointer; + _B = (float*)tile_buffers[tile->index]->buffer.data_pointer; + } + // evaluate progress on this tile + float maxVariation=0; + for(int x=0; xw; x++) { + for(int y=0; yh; y++) { + int index = 0; + if ( buffers ) + index += tile->x + x + (tile->y + y) * buffers->params.full_width; + else + index += x + y * tile_buffers[tile->index]->params.width; + index *= pass_stride; + + float4* A = (float4*)(_A + index); + float4* B = (float4*)(_B + index); + + //float avg_scale_A = 1.0f / (*((int*)(_A + index + pass_count_samples))); + //float avg_scale_B = 1.0f / (*((int*)(_B + index + pass_count_samples))); + float avg_scale_A = *(_A + index + pass_count_samples + 1); + float avg_scale_B = *(_B + index + pass_count_samples + 1); + + float4 avgA = (*A) * avg_scale_A; + float4 avgB = (*B) * avg_scale_B; + float4 progress = avgA-avgB; + + float thisVariation = max( fabsf(progress.x), max(fabsf(progress.y), max(fabsf(progress.z), fabsf(progress.w)))); + maxVariation = max( maxVariation, thisVariation ); + + if ( maxVariation > eps ) + break; + } + if ( maxVariation > eps ) + break; + } + tile->priority = maxVariation; + if ( maxVariation > eps ) { + tile->flags &= ~Tile::TILE_FLAGS_NOP; + } else { + noppedtiles++; + tile->flags |= Tile::TILE_FLAGS_NOP; + } + } + } + + // maybe too many tiles nopped... + while( noppedtiles > max_NOP*tile_manager.state.tiles.size() && eps>1e-8) { + // lower the barrier + eps *= 0.5f; + // unnop some tiles that break the new barrier + foreach( Tile& t, tile_manager.state.tiles ) + if ( t.flags&Tile::TILE_FLAGS_NOP ) + if ( t.priority > eps ) { + t.flags &= ~Tile::TILE_FLAGS_NOP; + noppedtiles--; + } + } + progress.nopped_tiles = noppedtiles; + + // mark them as "not rendering" so device can process them + tile_manager.mark_tiles_rendering(false); + + copy_to_last_buffers=true; + } + + if ( copy_to_last_buffers ) { + // retrieve buffers + if ( !buffers_from_device ) { + if ( buffers ) + buffers->copy_from_device(); + //else + // for(int i=0; icopy_from_device(); + buffers_from_device=true; + } + // copy to last buffer + if ( buffers ) { + last_buffers.resize( buffers->buffer.data_size ); + memcpy( (void*)last_buffers.data_pointer, (void*)buffers->buffer.data_pointer, buffers->buffer.data_size * sizeof(float) ); + } else { + for(int i=0; i(); + last_tile_buffers[i]->resize( tile_buffers[i]->buffer.data_size ); + memcpy( (void*)last_tile_buffers[i]->data_pointer, (void*)tile_buffers[i]->buffer.data_pointer, tile_buffers[i]->buffer.data_size * sizeof(float) ); + } + } + last_buffers_num_samples = buffers_num_samples; + } +} + void Session::run_cpu() { bool tiles_written = false; @@ -548,6 +807,9 @@ void Session::run_cpu() /* update status and timing */ update_status_time(); + /* determine tiles to refine */ + nop_tiles(); + /* path trace */ path_trace(); diff --git a/intern/cycles/render/session.h b/intern/cycles/render/session.h index 9da7a0aa..d04c839 100644 --- a/intern/cycles/render/session.h +++ b/intern/cycles/render/session.h @@ -142,6 +142,8 @@ public: void device_free(); + void* get_rgba(int* width, int* height); + protected: struct DelayedReset { thread_mutex mutex; @@ -158,6 +160,11 @@ protected: void path_trace(); void reset_(BufferParams& params, int samples); + device_vector last_buffers; + int last_buffers_num_samples; + vector *> last_tile_buffers; + void nop_tiles(); + void run_cpu(); bool draw_cpu(BufferParams& params, DeviceDrawParams& draw_params); void reset_cpu(BufferParams& params, int samples); diff --git a/intern/cycles/render/tile.cpp b/intern/cycles/render/tile.cpp index d6094a4..b496a41 100644 --- a/intern/cycles/render/tile.cpp +++ b/intern/cycles/render/tile.cpp @@ -63,6 +63,7 @@ void TileManager::reset(BufferParams& params_, int num_samples_) state.num_tiles = 0; state.num_rendered_tiles = 0; state.num_samples = 0; + state.last_num_samples = 0; state.resolution_divider = divider; state.tiles.clear(); } @@ -78,13 +79,20 @@ void TileManager::gen_tiles_global() int resolution = state.resolution_divider; int image_w = max(1, params.width/resolution); int image_h = max(1, params.height/resolution); + int num_logical_devices = preserve_tile_device? num_devices: 1; + + tiles_generation_params params( resolution, image_w, image_h, num_logical_devices ); + if ( params.equals( gen_tiles_last_params ) ) { + mark_tiles_rendering(false); + return; + } + gen_tiles_last_params = params; state.tiles.clear(); int tile_w = (tile_size.x >= image_w)? 1: (image_w + tile_size.x - 1)/tile_size.x; int tile_h = (tile_size.y >= image_h)? 1: (image_h + tile_size.y - 1)/tile_size.y; - int num_logical_devices = preserve_tile_device? num_devices: 1; int num = min(image_h, num_logical_devices); int tile_index = 0; @@ -98,7 +106,7 @@ void TileManager::gen_tiles_global() int w = (tile_x == tile_w-1)? image_w - x: tile_size.x; int h = (tile_y == tile_h-1)? image_h - y: tile_size.y; - state.tiles.push_back(Tile(tile_index, x, y, w, h, cur_device)); + state.tiles.push_back(Tile(tile_index, x, y, w, h, cur_device, 0)); cur_tiles++; if(cur_tiles == tiles_per_device) { @@ -115,11 +123,21 @@ void TileManager::gen_tiles_sliced() int resolution = state.resolution_divider; int image_w = max(1, params.width/resolution); int image_h = max(1, params.height/resolution); + int num_logical_devices = preserve_tile_device? num_devices: 1; + + tiles_generation_params params( resolution, image_w, image_h, num_logical_devices ); + if ( params.equals( gen_tiles_last_params ) ) { + mark_tiles_rendering(false); + return; + } + gen_tiles_last_params = params; + + int tile_w = (tile_size.x >= image_w)? 1: (image_w + tile_size.x - 1)/tile_size.x; + int tile_h = (tile_size.y >= image_h)? 1: (image_h + tile_size.y - 1)/tile_size.y; state.tiles.clear(); - int num_logical_devices = preserve_tile_device? num_devices: 1; - int num = min(image_h, num_logical_devices); + int num = min(params.image_h, params.num_logical_devices); int tile_index = 0; for(int device = 0; device < num; device++) { @@ -136,7 +154,7 @@ void TileManager::gen_tiles_sliced() int w = (tile_x == tile_w-1)? image_w - x: tile_size.x; int h = (tile_y == tile_h-1)? device_h - y: tile_size.y; - state.tiles.push_back(Tile(tile_index, x, y + device_y, w, h, device)); + state.tiles.push_back(Tile(tile_index, x, y + device_y, w, h, device, 0)); } } } @@ -230,6 +248,14 @@ list::iterator TileManager::next_background_tile(int device, TileOrder til return best; } +void TileManager::mark_tiles_rendering(bool rendering) +{ + list::iterator iter; + for(iter = state.tiles.begin(); iter != state.tiles.end(); iter++) + iter->rendering = rendering; + state.num_rendered_tiles=0; +} + bool TileManager::next_tile(Tile& tile, int device) { list::iterator tile_it; @@ -250,6 +276,26 @@ bool TileManager::next_tile(Tile& tile, int device) return false; } +bool TileManager::next_tile(Tile** tile, int device) +{ + list::iterator tile_it; + + if (background) + tile_it = next_background_tile(device, tile_order); + else + tile_it = next_viewport_tile(device); + + if(tile_it != state.tiles.end()) { + tile_it->rendering = true; + *tile = &(*tile_it); + state.num_rendered_tiles++; + + return true; + } + + return false; +} + bool TileManager::done() { return (state.sample+state.num_samples >= num_samples && state.resolution_divider == 1); @@ -260,6 +306,8 @@ bool TileManager::next() if(done()) return false; + state.last_num_samples = state.num_samples; + if(progressive && state.resolution_divider > 1) { state.sample = 0; state.resolution_divider /= 2; @@ -267,12 +315,16 @@ bool TileManager::next() set_tiles(); } else { - state.sample++; - if(progressive) - state.num_samples = 1; + { + state.num_samples = 1;//2;//num_progressive_samples + state.sample += state.num_samples; + } else + { + state.sample++; state.num_samples = num_samples; + } state.resolution_divider = 1; set_tiles(); diff --git a/intern/cycles/render/tile.h b/intern/cycles/render/tile.h index 7796518..0e6f9ce 100644 --- a/intern/cycles/render/tile.h +++ b/intern/cycles/render/tile.h @@ -33,11 +33,15 @@ public: int device; bool rendering; + int flags; + enum TILE_FLAGS { TILE_FLAGS_NOP=1, }; + float priority; + Tile() {} - Tile(int index_, int x_, int y_, int w_, int h_, int device_) - : index(index_), x(x_), y(y_), w(w_), h(h_), device(device_), rendering(false) {} + Tile(int index_, int x_, int y_, int w_, int h_, int device_, int flags_) + : index(index_), x(x_), y(y_), w(w_), h(h_), device(device_), rendering(false), flags(flags_), priority(1) {} }; /* Tile order */ @@ -61,6 +65,7 @@ public: BufferParams buffer; int sample; int num_samples; + int last_num_samples; int resolution_divider; int num_tiles; int num_rendered_tiles; @@ -77,8 +82,12 @@ public: void set_samples(int num_samples); bool next(); bool next_tile(Tile& tile, int device = 0); + bool next_tile(Tile** tile, int device=0); bool done(); + void mark_tiles_rendering(bool rendering=false); + int get_num_devices() { return num_devices; } + void set_tile_order(TileOrder tile_order_) { tile_order = tile_order_; } protected: @@ -113,6 +122,17 @@ protected: void gen_tiles_global(); /* slices image into as much pieces as how many devices are rendering this image */ + typedef struct tiles_generation_params + { + tiles_generation_params() {resolution=0;image_w=0;image_h=0;num_logical_devices=0;}; + tiles_generation_params(int r,int w, int h, int n) {resolution=r;image_w=w;image_h=h;num_logical_devices=r;}; + bool equals(tiles_generation_params& p) { return resolution==p.resolution && image_w==p.image_w && image_h==p.image_h && num_logical_devices==p.num_logical_devices; }; + int resolution; + int image_w; + int image_h; + int num_logical_devices; + } tiles_generation_params; + tiles_generation_params gen_tiles_last_params; void gen_tiles_sliced(); /* returns tiles for background render */ diff --git a/intern/cycles/util/util_progress.h b/intern/cycles/util/util_progress.h index 5d1219b..a2acbc3 100644 --- a/intern/cycles/util/util_progress.h +++ b/intern/cycles/util/util_progress.h @@ -47,6 +47,7 @@ public: cancel = false; cancel_message = ""; cancel_cb = NULL; + nopped_tiles=0; } Progress(Progress& progress) @@ -232,6 +233,8 @@ public: update_cb = function; } + int nopped_tiles; + protected: thread_mutex progress_mutex; thread_mutex update_mutex;