intern/cycles/app/cycles_standalone.cpp | 529 ++++++++++++++++++++++++++++-- intern/cycles/app/cycles_xml.cpp | 116 ++++++- intern/cycles/app/cycles_xml.h | 13 + intern/cycles/blender/addon/properties.py | 26 ++ intern/cycles/blender/addon/ui.py | 13 + intern/cycles/blender/blender_session.cpp | 5 + intern/cycles/blender/blender_sync.cpp | 6 + intern/cycles/kernel/kernel_film.h | 14 +- intern/cycles/kernel/kernel_path.h | 76 ++++- intern/cycles/kernel/kernel_types.h | 6 + intern/cycles/render/buffers.cpp | 84 ++++- intern/cycles/render/buffers.h | 4 + intern/cycles/render/film.cpp | 30 +- intern/cycles/render/integrator.cpp | 6 + intern/cycles/render/integrator.h | 3 + intern/cycles/render/session.cpp | 283 ++++++++++++++++ intern/cycles/render/session.h | 21 +- intern/cycles/render/tile.cpp | 46 ++- intern/cycles/render/tile.h | 19 +- intern/cycles/util/util_progress.h | 5 + 20 files changed, 1243 insertions(+), 62 deletions(-) diff --git a/intern/cycles/app/cycles_xml.cpp b/intern/cycles/app/cycles_xml.cpp index 915ef96..1617f5c 100644 --- a/intern/cycles/app/cycles_xml.cpp +++ b/intern/cycles/app/cycles_xml.cpp @@ -17,6 +17,7 @@ #include #include +#include #include #include @@ -45,17 +46,7 @@ CCL_NAMESPACE_BEGIN -/* XML reading state */ -struct XMLReadState { - Scene *scene; /* scene pointer */ - Transform tfm; /* current transform state */ - bool smooth; /* smooth normal state */ - int shader; /* current shader */ - string base; /* base path to current file*/ - float dicing_rate; /* current dicing rate */ - Mesh::DisplacementMethod displacement_method; -}; /* Attribute Reading */ @@ -248,6 +239,54 @@ static ShaderSocketType xml_read_socket_type(pugi::xml_node node, const char *na return SHADER_SOCKET_UNDEFINED; } +/* Embedded File */ +static void xml_read_embedded_file(const XMLReadState& state, pugi::xml_node node) +{ + std::string filename; + xml_read_string(&filename, node, "dst"); + filename = path_join(state.base, filename); + + std::string data; + xml_read_string(&data, node, "data"); + + bool fileexists=false; + { + FILE* test = fopen(filename.c_str(),"rb"); + if ( test ) + { + fileexists=true; + fclose(test); + } + } + + if ( !fileexists ) + { + FILE* dump = fopen(filename.c_str(),"wb"); + + unsigned char buffer[1000]; + int buffptr=0; + for(int i=0; idirection, node, "direction"); snode = normal; } + else if(string_iequals(node.name(), "bump")) { + BumpNode *bump = new BumpNode(); + xml_read_bool(&bump->invert, node, "invert"); + snode = bump; + } else if(string_iequals(node.name(), "mapping")) { snode = new MappingNode(); } @@ -849,9 +893,11 @@ static void xml_read_mesh(const XMLReadState& state, pugi::xml_node node) /* read vertices and polygons, RIB style */ vector P; + vector N; vector verts, nverts; xml_read_float3_array(P, node, "P"); + xml_read_float3_array(N, node, "N"); xml_read_int_array(verts, node, "verts"); xml_read_int_array(nverts, node, "nverts"); @@ -897,6 +943,8 @@ static void xml_read_mesh(const XMLReadState& state, pugi::xml_node node) DiagSplit dsplit(sdparams); sdmesh.tessellate(&dsplit); + + mesh->attributes.remove(ATTR_STD_VERTEX_NORMAL); } else { /* create vertices */ @@ -920,10 +968,18 @@ static void xml_read_mesh(const XMLReadState& state, pugi::xml_node node) index_offset += nverts[i]; } + + if ( P.size() == N.size() && + N.size() ) + { + Attribute* attr_N = mesh->attributes.add(ATTR_STD_VERTEX_NORMAL); + float3 *_N = attr_N->data_float3(); + memcpy( _N, &N[0], N.size()*sizeof(float3) ); + } } /* temporary for test compatibility */ - mesh->attributes.remove(ATTR_STD_VERTEX_NORMAL); + //mesh->attributes.remove(ATTR_STD_VERTEX_NORMAL); } /* Patch */ @@ -1094,7 +1150,7 @@ static void xml_read_state(XMLReadState& state, pugi::xml_node node) static void xml_read_include(const XMLReadState& state, const string& src); -static void xml_read_scene(const XMLReadState& state, pugi::xml_node scene_node) +void xml_read_scene(const XMLReadState& state, pugi::xml_node scene_node) { for(pugi::xml_node node = scene_node.first_child(); node; node = node.next_sibling()) { if(string_iequals(node.name(), "film")) { @@ -1139,6 +1195,9 @@ static void xml_read_scene(const XMLReadState& state, pugi::xml_node scene_node) if(xml_read_string(&src, node, "src")) xml_read_include(state, src); } + else if(string_iequals(node.name(), "embedded_file")) { + xml_read_embedded_file(state, node); + } else fprintf(stderr, "Unknown node \"%s\".\n", node.name()); } @@ -1185,5 +1244,38 @@ void xml_read_file(Scene *scene, const char *filepath) scene->params.bvh_type = SceneParams::BVH_STATIC; } +void xml_read_buffer(Scene *scene, char *buffer) +{ + XMLReadState state; + + state.scene = scene; + state.tfm = transform_identity(); + state.shader = scene->default_surface; + state.smooth = false; + state.dicing_rate = 0.1f; + //state.base = path_dirname(filepath); + + //xml_read_include(state, path_filename(filepath)); + /* open XML document */ + pugi::xml_document doc; + pugi::xml_parse_result parse_result; + + //string path = path_join(state.base, src); + parse_result = doc.load_buffer(buffer, strlen(buffer));//.load_file(path.c_str()); + + if(parse_result) { + XMLReadState substate = state; + //substate.base = path_dirname(path); + + xml_read_scene(substate, doc); + } + else { + //fprintf(stderr, "%s read error: %s\n", src.c_str(), parse_result.description()); + exit(EXIT_FAILURE); + } + + scene->params.bvh_type = SceneParams::BVH_STATIC; +} + CCL_NAMESPACE_END diff --git a/intern/cycles/app/cycles_xml.h b/intern/cycles/app/cycles_xml.h index 96bc79c..b2e44e0 100644 --- a/intern/cycles/app/cycles_xml.h +++ b/intern/cycles/app/cycles_xml.h @@ -22,11 +22,24 @@ CCL_NAMESPACE_BEGIN class Scene; void xml_read_file(Scene *scene, const char *filepath); +void xml_read_buffer(Scene* scene, char *buffer ); /* macros for importing */ #define RAD2DEGF(_rad) ((_rad) * (float)(180.0 / M_PI)) #define DEG2RADF(_deg) ((_deg) * (float)(M_PI / 180.0)) +/* XML reading state */ + +struct XMLReadState { + Scene *scene; /* scene pointer */ + Transform tfm; /* current transform state */ + bool smooth; /* smooth normal state */ + int shader; /* current shader */ + string base; /* base path to current file*/ + float dicing_rate; /* current dicing rate */ + Mesh::DisplacementMethod displacement_method; +}; + CCL_NAMESPACE_END #endif /* __CYCLES_XML_H__ */ diff --git a/intern/cycles/blender/addon/properties.py b/intern/cycles/blender/addon/properties.py index b4a1b10..8021e48 100644 --- a/intern/cycles/blender/addon/properties.py +++ b/intern/cycles/blender/addon/properties.py @@ -465,6 +465,32 @@ class CyclesRenderSettings(bpy.types.PropertyGroup): default=False, ) + + cls.nop_min_samples = IntProperty( + name="NOP Min Samples", + description="Number of samples until progress evaluation", + min=1, max=2147483647, + default=10, + ) + cls.nop_nth_sample = IntProperty( + name="NOP Nth Sample", + description="Number of samples between progress evaluations", + min=1, max=2147483647, + default=10, + ) + cls.nop_eps = FloatProperty( + name="NOP Threshold", + description="Tiles with progress below threshold will be nopped", + min=0.0, max=1.0, + default=0.002, + ) + cls.nop_maxnop = FloatProperty( + name="NOP Max Nopped", + description="Maximum number of tiles to be nopped [%]", + min=0, max=1, + default=0.5, + ) + cls.bake_type = EnumProperty( name="Bake Type", default='COMBINED', diff --git a/intern/cycles/blender/addon/ui.py b/intern/cycles/blender/addon/ui.py index 9dd5a4e..af99dda 100644 --- a/intern/cycles/blender/addon/ui.py +++ b/intern/cycles/blender/addon/ui.py @@ -302,6 +302,12 @@ class CyclesRender_PT_performance(CyclesButtonsPanel, Panel): subsub.enabled = not rd.use_border subsub.prop(rd, "use_save_buffers") + col.label(text="NOP Min Samples:") + col.prop(cscene, "nop_min_samples", text="") + col.label(text="NOP Nth Sample:") + col.prop(cscene, "nop_nth_sample", text="") + + col.separator() col = split.column(align=True) col.label(text="Viewport:") @@ -320,6 +326,13 @@ class CyclesRender_PT_performance(CyclesButtonsPanel, Panel): col.label(text="Acceleration structure:") col.prop(cscene, "debug_use_spatial_splits") + col.separator() + col.label(text="NOP Threshold:") + col.prop(cscene, "nop_eps", text="") + col.label(text="NOP Max Nopped:") + col.prop(cscene, "nop_maxnop", text="") + + class CyclesRender_PT_layer_options(CyclesButtonsPanel, Panel): bl_label = "Layer" diff --git a/intern/cycles/blender/blender_session.cpp b/intern/cycles/blender/blender_session.cpp index 0f31e55..37ab1e5 100644 --- a/intern/cycles/blender/blender_session.cpp +++ b/intern/cycles/blender/blender_session.cpp @@ -421,6 +421,9 @@ void BlenderSession::render() vector passes; Pass::add(PASS_COMBINED, passes); + // pixelwise sample count + Pass::add(PASS_SAMPLE_COUNT, passes); + if(session_params.device.advanced_shading) { /* loop over passes */ @@ -802,6 +805,8 @@ void BlenderSession::update_status_progress() BLI_timestr(remaining_time, time_str, sizeof(time_str)); timestatus += "Remaining:" + string(time_str) + " | "; } + + timestatus += string_printf("Nopped:%i/%i Eps=%0.2e | ", session->progress.nopped_tiles, session->tile_manager.state.tiles.size(), session->progress.nopped_eps ); timestatus += string_printf("Mem:%.2fM, Peak:%.2fM", mem_used, mem_peak); diff --git a/intern/cycles/blender/blender_sync.cpp b/intern/cycles/blender/blender_sync.cpp index 042bbca..c51e1d0 100644 --- a/intern/cycles/blender/blender_sync.cpp +++ b/intern/cycles/blender/blender_sync.cpp @@ -517,6 +517,12 @@ SessionParams BlenderSync::get_session_params(BL::RenderEngine b_engine, BL::Use /* color managagement */ params.display_buffer_linear = GLEW_ARB_half_float_pixel && b_engine.support_display_space_shader(b_scene); + // nopping + params.nop_min_samples = get_int(cscene, "nop_min_samples"); + params.nop_nth_sample = get_int(cscene, "nop_nth_sample"); + params.nop_eps = get_float(cscene, "nop_eps"); + params.nop_maxnop = get_float(cscene, "nop_maxnop"); + return params; } diff --git a/intern/cycles/kernel/kernel_film.h b/intern/cycles/kernel/kernel_film.h index dc5f6e7..f61bef6 100644 --- a/intern/cycles/kernel/kernel_film.h +++ b/intern/cycles/kernel/kernel_film.h @@ -55,6 +55,11 @@ ccl_device void kernel_film_convert_to_byte(KernelGlobals *kg, rgba += index; buffer += index*kernel_data.film.pass_stride; + if ( kernel_data.film.pass_sample_count ) { + //sample_scale = 1.0f/(*((int*)(buffer+kernel_data.film.pass_sample_count))); + sample_scale = *(buffer+kernel_data.film.pass_sample_count+1); + } + /* map colors */ float4 irradiance = *((ccl_global float4*)buffer); float4 float_result = film_map(kg, irradiance, sample_scale); @@ -70,7 +75,14 @@ ccl_device void kernel_film_convert_to_half_float(KernelGlobals *kg, /* buffer offset */ int index = offset + x + y*stride; - ccl_global float4 *in = (ccl_global float4*)(buffer + index*kernel_data.film.pass_stride); + buffer += index*kernel_data.film.pass_stride; + + if ( kernel_data.film.pass_sample_count ) { + //sample_scale = 1.0f/(*((int*)(buffer+kernel_data.film.pass_sample_count))); + sample_scale = *(buffer+kernel_data.film.pass_sample_count+1); + } + + ccl_global float4 *in = (ccl_global float4*)buffer; ccl_global half *out = (ccl_global half*)rgba + index*4; float exposure = kernel_data.film.exposure; diff --git a/intern/cycles/kernel/kernel_path.h b/intern/cycles/kernel/kernel_path.h index 83bceed..1e13e10 100644 --- a/intern/cycles/kernel/kernel_path.h +++ b/intern/cycles/kernel/kernel_path.h @@ -1094,6 +1094,13 @@ ccl_device void kernel_path_trace(KernelGlobals *kg, rng_state += index; buffer += index*pass_stride; + // skip this? + if ( kernel_data.film.pass_sample_count && (sample>kernel_data.integrator.converge_minsamples) ) { + float epsSQR = *(buffer+kernel_data.film.pass_sample_count+3); + if ( epsSQR < kernel_data.integrator.converge_abort ) + return; + } + /* initialize random numbers and ray */ RNG rng; Ray ray; @@ -1108,8 +1115,35 @@ ccl_device void kernel_path_trace(KernelGlobals *kg, else L = make_float4(0.0f, 0.0f, 0.0f, 0.0f); - /* accumulate result in output buffer */ - kernel_write_pass_float4(buffer, sample, L); + // calculate variance + if ( kernel_data.film.pass_sample_count ) { + int N = (sample==0) ? 1 : (*((int*)(buffer+kernel_data.film.pass_sample_count)))+1; + float Nm1inv = (sample==0) ? 1.0f : *(buffer+kernel_data.film.pass_sample_count+1); + float4 M2 = (sample==0) ? make_float4(0.0f, 0.0f, 0.0f, 0.0f) : *((ccl_global float4*)(buffer+kernel_data.film.pass_sample_count+4)); + + float4 meanNm1 = (sample==0) ? make_float4(0.0f, 0.0f, 0.0f, 0.0f) : *((ccl_global float4*)buffer) * Nm1inv; + float4 delta1 = L - meanNm1; + + /* accumulate result in output buffer */ + kernel_write_pass_float4(buffer, sample, L); + + float Ninv = 1.0f/N; + float4 meanN = (sample==0) ? L : *((ccl_global float4*)buffer) * Ninv; + float4 delta2 = L - meanN; + M2 = M2 + delta1*delta2; + + float epsSQR = max( M2.x, max( M2.y, max( M2.z, M2.w ) ) ); + epsSQR *= Nm1inv * Ninv * Ninv; + + (*((int*)(buffer+kernel_data.film.pass_sample_count))) = N; + *(buffer+kernel_data.film.pass_sample_count+1) = Ninv; + *(buffer+kernel_data.film.pass_sample_count+2) = Nm1inv; + *(buffer+kernel_data.film.pass_sample_count+3) = epsSQR; + *((ccl_global float4*)(buffer+kernel_data.film.pass_sample_count+4)) = M2; + } else { + /* accumulate result in output buffer */ + kernel_write_pass_float4(buffer, sample, L); + } path_rng_end(kg, rng_state, rng); } @@ -1126,6 +1160,13 @@ ccl_device void kernel_branched_path_trace(KernelGlobals *kg, rng_state += index; buffer += index*pass_stride; + // skip this? + if ( kernel_data.film.pass_sample_count && (sample>kernel_data.integrator.converge_minsamples) ) { + float epsSQR = *(buffer+kernel_data.film.pass_sample_count+3); + if ( epsSQR < kernel_data.integrator.converge_abort ) + return; + } + /* initialize random numbers and ray */ RNG rng; Ray ray; @@ -1140,8 +1181,35 @@ ccl_device void kernel_branched_path_trace(KernelGlobals *kg, else L = make_float4(0.0f, 0.0f, 0.0f, 0.0f); - /* accumulate result in output buffer */ - kernel_write_pass_float4(buffer, sample, L); + // calculate variance + if ( kernel_data.film.pass_sample_count ) { + int N = (sample==0) ? 1 : (*((int*)(buffer+kernel_data.film.pass_sample_count)))+1; + float Nm1inv = (sample==0) ? 1.0f : *(buffer+kernel_data.film.pass_sample_count+1); + float4 M2 = (sample==0) ? make_float4(0.0f, 0.0f, 0.0f, 0.0f) : *((ccl_global float4*)(buffer+kernel_data.film.pass_sample_count+4)); + + float4 meanNm1 = (sample==0) ? make_float4(0.0f, 0.0f, 0.0f, 0.0f) : *((ccl_global float4*)buffer) * Nm1inv; + float4 delta1 = L - meanNm1; + + /* accumulate result in output buffer */ + kernel_write_pass_float4(buffer, sample, L); + + float Ninv = 1.0f/N; + float4 meanN = (sample==0) ? L : *((ccl_global float4*)buffer) * Ninv; + float4 delta2 = L - meanN; + M2 = M2 + delta1*delta2; + + float epsSQR = max( M2.x, max( M2.y, max( M2.z, M2.w ) ) ); + epsSQR *= Nm1inv * Ninv * Ninv; + + (*((int*)(buffer+kernel_data.film.pass_sample_count))) = N; + *(buffer+kernel_data.film.pass_sample_count+1) = Ninv; + *(buffer+kernel_data.film.pass_sample_count+2) = Nm1inv; + *(buffer+kernel_data.film.pass_sample_count+3) = epsSQR; + *((ccl_global float4*)(buffer+kernel_data.film.pass_sample_count+4)) = M2; + } else { + /* accumulate result in output buffer */ + kernel_write_pass_float4(buffer, sample, L); + } path_rng_end(kg, rng_state, rng); } diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h index d81909a..4592722 100644 --- a/intern/cycles/kernel/kernel_types.h +++ b/intern/cycles/kernel/kernel_types.h @@ -311,6 +311,7 @@ typedef enum PassType { PASS_SUBSURFACE_INDIRECT = 8388608, PASS_SUBSURFACE_COLOR = 16777216, PASS_LIGHT = 33554432, /* no real pass, used to force use_light_pass */ + PASS_SAMPLE_COUNT = 67108864, } PassType; #define PASS_ALL (~0) @@ -825,6 +826,8 @@ typedef struct KernelFilm { float mist_start; float mist_inv_depth; float mist_falloff; + + int pass_sample_count; } KernelFilm; typedef struct KernelBackground { @@ -902,6 +905,9 @@ typedef struct KernelIntegrator { int volume_max_steps; float volume_step_size; int volume_samples; + + float converge_abort; + int converge_minsamples; } KernelIntegrator; typedef struct KernelBVH { diff --git a/intern/cycles/render/buffers.cpp b/intern/cycles/render/buffers.cpp index fc65922f..731e517 100644 --- a/intern/cycles/render/buffers.cpp +++ b/intern/cycles/render/buffers.cpp @@ -43,6 +43,9 @@ BufferParams::BufferParams() full_height = 0; Pass::add(PASS_COMBINED, passes); + + // pixelwise sample count + Pass::add(PASS_SAMPLE_COUNT, passes); } void BufferParams::get_offset_stride(int& offset, int& stride) @@ -93,6 +96,8 @@ RenderTile::RenderTile() rng_state = 0; buffers = NULL; + + flags = 0; } /* Render Buffers */ @@ -158,6 +163,16 @@ bool RenderBuffers::copy_from_device() bool RenderBuffers::get_pass_rect(PassType type, float exposure, int sample, int components, float *pixels) { + bool pass_sample_count_found=false; + int pass_sample_count=0; + foreach(Pass& pass, params.passes) { + if(pass.type == PASS_SAMPLE_COUNT) { + pass_sample_count_found=true; + break; + } + pass_sample_count += pass.components; + } + int pass_offset = 0; foreach(Pass& pass, params.passes) { @@ -166,6 +181,9 @@ bool RenderBuffers::get_pass_rect(PassType type, float exposure, int sample, int continue; } + pass_sample_count -= pass_offset; + float scale_correction=1.0f; + float *in = (float*)buffer.data_pointer + pass_offset; int pass_stride = params.get_passes_size(); @@ -181,19 +199,37 @@ bool RenderBuffers::get_pass_rect(PassType type, float exposure, int sample, int if(type == PASS_DEPTH) { for(int i = 0; i < size; i++, in += pass_stride, pixels++) { float f = *in; - pixels[0] = (f == 0.0f)? 1e10f: f*scale_exposure; + + if ( pass.filter && pass_sample_count_found ) { + //scale_correction = (float)sample / (*((int*)in+pass_sample_count)); + scale_correction = (float)sample * (*(in+pass_sample_count+1)); + } + + pixels[0] = (f == 0.0f)? 1e10f: f*scale_exposure *scale_correction; } } else if(type == PASS_MIST) { for(int i = 0; i < size; i++, in += pass_stride, pixels++) { float f = *in; - pixels[0] = clamp(f*scale_exposure, 0.0f, 1.0f); + + if ( pass.filter && pass_sample_count_found ) { + //scale_correction = (float)sample / (*((int*)in+pass_sample_count)); + scale_correction = (float)sample * (*(in+pass_sample_count+1)); + } + + pixels[0] = clamp(f*scale_exposure *scale_correction, 0.0f, 1.0f); } } else { for(int i = 0; i < size; i++, in += pass_stride, pixels++) { float f = *in; - pixels[0] = f*scale_exposure; + + if ( pass.filter && pass_sample_count_found ) { + //scale_correction = (float)sample / (*((int*)in+pass_sample_count)); + scale_correction = (float)sample * (*(in+pass_sample_count+1)); + } + + pixels[0] = f*scale_exposure *scale_correction; } } } @@ -238,9 +274,14 @@ bool RenderBuffers::get_pass_rect(PassType type, float exposure, int sample, int for(int i = 0; i < size; i++, in += pass_stride, pixels += 3) { float3 f = make_float3(in[0], in[1], in[2]); - pixels[0] = f.x*scale_exposure; - pixels[1] = f.y*scale_exposure; - pixels[2] = f.z*scale_exposure; + if ( pass.filter && pass_sample_count_found ) { + //scale_correction = (float)sample / (*((int*)in+pass_sample_count)); + scale_correction = (float)sample * (*(in+pass_sample_count+1)); + } + + pixels[0] = f.x*scale_exposure *scale_correction; + pixels[1] = f.y*scale_exposure *scale_correction; + pixels[2] = f.z*scale_exposure *scale_correction; } } } @@ -285,12 +326,17 @@ bool RenderBuffers::get_pass_rect(PassType type, float exposure, int sample, int for(int i = 0; i < size; i++, in += pass_stride, pixels += 4) { float4 f = make_float4(in[0], in[1], in[2], in[3]); - pixels[0] = f.x*scale_exposure; - pixels[1] = f.y*scale_exposure; - pixels[2] = f.z*scale_exposure; + if ( pass.filter && pass_sample_count_found ) { + //scale_correction = (float)sample / (*((int*)in+pass_sample_count)); + scale_correction = (float)sample * (*(in+pass_sample_count+1)); + } + + pixels[0] = f.x*scale_exposure *scale_correction; + pixels[1] = f.y*scale_exposure *scale_correction; + pixels[2] = f.z*scale_exposure *scale_correction; /* clamp since alpha might be > 1.0 due to russian roulette */ - pixels[3] = clamp(f.w*scale, 0.0f, 1.0f); + pixels[3] = clamp(f.w*scale *scale_correction, 0.0f, 1.0f); } } } @@ -376,6 +422,24 @@ bool DisplayBuffer::draw_ready() return (draw_width != 0 && draw_height != 0); } +void* DisplayBuffer::rgba_ptr() +{ + int w = draw_width; + int h = draw_height; + + if(w == 0 || h == 0) + return 0; + + if(half_float) + return 0; + + /* read buffer from device */ + device_memory& rgba = rgba_data(); + device->pixels_copy_from(rgba, 0, w, h); + + return (void*)rgba.data_pointer; +} + void DisplayBuffer::write(Device *device, const string& filename) { int w = draw_width; diff --git a/intern/cycles/render/buffers.h b/intern/cycles/render/buffers.h index 27ab20b..fbb25ac 100644 --- a/intern/cycles/render/buffers.h +++ b/intern/cycles/render/buffers.h @@ -113,6 +113,7 @@ public: void reset(Device *device, BufferParams& params); void write(Device *device, const string& filename); + void* rgba_ptr(); void draw_set(int width, int height); void draw(Device *device, const DeviceDrawParams& draw_params); @@ -139,6 +140,9 @@ public: int offset; int stride; + int flags; + enum flags_enum { TILE_FLAG_NOP=1, }; + device_ptr buffer; device_ptr rng_state; diff --git a/intern/cycles/render/film.cpp b/intern/cycles/render/film.cpp index c1aefbc..09d3fb5 100644 --- a/intern/cycles/render/film.cpp +++ b/intern/cycles/render/film.cpp @@ -33,9 +33,24 @@ CCL_NAMESPACE_BEGIN static bool compare_pass_order(const Pass& a, const Pass& b) { - if(a.components == b.components) + //if(a.components == b.components) + // return (a.type < b.type); + //return (a.components > b.components); + int na = a.components; + int nb = b.components; + if ( na>4 ) + if ( na%4 ) + na=1; + else + na=4; + if ( nb>4 ) + if ( nb%4 ) + nb=1; + else + nb=4; + if(na == nb) return (a.type < b.type); - return (a.components > b.components); + return (na > nb); } void Pass::add(PassType type, vector& passes) @@ -158,6 +173,11 @@ void Pass::add(PassType type, vector& passes) case PASS_LIGHT: /* ignores */ break; + case PASS_SAMPLE_COUNT: + pass.components = 8; + pass.filter = false; + pass.exposure = false; + break; } passes.push_back(pass); @@ -264,6 +284,8 @@ Film::Film() { exposure = 0.8f; Pass::add(PASS_COMBINED, passes); + // pixelwise sample count + Pass::add(PASS_SAMPLE_COUNT, passes); pass_alpha_threshold = 0.5f; filter_type = FILTER_BOX; @@ -298,6 +320,7 @@ void Film::device_update(Device *device, DeviceScene *dscene, Scene *scene) kfilm->pass_flag = 0; kfilm->pass_stride = 0; kfilm->use_light_pass = use_light_visibility || use_sample_clamp; + kfilm->pass_sample_count = 0; foreach(Pass& pass, passes) { kfilm->pass_flag |= pass.type; @@ -402,6 +425,9 @@ void Film::device_update(Device *device, DeviceScene *dscene, Scene *scene) break; case PASS_NONE: break; + case PASS_SAMPLE_COUNT: + kfilm->pass_sample_count = kfilm->pass_stride; + break; } kfilm->pass_stride += pass.components; diff --git a/intern/cycles/render/integrator.cpp b/intern/cycles/render/integrator.cpp index 4a8b490..1c5655b 100644 --- a/intern/cycles/render/integrator.cpp +++ b/intern/cycles/render/integrator.cpp @@ -63,6 +63,9 @@ Integrator::Integrator() sampling_pattern = SAMPLING_PATTERN_SOBOL; + converge_abort = 0; + converge_minsamples = 65535; + need_update = true; } @@ -125,6 +128,9 @@ void Integrator::device_update(Device *device, DeviceScene *dscene, Scene *scene kintegrator->subsurface_samples = subsurface_samples; kintegrator->volume_samples = volume_samples; + kintegrator->converge_abort = converge_abort*converge_abort; + kintegrator->converge_minsamples = converge_minsamples; + if(method == BRANCHED_PATH) { kintegrator->sample_all_lights_direct = sample_all_lights_direct; kintegrator->sample_all_lights_indirect = sample_all_lights_indirect; diff --git a/intern/cycles/render/integrator.h b/intern/cycles/render/integrator.h index 380c1a6..7991b75 100644 --- a/intern/cycles/render/integrator.h +++ b/intern/cycles/render/integrator.h @@ -64,6 +64,9 @@ public: bool sample_all_lights_direct; bool sample_all_lights_indirect; + int converge_minsamples; + float converge_abort; + enum Method { BRANCHED_PATH = 0, PATH = 1 diff --git a/intern/cycles/render/session.cpp b/intern/cycles/render/session.cpp index 9fcd9fa..786fe17 100644 --- a/intern/cycles/render/session.cpp +++ b/intern/cycles/render/session.cpp @@ -77,6 +77,14 @@ Session::Session(const SessionParams& params_) gpu_need_tonemap = false; pause = false; kernels_loaded = false; + +#if CYCLES_DLL + params.background=false; + if ( !buffers ) { + buffers = new RenderBuffers(device); + display = new DisplayBuffer(device, params.display_buffer_linear); + } +#endif } Session::~Session() @@ -113,6 +121,10 @@ Session::~Session() foreach(RenderBuffers *buffers, tile_buffers) delete buffers; + foreach(device_vector *buffer, last_tile_buffers) + delete buffer; + last_tile_buffers.clear(); + delete buffers; delete display; delete scene; @@ -121,6 +133,27 @@ Session::~Session() TaskScheduler::exit(); } +void* Session::get_rgba(int* width, int* height) +{ + if ( !display || !buffers ) + { + delete buffers; + delete display; + + buffers = new RenderBuffers(device); + display = new DisplayBuffer(device, params.display_buffer_linear); + display->reset(device, buffers->params); + } + + *width = display->draw_width; + *height = display->draw_height; + + device_memory& rgba = display->rgba_data(); + device->pixels_copy_from(rgba,0,display->draw_width,display->draw_height); + + return (void*) rgba.data_pointer; +} + void Session::start() { session_thread = new thread(function_bind(&Session::run, this)); @@ -209,6 +242,7 @@ void Session::run_gpu() if(params.background) { /* if no work left and in background mode, we can stop immediately */ if(no_tiles) { + notile_cond.notify_all(); progress.set_status("Finished"); break; } @@ -227,6 +261,9 @@ void Session::run_gpu() update_status_time(pause, no_tiles); while(1) { + if ( no_tiles ) + notile_cond.notify_all(); + double pause_start = time_dt(); pause_cond.wait(pause_lock); paused_time += time_dt() - pause_start; @@ -269,6 +306,9 @@ void Session::run_gpu() /* path trace */ path_trace(); + /* determine tiles to refine on the next run */ + nop_tiles(); + device->task_wait(); if(!device->error_message().empty()) @@ -370,6 +410,7 @@ bool Session::acquire_tile(Device *tile_device, RenderTile& rtile) rtile.start_sample = tile_manager.state.sample; rtile.num_samples = tile_manager.state.num_samples; rtile.resolution = tile_manager.state.resolution_divider; + rtile.flags = tile.flags; tile_lock.unlock(); @@ -465,6 +506,213 @@ void Session::release_tile(RenderTile& rtile) update_status_time(); } +void Session::nop_tiles() +{ + if ( !params.progressive ) + return; + + double mineps = 1e-8; +#ifdef CYCLES_DLL + int maxprogressive = 2; +#else + int maxprogressive = 100; +#endif + +#ifdef BLENDER_APP + int nth_sample = params.nop_nth_sample; +#else + static int nth_sample=1; +#endif + + // start value + static float eps = 0.1f; + + if ( tile_manager.state.resolution_divider!=1 ) { + last_buffers_num_samples=0; + last_buffers.clear(); + for(int i=0; iintegrator->need_update |= (scene->integrator->converge_abort != eps) ) + scene->integrator->converge_abort = eps; + if ( scene->integrator->need_update |= (scene->integrator->converge_minsamples != params.nop_min_samples) ) + scene->integrator->converge_minsamples = params.nop_min_samples; + + return; + } + + // current number of samples in buffers + int buffers_num_samples = tile_manager.state.sample+tile_manager.state.num_samples; + if ( buffers_num_samplesintegrator->need_update |= (scene->integrator->converge_abort != eps) ) + scene->integrator->converge_abort = eps; + if ( scene->integrator->need_update |= (scene->integrator->converge_minsamples != params.nop_min_samples) ) + scene->integrator->converge_minsamples = params.nop_min_samples; + + return; + } + + int pass_count_samples = 0; + bool pass_count_samples_found=false; + foreach(Pass& pass, scene->film->passes) { + if ( pass.type == PASS_SAMPLE_COUNT ) { + pass_count_samples_found=true; + break; + } + pass_count_samples += pass.components; + } + if ( !pass_count_samples_found ) + return; + + bool buffers_from_device=false; + bool copy_to_last_buffers=false; + + int pass_stride = tile_manager.params.get_passes_size(); + + if ( ((last_buffers_num_samples==0) || (buffers_num_samples-last_buffers_num_samples>=nth_sample)) && eps>mineps ) { +#ifndef BLENDER_APP + nth_sample = min( nth_sample*2, params.nop_nth_sample ); +#endif + // retrieve buffers + if ( !buffers_from_device ) { + if ( buffers ) + buffers->copy_from_device(); + else + for(int i=0; icopy_from_device(); + buffers_from_device=true; + } + // compare to last_buffer, tilewise + int offset,stride; + tile_manager.params.get_offset_stride(offset, stride); + float* _B=0; + if ( buffers ) { + _B = (float*)buffers->buffer.data_pointer; + } + int noppedtiles=0; + foreach(Tile& _tile, tile_manager.state.tiles){ + Tile* tile = &_tile; + // check priority of NOPPED tiles + if ( tile->flags&Tile::TILE_FLAGS_NOP ) { + if ( tile->priority >= eps ) { + tile->flags &= ~Tile::TILE_FLAGS_NOP; + } else { + noppedtiles++; + } + continue; + } + if ( !buffers ) { + _B = (float*)tile_buffers[tile->index]->buffer.data_pointer; + } + // evaluate progress on this tile + float maxVariation=0; + for(int x=0; xw; x++) { + for(int y=0; yh; y++) { + int index = 0; + if ( buffers ) + index += tile->x + x + (tile->y + y) * buffers->params.full_width; + else + index += x + y * tile_buffers[tile->index]->params.width; + index *= pass_stride; + float thisVariation = *(_B + index + pass_count_samples + 3); + + // attempt to blur + if ( x>0 ) { + int xm1=x-1; + int indexm1 = 0; + if ( buffers ) + indexm1 += tile->x + xm1 + (tile->y + y) * buffers->params.full_width; + else + indexm1 += xm1 + y * tile_buffers[tile->index]->params.width; + indexm1 *= pass_stride; + float xm1Variation = *(_B + indexm1 + pass_count_samples + 3); + + thisVariation = max( thisVariation, 0.5f*thisVariation + 0.5f*xm1Variation ); + *(_B + index + pass_count_samples + 3) = thisVariation; + + if ( xm1==0 ){ + xm1Variation = max( xm1Variation, 0.5f*thisVariation + 0.5f*xm1Variation ); + *(_B + indexm1 + pass_count_samples + 3) = xm1Variation; + } + } + + maxVariation = max( maxVariation, thisVariation ); + if ( maxVariation >= eps ) + break; + } + if ( maxVariation >= eps ) + break; + } + tile->priority = maxVariation; + if ( maxVariation >= eps ) { + tile->flags &= ~Tile::TILE_FLAGS_NOP; + } else { + noppedtiles++; +#ifndef BLENDER_APP + tile->flags |= Tile::TILE_FLAGS_NOP; +#endif + } + } + + // maybe too many tiles nopped... + if( noppedtiles > params.nop_maxnop*tile_manager.state.tiles.size() && eps>mineps) { + // lower the barrier +#ifdef BLENDER_APP + eps *= 0.1f; +#else + eps *= 0.5f; +#endif + // unnop some tiles that break the new barrier + foreach( Tile& t, tile_manager.state.tiles ) + //if ( t.flags&Tile::TILE_FLAGS_NOP ) + //if ( t.priority >= eps ) { + t.flags &= ~Tile::TILE_FLAGS_NOP; + //noppedtiles--; + //} + } + if ( eps<=mineps ) { + eps=0; + noppedtiles=0; + foreach( Tile& t, tile_manager.state.tiles ) + t.flags &= ~Tile::TILE_FLAGS_NOP; + } + progress.nopped_tiles = noppedtiles; + progress.nopped_eps = eps; + + copy_to_last_buffers=true; + } + + if ( this->scene->integrator->need_update |= (this->scene->integrator->converge_abort != eps) ) + this->scene->integrator->converge_abort = eps; + + if ( copy_to_last_buffers ) { + last_buffers_num_samples = buffers_num_samples; + + tile_manager.num_progressive_samples = min(nth_sample,maxprogressive); + } +} + void Session::run_cpu() { bool tiles_written = false; @@ -481,6 +729,9 @@ void Session::run_cpu() delayed_reset.do_reset = false; } + /* determine tiles to refine on the next run */ + nop_tiles(); + while(!progress.get_cancel()) { /* advance to next tile */ bool no_tiles = !tile_manager.next(); @@ -489,6 +740,7 @@ void Session::run_cpu() if(params.background) { /* if no work left and in background mode, we can stop immediately */ if(no_tiles) { + notile_cond.notify_all(); progress.set_status("Finished"); break; } @@ -511,6 +763,9 @@ void Session::run_cpu() update_status_time(pause, no_tiles); while(1) { + if ( no_tiles ) + notile_cond.notify_all(); + double pause_start = time_dt(); pause_cond.wait(pause_lock); paused_time += time_dt() - pause_start; @@ -563,6 +818,11 @@ void Session::run_cpu() device->task_wait(); + if (!no_tiles) { + /* determine tiles to refine on the next run */ + nop_tiles(); + } + { thread_scoped_lock reset_lock(delayed_reset.mutex); thread_scoped_lock buffers_lock(buffers_mutex); @@ -670,6 +930,17 @@ void Session::reset_(BufferParams& buffer_params, int samples) void Session::reset(BufferParams& buffer_params, int samples) { +#if CYCLES_DLL + if ( !buffers || !display ) { + delete buffers; + delete display; + + buffers = new RenderBuffers(device); + display = new DisplayBuffer(device, params.display_buffer_linear); + display->reset(device, buffers->params); + } +#endif + if(device_use_gl) reset_gpu(buffer_params, samples); else @@ -717,12 +988,24 @@ void Session::set_pause(bool pause_) void Session::wait() { + if(!params.background) { + progress.set_cancel("Waiting"); + pause_cond.notify_all(); + } + session_thread->join(); delete session_thread; session_thread = NULL; } +void Session::wait_notile() +{ + // wait for finishing + thread_scoped_lock notile_lock(tile_mutex); + notile_cond.wait(notile_lock); +} + void Session::update_scene() { thread_scoped_lock scene_lock(scene->mutex); diff --git a/intern/cycles/render/session.h b/intern/cycles/render/session.h index 9da7a0aa..c62bc3c 100644 --- a/intern/cycles/render/session.h +++ b/intern/cycles/render/session.h @@ -62,6 +62,11 @@ public: ShadingSystem shadingsystem; + int nop_min_samples; + int nop_nth_sample; + float nop_maxnop; + float nop_eps; + SessionParams() { background = false; @@ -83,6 +88,11 @@ public: shadingsystem = SHADINGSYSTEM_SVM; tile_order = TILE_CENTER; + + nop_min_samples = 10; + nop_nth_sample = 10; + nop_maxnop = 0.3f; + nop_eps = 0.002f; } bool modified(const SessionParams& params) @@ -131,6 +141,7 @@ public: void start(); bool draw(BufferParams& params, DeviceDrawParams& draw_params); void wait(); + void wait_notile(); bool ready_to_reset(); void reset(BufferParams& params, int samples); @@ -142,6 +153,8 @@ public: void device_free(); + void* get_rgba(int* width, int* height); + protected: struct DelayedReset { thread_mutex mutex; @@ -158,6 +171,11 @@ protected: void path_trace(); void reset_(BufferParams& params, int samples); + device_vector last_buffers; + int last_buffers_num_samples; + vector *> last_tile_buffers; + void nop_tiles(); + void run_cpu(); bool draw_cpu(BufferParams& params, DeviceDrawParams& draw_params); void reset_cpu(BufferParams& params, int samples); @@ -182,12 +200,13 @@ protected: volatile bool gpu_need_tonemap; thread_condition_variable gpu_need_tonemap_cond; - bool pause; + volatile bool pause; thread_condition_variable pause_cond; thread_mutex pause_mutex; thread_mutex tile_mutex; thread_mutex buffers_mutex; thread_mutex display_mutex; + thread_condition_variable notile_cond; bool kernels_loaded; diff --git a/intern/cycles/render/tile.cpp b/intern/cycles/render/tile.cpp index d6094a4..af1f075 100644 --- a/intern/cycles/render/tile.cpp +++ b/intern/cycles/render/tile.cpp @@ -17,6 +17,7 @@ #include "tile.h" #include "util_algorithm.h" +#include "util_foreach.h" #include "util_types.h" CCL_NAMESPACE_BEGIN @@ -34,6 +35,8 @@ TileManager::TileManager(bool progressive_, int num_samples_, int2 tile_size_, i BufferParams buffer_params; reset(buffer_params, 0); + + num_progressive_samples=1; } TileManager::~TileManager() @@ -63,8 +66,11 @@ void TileManager::reset(BufferParams& params_, int num_samples_) state.num_tiles = 0; state.num_rendered_tiles = 0; state.num_samples = 0; + state.last_num_samples = 0; state.resolution_divider = divider; state.tiles.clear(); + + num_progressive_samples=1; } void TileManager::set_samples(int num_samples_) @@ -78,13 +84,21 @@ void TileManager::gen_tiles_global() int resolution = state.resolution_divider; int image_w = max(1, params.width/resolution); int image_h = max(1, params.height/resolution); + int num_logical_devices = preserve_tile_device? num_devices: 1; + + tiles_generation_params params( resolution, image_w, image_h, num_logical_devices ); + if ( params.equals( gen_tiles_last_params ) && state.tiles.size() ) { + foreach(Tile& tile, state.tiles) + tile.rendering=false; + return; + } + gen_tiles_last_params = params; state.tiles.clear(); int tile_w = (tile_size.x >= image_w)? 1: (image_w + tile_size.x - 1)/tile_size.x; int tile_h = (tile_size.y >= image_h)? 1: (image_h + tile_size.y - 1)/tile_size.y; - int num_logical_devices = preserve_tile_device? num_devices: 1; int num = min(image_h, num_logical_devices); int tile_index = 0; @@ -115,11 +129,22 @@ void TileManager::gen_tiles_sliced() int resolution = state.resolution_divider; int image_w = max(1, params.width/resolution); int image_h = max(1, params.height/resolution); + int num_logical_devices = preserve_tile_device? num_devices: 1; + + tiles_generation_params params( resolution, image_w, image_h, num_logical_devices ); + if ( params.equals( gen_tiles_last_params ) && state.tiles.size() ) { + foreach(Tile& tile, state.tiles) + tile.rendering=false; + return; + } + gen_tiles_last_params = params; + + int tile_w = (tile_size.x >= image_w)? 1: (image_w + tile_size.x - 1)/tile_size.x; + int tile_h = (tile_size.y >= image_h)? 1: (image_h + tile_size.y - 1)/tile_size.y; state.tiles.clear(); - int num_logical_devices = preserve_tile_device? num_devices: 1; - int num = min(image_h, num_logical_devices); + int num = min(params.image_h, params.num_logical_devices); int tile_index = 0; for(int device = 0; device < num; device++) { @@ -171,7 +196,7 @@ list::iterator TileManager::next_viewport_tile(int device) int logical_device = preserve_tile_device? device: 0; for(iter = state.tiles.begin(); iter != state.tiles.end(); iter++) { - if(iter->device == logical_device && iter->rendering == false) + if(iter->device == logical_device && iter->rendering == false && !(iter->flags&Tile::TILE_FLAGS_NOP)) return iter; } @@ -192,7 +217,7 @@ list::iterator TileManager::next_background_tile(int device, TileOrder til int64_t centx = cordx / 2, centy = cordy / 2; for(iter = state.tiles.begin(); iter != state.tiles.end(); iter++) { - if(iter->device == logical_device && iter->rendering == false) { + if(iter->device == logical_device && iter->rendering == false && !(iter->flags&Tile::TILE_FLAGS_NOP)) { Tile &cur_tile = *iter; int64_t distx = cordx; @@ -267,12 +292,15 @@ bool TileManager::next() set_tiles(); } else { - state.sample++; - if(progressive) - state.num_samples = 1; - else + if(progressive) { + state.sample += 1 + state.last_num_samples; + state.num_samples = min( num_progressive_samples, num_samples-state.sample); + state.last_num_samples = state.num_samples-1; + } else { + state.sample++; state.num_samples = num_samples; + } state.resolution_divider = 1; set_tiles(); diff --git a/intern/cycles/render/tile.h b/intern/cycles/render/tile.h index 7796518..135e1bf 100644 --- a/intern/cycles/render/tile.h +++ b/intern/cycles/render/tile.h @@ -33,11 +33,15 @@ public: int device; bool rendering; + int flags; + enum TILE_FLAGS { TILE_FLAGS_NOP=1, }; + float priority; + Tile() {} Tile(int index_, int x_, int y_, int w_, int h_, int device_) - : index(index_), x(x_), y(y_), w(w_), h(h_), device(device_), rendering(false) {} + : index(index_), x(x_), y(y_), w(w_), h(h_), device(device_), rendering(false), flags(0), priority(0) {} }; /* Tile order */ @@ -61,6 +65,7 @@ public: BufferParams buffer; int sample; int num_samples; + int last_num_samples; int resolution_divider; int num_tiles; int num_rendered_tiles; @@ -68,6 +73,7 @@ public: } state; int num_samples; + int num_progressive_samples; TileManager(bool progressive, int num_samples, int2 tile_size, int start_resolution, bool preserve_tile_device, bool background, TileOrder tile_order, int num_devices = 1); @@ -113,6 +119,17 @@ protected: void gen_tiles_global(); /* slices image into as much pieces as how many devices are rendering this image */ + typedef struct tiles_generation_params + { + tiles_generation_params() {resolution=0;image_w=0;image_h=0;num_logical_devices=0;}; + tiles_generation_params(int r,int w, int h, int n) {resolution=r;image_w=w;image_h=h;num_logical_devices=r;}; + bool equals(tiles_generation_params& p) { return resolution==p.resolution && image_w==p.image_w && image_h==p.image_h && num_logical_devices==p.num_logical_devices; }; + int resolution; + int image_w; + int image_h; + int num_logical_devices; + } tiles_generation_params; + tiles_generation_params gen_tiles_last_params; void gen_tiles_sliced(); /* returns tiles for background render */ diff --git a/intern/cycles/util/util_progress.h b/intern/cycles/util/util_progress.h index 5d1219b..cc3e099 100644 --- a/intern/cycles/util/util_progress.h +++ b/intern/cycles/util/util_progress.h @@ -47,6 +47,8 @@ public: cancel = false; cancel_message = ""; cancel_cb = NULL; + nopped_tiles=0; + nopped_eps=0; } Progress(Progress& progress) @@ -232,6 +234,9 @@ public: update_cb = function; } + int nopped_tiles; + float nopped_eps; + protected: thread_mutex progress_mutex; thread_mutex update_mutex;