>> Кстати, на блендернэшнл что то про мех или волосы говорили для Циклов, вроде дополнение (аддон) что ли?.. правда что-ли ...http://blender-3d.ru/forum/index.php/topic,433.msg4969/topicseen.html#msg4969
## CUDA moduleif(WITH_CYCLES_CUDA_BINARIES) if("${CMAKE_SIZEOF_VOID_P}" EQUAL "8") set(CUDA_BITS 64) else() set(CUDA_BITS 32) endif() set(cuda_sources kernel.cu ${SRC_HEADERS} ${SRC_SVM_HEADERS} ${SRC_CLOSURE_HEADERS} ${SRC_UTIL_HEADERS}) set(cuda_cubins) foreach(arch ${CYCLES_CUDA_BINARIES_ARCH}) set(cuda_cubin kernel_${arch}.cubin) if(${arch} MATCHES "sm_1[0-9]") # sm_1x set(cuda_arch_flags "--maxrregcount=24 --opencc-options -OPT:Olimit=0") elseif(${arch} MATCHES "sm_2[0-9]") # sm_2x set(cuda_arch_flags "--maxrregcount=24") else() # sm_3x set(cuda_arch_flags "--maxrregcount=32") endif() add_custom_command( OUTPUT ${cuda_cubin} COMMAND ${CUDA_NVCC_EXECUTABLE} -arch=${arch} -m${CUDA_BITS} --cubin ${CMAKE_CURRENT_SOURCE_DIR}/kernel.cu -o ${CMAKE_CURRENT_BINARY_DIR}/${cuda_cubin} --ptxas-options="-v" ${cuda_arch_flags} -I${CMAKE_CURRENT_SOURCE_DIR}/../util -I${CMAKE_CURRENT_SOURCE_DIR}/svm -DCCL_NAMESPACE_BEGIN= -DCCL_NAMESPACE_END= -DNVCC DEPENDS ${cuda_sources}) delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${cuda_cubin}" ${CYCLES_INSTALL_PATH}/lib) list(APPEND cuda_cubins ${cuda_cubin}) endforeach() add_custom_target(cycles_kernel_cuda ALL DEPENDS ${cuda_cubins})endif()
I did some benchmarking with 3 versions of the cuda toolkit on my box.My interpretation of the results is that it could be good to move to cuda 5.0.But more importantly 24 max registers seems to be sub optimal in the scenes I have tested with.It could maybe be good to see if the default for sm_21 and sm_20 cards could be moved to 32 as it is for sm_3x currently.
set(cuda_version_flags "-D__KERNEL_CUDA_VERSION__=${CUDA_VERSION}") # build flags depending on CUDA version and arch if(CUDA_VERSION LESS 50) # CUDA 4.x if(${arch} MATCHES "sm_1[0-9]") # sm_1x set(cuda_arch_flags "--maxrregcount=24 --opencc-options -OPT:Olimit=0") elseif(${arch} MATCHES "sm_2[0-9]") # sm_2x set(cuda_arch_flags "--maxrregcount=24") else() # sm_3x set(cuda_arch_flags "--maxrregcount=32") endif() set(cuda_math_flags "") else() # CUDA 5.x if(${arch} MATCHES "sm_1[0-9]") # sm_1x set(cuda_arch_flags "--maxrregcount=24 --opencc-options -OPT:Olimit=0") elseif(${arch} MATCHES "sm_2[0-9]") # sm_2x set(cuda_arch_flags "--maxrregcount=32") else() # sm_3x set(cuda_arch_flags "--maxrregcount=32") endif() set(cuda_math_flags "--use_fast_math") endif()
Глупо заменять один рендерер другим, бросая его на полпути...