Summary:
The C++ and CUDA implementations of the lerp are not numerically stable. This is discussed on Wikipedia [here](https://en.wikipedia.org/wiki/Linear_interpolation#Programming_language_support). I checked the GPU SASS output and there's no overhead from using the more precise implementation, from Kepler all the way to Turing. I haven't looked at CPU ASM though.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/18871
Differential Revision:
D14793438
Pulled By: ezyang
fbshipit-source-id:
2ddc2e026c5285466cae7d1b4101174253100445
const scalar_t& self_val,
const scalar_t& end_val,
const scalar_t& weight_val) {
- ret_val = self_val + weight_val * (end_val - self_val);
+ ret_val = (weight_val < 0.5) ?
+ self_val + weight_val * (end_val - self_val) : end_val - (end_val - self_val) * (1 - weight_val);
});
}
[=](scalar_t& ret_val,
const scalar_t& self_val,
const scalar_t& end_val) {
- ret_val = self_val + weight_val * (end_val - self_val);
+ ret_val = (weight_val < 0.5) ?
+ self_val + weight_val * (end_val - self_val) : end_val - (end_val - self_val) * (1 - weight_val);
});
}
const scalar_t& self_val,
const scalar_t& end_val,
const scalar_t& weight_val) {
- ret_val = self_val + weight_val * (end_val - self_val);
+ ret_val = (weight_val < 0.5) ?
+ self_val + weight_val * (end_val - self_val) : end_val - (end_val - self_val) * (1 - weight_val);
});
}
scalar_t& ret_val,
const scalar_t& self_val,
const scalar_t& end_val) {
- ret_val = self_val + weight_val * (end_val - self_val);
+ ret_val = (weight_val < 0.5) ?
+ self_val + weight_val * (end_val - self_val) : end_val - (end_val - self_val) * (1 - weight_val);
});
}
} // namespace