torch.bool,
]
self.fp_dtypes = [
- # TODO: Add back when https://github.com/pytorch/pytorch/issues/55905 is closed
- # torch.float16,
+ torch.float16,
torch.float32,
torch.float64,
]
dtypes = [
torch.bool,
torch.int,
- # TODO: Add back when https://github.com/pytorch/pytorch/issues/55905 is closed
- # torch.float16,
+ torch.float16,
torch.float32,
torch.float64,
]
bad_dtypes = []
for dtype, output_dtype, device, size in product(dtypes, dtypes, self.devices, sizes):
+ # TODO: Add back when https://github.com/pytorch/pytorch/issues/55905 is closed
+ if dtype == torch.float16 and device == "cpu":
+ continue
if dtype == output_dtype:
continue
torch.int16,
torch.int32,
torch.int64,
- # TODO: Add back when https://github.com/pytorch/pytorch/issues/55905 is closed
- # torch.float16,
+ torch.float16,
torch.float32,
torch.float64,
torch.bool,
]
for inp, device, dtype in product(inputs, self.devices, dtypes):
- # TODO
- if dtype == torch.float16 and not LLVM_ENABLED:
+ # TODO: Add back when https://github.com/pytorch/pytorch/issues/55905 is closed
+ if dtype == torch.float16 and device == "cpu":
continue
-
inp = inp.to(device=device, dtype=dtype)
try:
f = torch.jit.trace(lambda x: x.isnan(), (inp,))
gpu_only = {torch.erf, torch.erfc}
sizes = [(1,), (2,), (4, 4)]
for dtype, op, device, size in product(self.dtypes, unary_ops, self.devices, sizes):
+ # TODO: Add back when https://github.com/pytorch/pytorch/issues/55905 is closed
+ if dtype == torch.float16 and device == "cpu":
+ continue
if op in gpu_only and device == "cpu":
continue
try:
]
devices = self.devices
for dtype, op, device in product(self.dtypes, binary_ops, devices):
+ if dtype == torch.float16 and device == "cpu":
+ continue
try:
x = self.data_for(dtype, device)
y = self.data_for(dtype, device)
"[[10, 3, 4], [4, 5]]",
]
for dtype, size, device in product(self.dtypes, sizes, devices):
+ if dtype == torch.float16 and device == "cpu":
+ continue
try:
size_x, size_y = size
x = self.data_for(dtype, device, size=size_x)
# only using scalar values relevant to particular ops
scalars = [1.5, 3, 0, -2.0, -1]
for dtype, op, device, scalar in product(self.dtypes, binary_ops, devices, scalars):
+ if dtype == torch.float16 and device == "cpu":
+ continue
try:
x = self.data_for(dtype, device)
fn = apply_with_scalar(op, scalar)
# only using scalar values relevant to particular ops
scalars = [1.5, 3, -2.0, -1] # skip 0
for dtype, op, device, scalar in product(self.dtypes, binary_ops, devices, scalars):
+ if dtype == torch.float16 and device == "cpu":
+ continue
try:
x = self.data_for(dtype, device)
fn = apply_with_scalar(op, scalar)
# only using scalar values relevant to particular ops
scalars = [1.5, 3, 0, -2.0, -1]
for dtype, op, device, scalar in product(dtypes, binary_ops, self.devices, scalars):
+ if dtype == torch.float16 and device == "cpu":
+ continue
try:
x = self.data_for(dtype, device)
fn = apply_with_scalar(op, scalar)
]
devices = self.devices
for dtype, op, device in product(self.dtypes, ternary_ops, devices):
+ if dtype == torch.float16 and device == "cpu":
+ continue
try:
x = self.data_for(dtype, device)
y = self.data_for(dtype, device)
]
devices = self.devices
for dtype, op, device in product(self.dtypes, ternary_ops, devices):
+ if dtype == torch.float16 and device == "cpu":
+ continue
try:
x = self.data_for(dtype, device, size=[5, 3, 128, 128])
y = self.data_for(dtype, device, size=[3])
torch.cat,
]
for dtype, op, device in product(self.dtypes, list_ops, devices):
+ if dtype == torch.float16 and device == "cpu":
+ continue
try:
x = self.data_for(dtype, device, size=[5, 4, 1, 7])
y = self.data_for(dtype, device, size=[5, 4, 1, 7])
]
devices = self.devices
for dtype, op, device in product(self.dtypes, ops, devices):
+ if dtype == torch.float16 and device == "cpu":
+ continue
try:
cond = self.data_for(torch.bool, device)
x = self.data_for(dtype, device)
with inline_fusion_groups():
def eager(x, y):
return torch.cat((x, y.type_as(x)), dim=1)
- for dtype1, dtype2 in product(self.dtypes, self.dtypes):
+ dtypes = self.dtypes.copy()
+ # CPU fuser doesn't support float16.
+ dtypes.remove(torch.float16)
+ for dtype1, dtype2 in product(dtypes, dtypes):
x = torch.randint(2, (1, 13,)).to(dtype1)
zero = torch.tensor([[0]]).to(dtype2)
one = torch.tensor([[1]]).to(dtype2)
for fn in [bn, bn_no_weight, bn_no_bias, bn_neither]:
test(fn, (i, x))
-
works_list = [
'__radd__',
'__rdiv__',
return v;
}
+ template <typename T>
+ ExprPtr mutateArithmetic(T v) {
+ IRMutator::mutate(v);
+ if (v->dtype().scalar_type() == c10::kHalf) {
+ v->set_dtype(v->dtype().cloneWithScalarType(c10::kFloat));
+ }
+ return v;
+ }
+
+ ExprPtr mutate(AddPtr v) override {
+ return mutateArithmetic(v);
+ }
+ ExprPtr mutate(SubPtr v) override {
+ return mutateArithmetic(v);
+ }
+ ExprPtr mutate(MulPtr v) override {
+ return mutateArithmetic(v);
+ }
+ ExprPtr mutate(DivPtr v) override {
+ return mutateArithmetic(v);
+ }
+ ExprPtr mutate(MaxPtr v) override {
+ return mutateArithmetic(v);
+ }
+ ExprPtr mutate(MinPtr v) override {
+ return mutateArithmetic(v);
+ }
+ ExprPtr mutate(CompareSelectPtr v) override {
+ return mutateArithmetic(v);
+ }
+ ExprPtr mutate(BroadcastPtr v) override {
+ return mutateArithmetic(v);
+ }
+ ExprPtr mutate(IfThenElsePtr v) override {
+ return mutateArithmetic(v);
+ }
+ ExprPtr mutate(IntrinsicsPtr v) override {
+ return mutateArithmetic(v);
+ }
+
private:
std::unordered_set<ExprPtr> inserted_half_casts_;
std::unordered_map<VarPtr, VarPtr> var_map;