Operator: aten._log_softmax.default
cnt: 1, ((T([64, 1000], f16), 1, False), {})
Operator: aten._log_softmax_backward_data.default
cnt: 1, ((T([64, 1000], f16), T([64, 1000], f16), 1, f16), {})
Operator: aten._softmax.default
cnt: 3, ((T([64, 4, 962, 962], f16), -1, False), {})
cnt: 6, ((T([64, 8, 257, 257], f16), -1, False), {})
cnt: 4, ((T([64, 16, 65, 65], f16), -1, False), {})
Operator: aten._softmax_backward_data.default
cnt: 4, ((T([64, 16, 65, 65], f16), T([64, 16, 65, 65], f16), -1, f16), {})
cnt: 6, ((T([64, 8, 257, 257], f16), T([64, 8, 257, 257], f16), -1, f16), {})
cnt: 3, ((T([64, 4, 962, 962], f16), T([64, 4, 962, 962], f16), -1, f16), {})
Operator: aten._unsafe_view.default
cnt: 9, ((T([64, 4, 962, 64], f16), [256, 962, 64]), {})
cnt: 3, ((T([64, 4, 64, 962], f16), [256, 64, 962]), {})
cnt: 3, ((T([256, 962, 962], f16), [64, 4, 962, 962]), {})
cnt: 3, ((T([256, 962, 64], f16), [64, 4, 962, 64]), {})
cnt: 3, ((T([64, 962, 4, 64], f16), [64, 962, 256]), {})
cnt: 1, ((T([64, 512], f16), [64, 1, 512]), {})
cnt: 18, ((T([64, 8, 257, 64], f16), [512, 257, 64]), {})
cnt: 6, ((T([64, 8, 64, 257], f16), [512, 64, 257]), {})
cnt: 6, ((T([512, 257, 257], f16), [64, 8, 257, 257]), {})
cnt: 6, ((T([512, 257, 64], f16), [64, 8, 257, 64]), {})
cnt: 6, ((T([64, 257, 8, 64], f16), [64, 257, 512]), {})
cnt: 1, ((T([64, 1024], f16), [64, 1, 1024]), {})
cnt: 12, ((T([64, 16, 65, 64], f16), [1024, 65, 64]), {})
cnt: 4, ((T([64, 16, 64, 65], f16), [1024, 64, 65]), {})
cnt: 4, ((T([1024, 65, 65], f16), [64, 16, 65, 65]), {})
cnt: 4, ((T([1024, 65, 64], f16), [64, 16, 65, 64]), {})
cnt: 4, ((T([64, 65, 16, 64], f16), [64, 65, 1024]), {})
cnt: 4, ((T([64, 65, 3, 16, 64], f16), [64, 65, 3072]), {})
cnt: 6, ((T([64, 257, 3, 8, 64], f16), [64, 257, 1536]), {})
cnt: 3, ((T([64, 962, 3, 4, 64], f16), [64, 962, 768]), {})
Operator: aten.add.Tensor
cnt: 1, ((T([64, 256, 31, 31], f16), T([1, 256, 31, 31], f16)), {})
cnt: 13, ((T([64, 962, 256], f16), T([64, 962, 256], f16)), {})
cnt: 1, ((T([64, 1, 512], f16), T([512], f16)), {})
cnt: 25, ((T([64, 257, 512], f16), T([64, 257, 512], f16)), {})
cnt: 1, ((T([64, 1, 1024], f16), T([1024], f16)), {})
cnt: 16, ((T([64, 65, 1024], f16), T([64, 65, 1024], f16)), {})
Operator: aten.addmm.default
cnt: 3, ((T([768], f16), T([61568, 256], f16), T([256, 768], f16, stride=(1, 256))), {})
cnt: 3, ((T([256], f16), T([61568, 256], f16), T([256, 256], f16, stride=(1, 256))), {})
cnt: 3, ((T([1024], f16), T([61568, 256], f16), T([256, 1024], f16, stride=(1, 256))), {})
cnt: 3, ((T([256], f16), T([61568, 1024], f16), T([1024, 256], f16, stride=(1, 1024))), {})
cnt: 6, ((T([1536], f16), T([16448, 512], f16), T([512, 1536], f16, stride=(1, 512))), {})
cnt: 6, ((T([512], f16), T([16448, 512], f16), T([512, 512], f16, stride=(1, 512))), {})
cnt: 6, ((T([2048], f16), T([16448, 512], f16), T([512, 2048], f16, stride=(1, 512))), {})
cnt: 6, ((T([512], f16), T([16448, 2048], f16), T([2048, 512], f16, stride=(1, 2048))), {})
cnt: 4, ((T([3072], f16), T([4160, 1024], f16), T([1024, 3072], f16, stride=(1, 1024))), {})
cnt: 4, ((T([1024], f16), T([4160, 1024], f16), T([1024, 1024], f16, stride=(1, 1024))), {})
cnt: 4, ((T([4096], f16), T([4160, 1024], f16), T([1024, 4096], f16, stride=(1, 1024))), {})
cnt: 4, ((T([1024], f16), T([4160, 4096], f16), T([4096, 1024], f16, stride=(1, 4096))), {})
cnt: 1, ((T([1000], f16), T([64, 1024], f16), T([1024, 1000], f16, stride=(1, 1024))), {})
Operator: aten.bmm.default
cnt: 3, ((T([256, 962, 64], f16), T([256, 64, 962], f16)), {})
cnt: 3, ((T([256, 962, 962], f16), T([256, 962, 64], f16)), {})
cnt: 6, ((T([512, 257, 64], f16), T([512, 64, 257], f16)), {})
cnt: 6, ((T([512, 257, 257], f16), T([512, 257, 64], f16)), {})
cnt: 4, ((T([1024, 65, 64], f16), T([1024, 64, 65], f16)), {})
cnt: 4, ((T([1024, 65, 65], f16), T([1024, 65, 64], f16)), {})
cnt: 4, ((T([1024, 65, 65], f16, stride=(4225, 1, 65)), T([1024, 65, 64], f16)), {})
cnt: 4, ((T([1024, 65, 64], f16), T([1024, 64, 65], f16, stride=(4160, 1, 64))), {})
cnt: 4, ((T([1024, 64, 65], f16, stride=(4160, 1, 64)), T([1024, 65, 65], f16)), {})
cnt: 4, ((T([1024, 65, 65], f16), T([1024, 65, 64], f16, stride=(4160, 1, 65))), {})
cnt: 6, ((T([512, 257, 257], f16, stride=(66049, 1, 257)), T([512, 257, 64], f16)), {})
cnt: 6, ((T([512, 257, 64], f16), T([512, 64, 257], f16, stride=(16448, 1, 64))), {})
cnt: 6, ((T([512, 64, 257], f16, stride=(16448, 1, 64)), T([512, 257, 257], f16)), {})
cnt: 6, ((T([512, 257, 257], f16), T([512, 257, 64], f16, stride=(16448, 1, 257))), {})
cnt: 3, ((T([256, 962, 962], f16, stride=(925444, 1, 962)), T([256, 962, 64], f16)), {})
cnt: 3, ((T([256, 962, 64], f16), T([256, 64, 962], f16, stride=(61568, 1, 64))), {})
cnt: 3, ((T([256, 64, 962], f16, stride=(61568, 1, 64)), T([256, 962, 962], f16)), {})
cnt: 3, ((T([256, 962, 962], f16), T([256, 962, 64], f16, stride=(61568, 1, 962))), {})
Operator: aten.cat.default
cnt: 1, (([T([64, 1, 256], f16, stride=(0, 256, 1)), T([64, 961, 256], f16, stride=(246016, 1, 961))], 1), {})
cnt: 1, (([T([64, 1, 512], f16), T([64, 256, 512], f16, stride=(131072, 1, 256))], 1), {})
cnt: 1, (([T([64, 1, 1024], f16), T([64, 64, 1024], f16, stride=(65536, 1, 64))], 1), {})
Operator: aten.clone.default
cnt: 1, ((T([64, 3, 224, 224], f16),), {})
Operator: aten.convolution.default
cnt: 1, ((T([64, 3, 224, 224], f16), T([256, 3, 14, 14], f16), T([256], f16), [7, 7], [0, 0], [1, 1], False, [0, 0], 1), {})
cnt: 1, ((T([64, 256, 31, 31], f16, stride=(246272, 1, 7936, 256)), T([512, 1, 3, 3], f16), T([512], f16), [2, 2], [1, 1], [1, 1], False, [0, 0], 256), {})
cnt: 1, ((T([64, 512, 16, 16], f16, stride=(131584, 1, 8192, 512)), T([1024, 1, 3, 3], f16), T([1024], f16), [2, 2], [1, 1], [1, 1], False, [0, 0], 512), {})
Operator: aten.convolution_backward.default
cnt: 1, ((T([64, 1024, 8, 8], f16, stride=(66560, 1, 8192, 1024)), T([64, 512, 16, 16], f16, stride=(131584, 1, 8192, 512)), T([1024, 1, 3, 3], f16), [1024], [2, 2], [1, 1], [1, 1], False, [0, 0], 512, [True, True, True]), {})
cnt: 1, ((T([64, 512, 16, 16], f16, stride=(131584, 1, 8192, 512)), T([64, 256, 31, 31], f16, stride=(246272, 1, 7936, 256)), T([512, 1, 3, 3], f16), [512], [2, 2], [1, 1], [1, 1], False, [0, 0], 256, [True, True, True]), {})
cnt: 1, ((T([64, 256, 31, 31], f16, stride=(246272, 1, 7936, 256)), T([64, 3, 224, 224], f16), T([256, 3, 14, 14], f16), [256], [7, 7], [0, 0], [1, 1], False, [0, 0], 1, [False, True, True]), {})
Operator: aten.copy_.default
cnt: 1, ((T([64, 3, 224, 224], f16), T([64, 3, 224, 224], f16)), {})
Operator: aten.gelu.default
cnt: 3, ((T([64, 962, 1024], f16),), {})
cnt: 6, ((T([64, 257, 2048], f16),), {})
cnt: 4, ((T([64, 65, 4096], f16),), {})
Operator: aten.gelu_backward.default
cnt: 4, ((T([64, 65, 4096], f16), T([64, 65, 4096], f16)), {})
cnt: 6, ((T([64, 257, 2048], f16), T([64, 257, 2048], f16)), {})
cnt: 3, ((T([64, 962, 1024], f16), T([64, 962, 1024], f16)), {})
Operator: aten.lift_fresh_copy.default
cnt: 1, ((T([64], i64),), {})
Operator: aten.mm.default
cnt: 1, ((T([64, 256], f16, stride=(246272, 1)), T([256, 512], f16, stride=(1, 256))), {})
cnt: 1, ((T([64, 512], f16, stride=(131584, 1)), T([512, 1024], f16, stride=(1, 512))), {})
cnt: 1, ((T([64, 1000], f16), T([1000, 1024], f16)), {})
cnt: 1, ((T([1000, 64], f16, stride=(1, 1000)), T([64, 1024], f16)), {})
cnt: 4, ((T([4160, 1024], f16), T([1024, 4096], f16)), {})
cnt: 4, ((T([1024, 4160], f16, stride=(1, 1024)), T([4160, 4096], f16)), {})
cnt: 4, ((T([4160, 4096], f16), T([4096, 1024], f16)), {})
cnt: 4, ((T([4096, 4160], f16, stride=(1, 4096)), T([4160, 1024], f16)), {})
cnt: 4, ((T([4160, 1024], f16), T([1024, 1024], f16)), {})
cnt: 4, ((T([1024, 4160], f16, stride=(1, 1024)), T([4160, 1024], f16)), {})
cnt: 4, ((T([4160, 3072], f16), T([3072, 1024], f16)), {})
cnt: 4, ((T([3072, 4160], f16, stride=(1, 3072)), T([4160, 1024], f16)), {})
cnt: 1, ((T([1024, 64], f16, stride=(1, 66560)), T([64, 512], f16, stride=(131584, 1))), {})
cnt: 1, ((T([64, 1024], f16, stride=(66560, 1)), T([1024, 512], f16)), {})
cnt: 6, ((T([16448, 512], f16), T([512, 2048], f16)), {})
cnt: 6, ((T([512, 16448], f16, stride=(1, 512)), T([16448, 2048], f16)), {})
cnt: 6, ((T([16448, 2048], f16), T([2048, 512], f16)), {})
cnt: 6, ((T([2048, 16448], f16, stride=(1, 2048)), T([16448, 512], f16)), {})
cnt: 6, ((T([16448, 512], f16), T([512, 512], f16)), {})
cnt: 6, ((T([512, 16448], f16, stride=(1, 512)), T([16448, 512], f16)), {})
cnt: 6, ((T([16448, 1536], f16), T([1536, 512], f16)), {})
cnt: 6, ((T([1536, 16448], f16, stride=(1, 1536)), T([16448, 512], f16)), {})
cnt: 1, ((T([512, 64], f16, stride=(1, 131584)), T([64, 256], f16, stride=(246272, 1))), {})
cnt: 1, ((T([64, 512], f16, stride=(131584, 1)), T([512, 256], f16)), {})
cnt: 3, ((T([61568, 256], f16), T([256, 1024], f16)), {})
cnt: 3, ((T([256, 61568], f16, stride=(1, 256)), T([61568, 1024], f16)), {})
cnt: 3, ((T([61568, 1024], f16), T([1024, 256], f16)), {})
cnt: 3, ((T([1024, 61568], f16, stride=(1, 1024)), T([61568, 256], f16)), {})
cnt: 3, ((T([61568, 256], f16), T([256, 256], f16)), {})
cnt: 3, ((T([256, 61568], f16, stride=(1, 256)), T([61568, 256], f16)), {})
cnt: 3, ((T([61568, 768], f16), T([768, 256], f16)), {})
cnt: 3, ((T([768, 61568], f16, stride=(1, 768)), T([61568, 256], f16)), {})
Operator: aten.mul.Tensor
cnt: 6, ((T([64, 4, 962, 962], f16), 0.125), {})
cnt: 12, ((T([64, 8, 257, 257], f16), 0.125), {})
cnt: 8, ((T([64, 16, 65, 65], f16), 0.125), {})
Operator: aten.native_layer_norm.default
cnt: 6, ((T([64, 962, 256], f16), [256], T([256], f16), T([256], f16), 1e-06), {})
cnt: 12, ((T([64, 257, 512], f16), [512], T([512], f16), T([512], f16), 1e-06), {})
cnt: 8, ((T([64, 65, 1024], f16), [1024], T([1024], f16), T([1024], f16), 1e-06), {})
cnt: 1, ((T([64, 1, 1024], f16, stride=(66560, 1024, 1)), [1024], T([1024], f16), T([1024], f16), 1e-06), {})
Operator: aten.native_layer_norm_backward.default
cnt: 1, ((T([64, 1, 1024], f16), T([64, 1, 1024], f16, stride=(66560, 1024, 1)), [1024], T([64, 1, 1], f32), T([64, 1, 1], f32), T([1024], f16), T([1024], f16), [True, True, True]), {})
cnt: 8, ((T([64, 65, 1024], f16), T([64, 65, 1024], f16), [1024], T([64, 65, 1], f32), T([64, 65, 1], f32), T([1024], f16), T([1024], f16), [True, True, True]), {})
cnt: 12, ((T([64, 257, 512], f16), T([64, 257, 512], f16), [512], T([64, 257, 1], f32), T([64, 257, 1], f32), T([512], f16), T([512], f16), [True, True, True]), {})
cnt: 6, ((T([64, 962, 256], f16), T([64, 962, 256], f16), [256], T([64, 962, 1], f32), T([64, 962, 1], f32), T([256], f16), T([256], f16), [True, True, True]), {})
Operator: aten.nll_loss_backward.default
cnt: 1, ((T([], f16), T([64, 1000], f16), T([64], i64), None, 1, -100, T([], f16)), {})
Operator: aten.nll_loss_forward.default
cnt: 1, ((T([64, 1000], f16), T([64], i64), None, 1, -100), {})
Operator: aten.select_backward.default
cnt: 1, ((T([64, 1024], f16), [64, 1, 1024], 1, 0), {})
Operator: aten.slice_backward.default
cnt: 1, ((T([64, 1, 1024], f16), [64, 1, 1024], 0, 0, 9223372036854775807, 1), {})
cnt: 1, ((T([64, 1, 1024], f16), [64, 65, 1024], 1, 0, 1, 1), {})
cnt: 1, ((T([64, 65, 1024], f16), [64, 65, 1024], 0, 0, 9223372036854775807, 1), {})
cnt: 1, ((T([64, 256, 512], f16), [64, 257, 512], 1, 1, 9223372036854775807, 1), {})
cnt: 2, ((T([64, 257, 512], f16), [64, 257, 512], 0, 0, 9223372036854775807, 1), {})
cnt: 1, ((T([64, 1, 512], f16), [64, 257, 512], 1, 0, 1, 1), {})
cnt: 1, ((T([64, 961, 256], f16), [64, 962, 256], 1, 1, 9223372036854775807, 1), {})
cnt: 2, ((T([64, 962, 256], f16), [64, 962, 256], 0, 0, 9223372036854775807, 1), {})
cnt: 1, ((T([64, 1, 256], f16), [64, 962, 256], 1, 0, 1, 1), {})
Operator: aten.stack.default
cnt: 4, (([T([64, 16, 65, 64], f16), T([64, 16, 65, 64], f16, stride=(66560, 4160, 1, 65)), T([64, 16, 65, 64], f16)],), {})
cnt: 6, (([T([64, 8, 257, 64], f16), T([64, 8, 257, 64], f16, stride=(131584, 16448, 1, 257)), T([64, 8, 257, 64], f16)],), {})
cnt: 3, (([T([64, 4, 962, 64], f16), T([64, 4, 962, 64], f16, stride=(246272, 61568, 1, 962)), T([64, 4, 962, 64], f16)],), {})
Operator: aten.sum.SymInt
cnt: 1, ((T([64, 1000], f16), [0], True), {})
cnt: 8, ((T([4160, 1024], f16), [0], True), {})
cnt: 4, ((T([4160, 4096], f16), [0], True), {})
cnt: 4, ((T([4160, 3072], f16), [0], True), {})
cnt: 1, ((T([64, 1, 1024], f16, stride=(66560, 1024, 1)), [0, 1], True), {})
cnt: 12, ((T([16448, 512], f16), [0], True), {})
cnt: 6, ((T([16448, 2048], f16), [0], True), {})
cnt: 6, ((T([16448, 1536], f16), [0], True), {})
cnt: 1, ((T([64, 1, 512], f16, stride=(131584, 512, 1)), [0, 1], True), {})
cnt: 6, ((T([61568, 256], f16), [0], True), {})
cnt: 3, ((T([61568, 1024], f16), [0], True), {})
cnt: 3, ((T([61568, 768], f16), [0], True), {})
cnt: 1, ((T([64, 1, 256], f16, stride=(246272, 256, 1)), [0], True), {})
cnt: 1, ((T([64, 256, 31, 31], f16, stride=(246272, 1, 7936, 256)), [0], True), {})
Operator: aten.unbind.int
cnt: 3, ((T([3, 64, 4, 962, 64], f16, stride=(256, 738816, 64, 768, 1)),), {})
cnt: 6, ((T([3, 64, 8, 257, 64], f16, stride=(512, 394752, 64, 1536, 1)),), {})
cnt: 4, ((T([3, 64, 16, 65, 64], f16, stride=(1024, 199680, 64, 3072, 1)),), {})
