898 .type ${alg}${bits}_t4_ctr32_encrypt,#function |
898 .type ${alg}${bits}_t4_ctr32_encrypt,#function |
899 .size ${alg}${bits}_t4_ctr32_encrypt,.-${alg}${bits}_t4_ctr32_encrypt |
899 .size ${alg}${bits}_t4_ctr32_encrypt,.-${alg}${bits}_t4_ctr32_encrypt |
900 ___ |
900 ___ |
901 } |
901 } |
902 |
902 |
|
903 sub alg_xts_implement { |
|
904 my ($alg,$bits,$dir) = @_; |
|
905 my ($inp,$out,$len,$key1,$key2,$ivec)=map("%i$_",(0..5)); |
|
906 my $rem=$ivec; |
|
907 |
|
908 $::code.=<<___; |
|
909 .globl ${alg}${bits}_t4_xts_${dir}crypt |
|
910 .align 32 |
|
911 ${alg}${bits}_t4_xts_${dir}crypt: |
|
912 save %sp, -$::frame-16, %sp |
|
913 |
|
914 mov $ivec, %o0 |
|
915 add %fp, $::bias-16, %o1 |
|
916 call ${alg}_t4_encrypt |
|
917 mov $key2, %o2 |
|
918 |
|
919 add %fp, $::bias-16, %l7 |
|
920 ldxa [%l7]0x88, %g2 |
|
921 add %fp, $::bias-8, %l7 |
|
922 ldxa [%l7]0x88, %g3 ! %g3:%g2 is tweak |
|
923 |
|
924 sethi %hi(0x76543210), %l7 |
|
925 or %l7, %lo(0x76543210), %l7 |
|
926 bmask %l7, %g0, %g0 ! byte swap mask |
|
927 |
|
928 prefetch [$inp], 20 |
|
929 prefetch [$inp + 63], 20 |
|
930 call _${alg}${bits}_load_${dir}ckey |
|
931 and $len, 15, $rem |
|
932 and $len, -16, $len |
|
933 ___ |
|
934 $code.=<<___ if ($dir eq "de"); |
|
935 mov 0, %l7 |
|
936 movrnz $rem, 16, %l7 |
|
937 sub $len, %l7, $len |
|
938 ___ |
|
939 $code.=<<___; |
|
940 |
|
941 sub $inp, $out, $blk_init ! $inp!=$out |
|
942 and $inp, 7, $ileft |
|
943 andn $inp, 7, $inp |
|
944 sll $ileft, 3, $ileft |
|
945 mov 64, $iright |
|
946 mov 0xff, $omask |
|
947 sub $iright, $ileft, $iright |
|
948 and $out, 7, $ooff |
|
949 cmp $len, 255 |
|
950 movrnz $ooff, 0, $blk_init ! if ( $out&7 || |
|
951 movleu $::size_t_cc, 0, $blk_init ! $len<256 || |
|
952 brnz,pn $blk_init, .L${bits}_xts_${dir}blk ! $inp==$out) |
|
953 srl $omask, $ooff, $omask |
|
954 |
|
955 andcc $len, 16, %g0 ! is number of blocks even? |
|
956 ___ |
|
957 $code.=<<___ if ($dir eq "de"); |
|
958 brz,pn $len, .L${bits}_xts_${dir}steal |
|
959 ___ |
|
960 $code.=<<___; |
|
961 alignaddrl $out, %g0, $out |
|
962 bz %icc, .L${bits}_xts_${dir}loop2x |
|
963 srlx $len, 4, $len |
|
964 .L${bits}_xts_${dir}loop: |
|
965 ldx [$inp + 0], %o0 |
|
966 brz,pt $ileft, 4f |
|
967 ldx [$inp + 8], %o1 |
|
968 |
|
969 ldx [$inp + 16], %o2 |
|
970 sllx %o0, $ileft, %o0 |
|
971 srlx %o1, $iright, %g1 |
|
972 sllx %o1, $ileft, %o1 |
|
973 or %g1, %o0, %o0 |
|
974 srlx %o2, $iright, %o2 |
|
975 or %o2, %o1, %o1 |
|
976 4: |
|
977 movxtod %g2, %f12 |
|
978 movxtod %g3, %f14 |
|
979 bshuffle %f12, %f12, %f12 |
|
980 bshuffle %f14, %f14, %f14 |
|
981 |
|
982 xor %g4, %o0, %o0 ! ^= rk[0] |
|
983 xor %g5, %o1, %o1 |
|
984 movxtod %o0, %f0 |
|
985 movxtod %o1, %f2 |
|
986 |
|
987 fxor %f12, %f0, %f0 ! ^= tweak[0] |
|
988 fxor %f14, %f2, %f2 |
|
989 |
|
990 prefetch [$out + 63], 22 |
|
991 prefetch [$inp + 16+63], 20 |
|
992 call _${alg}${bits}_${dir}crypt_1x |
|
993 add $inp, 16, $inp |
|
994 |
|
995 fxor %f12, %f0, %f0 ! ^= tweak[0] |
|
996 fxor %f14, %f2, %f2 |
|
997 |
|
998 srax %g3, 63, %l7 ! next tweak value |
|
999 addcc %g2, %g2, %g2 |
|
1000 and %l7, 0x87, %l7 |
|
1001 addxc %g3, %g3, %g3 |
|
1002 xor %l7, %g2, %g2 |
|
1003 |
|
1004 brnz,pn $ooff, 2f |
|
1005 sub $len, 1, $len |
|
1006 |
|
1007 std %f0, [$out + 0] |
|
1008 std %f2, [$out + 8] |
|
1009 brnz,pt $len, .L${bits}_xts_${dir}loop2x |
|
1010 add $out, 16, $out |
|
1011 |
|
1012 brnz,pn $rem, .L${bits}_xts_${dir}steal |
|
1013 nop |
|
1014 |
|
1015 ret |
|
1016 restore |
|
1017 |
|
1018 .align 16 |
|
1019 2: ldxa [$inp]0x82, %o0 ! avoid read-after-write hazard |
|
1020 ! and ~3x deterioration |
|
1021 ! in inp==out case |
|
1022 faligndata %f0, %f0, %f4 ! handle unaligned output |
|
1023 faligndata %f0, %f2, %f6 |
|
1024 faligndata %f2, %f2, %f8 |
|
1025 stda %f4, [$out + $omask]0xc0 ! partial store |
|
1026 std %f6, [$out + 8] |
|
1027 add $out, 16, $out |
|
1028 orn %g0, $omask, $omask |
|
1029 stda %f8, [$out + $omask]0xc0 ! partial store |
|
1030 |
|
1031 brnz,pt $len, .L${bits}_xts_${dir}loop2x+4 |
|
1032 orn %g0, $omask, $omask |
|
1033 |
|
1034 brnz,pn $rem, .L${bits}_xts_${dir}steal |
|
1035 nop |
|
1036 |
|
1037 ret |
|
1038 restore |
|
1039 |
|
1040 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! |
|
1041 .align 32 |
|
1042 .L${bits}_xts_${dir}loop2x: |
|
1043 ldx [$inp + 0], %o0 |
|
1044 ldx [$inp + 8], %o1 |
|
1045 ldx [$inp + 16], %o2 |
|
1046 brz,pt $ileft, 4f |
|
1047 ldx [$inp + 24], %o3 |
|
1048 |
|
1049 ldx [$inp + 32], %o4 |
|
1050 sllx %o0, $ileft, %o0 |
|
1051 srlx %o1, $iright, %g1 |
|
1052 or %g1, %o0, %o0 |
|
1053 sllx %o1, $ileft, %o1 |
|
1054 srlx %o2, $iright, %g1 |
|
1055 or %g1, %o1, %o1 |
|
1056 sllx %o2, $ileft, %o2 |
|
1057 srlx %o3, $iright, %g1 |
|
1058 or %g1, %o2, %o2 |
|
1059 sllx %o3, $ileft, %o3 |
|
1060 srlx %o4, $iright, %o4 |
|
1061 or %o4, %o3, %o3 |
|
1062 4: |
|
1063 movxtod %g2, %f12 |
|
1064 movxtod %g3, %f14 |
|
1065 bshuffle %f12, %f12, %f12 |
|
1066 bshuffle %f14, %f14, %f14 |
|
1067 |
|
1068 srax %g3, 63, %l7 ! next tweak value |
|
1069 addcc %g2, %g2, %g2 |
|
1070 and %l7, 0x87, %l7 |
|
1071 addxc %g3, %g3, %g3 |
|
1072 xor %l7, %g2, %g2 |
|
1073 |
|
1074 movxtod %g2, %f8 |
|
1075 movxtod %g3, %f10 |
|
1076 bshuffle %f8, %f8, %f8 |
|
1077 bshuffle %f10, %f10, %f10 |
|
1078 |
|
1079 xor %g4, %o0, %o0 ! ^= rk[0] |
|
1080 xor %g5, %o1, %o1 |
|
1081 xor %g4, %o2, %o2 ! ^= rk[0] |
|
1082 xor %g5, %o3, %o3 |
|
1083 movxtod %o0, %f0 |
|
1084 movxtod %o1, %f2 |
|
1085 movxtod %o2, %f4 |
|
1086 movxtod %o3, %f6 |
|
1087 |
|
1088 fxor %f12, %f0, %f0 ! ^= tweak[0] |
|
1089 fxor %f14, %f2, %f2 |
|
1090 fxor %f8, %f4, %f4 ! ^= tweak[0] |
|
1091 fxor %f10, %f6, %f6 |
|
1092 |
|
1093 prefetch [$out + 63], 22 |
|
1094 prefetch [$inp + 32+63], 20 |
|
1095 call _${alg}${bits}_${dir}crypt_2x |
|
1096 add $inp, 32, $inp |
|
1097 |
|
1098 movxtod %g2, %f8 |
|
1099 movxtod %g3, %f10 |
|
1100 |
|
1101 srax %g3, 63, %l7 ! next tweak value |
|
1102 addcc %g2, %g2, %g2 |
|
1103 and %l7, 0x87, %l7 |
|
1104 addxc %g3, %g3, %g3 |
|
1105 xor %l7, %g2, %g2 |
|
1106 |
|
1107 bshuffle %f8, %f8, %f8 |
|
1108 bshuffle %f10, %f10, %f10 |
|
1109 |
|
1110 fxor %f12, %f0, %f0 ! ^= tweak[0] |
|
1111 fxor %f14, %f2, %f2 |
|
1112 fxor %f8, %f4, %f4 |
|
1113 fxor %f10, %f6, %f6 |
|
1114 |
|
1115 brnz,pn $ooff, 2f |
|
1116 sub $len, 2, $len |
|
1117 |
|
1118 std %f0, [$out + 0] |
|
1119 std %f2, [$out + 8] |
|
1120 std %f4, [$out + 16] |
|
1121 std %f6, [$out + 24] |
|
1122 brnz,pt $len, .L${bits}_xts_${dir}loop2x |
|
1123 add $out, 32, $out |
|
1124 |
|
1125 fsrc2 %f4, %f0 |
|
1126 fsrc2 %f6, %f2 |
|
1127 brnz,pn $rem, .L${bits}_xts_${dir}steal |
|
1128 nop |
|
1129 |
|
1130 ret |
|
1131 restore |
|
1132 |
|
1133 .align 16 |
|
1134 2: ldxa [$inp]0x82, %o0 ! avoid read-after-write hazard |
|
1135 ! and ~3x deterioration |
|
1136 ! in inp==out case |
|
1137 faligndata %f0, %f0, %f8 ! handle unaligned output |
|
1138 faligndata %f0, %f2, %f10 |
|
1139 faligndata %f2, %f4, %f12 |
|
1140 faligndata %f4, %f6, %f14 |
|
1141 faligndata %f6, %f6, %f0 |
|
1142 |
|
1143 stda %f8, [$out + $omask]0xc0 ! partial store |
|
1144 std %f10, [$out + 8] |
|
1145 std %f12, [$out + 16] |
|
1146 std %f14, [$out + 24] |
|
1147 add $out, 32, $out |
|
1148 orn %g0, $omask, $omask |
|
1149 stda %f0, [$out + $omask]0xc0 ! partial store |
|
1150 |
|
1151 brnz,pt $len, .L${bits}_xts_${dir}loop2x+4 |
|
1152 orn %g0, $omask, $omask |
|
1153 |
|
1154 fsrc2 %f4, %f0 |
|
1155 fsrc2 %f6, %f2 |
|
1156 brnz,pn $rem, .L${bits}_xts_${dir}steal |
|
1157 nop |
|
1158 |
|
1159 ret |
|
1160 restore |
|
1161 |
|
1162 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! |
|
1163 .align 32 |
|
1164 .L${bits}_xts_${dir}blk: |
|
1165 add $out, $len, $blk_init |
|
1166 and $blk_init, 63, $blk_init ! tail |
|
1167 sub $len, $blk_init, $len |
|
1168 add $blk_init, 15, $blk_init ! round up to 16n |
|
1169 srlx $len, 4, $len |
|
1170 srl $blk_init, 4, $blk_init |
|
1171 sub $len, 1, $len |
|
1172 add $blk_init, 1, $blk_init |
|
1173 |
|
1174 .L${bits}_xts_${dir}blk2x: |
|
1175 ldx [$inp + 0], %o0 |
|
1176 ldx [$inp + 8], %o1 |
|
1177 ldx [$inp + 16], %o2 |
|
1178 brz,pt $ileft, 5f |
|
1179 ldx [$inp + 24], %o3 |
|
1180 |
|
1181 ldx [$inp + 32], %o4 |
|
1182 sllx %o0, $ileft, %o0 |
|
1183 srlx %o1, $iright, %g1 |
|
1184 or %g1, %o0, %o0 |
|
1185 sllx %o1, $ileft, %o1 |
|
1186 srlx %o2, $iright, %g1 |
|
1187 or %g1, %o1, %o1 |
|
1188 sllx %o2, $ileft, %o2 |
|
1189 srlx %o3, $iright, %g1 |
|
1190 or %g1, %o2, %o2 |
|
1191 sllx %o3, $ileft, %o3 |
|
1192 srlx %o4, $iright, %o4 |
|
1193 or %o4, %o3, %o3 |
|
1194 5: |
|
1195 movxtod %g2, %f12 |
|
1196 movxtod %g3, %f14 |
|
1197 bshuffle %f12, %f12, %f12 |
|
1198 bshuffle %f14, %f14, %f14 |
|
1199 |
|
1200 srax %g3, 63, %l7 ! next tweak value |
|
1201 addcc %g2, %g2, %g2 |
|
1202 and %l7, 0x87, %l7 |
|
1203 addxc %g3, %g3, %g3 |
|
1204 xor %l7, %g2, %g2 |
|
1205 |
|
1206 movxtod %g2, %f8 |
|
1207 movxtod %g3, %f10 |
|
1208 bshuffle %f8, %f8, %f8 |
|
1209 bshuffle %f10, %f10, %f10 |
|
1210 |
|
1211 xor %g4, %o0, %o0 ! ^= rk[0] |
|
1212 xor %g5, %o1, %o1 |
|
1213 xor %g4, %o2, %o2 ! ^= rk[0] |
|
1214 xor %g5, %o3, %o3 |
|
1215 movxtod %o0, %f0 |
|
1216 movxtod %o1, %f2 |
|
1217 movxtod %o2, %f4 |
|
1218 movxtod %o3, %f6 |
|
1219 |
|
1220 fxor %f12, %f0, %f0 ! ^= tweak[0] |
|
1221 fxor %f14, %f2, %f2 |
|
1222 fxor %f8, %f4, %f4 ! ^= tweak[0] |
|
1223 fxor %f10, %f6, %f6 |
|
1224 |
|
1225 prefetch [$inp + 32+63], 20 |
|
1226 call _${alg}${bits}_${dir}crypt_2x |
|
1227 add $inp, 32, $inp |
|
1228 |
|
1229 movxtod %g2, %f8 |
|
1230 movxtod %g3, %f10 |
|
1231 |
|
1232 srax %g3, 63, %l7 ! next tweak value |
|
1233 addcc %g2, %g2, %g2 |
|
1234 and %l7, 0x87, %l7 |
|
1235 addxc %g3, %g3, %g3 |
|
1236 xor %l7, %g2, %g2 |
|
1237 |
|
1238 bshuffle %f8, %f8, %f8 |
|
1239 bshuffle %f10, %f10, %f10 |
|
1240 |
|
1241 fxor %f12, %f0, %f0 ! ^= tweak[0] |
|
1242 fxor %f14, %f2, %f2 |
|
1243 fxor %f8, %f4, %f4 |
|
1244 fxor %f10, %f6, %f6 |
|
1245 |
|
1246 stda %f0, [$out]0xe2 ! ASI_BLK_INIT, T4-specific |
|
1247 add $out, 8, $out |
|
1248 stda %f2, [$out]0xe2 ! ASI_BLK_INIT, T4-specific |
|
1249 add $out, 8, $out |
|
1250 stda %f4, [$out]0xe2 ! ASI_BLK_INIT, T4-specific |
|
1251 add $out, 8, $out |
|
1252 stda %f6, [$out]0xe2 ! ASI_BLK_INIT, T4-specific |
|
1253 bgu,pt $::size_t_cc, .L${bits}_xts_${dir}blk2x |
|
1254 add $out, 8, $out |
|
1255 |
|
1256 add $blk_init, $len, $len |
|
1257 andcc $len, 1, %g0 ! is number of blocks even? |
|
1258 membar #StoreLoad|#StoreStore |
|
1259 bnz,pt %icc, .L${bits}_xts_${dir}loop |
|
1260 srl $len, 0, $len |
|
1261 brnz,pn $len, .L${bits}_xts_${dir}loop2x |
|
1262 nop |
|
1263 |
|
1264 fsrc2 %f4, %f0 |
|
1265 fsrc2 %f6, %f2 |
|
1266 brnz,pn $rem, .L${bits}_xts_${dir}steal |
|
1267 nop |
|
1268 |
|
1269 ret |
|
1270 restore |
|
1271 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! |
|
1272 ___ |
|
1273 $code.=<<___ if ($dir eq "en"); |
|
1274 .align 32 |
|
1275 .L${bits}_xts_${dir}steal: |
|
1276 std %f0, [%fp + $::bias-16] ! copy of output |
|
1277 std %f2, [%fp + $::bias-8] |
|
1278 |
|
1279 srl $ileft, 3, $ileft |
|
1280 add %fp, $::bias-16, %l7 |
|
1281 add $inp, $ileft, $inp ! original $inp+$len&-15 |
|
1282 add $out, $ooff, $out ! original $out+$len&-15 |
|
1283 mov 0, $ileft |
|
1284 nop ! align |
|
1285 |
|
1286 .L${bits}_xts_${dir}stealing: |
|
1287 ldub [$inp + $ileft], %o0 |
|
1288 ldub [%l7 + $ileft], %o1 |
|
1289 dec $rem |
|
1290 stb %o0, [%l7 + $ileft] |
|
1291 stb %o1, [$out + $ileft] |
|
1292 brnz $rem, .L${bits}_xts_${dir}stealing |
|
1293 inc $ileft |
|
1294 |
|
1295 mov %l7, $inp |
|
1296 sub $out, 16, $out |
|
1297 mov 0, $ileft |
|
1298 sub $out, $ooff, $out |
|
1299 ba .L${bits}_xts_${dir}loop ! one more time |
|
1300 mov 1, $len ! $rem is 0 |
|
1301 ___ |
|
1302 $code.=<<___ if ($dir eq "de"); |
|
1303 .align 32 |
|
1304 .L${bits}_xts_${dir}steal: |
|
1305 ldx [$inp + 0], %o0 |
|
1306 brz,pt $ileft, 8f |
|
1307 ldx [$inp + 8], %o1 |
|
1308 |
|
1309 ldx [$inp + 16], %o2 |
|
1310 sllx %o0, $ileft, %o0 |
|
1311 srlx %o1, $iright, %g1 |
|
1312 sllx %o1, $ileft, %o1 |
|
1313 or %g1, %o0, %o0 |
|
1314 srlx %o2, $iright, %o2 |
|
1315 or %o2, %o1, %o1 |
|
1316 8: |
|
1317 srax %g3, 63, %l7 ! next tweak value |
|
1318 addcc %g2, %g2, %o2 |
|
1319 and %l7, 0x87, %l7 |
|
1320 addxc %g3, %g3, %o3 |
|
1321 xor %l7, %o2, %o2 |
|
1322 |
|
1323 movxtod %o2, %f12 |
|
1324 movxtod %o3, %f14 |
|
1325 bshuffle %f12, %f12, %f12 |
|
1326 bshuffle %f14, %f14, %f14 |
|
1327 |
|
1328 xor %g4, %o0, %o0 ! ^= rk[0] |
|
1329 xor %g5, %o1, %o1 |
|
1330 movxtod %o0, %f0 |
|
1331 movxtod %o1, %f2 |
|
1332 |
|
1333 fxor %f12, %f0, %f0 ! ^= tweak[0] |
|
1334 fxor %f14, %f2, %f2 |
|
1335 |
|
1336 call _${alg}${bits}_${dir}crypt_1x |
|
1337 add $inp, 16, $inp |
|
1338 |
|
1339 fxor %f12, %f0, %f0 ! ^= tweak[0] |
|
1340 fxor %f14, %f2, %f2 |
|
1341 |
|
1342 std %f0, [%fp + $::bias-16] |
|
1343 std %f2, [%fp + $::bias-8] |
|
1344 |
|
1345 srl $ileft, 3, $ileft |
|
1346 add %fp, $::bias-16, %l7 |
|
1347 add $inp, $ileft, $inp ! original $inp+$len&-15 |
|
1348 add $out, $ooff, $out ! original $out+$len&-15 |
|
1349 mov 0, $ileft |
|
1350 add $out, 16, $out |
|
1351 nop ! align |
|
1352 |
|
1353 .L${bits}_xts_${dir}stealing: |
|
1354 ldub [$inp + $ileft], %o0 |
|
1355 ldub [%l7 + $ileft], %o1 |
|
1356 dec $rem |
|
1357 stb %o0, [%l7 + $ileft] |
|
1358 stb %o1, [$out + $ileft] |
|
1359 brnz $rem, .L${bits}_xts_${dir}stealing |
|
1360 inc $ileft |
|
1361 |
|
1362 mov %l7, $inp |
|
1363 sub $out, 16, $out |
|
1364 mov 0, $ileft |
|
1365 sub $out, $ooff, $out |
|
1366 ba .L${bits}_xts_${dir}loop ! one more time |
|
1367 mov 1, $len ! $rem is 0 |
|
1368 ___ |
|
1369 $code.=<<___; |
|
1370 ret |
|
1371 restore |
|
1372 .type ${alg}${bits}_t4_xts_${dir}crypt,#function |
|
1373 .size ${alg}${bits}_t4_xts_${dir}crypt,.-${alg}${bits}_t4_xts_${dir}crypt |
|
1374 ___ |
|
1375 } |
|
1376 |
903 # Purpose of these subroutines is to explicitly encode VIS instructions, |
1377 # Purpose of these subroutines is to explicitly encode VIS instructions, |
904 # so that one can compile the module without having to specify VIS |
1378 # so that one can compile the module without having to specify VIS |
905 # extentions on compiler command line, e.g. -xarch=v9 vs. -xarch=v9a. |
1379 # extentions on compiler command line, e.g. -xarch=v9 vs. -xarch=v9a. |
906 # Idea is to reserve for option to produce "universal" binary and let |
1380 # Idea is to reserve for option to produce "universal" binary and let |
907 # programmer detect if current CPU is VIS capable at run-time. |
1381 # programmer detect if current CPU is VIS capable at run-time. |
908 sub unvis { |
1382 sub unvis { |
909 my ($mnemonic,$rs1,$rs2,$rd)=@_; |
1383 my ($mnemonic,$rs1,$rs2,$rd)=@_; |
910 my ($ref,$opf); |
1384 my ($ref,$opf); |
911 my %visopf = ( "faligndata" => 0x048, |
1385 my %visopf = ( "faligndata" => 0x048, |
|
1386 "bshuffle" => 0x04c, |
912 "fnot2" => 0x066, |
1387 "fnot2" => 0x066, |
913 "fxor" => 0x06c, |
1388 "fxor" => 0x06c, |
914 "fsrc2" => 0x078 ); |
1389 "fsrc2" => 0x078 ); |
915 |
1390 |
916 $ref = "$mnemonic\t$rs1,$rs2,$rd"; |
1391 $ref = "$mnemonic\t$rs1,$rs2,$rd"; |
1115 } else { |
1580 } else { |
1116 return $ref; |
1581 return $ref; |
1117 } |
1582 } |
1118 } |
1583 } |
1119 |
1584 |
|
1585 sub undes { |
|
1586 my ($mnemonic)=shift; |
|
1587 my @args=@_; |
|
1588 my ($ref,$opf); |
|
1589 my %desopf = ( "des_round" => 0b1001, |
|
1590 "des_ip" => 0b100110100, |
|
1591 "des_iip" => 0b100110101, |
|
1592 "des_kexpand" => 0b100110110 ); |
|
1593 |
|
1594 $ref = "$mnemonic\t".join(",",@_); |
|
1595 |
|
1596 if (defined($opf=$desopf{$mnemonic})) { # 4-arg |
|
1597 if ($mnemonic eq "des_round") { |
|
1598 foreach (@args[0..3]) { |
|
1599 return $ref if (!/%f([0-9]{1,2})/); |
|
1600 $_=$1; |
|
1601 if ($1>=32) { |
|
1602 return $ref if ($1&1); |
|
1603 # re-encode for upper double register addressing |
|
1604 $_=($1|$1>>5)&31; |
|
1605 } |
|
1606 } |
|
1607 return sprintf ".word\t0x%08x !%s", |
|
1608 2<<30|0b011001<<19|$opf<<5|$args[0]<<14|$args[1]|$args[2]<<9|$args[3]<<25, |
|
1609 $ref; |
|
1610 } elsif ($mnemonic eq "des_kexpand") { # 3-arg |
|
1611 foreach (@args[0..2]) { |
|
1612 return $ref if (!/(%f)?([0-9]{1,2})/); |
|
1613 $_=$2; |
|
1614 if ($2>=32) { |
|
1615 return $ref if ($2&1); |
|
1616 # re-encode for upper double register addressing |
|
1617 $_=($2|$2>>5)&31; |
|
1618 } |
|
1619 } |
|
1620 return sprintf ".word\t0x%08x !%s", |
|
1621 2<<30|0b110110<<19|$opf<<5|$args[0]<<14|$args[1]|$args[2]<<25, |
|
1622 $ref; |
|
1623 } else { # 2-arg |
|
1624 foreach (@args[0..1]) { |
|
1625 return $ref if (!/%f([0-9]{1,2})/); |
|
1626 $_=$1; |
|
1627 if ($1>=32) { |
|
1628 return $ref if ($2&1); |
|
1629 # re-encode for upper double register addressing |
|
1630 $_=($1|$1>>5)&31; |
|
1631 } |
|
1632 } |
|
1633 return sprintf ".word\t0x%08x !%s", |
|
1634 2<<30|0b110110<<19|$opf<<5|$args[0]<<14|$args[1]<<25, |
|
1635 $ref; |
|
1636 } |
|
1637 } else { |
|
1638 return $ref; |
|
1639 } |
|
1640 } |
|
1641 |
1120 sub emit_assembler { |
1642 sub emit_assembler { |
1121 foreach (split("\n",$::code)) { |
1643 foreach (split("\n",$::code)) { |
1122 s/\`([^\`]*)\`/eval $1/ge; |
1644 s/\`([^\`]*)\`/eval $1/ge; |
1123 |
1645 |
1124 s/\b(f[a-z]+2[sd]*)\s+(%f[0-9]{1,2}),\s*(%f[0-9]{1,2})\s*$/$1\t%f0,$2,$3/g; |
1646 s/\b(f[a-z]+2[sd]*)\s+(%f[0-9]{1,2}),\s*(%f[0-9]{1,2})\s*$/$1\t%f0,$2,$3/go; |
1125 |
1647 |
1126 s/\b(aes_[edk][^\s]*)\s+(%f[0-9]{1,2}),\s*(%f[0-9]{1,2}),\s*([%fx0-9]+),\s*(%f[0-9]{1,2})/ |
1648 s/\b(aes_[edk][^\s]*)\s+(%f[0-9]{1,2}),\s*(%f[0-9]{1,2}),\s*([%fx0-9]+),\s*(%f[0-9]{1,2})/ |
1127 &unaes_round($1,$2,$3,$4,$5) |
1649 &unaes_round($1,$2,$3,$4,$5) |
1128 /ge or |
1650 /geo or |
1129 s/\b(aes_kexpand[02])\s+(%f[0-9]{1,2}),\s*(%f[0-9]{1,2}),\s*(%f[0-9]{1,2})/ |
1651 s/\b(aes_kexpand[02])\s+(%f[0-9]{1,2}),\s*(%f[0-9]{1,2}),\s*(%f[0-9]{1,2})/ |
1130 &unaes_kexpand($1,$2,$3,$4) |
1652 &unaes_kexpand($1,$2,$3,$4) |
1131 /ge or |
1653 /geo or |
1132 s/\b(camellia_f)\s+(%f[0-9]{1,2}),\s*(%f[0-9]{1,2}),\s*([%fx0-9]+),\s*(%f[0-9]{1,2})/ |
1654 s/\b(camellia_f)\s+(%f[0-9]{1,2}),\s*(%f[0-9]{1,2}),\s*([%fx0-9]+),\s*(%f[0-9]{1,2})/ |
1133 &uncamellia_f($1,$2,$3,$4,$5) |
1655 &uncamellia_f($1,$2,$3,$4,$5) |
1134 /ge or |
1656 /geo or |
1135 s/\b(camellia_[^s]+)\s+(%f[0-9]{1,2}),\s*(%f[0-9]{1,2}),\s*(%f[0-9]{1,2})/ |
1657 s/\b(camellia_[^s]+)\s+(%f[0-9]{1,2}),\s*(%f[0-9]{1,2}),\s*(%f[0-9]{1,2})/ |
1136 &uncamellia3($1,$2,$3,$4) |
1658 &uncamellia3($1,$2,$3,$4) |
1137 /ge or |
1659 /geo or |
|
1660 s/\b(des_\w+)\s+(?<rs1>%f[0-9]{1,2}),\s*(?<rs2>[%fx0-9]+)(,\s*(?<rs3>%f[0-9]{1,2})(,\s*(?<rs4>%f[0-9]{1,2}))?)?/ |
|
1661 &undes($1,$+{rs1},$+{rs2},$+{rs3},$+{rs4}) |
|
1662 /geo or |
1138 s/\b(mov[ds]to\w+)\s+(%f[0-9]{1,2}),\s*(%[goli][0-7])/ |
1663 s/\b(mov[ds]to\w+)\s+(%f[0-9]{1,2}),\s*(%[goli][0-7])/ |
1139 &unmovxtox($1,$2,$3) |
1664 &unmovxtox($1,$2,$3) |
1140 /ge or |
1665 /geo or |
1141 s/\b(mov[xw]to[ds])\s+(%[goli][0-7]),\s*(%f[0-9]{1,2})/ |
1666 s/\b(mov[xw]to[ds])\s+(%[goli][0-7]),\s*(%f[0-9]{1,2})/ |
1142 &unmovxtox($1,$2,$3) |
1667 &unmovxtox($1,$2,$3) |
1143 /ge or |
1668 /geo or |
1144 s/\b(f[^\s]*)\s+(%f[0-9]{1,2}),\s*(%f[0-9]{1,2}),\s*(%f[0-9]{1,2})/ |
1669 s/\b([fb][^\s]*)\s+(%f[0-9]{1,2}),\s*(%f[0-9]{1,2}),\s*(%f[0-9]{1,2})/ |
1145 &unvis($1,$2,$3,$4) |
1670 &unvis($1,$2,$3,$4) |
1146 /ge or |
1671 /geo or |
1147 s/\b(alignaddr[l]*)\s+(%[goli][0-7]),\s*(%[goli][0-7]),\s*(%[goli][0-7])/ |
1672 s/\b(umulxhi|bmask|addxc[c]{0,2}|alignaddr[l]*)\s+(%[goli][0-7]),\s*(%[goli][0-7]),\s*(%[goli][0-7])/ |
1148 &unalignaddr($1,$2,$3,$4) |
|
1149 /ge or |
|
1150 s/\b(umulxhi|addxc[c]{0,2})\s+(%[goli][0-7]),\s*(%[goli][0-7]),\s*(%[goli][0-7])/ |
|
1151 &unvis3($1,$2,$3,$4) |
1673 &unvis3($1,$2,$3,$4) |
1152 /ge; |
1674 /geo; |
1153 |
1675 |
1154 print $_,"\n"; |
1676 print $_,"\n"; |
1155 } |
1677 } |
1156 } |
1678 } |
1157 |
1679 |