浮動小数点数(float)の計算を自前でやってみた

投稿記事 by **みけCAT** » 10年前

普段はfloatやdoubleなどの浮動小数点数の加減乗除は演算子1個や数個の機械語命令で勝手にやってくれますが、
8086run（執筆時点)などの一部の恵まれていない環境では浮動小数点数演算を行う命令がありません。
そこで、整数の演算でfloat型相当の浮動小数点数演算を行うプログラムを組んでみました。
int = unsigned int = float = 32ビット、long long = unsigned long long = 64ビットを想定しています。

CODE:

#include 
#include 
#include 

const unsigned int inf_value = 0x7f800000;
const unsigned int nan_value = 0x7fffffff;
const unsigned int ind_value = 0xffc00000;

void bunkai(unsigned int *fugou, unsigned int *sisu, unsigned int *kasu, unsigned int f) {
	*fugou = (f >> 31) & 0x01;
	*sisu = (f >> 23) & 0xff;
	*kasu = f & 0x7fffff;
}

/* 引数にNaNがあるかをチェックして、あったらそれを返す(INDもここではNaNと判定される) */
unsigned int check_nan(unsigned int a, unsigned int b) {
	if ((a & 0x7f800000) == 0x7f800000 && (a & 0x7fffff)) return a; /* a = NaN */
	if ((b & 0x7f800000) == 0x7f800000 && (b & 0x7fffff)) return b; /* b = NaN */
	return 0;
}

/* 正規化前の計算結果、指数、符号から、答えを構築して返す */
unsigned int get_result(int ret_fugou, int ret_sisu, long long ret_calc) {
	unsigned int ret = 0;
	if (ret_calc == 0) {
		/* 結果がゼロ */
		ret = 0;
	} else {
		/* 丸め */
		long long next_mask, sita_mask, ue_mask;
		int first_pos;
		for (first_pos = 63; (ret_calc >> first_pos) == 0; first_pos--);
		if (ret_sisu + (first_pos - 23) >= 1) {
			/* 指数の調整で正規化数にできる場合 */
			if (first_pos >= 24) {
				next_mask = 1LL = -62) ret_calc >>= -ret_sisu + 1; else ret_calc = 0;
			ret_sisu = 1;
		}
		if (ret_calc >= 0x800000) {
			while (ret_calc >= 0x1000000) {
				ret_calc >>= 1;
				ret_sisu++;
			}
		} else {
			while (ret_sisu > 1 && ret_calc = 255) {
			/* inf */
			ret = inf_value;
		} else {
			ret = ((unsigned long long)(ret_sisu & 0xff) = 0) {
			if (sisu_diff >= 46) b_calc = 0; else b_calc >>= sisu_diff;
			ret_sisu = a_sisu;
		} else {
			sisu_diff = -sisu_diff;
			if (sisu_diff >= 46) a_calc = 0; else a_calc >>= sisu_diff;
			ret_sisu = b_sisu;
		}
		/* 符号の反映 */
		if (a_fugou) a_calc = -a_calc;
		if (b_fugou) b_calc = -b_calc;
		/* 計算 */
		ret_calc = a_calc + b_calc;
		if (ret_calc < 0) {
			ret_calc = -ret_calc;
			ret_fugou = 1;
		} else {
			if (a_fugou && b_fugou) ret_fugou = 1; else ret_fugou = 0;
		}
		/* 結果の構築 */
		ret_sisu -= 23;
		ret = get_result(ret_fugou, ret_sisu, ret_calc);
	}

	return *((float*)&ret);
}

float float_sub(float a_in, float b_in) {
	unsigned int a = *((unsigned int*)&a_in);
	unsigned int b = *((unsigned int*)&b_in);
	unsigned int ret = check_nan(a, b);
	if (ret != 0) return *((float*)&ret);
	b ^= 0x80000000; /* 符号を反転 */
	return float_add(a_in, *((float*)&b)); /* 加算に丸投げ */
}

float float_mul(float a_in, float b_in) {
	unsigned int a = *((unsigned int*)&a_in);
	unsigned int b = *((unsigned int*)&b_in);
	unsigned int a_fugou, b_fugou;
	unsigned int a_kasu, b_kasu;
	unsigned int a_sisu, b_sisu;
	unsigned int ret;
	ret = check_nan(a, b);
	if (ret != 0) return *((float*)&ret);

	bunkai(&a_fugou, &a_sisu, &a_kasu, a);
	bunkai(&b_fugou, &b_sisu, &b_kasu, b);

	if (a_sisu == 255) {
		/* a = +-inf */
		if (b_sisu == 255) {
			/* b = +-inf */
			if (a_fugou == b_fugou) {
				ret = inf_value;
			} else {
				ret = inf_value | 0x80000000;
			}
		} else {
			/* finite b */
			if ((b & 0x7fffffff) == 0) {
				ret = ind_value;
			} else {
				ret = inf_value;
				if (a_fugou != b_fugou) ret |= 0x80000000;
			}
		}
	} else if (b_sisu == 255) {
		/* finite a and b = +-inf */
		if ((a & 0x7fffffff) == 0) {
			ret = ind_value;
		} else {
			ret = inf_value;
			if (a_fugou != b_fugou) ret |= 0x80000000;
		}
	} else {
		/* finite a, b */
		long long a_calc, b_calc, ret_calc;
		int ret_sisu, ret_fugou = 0;
		a_calc = a_kasu;
		b_calc = b_kasu;
		/* けち表現と非正規化数の処理 */
		if (a_sisu != 0) a_calc |= 0x800000; else a_sisu = 1;
		if (b_sisu != 0) b_calc |= 0x800000; else b_sisu = 1;
		/* 指数の計算 */
		ret_sisu = a_sisu + b_sisu - 127;
		/* 符号の反映 */
		if (a_fugou) ret_fugou = 1 - ret_fugou;
		if (b_fugou) ret_fugou = 1 - ret_fugou;
		/* 計算 */
		ret_calc = a_calc * b_calc;
		/* 結果の構築 */
		ret_sisu -= 23;
		ret = get_result(ret_fugou, ret_sisu, ret_calc);
	}

	return *((float*)&ret);
}

float float_div(float a_in, float b_in) {
	unsigned int a = *((unsigned int*)&a_in);
	unsigned int b = *((unsigned int*)&b_in);
	unsigned int a_fugou, b_fugou;
	unsigned int a_kasu, b_kasu;
	unsigned int a_sisu, b_sisu;
	unsigned int ret;
	ret = check_nan(a, b);
	if (ret != 0) return *((float*)&ret);

	bunkai(&a_fugou, &a_sisu, &a_kasu, a);
	bunkai(&b_fugou, &b_sisu, &b_kasu, b);

	if (a_sisu == 255) {
		/* a = +-inf */
		if (b_sisu == 255) {
			/* b = +-inf */
			ret = ind_value;
		} else {
			/* finite b */
			ret = inf_value;
			if (a_fugou != b_fugou) ret |= 0x80000000;
		}
	} else if (b_sisu == 255) {
		/* finite a and b = +-inf */
		ret = 0;
		if (a_fugou != b_fugou) ret |= 0x80000000;
	} else if ((b & 0x7fffffff) == 0) {
		if ((a & 0x7fffffff) == 0) {
			ret = ind_value;
		} else {
			ret = inf_value;
			if (a_fugou != b_fugou) ret |= 0x80000000;
		}
	} else {
		/* finite a, b */
		long long a_calc, b_calc, ret_calc;
		int ret_sisu, ret_fugou = 0;
		a_calc = a_kasu;
		b_calc = b_kasu;
		/* けち表現と非正規化数の処理 */
		if (a_sisu != 0) a_calc |= 0x800000; else a_sisu = 1;
		if (b_sisu != 0) b_calc |= 0x800000; else b_sisu = 1;
		/* 指数の計算 */
		ret_sisu = a_sisu - b_sisu + 127;
		/* 符号の反映 */
		if (a_fugou) ret_fugou = 1 - ret_fugou;
		if (b_fugou) ret_fugou = 1 - ret_fugou;
		/* 計算 */
		if (a_calc != 0) {
			a_calc <<= 23;
			while ((a_calc & 0x4000000000000000LL) == 0) {
				a_calc <<= 1;
				ret_sisu--;
			}
		}
		ret_calc = a_calc / b_calc;
		/* 結果の構築 */
		ret = get_result(ret_fugou, ret_sisu, ret_calc);
	}

	return *((float*)&ret);
}

float add_native(float a, float b) {
	return a + b;
}

float sub_native(float a, float b) {
	return a - b;
}

float mul_native(float a, float b) {
	return a * b;
}

float div_native(float a, float b) {
	return a / b;
}

int test_binary_operation(const char *op, float a, float b, float (*func_a)(float, float), float (*func_b)(float, float)) {
	float result_a = func_a(a, b);
	float result_b = func_b(a, b);
	unsigned int data_a = *((unsigned int*)&result_a);
	unsigned int data_b = *((unsigned int*)&result_b);
	int is_ok = (data_a == data_b);
	printf("%s IN = %g (%08X) %s %g (%08X) A = %g (%08X), B = %g (%08X)\n",
		is_ok ? "Accepted" : "Wrong Answer",
		a, *((unsigned int*)&a), op, b, *((unsigned int*)&b),
		result_a, data_a, result_b, data_b);
	return is_ok;
}

float make_float(unsigned int n) {
	return *((float*)&n);
}

int main(void) {
	float test_operands[][2] = {
		{1.0f, 2.0f},
		{0.1f, 0.1f},
		{3.14f, 2.71f},
		{-3.14f, 2.71f},
		{3.14f, -2.71f},
		{-3.14f, -2.71f},
		{10e+20f, 5e+23f},

		{1.23f, -1.23f},
		{-1.23f, 1.23f},

		{make_float(0x7f000000), make_float(0x7f000000)},
		{make_float(0x00000001), make_float(0x00800000)},
		{make_float(0x00000001), make_float(0x00800001)},

		{1.0f, make_float(0x00000001)},
		{0.5f, make_float(0x0000beef)},
		{0.01f,make_float(0x00123456)},

		{make_float(0x3f800000), make_float(0x3f800001)},
		{make_float(0x3f800002), make_float(0x3f800001)},

		{0.25f, make_float(0x000000ff)},
		{0.25f, make_float(0x000000fe)},
		{0.25f, make_float(0x000000fd)},
		{0.25f, make_float(0x000000fc)},
		{0.25f, make_float(0x000000fb)},
		{0.25f, make_float(0x000000fa)},
		{0.25f, make_float(0x000000f9)},
		{0.25f, make_float(0x000000f8)},

		{make_float(0x7f7fffff), make_float(0x73800000)},
		{make_float(0x7f7ffffe), make_float(0x73800000)},
		{make_float(0xff7fffff), make_float(0xf3800000)},
		{make_float(0xff7ffffe), make_float(0xf3800000)},

		{*((float*)&inf_value), *((float*)&inf_value)},
		{*((float*)&inf_value), -*((float*)&inf_value)},
		{-*((float*)&inf_value), *((float*)&inf_value)},
		{-*((float*)&inf_value), -*((float*)&inf_value)},

		{*((float*)&nan_value), 1.23f},
		{1.23f, *((float*)&nan_value)},
		{*((float*)&nan_value), *((float*)&nan_value)},
		/* 答えが環境依存になるっぽい */
		/* {*((float*)&nan_value), *((float*)&ind_value)}, */
		/* {*((float*)&ind_value), *((float*)&nan_value)}, */

		{0.0f, 0.0f},
		{-0.0f, 0.0f},
		{0.0f, -0.0f},
		{-0.0f, -0.0f},

		{1.0f, 0.0f},
		{0.0f, 1.0f},
		{1.0f, -0.0f},
		{0.0f, -1.0f},
		{-1.0f, 0.0f},
		{-0.0f, 1.0f},
		{-1.0f, -0.0f},
		{-0.0f, -1.0f},
		{*((float*)&inf_value), 0.0f},
		{0.0f, *((float*)&inf_value)},
		{-*((float*)&inf_value), 0.0f},
		{0.0f, -*((float*)&inf_value)},
		{*((float*)&inf_value), -0.0f},
		{-0.0f, *((float*)&inf_value)},
		{-*((float*)&inf_value), -0.0f},
		{-0.0f, -*((float*)&inf_value)},
		{*((float*)&inf_value), 1.0f},
		{1.0f, *((float*)&inf_value)},
		{-*((float*)&inf_value), 1.0f},
		{1.0f, -*((float*)&inf_value)},
		{*((float*)&inf_value), -1.0f},
		{-1.0f, *((float*)&inf_value)},
		{-*((float*)&inf_value), -1.0f},
		{-1.0f, -*((float*)&inf_value)},

		{*((float*)&ind_value), 1.0f},
		{1.0f, *((float*)&ind_value)},
		{*((float*)&ind_value), 0.0f},
		{0.0f, *((float*)&ind_value)},
		{*((float*)&ind_value), *((float*)&ind_value)},
		{*((float*)&inf_value), *((float*)&ind_value)},
		{*((float*)&ind_value), *((float*)&inf_value)},
		{-*((float*)&inf_value), *((float*)&ind_value)},
		{*((float*)&ind_value), -*((float*)&inf_value)}
	};
	float (*test_functions[][2])(float, float) = {
		{float_add, add_native},
		{float_sub, sub_native},
		{float_mul, mul_native},
		{float_div, div_native},
	};
	const char *test_names[] = {
		"+",
		"-",
		"*",
		"/"
	};
	unsigned int i, j;
	int all_ok = 1;

	puts("----------- fixed case test -----------\n");

	for (i = 0; i < sizeof(test_functions) / sizeof(test_functions[0]); i++) {
		const char *function_name = "";
		if (i < sizeof(test_names) / sizeof(test_names[0])) function_name = test_names[i];
		for (j = 0; j < sizeof(test_operands) / sizeof(test_operands[0]); j++) {
			int current_ok = test_binary_operation(function_name,
				test_operands[j][0], test_operands[j][1], test_functions[i][0], test_functions[i][1]);
			if (!current_ok) all_ok = 0;
		}
	}

	puts("\n----------- random case test -----------\n");

	srand((unsigned int)time(NULL));
	for (i = 0; i < 10; i++) {
		unsigned int a = (rand() & 0xff) | ((rand() & 0xff) << 8) | ((rand() & 0xff) << 16) | ((rand() & 0xff) << 24);
		unsigned int b = (rand() & 0xff) | ((rand() & 0xff) << 8) | ((rand() & 0xff) << 16) | ((rand() & 0xff) << 24);
		for (j = 0; j < sizeof(test_functions) / sizeof(test_functions[0]); j++) {
			const char *function_name = "";
			if (j < sizeof(test_names) / sizeof(test_names[0])) function_name = test_names[j];
			int current_ok = test_binary_operation(function_name,
				*((float*)&a), *((float*)&b), test_functions[j][0], test_functions[j][1]);
			if (!current_ok) all_ok = 0;
		}
	}

	printf("\n%s\n", all_ok ? "Accepted" : "Wrong Answer");

	return 0;
}

実戦投入にはまだunsigned intやlong longなどの計算を多倍長演算に置き換えるなどの課題がありますが、とりあえずわりといい感じに実装できたようです。
長く見えますが、やっていることは単純で、どれも「NaNや無限大などのコーナーケースの処理→指数の調整→計算→丸めと正規化」です。

投稿記事 by **h2so5** » 10年前

プロ

投稿記事 by **みけCAT** » 10年前

h2so5 さんが書きました:プロ

キシ趣味

		7月 2025
日	月	火	水	木	金	土
		1	2	3	4	5
6	7	8	9	10	11	12
13	14	15	16	17	18	19
20	21	22	23	24	25	26
27	28	29	30	31

浮動小数点数(float)の計算を自前でやってみた

浮動小数点数(float)の計算を自前でやってみた

Re: 浮動小数点数(float)の計算を自前でやってみた

Re: 浮動小数点数(float)の計算を自前でやってみた