ランダムな文字列の分布

http://d.hatena.ne.jp/uunfo/20080516/1210957825で生成された文字列が本当にランダムなのかを確かめてみた。

数字

preg_replace("/[^[:digit:]]/", "", hash("whirlpool", mt_rand()));

[ 1214388779.7213 sec / 12503 : 1000008 letters ] [/[^[:digit:]]/]

アルファベット

preg_replace("/[^[:alpha:]]/", "", base64_encode(hash("sha384", mt_rand(),true)))

[ 1.0966 sec / 19225 : 1000047 letters ] [/[^[:alpha:]]/]

英数字

preg_replace("/[^[:alnum:]]/", "", base64_encode(hash("sha384", mt_rand(),true)))

[ 1.0179 sec / 16133 : 1000044 letters ] [/[^[:alnum:]]/]

ソース

define('BR', "<br>\n");
$limit = 1000000;
eval_count_time_digit();
eval_count_time_alpha();
eval_count_time_alnum();
exit;

function eval_count_time_digit(){
	global $limit;
	$distribution = array();
	$count = 0;
	$time=microtime(true);
	for($i=0;$count <= $limit;$i++){
		$buffer = preg_replace("/[^[:digit:]]/", "", hash("whirlpool", mt_rand()));
		for($j=0;$j<strlen($buffer);$j++) $distribution[$buffer[$j]]++;
		$count += strlen($buffer);
	}
	ksort($distribution);
	$end_time=microtime(true);
	echo '[ '.$end_time.' sec / '.$i.' : '.$count.' letters ] ['."/[^[:digit:]]/".']'.BR;
	$distribution = frequency2probability($distribution);
	google_graph($distribution);
	echo BR;
}
function eval_count_time_alpha(){
	global $limit;
	$distribution = array();
	$count = 0;
	$time=microtime(true);
	for($i=0;$count <= $limit;$i++){
		$buffer = preg_replace("/[^[:alpha:]]/", "", base64_encode(hash("sha384", mt_rand(),true)));
		for($j=0;$j<strlen($buffer);$j++) $distribution[$buffer[$j]]++;
		$count += strlen($buffer);
	}
	$end_time=microtime(true);
	echo '[ '.sprintf("%01.04f", $end_time-$time).' sec / '.$i.' : '.$count.' letters ] ['."/[^[:alpha:]]/".']'.BR;
	ksort($distribution);
	$distribution = frequency2probability($distribution);
	google_graph($distribution);
	echo BR;
}
function eval_count_time_alnum(){
	global $limit;
	$distribution = array();
	$count = 0;
	$time=microtime(true);
	for($i=0;$count <= $limit;$i++){
		$buffer = preg_replace("/[^[:alnum:]]/", "", base64_encode(hash("sha384", mt_rand(),true)));
		for($j=0;$j<strlen($buffer);$j++) $distribution[$buffer[$j]]++;
		$count += strlen($buffer);
	}
	$end_time=microtime(true);
	echo '[ '.sprintf("%01.04f", $end_time-$time).' sec / '.$i.' : '.$count.' letters ] ['."/[^[:alnum:]]/".']'.BR;
	ksort($distribution);
	$distribution = frequency2probability($distribution);
	google_graph($distribution);
	echo BR;
}

function google_graph($distribution){
	$min = min($distribution);
	$max = max($distribution);
	$chart_data_string = implode(",",$distribution);
	
	$axis_x = implode("|", array_keys($distribution));
	$axis_y = $min."|".$max;
	
	echo '<img src="http://chart.apis.google.com/chart?cht=lc&amp;chs=600x200&amp;chd=t:'.$chart_data_string.'&amp;chds='.$min.','.$max.'&amp;chxt=x,y&amp;chxl=0:|'.$axis_x.'|1:|'.$axis_y.'" width="600" height="200">';
	echo '<img src="http://chart.apis.google.com/chart?cht=lc&amp;chs=600x200&amp;chd=t:'.$chart_data_string.'&amp;chds=0,1&amp;chxt=x,y&amp;chxl=0:|'.$axis_x.'|1:|0|1" width="600" height="200">';
}

function frequency2probability($distribution){
	$count = array_sum($distribution);
	foreach($distribution as $key => $value){
		$distribution[$key] = $value / $count;
	}
	return $distribution;
	
}

md5を使った場合

preg_replace("/[^[:digit:]]/", "", hash("md5", mt_rand()))

注意としてはbase64では変換対象のビット数が6の倍数になるように後ろに0のビットを付加するので、変換対象が6の倍数になるようにしないとバランスがおかしくなる

の実例。
[ 1.1743 sec / 46871 : 1000006 letters ] [/[^[:alnum:]]/] md5


function eval_count_time_alnum_md5(){
	global $limit;
	$distribution = array();
	$count = 0;
	$time=microtime(true);
	for($i=0;$count <= $limit;$i++){
		$buffer = preg_replace("/[^[:alnum:]]/", "", base64_encode(hash("md5", mt_rand(),true)));
		for($j=0;$j<strlen($buffer);$j++) $distribution[$buffer[$j]]++;
		$count += strlen($buffer);
	}
	$end_time=microtime(true);
	echo '[ '.sprintf("%01.04f", $end_time-$time).' sec / '.$i.' : '.$count.' letters ] ['."/[^[:alnum:]]/".'] md5'.BR;
	ksort($distribution);
	$distribution = frequency2probability($distribution);
	google_graph($distribution);
	echo BR;
}