ランダムな文字列の分布
http://d.hatena.ne.jp/uunfo/20080516/1210957825で生成された文字列が本当にランダムなのかを確かめてみた。
数字
preg_replace("/[^[:digit:]]/", "", hash("whirlpool", mt_rand()));
[ 1214388779.7213 sec / 12503 : 1000008 letters ] [/[^[:digit:]]/]
アルファベット
preg_replace("/[^[:alpha:]]/", "", base64_encode(hash("sha384", mt_rand(),true)))
[ 1.0966 sec / 19225 : 1000047 letters ] [/[^[:alpha:]]/]
英数字
preg_replace("/[^[:alnum:]]/", "", base64_encode(hash("sha384", mt_rand(),true)))
[ 1.0179 sec / 16133 : 1000044 letters ] [/[^[:alnum:]]/]
ソース
define('BR', "<br>\n"); $limit = 1000000; eval_count_time_digit(); eval_count_time_alpha(); eval_count_time_alnum(); exit; function eval_count_time_digit(){ global $limit; $distribution = array(); $count = 0; $time=microtime(true); for($i=0;$count <= $limit;$i++){ $buffer = preg_replace("/[^[:digit:]]/", "", hash("whirlpool", mt_rand())); for($j=0;$j<strlen($buffer);$j++) $distribution[$buffer[$j]]++; $count += strlen($buffer); } ksort($distribution); $end_time=microtime(true); echo '[ '.$end_time.' sec / '.$i.' : '.$count.' letters ] ['."/[^[:digit:]]/".']'.BR; $distribution = frequency2probability($distribution); google_graph($distribution); echo BR; } function eval_count_time_alpha(){ global $limit; $distribution = array(); $count = 0; $time=microtime(true); for($i=0;$count <= $limit;$i++){ $buffer = preg_replace("/[^[:alpha:]]/", "", base64_encode(hash("sha384", mt_rand(),true))); for($j=0;$j<strlen($buffer);$j++) $distribution[$buffer[$j]]++; $count += strlen($buffer); } $end_time=microtime(true); echo '[ '.sprintf("%01.04f", $end_time-$time).' sec / '.$i.' : '.$count.' letters ] ['."/[^[:alpha:]]/".']'.BR; ksort($distribution); $distribution = frequency2probability($distribution); google_graph($distribution); echo BR; } function eval_count_time_alnum(){ global $limit; $distribution = array(); $count = 0; $time=microtime(true); for($i=0;$count <= $limit;$i++){ $buffer = preg_replace("/[^[:alnum:]]/", "", base64_encode(hash("sha384", mt_rand(),true))); for($j=0;$j<strlen($buffer);$j++) $distribution[$buffer[$j]]++; $count += strlen($buffer); } $end_time=microtime(true); echo '[ '.sprintf("%01.04f", $end_time-$time).' sec / '.$i.' : '.$count.' letters ] ['."/[^[:alnum:]]/".']'.BR; ksort($distribution); $distribution = frequency2probability($distribution); google_graph($distribution); echo BR; } function google_graph($distribution){ $min = min($distribution); $max = max($distribution); $chart_data_string = implode(",",$distribution); $axis_x = implode("|", array_keys($distribution)); $axis_y = $min."|".$max; echo '<img src="http://chart.apis.google.com/chart?cht=lc&chs=600x200&chd=t:'.$chart_data_string.'&chds='.$min.','.$max.'&chxt=x,y&chxl=0:|'.$axis_x.'|1:|'.$axis_y.'" width="600" height="200">'; echo '<img src="http://chart.apis.google.com/chart?cht=lc&chs=600x200&chd=t:'.$chart_data_string.'&chds=0,1&chxt=x,y&chxl=0:|'.$axis_x.'|1:|0|1" width="600" height="200">'; } function frequency2probability($distribution){ $count = array_sum($distribution); foreach($distribution as $key => $value){ $distribution[$key] = $value / $count; } return $distribution; }
md5を使った場合
preg_replace("/[^[:digit:]]/", "", hash("md5", mt_rand()))
注意としてはbase64では変換対象のビット数が6の倍数になるように後ろに0のビットを付加するので、変換対象が6の倍数になるようにしないとバランスがおかしくなる
の実例。
[ 1.1743 sec / 46871 : 1000006 letters ] [/[^[:alnum:]]/] md5
function eval_count_time_alnum_md5(){ global $limit; $distribution = array(); $count = 0; $time=microtime(true); for($i=0;$count <= $limit;$i++){ $buffer = preg_replace("/[^[:alnum:]]/", "", base64_encode(hash("md5", mt_rand(),true))); for($j=0;$j<strlen($buffer);$j++) $distribution[$buffer[$j]]++; $count += strlen($buffer); } $end_time=microtime(true); echo '[ '.sprintf("%01.04f", $end_time-$time).' sec / '.$i.' : '.$count.' letters ] ['."/[^[:alnum:]]/".'] md5'.BR; ksort($distribution); $distribution = frequency2probability($distribution); google_graph($distribution); echo BR; }