データ収集公示地価
土地総合情報ライブラリーの公示地価をざっくり取ってくるスクリプトです。
使いかた
perl tochi.pl 都道府県コード
で、
tochi_都道府県コード.csv
なファイルが出来ると思います。
#!/usr/bin/perl use LWP::UserAgent; use Jcode; use strict; sub get_data($$) { open (FD, ">>" . sprintf("tochi_%02d.csv",$ARGV[0])) or die; my $year = shift; my $ken = shift; my $st = 0; my $urlfmt = qq{ http://tochi.mlit.go.jp/ba/owa/kekka30?st=%s&sqlstr=shuyouchiten!='A' AND taishounen=%s AND ((shicode>=%s000 AND shicode<=%s999)) ORDER BY taishounen DESC&shuyouchiten=1&name=dummy }; my $ua = LWP::UserAgent->new; $ua->agent("Mozilla/4.7"); while(1) { my $url = sprintf $urlfmt, $st, $year, $ken, $ken ; my $req = HTTP::Request->new(GET => $url); $req->header('Accept' => 'text/html'); my $res = $ua->request($req); return unless ($res->is_success); my $rstr = Jcode::convert(\$res->content, "euc", "sjis", "z"); $rstr =~ s/<(\/)*html(.*?)>//isg; $rstr =~ s/<head>(.)*<\/head>//isg; $rstr =~ s/<script(.*?)>(.*?)<\/script>//isg; $rstr =~ s/<(\/)*body(.*?)>//isg; $rstr =~ s/<(\/)*form(.*?)>//isg; $rstr =~ s/<(\/)*hr(.*?)>//isg; $rstr =~ s/<(\/)*tt(.*?)>//isg; $rstr =~ s/<(\/)*center(.*?)>//isg; $rstr =~ s/<(\/)*input(.*?)>//isg; $rstr =~ s/<(\/)*br(.*?)>//isg; $rstr =~ s/<(\/)*img(.*?)>//isg; $rstr =~ s/<(\/)*font(.*?)>//isg; $rstr =~ s/<(\/)*a(.*?)>//isg; $rstr =~ s/<(\/)*p(.*?)>//isg; $rstr =~ s/(.*?)<table(.*?)>(.*?)<\/table>(.*)/$3/isg; $rstr =~ s/\n//isg; $rstr =~ s/<td(.*?)>/"/isg; $rstr =~ s/<\/*td(.*?)>/",/isg; $rstr =~ s/<tr(.*?)>//isg; $rstr =~ s/<\/tr(.*?)>/\n/isg; $rstr =~ s/"(\s*?)([0-9]+?),([0-9]+?),([0-9]+?),([0-9]+?)(\s*?)"/"$2$3$4$5"/isg; $rstr =~ s/"(\s*?)([0-9]+?),([0-9]+?),([0-9]+?)(\s*?)"/"$2$3$4"/isg; $rstr =~ s/"(\s*?)([0-9]+?),([0-9]+?)(\s*?)"/"$2$3"/isg; $rstr =~ s/"(\s*?)([0-9]+?)(\s*?)"/"$2"/isg; my @lines = split ("\n",$rstr); return if $#lines <= 1; # print $lines[0],"\n" if ($st == 0); for (my $i=1; $i<=$#lines ;$i++){ chop($lines[$i]); print FD "\"$ken\",",$lines[$i],"\n"; } $st=$st+20; } close(FD); } if ( $#ARGV < 0 ){ print "usage: $0 KENCODE(1-47)\n" ; exit 1; } open (FD, ">" . sprintf("tochi_%02d.csv",$ARGV[0])) or die; close(FD); for (my $y=1997 ; $y <= 2003 ; $y++) { get_data ($y,$ARGV[0]); }