COLOR(red){SIZE(25){データ収集公示地価}}

[[土地総合情報ライブラリー:http://tochi.mlit.go.jp/]]の公示地価をざっくり取ってくるスクリプトです。

使いかた~
 perl tochi.pl 都道府県コード
で、~
 tochi_都道府県コード.csv
なファイルが出来ると思います。

 #!/usr/bin/perl
 use LWP::UserAgent;
 use Jcode;
 use strict;
 
 sub get_data($$)
   {
     open (FD, ">>" . sprintf("tochi_%02d.csv",$ARGV[0])) or die; 
 
     my $year = shift;
     my $ken  = shift;
 
     my $st = 0;
 
     my $urlfmt = 
       qq{
        http://tochi.mlit.go.jp/ba/owa/kekka30?st=%s&sqlstr=shuyouchiten!='A' AND taishounen=%s AND ((shicode>=%s000 AND shicode<=%s999)) ORDER BY taishounen DESC&shuyouchiten=1&name=dummy
         };
 
     my $ua = LWP::UserAgent->new;
     $ua->agent("Mozilla/4.7");
 
     while(1)
       {
         my $url = sprintf $urlfmt, $st, $year, $ken, $ken ;
 
         my $req = HTTP::Request->new(GET => $url);
         $req->header('Accept' => 'text/html'); 
 
         my $res = $ua->request($req);
 
         return unless ($res->is_success);
         
         my $rstr = Jcode::convert(\$res->content, "euc", "sjis", "z");
         $rstr =~ s/<(\/)*html(.*?)>//isg;
         $rstr =~ s/<head>(.)*<\/head>//isg;
         $rstr =~ s/<script(.*?)>(.*?)<\/script>//isg;
         $rstr =~ s/<(\/)*body(.*?)>//isg;
         $rstr =~ s/<(\/)*form(.*?)>//isg;
         $rstr =~ s/<(\/)*hr(.*?)>//isg;
         $rstr =~ s/<(\/)*tt(.*?)>//isg;
         $rstr =~ s/<(\/)*center(.*?)>//isg;
         $rstr =~ s/<(\/)*input(.*?)>//isg;
         $rstr =~ s/<(\/)*br(.*?)>//isg;
         $rstr =~ s/<(\/)*img(.*?)>//isg;
         $rstr =~ s/<(\/)*font(.*?)>//isg;
         $rstr =~ s/<(\/)*a(.*?)>//isg;
         $rstr =~ s/<(\/)*p(.*?)>//isg;
         $rstr =~ s/(.*?)<table(.*?)>(.*?)<\/table>(.*)/$3/isg;
         
         $rstr =~ s/\n//isg;
         $rstr =~ s/<td(.*?)>/"/isg;
         $rstr =~ s/<\/*td(.*?)>/",/isg;
         $rstr =~ s/<tr(.*?)>//isg;
         $rstr =~ s/<\/tr(.*?)>/\n/isg;
         
         $rstr =~ s/"(\s*?)([0-9]+?),([0-9]+?),([0-9]+?),([0-9]+?)(\s*?)"/"$2$3$4$5"/isg;
         $rstr =~ s/"(\s*?)([0-9]+?),([0-9]+?),([0-9]+?)(\s*?)"/"$2$3$4"/isg;
         $rstr =~ s/"(\s*?)([0-9]+?),([0-9]+?)(\s*?)"/"$2$3"/isg;
         $rstr =~ s/"(\s*?)([0-9]+?)(\s*?)"/"$2"/isg;
 
         my @lines = split ("\n",$rstr);
 
         return if $#lines <= 1;
 
         # print $lines[0],"\n" if ($st == 0);
         for (my $i=1; $i<=$#lines ;$i++){
           chop($lines[$i]);
           print FD "\"$ken\",",$lines[$i],"\n";
         }
         $st=$st+20;
       }
     close(FD);
   }
 
 if ( $#ARGV < 0 ){
   print "usage: $0 KENCODE(1-47)\n" ;
   exit 1;
 }
 
 open (FD, ">" . sprintf("tochi_%02d.csv",$ARGV[0])) or die;
 close(FD);
 for (my $y=1997 ; $y <= 2003 ; $y++)
   {
         get_data ($y,$ARGV[0]);
   }


トップ   編集 差分 バックアップ 添付 複製 名前変更 リロード   新規 一覧 検索 最終更新   ヘルプ   最終更新のRSS