データ収集公示地価

土地総合情報ライブラリーの公示地価をざっくり取ってくるスクリプトです。

使いかた

perl tochi.pl 都道府県コード

で、

tochi_都道府県コード.csv

なファイルが出来ると思います。

#!/usr/bin/perl
use LWP::UserAgent;
use Jcode;
use strict;

sub get_data($$)
  {
    open (FD, ">>" . sprintf("tochi_%02d.csv",$ARGV[0])) or die; 

    my $year = shift;
    my $ken  = shift;

    my $st = 0;

    my $urlfmt = 
      qq{
       http://tochi.mlit.go.jp/ba/owa/kekka30?st=%s&sqlstr=shuyouchiten!='A' AND taishounen=%s AND ((shicode>=%s000 AND shicode<=%s999)) ORDER BY taishounen DESC&shuyouchiten=1&name=dummy
        };

    my $ua = LWP::UserAgent->new;
    $ua->agent("Mozilla/4.7");

    while(1)
      {
        my $url = sprintf $urlfmt, $st, $year, $ken, $ken ;

        my $req = HTTP::Request->new(GET => $url);
        $req->header('Accept' => 'text/html'); 

        my $res = $ua->request($req);

        return unless ($res->is_success);
        
        my $rstr = Jcode::convert(\$res->content, "euc", "sjis", "z");
        $rstr =~ s/<(\/)*html(.*?)>//isg;
        $rstr =~ s/<head>(.)*<\/head>//isg;
        $rstr =~ s/<script(.*?)>(.*?)<\/script>//isg;
        $rstr =~ s/<(\/)*body(.*?)>//isg;
        $rstr =~ s/<(\/)*form(.*?)>//isg;
        $rstr =~ s/<(\/)*hr(.*?)>//isg;
        $rstr =~ s/<(\/)*tt(.*?)>//isg;
        $rstr =~ s/<(\/)*center(.*?)>//isg;
        $rstr =~ s/<(\/)*input(.*?)>//isg;
        $rstr =~ s/<(\/)*br(.*?)>//isg;
        $rstr =~ s/<(\/)*img(.*?)>//isg;
        $rstr =~ s/<(\/)*font(.*?)>//isg;
        $rstr =~ s/<(\/)*a(.*?)>//isg;
        $rstr =~ s/<(\/)*p(.*?)>//isg;
        $rstr =~ s/(.*?)<table(.*?)>(.*?)<\/table>(.*)/$3/isg;
        
        $rstr =~ s/\n//isg;
        $rstr =~ s/<td(.*?)>/"/isg;
        $rstr =~ s/<\/*td(.*?)>/",/isg;
        $rstr =~ s/<tr(.*?)>//isg;
        $rstr =~ s/<\/tr(.*?)>/\n/isg;
        
        $rstr =~ s/"(\s*?)([0-9]+?),([0-9]+?),([0-9]+?),([0-9]+?)(\s*?)"/"$2$3$4$5"/isg;
        $rstr =~ s/"(\s*?)([0-9]+?),([0-9]+?),([0-9]+?)(\s*?)"/"$2$3$4"/isg;
        $rstr =~ s/"(\s*?)([0-9]+?),([0-9]+?)(\s*?)"/"$2$3"/isg;
        $rstr =~ s/"(\s*?)([0-9]+?)(\s*?)"/"$2"/isg;

        my @lines = split ("\n",$rstr);

        return if $#lines <= 1;

        # print $lines[0],"\n" if ($st == 0);
        for (my $i=1; $i<=$#lines ;$i++){
          chop($lines[$i]);
          print FD "\"$ken\",",$lines[$i],"\n";
        }
        $st=$st+20;
      }
    close(FD);
  }

if ( $#ARGV < 0 ){
  print "usage: $0 KENCODE(1-47)\n" ;
  exit 1;
}

open (FD, ">" . sprintf("tochi_%02d.csv",$ARGV[0])) or die;
close(FD);
for (my $y=1997 ; $y <= 2003 ; $y++)
  {
        get_data ($y,$ARGV[0]);
  }

トップ   編集 凍結 差分 バックアップ 添付 複製 名前変更 リロード   新規 一覧 単語検索 最終更新   ヘルプ   最終更新のRSS
Google
WWW を検索 OKADAJP.ORG を検索
Last-modified: 2015-03-01 (日) 01:15:59 (1719d)