1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
| use Mojo::UserAgent;
use Mojo::DOM;
use DBI;
my $ua = Mojo::UserAgent->new;
# データベース接続
my $dbh = DBI->connect('dbi:SQLite:dbname=news.db', '', '', {
RaiseError => 1,
AutoCommit => 1,
});
# テーブル作成
$dbh->do(q{
CREATE TABLE IF NOT EXISTS articles (
id INTEGER PRIMARY KEY,
title TEXT,
url TEXT UNIQUE,
published_at TEXT,
scraped_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
)
});
sub scrape_news {
my $url = shift;
my $dom = $ua->get($url)->res->dom;
# 記事を抽出(サイト構造に応じて調整)
$dom->find('article.post')->each(sub {
my $article = shift;
my $title = $article->at('h2.title')->text;
my $link = $article->at('a')->attr('href');
my $date = $article->at('time')->attr('datetime');
# データベースに保存
eval {
$dbh->do(
'INSERT INTO articles (title, url, published_at) VALUES (?, ?, ?)',
undef,
$title,
$link,
$date
);
print "Saved: $title\n";
};
if ($@) {
# 既に存在する場合(UNIQUE制約)
print "Skipped: $title (already exists)\n";
}
});
}
# スクレイピング実行
scrape_news('https://example.com/news');
|