今天遇到一朋友一個問題,順手幫他寫了小工具,其實主要是因為泰國佛牌有一些驗證單位像是 薩瑪空(Samakon) 、G-Pra 、DD -Pra、 塔帕占( Thaprachan )等等
這些驗證單位的網站提供了佛牌的認證與詳細資訊,不過通常都需要手動輸入卡號來查詢。我發現 Samakon 的查詢系統是透過 HTTP POST 來發送請求,
因此決定使用 C# 來寫一個簡單的爬取工具。
1. 安裝 RestSharp ,現在習慣用這個處理 關於 http 爬取的部分
2.接下來就是程式碼的部分
我將這個工具分為兩個主要的函式:
GetSMKData - 這個函式會發送 HTTP POST 請求,並獲取 HTML 回應。
ParseSMKData - 使用 Regex 解析 HTML,提取關鍵資訊。
//call function
//GetSMKData("GH08-9E568");
///
/// Post data to https://www.samakompra.com/searchmycer/SearchCertificate/"
///
///
///
private static void GetSMKData(string cardNumber)
{
//基本上就是模擬瀏覽器參數
var client = new RestClient("https://www.samakompra.com/searchmycer/SearchCertificate/");
var request = new RestRequest("", Method.Post);
request.AddHeader("accept", "*/*");
request.AddHeader("accept-language", "zh-TW,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6");
request.AddHeader("content-type", "application/x-www-form-urlencoded; charset=UTF-8");
//要用英文,會比較好閱讀
request.AddHeader("cookie", "languageUse=en;");
request.AddHeader("origin", "https://www.samakompra.com");
request.AddHeader("priority", "u=1, i");
request.AddHeader("referer", "https://www.samakompra.com/searchmycer/");
request.AddHeader("sec-ch-ua", "\"Microsoft Edge\";v=\"131\", \"Chromium\";v=\"131\", \"Not_A Brand\";v=\"24\"");
request.AddHeader("sec-ch-ua-mobile", "?0");
request.AddHeader("sec-ch-ua-platform", "\"Windows\"");
request.AddHeader("sec-fetch-dest", "empty");
request.AddHeader("sec-fetch-mode", "cors");
request.AddHeader("sec-fetch-site", "same-origin");
request.AddHeader("user-agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 Edg/131.0.0.0");
request.AddHeader("x-requested-with", "XMLHttpRequest");
//caseType=SearchCertificate , keyword_search=cardNumber
request.AddParameter("application/x-www-form-urlencoded", "caseType=SearchCertificate&keyword_search="+ cardNumber, ParameterType.RequestBody);
var response = client.ExecuteAsync(request).Result;
Console.OutputEncoding = Encoding.UTF8;
//Console.WriteLine(response.Content);
ParseSMKData(response.Content);
}
///
/// Parse Post 回來得資料
///
///
private static void ParseSMKData(string htmlCode)
{
string html = htmlCode; // 你的 HTML 內容
// 抓取 img src
string imgPattern = "
Category\\s*(.*?)";
string namePattern = "Name\\s*(.*?)";
string placePattern = "Place\\s*(.*?)";
string provincePattern = "Province\\s*(.*?)";
string issuedPattern = "Issued\\s*(.*?)";
string cardNumber = @"(.*?)<\/div>";
Console.OutputEncoding = Encoding.UTF8;
Console.WriteLine("卡號: " + Regex.Match(html, cardNumber).Groups[1].Value);
Console.WriteLine("圖片網址: " + Regex.Match(html, imgPattern).Groups[1].Value);
Console.WriteLine("分類: " + Regex.Match(html, categoryPattern).Groups[1].Value);
Console.WriteLine("名稱: " + Regex.Match(html, namePattern).Groups[1].Value);
Console.WriteLine("地點: " + Regex.Match(html, placePattern).Groups[1].Value);
Console.WriteLine("省分: " + Regex.Match(html, provincePattern).Groups[1].Value);
Console.WriteLine("發卡時間: " + Regex.Match(html, issuedPattern).Groups[1].Value);
}
// Result
/*
卡號: GH08-9E568
圖片網址: https://www.samakompra.com/myceronline/show/images/w4e274a4r5k5z3g434t5k5h253d2m2q295t2n5i4u564d55426s4e574p5e5n2l5o4v554x3o29484q4p4d474a4i5g5k474y33454v2x2v2w3r21423d5w284v2a5r2t4k3140324f4936433e4l2q2n2g4j4f4f2o2b2l4p3d4p2x2t2c4a4z2w2s2o2w266
分類: เหรียญพระพุทธ - พระคณาจารย์
名稱: เหรียญท่านท้าวมหาพรหมธาดา (เหรียญจักรเพชร) เนื้อทองฝาบาตร พ.ศ. 2508
地點: วัดบรมสถลศรีสุทธิโสภณรังสรรค์ (วัดดอน)
省分: กรุงเทพมหานคร
發卡時間: 27 August 2024
*/
Result:
卡號: GH08-9E568
圖片網址:
https://www.samakompra.com/myceronline/show/images/w4e274a4r5k5z3g434t5k5h253d2m2q295t2n5i4u564d55426s4e574p5e5n2l5o4v554x3o29484q4p4d474a4i5g5k474y33454v2x2v2w3r21423d5w284v2a5r2t4k3140324f4936433e4l2q2n2g4j4f4f2o2b2l4p3d4p2x2t2c4a4z2w2s2o2w266
分類: เหรียญพระพุทธ - พระคณาจารย์
名稱: เหรียญท่านท้าวมหาพรหมธาดา (เหรียญจักรเพชร) เนื้อทองฝาบาตร พ.ศ. 2508
地點: วัดบรมสถลศรีสุทธิโสภณรังสรรค์ (วัดดอน)
省分: กรุงเทพมหานคร
發卡時間: 27 August 2024
其他的就看你需要啥再自己取得,如果是 Console 因為有泰文的關係,記得 Console.OutputEncoding = Encoding.UTF8;