2018年4月11日 星期三

[C#]HttpWebRequest爬蟲


步驟一、Default.aspx頁面中建立lbtnSend按鈕
步驟二、建立AccountInfo.cs 檔案與SetHttpWebRequest.cs 檔案
步驟三、設定要抓取的網站網址
----------------------------------------------------------------------------------------------------------------
Default.aspx 預設目錄
----------------------------------------------------------------------------------------------------------------
        protected void lbtnSend_Click(object sender, EventArgs e)
        {
            AccountInfo accountInfo = new AccountInfo("123", "456");

            string url, postData, html;
            url = "http://rrfp.swcb.gov.tw/";
            postData = string.Format("hidLoginType=A&hidRadio=SSO&LoginType=SSO&LOGIN_TYPE=A&ID_NO=&USER_ID={0}&USER_PWD={1}", accountInfo.Account, accountInfo.Password);
            html = SetHttpWebRequest.SetLoginHttpWebRequest("POST", url, postData, accountInfo, 0);

            url = "http://rrfp.swcb.gov.tw/Home/Index";
            html = SetHttpWebRequest.SetLoginHttpWebRequest("GET", url, null, accountInfo, 0);

            url = "http://rrfp.swcb.gov.tw/DES_Analys/Home_Analys_Index";
            html = SetHttpWebRequest.SetLoginHttpWebRequest("GET", url, null, accountInfo, 0);
        }

----------------------------------------------------------------------------------------------------------------
AccountInfo.cs 帳號型態
----------------------------------------------------------------------------------------------------------------
public class AccountInfo
{
    /// <summary>
    /// 帳號
    /// </summary>
    public string Account { get; set; }
    /// <summary>
    /// 密碼
    /// </summary>
    public string Password { get; set; }
    /// <summary>
    /// 跳轉網址
    /// </summary>
    public string RedirectUrl { get; set; }
    /// <summary>
    /// 登入狀態
    /// </summary>
    public bool LoginedStatus { get; set; }
    /// <summary>
    /// 登入金鑰
    /// </summary>
    public string Uid { get; set; }
    /// <summary>
    /// 存放來源Cookie
    /// </summary>
    public CookieCollection CookieCollection { get; set; }
    /// <summary>
    /// 登入失敗
    /// </summary>
    public bool IsFailure { get; set; }
    /// <summary>
    /// 連續服務器錯誤計數
    /// </summary>
    public int WebExCount { get; set; }

    /// <summary>
    /// 建構子 設定帳密
    /// </summary>
    /// <param name="account">帳號</param>
    /// <param name="password">密碼</param>
    public AccountInfo(string account, string password)
    {
        Account = account;
        Password = password;
        CookieCollection = new CookieCollection();
    }
}


----------------------------------------------------------------------------------------------------------------
SetHttpWebRequest.cs POST/GET方法
----------------------------------------------------------------------------------------------------------------
 public class SetHttpWebRequest
    {
        //設定登入要求器
        public static string SetLoginHttpWebRequest(string method, string url, string postData, AccountInfo accountInfo, int proxyIndex)
        {
            //協定
            HttpWebRequest request;
            request = WebRequest.CreateHttp(url);
            request.Method = method;
            request.UserAgent = "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.75 Safari/537.36";
            request.Accept = "*/*";
            request.ContentType = "application/x-www-form-urlencoded; charset=UTF-8";
            request.AllowAutoRedirect = false;
            request.KeepAlive = true;
            request.Referer = "http://rrfp.swcb.gov.tw/";
            request.Timeout = 10000;
            request.ReadWriteTimeout = 10000;
            request.CookieContainer = new CookieContainer();
            request.CookieContainer.Add(accountInfo.CookieCollection);

            //傳送資料
            if (postData != null)
            {
                byte[] postByte = Encoding.UTF8.GetBytes(postData);
                request.ContentLength = postByte.Length;
                Stream stream = request.GetRequestStream();
                stream.Write(postByte, 0, postByte.Length);
                stream.Close();
            }

            //接收回應
            HttpWebResponse response = request.GetResponse() as HttpWebResponse;
            string html = string.Empty;

            if (response != null)
            {
                StreamReader reader;

                if (string.Equals("gzip", response.ContentEncoding, StringComparison.CurrentCultureIgnoreCase))
                {
                    reader = new StreamReader(new GZipStream(response.GetResponseStream(), CompressionMode.Decompress), Encoding.UTF8);
                }
                else
                {
                    reader = new StreamReader(response.GetResponseStream(), Encoding.UTF8);
                }

                html = reader.ReadToEnd();
                reader.Close();

                foreach (Cookie cookie in response.Cookies)
                {
                    accountInfo.CookieCollection.Add(cookie);
                }
            }

            response.Close();
            request.Abort();

            return html;
        }
    }
----------------------------------------------------------------------------------------------------------------

沒有留言:

張貼留言

[JQuary][C#]使用jQuary實作form action post參數

網頁轉址一般分為 1.直接導向Url 2.夾帶Get參數在Url後面 如果要使用Post的方式要如何跳轉呢? Ans:使用Form Action 設定Method='Post' 範例前端程式 -----------------------------...