一、背景
! e% Z! B& q. M" I+ Q" S& _- Y 在1688官网里面有很多信息是需要登录才能看得到的,比如商家的联系电话等等。那么我们在抓取它的网页的时候,肯定是需要维持登录状态才能得到对应的内容。这里面就会涉及到自动登录的问题。
# H1 k/ x9 B- p登录地址:https://login.1688.com/member/signin.htm8 N T; d& u4 W" P& [$ [! v, \
, M1 g- M1 B6 j二、自动登录方法
; N2 ^+ Y# `& `# ^6 ^6 F% y1、找到对应的元素,账号、密码框。0 b% U. T: H( @. B! _( m6 \
& p( l# J! w9 U7 c
2、把账号、密码值带进去。( G( d8 G. i9 g3 J8 D' u3 `$ ~$ Y
- List<string> logininfolist = new List<string>();
- string file = "1688Account.json";
- if (!File.Exists(file))
- {
- throw new ArgumentException("1688Account not found");
- }
- string data = File.ReadAllText(file, Encoding.UTF8);
- var account1688list = JsonConvert.DeserializeObject<List<Account1688Item>>(data);
- logininfolist.AddRange(account1688list.Select(o => o.AccountPassword).ToList());
- Random rdinfo = new Random();
- int indexinfo = rdinfo.Next(logininfolist.Count);
- var modelinfo = logininfolist[indexinfo];
- driver.FindElement(By.Id("fm-login-id")).SendKeys(modelinfo.Split('+')[0]);
- driver.FindElement(By.Id("fm-login-password")).SendKeys(modelinfo.Split('+')[1]);
3、模拟点击提交按钮操作。
' H) K7 N/ q+ t% a y! U4 I* U- // Thread.Sleep(1000 * 30); //30时间操作
- driver.FindElement(By.ClassName("password-login")).Click();
- Thread.Sleep(1000 * 30);//30时间操作
4、进入控制台,验证是否登陆成功。并记录Cookies,下次再来的时候,直接使用现成的Cookies,一般有效期可以维持一天多左右。
0 ~- g1 E. S( z4 g3 t$ H- driver.Navigate().GoToUrl("https://work.1688.com/?tracelog=login_target_is_blank_1688");
- Thread.Sleep(1000 * 5);
- if (driver.Url.Contains("login.1688.com"))
- {
- Console.WriteLine("登录失败");
- CookieHelp.DeleteCookies();
- Console.WriteLine("2");
- throw new Exception("重新登录");
- }
- driver.Navigate().Refresh();
- CookieHelp.WriteCookies(driver.Manage().Cookies.AllCookies);
5、判断是否有现成的登录cookies。
2 ~9 u7 L: H* {- driver.Navigate().GoToUrl("https://www.1688.com/");
- driver.Manage().Cookies.DeleteAllCookies();
- var listCookie = CookieHelp.GetCookie();
- if (listCookie != null)
- {
- logintry = 0;
- Console.WriteLine("有现成cookies" + DateTime.UtcNow);
- foreach (var item in listCookie)
- {
- driver.Manage().Cookies.AddCookie(new Cookie(item.Name, item.Value, item.Domain, item.Path, item.Expiry));
- }
- Thread.Sleep(2000);
-
- driver.Navigate().GoToUrl("https://work.1688.com/?tracelog=login_target_is_blank_1688");
-
- Thread.Sleep(1000 * 2);
三、完整代码6 R8 s/ y( |1 z- i% e2 z
- /// <summary>
- /// 登录 todo
- /// </summary>
- /// <param name="_reptilesImageSearchService"></param>
- /// <param name="options"></param>
- /// <param name="driver"></param>
- public void Implement(IReptilesImageSearchService _reptilesImageSearchService, IWebDriver driver)
- {
- driver.Navigate().GoToUrl("https://www.1688.com/");
- driver.Manage().Cookies.DeleteAllCookies();
- var listCookie = CookieHelp.GetCookie();
- if (listCookie != null)
- {
- logintry = 0;
- Console.WriteLine("有现成cookies" + DateTime.UtcNow);
- foreach (var item in listCookie)
- {
- driver.Manage().Cookies.AddCookie(new Cookie(item.Name, item.Value, item.Domain, item.Path, item.Expiry));
- }
- Thread.Sleep(2000);
-
- driver.Navigate().GoToUrl("https://work.1688.com/?tracelog=login_target_is_blank_1688");
-
- Thread.Sleep(1000 * 2);
- if (driver.Url.Contains("login.1688.com"))
- {
- Console.WriteLine("cookies过期了");
- CookieHelp.DeleteCookies();
- Console.WriteLine("1");
- throw new Exception("重新登录");
- }
- }
- else
- {
- if (logintry > 4)
- {
- Console.WriteLine("登陆次数超出:" + logintry);
- throw new Exception("登陆次数超出,退出");
- }
- logintry++;
- Console.WriteLine("无现成cookies" + DateTime.UtcNow);
- driver.Navigate().GoToUrl("https://login.1688.com/member/signin.htm");
- #region 登录动作
- driver.SwitchTo().Frame(0);
-
- IJavaScriptExecutor js = (IJavaScriptExecutor)driver;
- ////// string returnjs = (string)js.ExecuteScript("Object.defineProperties(navigator, {webdriver:{get:()=>undefined}});");
-
- string jsfile = File.ReadAllText("stealth.min.js", Encoding.UTF8);
- string returnjs = (string)js.ExecuteScript(jsfile);
-
- List<string> logininfolist = new List<string>();
- string file = "1688Account.json";
- if (!File.Exists(file))
- {
- throw new ArgumentException("1688Account not found");
- }
- string data = File.ReadAllText(file, Encoding.UTF8);
- var account1688list = JsonConvert.DeserializeObject<List<Account1688Item>>(data);
- logininfolist.AddRange(account1688list.Select(o => o.AccountPassword).ToList());
- Random rdinfo = new Random();
- int indexinfo = rdinfo.Next(logininfolist.Count);
- var modelinfo = logininfolist[indexinfo];
- driver.FindElement(By.Id("fm-login-id")).SendKeys(modelinfo.Split('+')[0]);
- driver.FindElement(By.Id("fm-login-password")).SendKeys(modelinfo.Split('+')[1]);
- // Thread.Sleep(1000 * 30); //30时间操作
- driver.FindElement(By.ClassName("password-login")).Click();
- Thread.Sleep(1000 * 30);//30时间操作
-
- #endregion 登录动作
- driver.Navigate().GoToUrl("https://work.1688.com/?tracelog=login_target_is_blank_1688");
- Thread.Sleep(1000 * 5);
- if (driver.Url.Contains("login.1688.com"))
- {
- Console.WriteLine("登录失败");
- CookieHelp.DeleteCookies();
- Console.WriteLine("2");
- throw new Exception("重新登录");
- }
- driver.Navigate().Refresh();
- CookieHelp.WriteCookies(driver.Manage().Cookies.AllCookies);
- }
- Thread.Sleep(1000);
- }
四、注意事项
/ K% A9 n# \0 e' R9 z1、登录的时候,有时候会出现滑块验证码,这时候一般是使用其他账号重试或者是在当前的机器手工登录一次,后面基本就会被信任。 J, ~) j8 ^3 f+ ?
2、使用这段代码本身就是模拟真实用户的行为,最大限度的减少验证码出现的几率。
* l) C% ]; O6 Q+ _1 b( e' V+ p- IJavaScriptExecutor js = (IJavaScriptExecutor)driver;
- ////// string returnjs = (string)js.ExecuteScript("Object.defineProperties(navigator, {webdriver:{get:()=>undefined}});");
-
- string jsfile = File.ReadAllText("stealth.min.js", Encoding.UTF8);
- string returnjs = (string)js.ExecuteScript(jsfile);
|