一、背景 6 r4 d. m* Q# q2 h2 I$ [% I
在1688官网里面有很多信息是需要登录才能看得到的,比如商家的联系电话等等。那么我们在抓取它的网页的时候,肯定是需要维持登录状态才能得到对应的内容。这里面就会涉及到自动登录的问题。
0 {, y2 `/ B& H, \" U" L4 a 登录地址:https://login.1688.com/member/signin.htm 4 v o7 `5 B0 b: i3 @! ?' I2 V8 i# X
8 e$ h3 m3 M9 Z/ w7 ]. `8 P
二、自动登录方法
0 H4 Z& C3 ?1 j- | 1、找到对应的元素,账号、密码框。6 l t' Z! F/ w2 k6 l. c
+ M# p8 g% T, f6 L1 u9 ` 2、把账号、密码值带进去。
2 k4 n6 ~. f; R2 y, R9 ]3 i8 i5 j List<string> logininfolist = new List<string>();
string file = "1688Account.json";
if (!File.Exists(file))
{
throw new ArgumentException("1688Account not found");
}
string data = File.ReadAllText(file, Encoding.UTF8);
var account1688list = JsonConvert.DeserializeObject<List<Account1688Item>>(data);
logininfolist.AddRange(account1688list.Select(o => o.AccountPassword).ToList());
Random rdinfo = new Random();
int indexinfo = rdinfo.Next(logininfolist.Count);
var modelinfo = logininfolist[indexinfo];
driver.FindElement(By.Id("fm-login-id")).SendKeys(modelinfo.Split('+')[0]);
driver.FindElement(By.Id("fm-login-password")).SendKeys(modelinfo.Split('+')[1]); 3、模拟点击提交按钮操作。; l1 J3 }9 m2 x% S; {. f$ J
// Thread.Sleep(1000 * 30); //30时间操作
driver.FindElement(By.ClassName("password-login")).Click();
Thread.Sleep(1000 * 30);//30时间操作 4、进入控制台,验证是否登陆成功。并记录Cookies,下次再来的时候,直接使用现成的Cookies,一般有效期可以维持一天多左右。6 L* {) J! U, X' z* u8 e* q/ C. t$ O
driver.Navigate().GoToUrl("https://work.1688.com/?tracelog=login_target_is_blank_1688");
Thread.Sleep(1000 * 5);
if (driver.Url.Contains("login.1688.com"))
{
Console.WriteLine("登录失败");
CookieHelp.DeleteCookies();
Console.WriteLine("2");
throw new Exception("重新登录");
}
driver.Navigate().Refresh();
CookieHelp.WriteCookies(driver.Manage().Cookies.AllCookies); 5、判断是否有现成的登录cookies。5 H( P6 G+ f+ U
driver.Navigate().GoToUrl("https://www.1688.com/");
driver.Manage().Cookies.DeleteAllCookies();
var listCookie = CookieHelp.GetCookie();
if (listCookie != null)
{
logintry = 0;
Console.WriteLine("有现成cookies" + DateTime.UtcNow);
foreach (var item in listCookie)
{
driver.Manage().Cookies.AddCookie(new Cookie(item.Name, item.Value, item.Domain, item.Path, item.Expiry));
}
Thread.Sleep(2000);
driver.Navigate().GoToUrl("https://work.1688.com/?tracelog=login_target_is_blank_1688");
Thread.Sleep(1000 * 2);三、完整代码 " A2 g2 \9 v, Y# G- a6 F. ]) A. w
/// <summary>
/// 登录 todo
/// </summary>
/// <param name="_reptilesImageSearchService"></param>
/// <param name="options"></param>
/// <param name="driver"></param>
public void Implement(IReptilesImageSearchService _reptilesImageSearchService, IWebDriver driver)
{
driver.Navigate().GoToUrl("https://www.1688.com/");
driver.Manage().Cookies.DeleteAllCookies();
var listCookie = CookieHelp.GetCookie();
if (listCookie != null)
{
logintry = 0;
Console.WriteLine("有现成cookies" + DateTime.UtcNow);
foreach (var item in listCookie)
{
driver.Manage().Cookies.AddCookie(new Cookie(item.Name, item.Value, item.Domain, item.Path, item.Expiry));
}
Thread.Sleep(2000);
driver.Navigate().GoToUrl("https://work.1688.com/?tracelog=login_target_is_blank_1688");
Thread.Sleep(1000 * 2);
if (driver.Url.Contains("login.1688.com"))
{
Console.WriteLine("cookies过期了");
CookieHelp.DeleteCookies();
Console.WriteLine("1");
throw new Exception("重新登录");
}
}
else
{
if (logintry > 4)
{
Console.WriteLine("登陆次数超出:" + logintry);
throw new Exception("登陆次数超出,退出");
}
logintry++;
Console.WriteLine("无现成cookies" + DateTime.UtcNow);
driver.Navigate().GoToUrl("https://login.1688.com/member/signin.htm");
#region 登录动作
driver.SwitchTo().Frame(0);
IJavaScriptExecutor js = (IJavaScriptExecutor)driver;
////// string returnjs = (string)js.ExecuteScript("Object.defineProperties(navigator, {webdriver:{get:()=>undefined}});");
string jsfile = File.ReadAllText("stealth.min.js", Encoding.UTF8);
string returnjs = (string)js.ExecuteScript(jsfile);
List<string> logininfolist = new List<string>();
string file = "1688Account.json";
if (!File.Exists(file))
{
throw new ArgumentException("1688Account not found");
}
string data = File.ReadAllText(file, Encoding.UTF8);
var account1688list = JsonConvert.DeserializeObject<List<Account1688Item>>(data);
logininfolist.AddRange(account1688list.Select(o => o.AccountPassword).ToList());
Random rdinfo = new Random();
int indexinfo = rdinfo.Next(logininfolist.Count);
var modelinfo = logininfolist[indexinfo];
driver.FindElement(By.Id("fm-login-id")).SendKeys(modelinfo.Split('+')[0]);
driver.FindElement(By.Id("fm-login-password")).SendKeys(modelinfo.Split('+')[1]);
// Thread.Sleep(1000 * 30); //30时间操作
driver.FindElement(By.ClassName("password-login")).Click();
Thread.Sleep(1000 * 30);//30时间操作
#endregion 登录动作
driver.Navigate().GoToUrl("https://work.1688.com/?tracelog=login_target_is_blank_1688");
Thread.Sleep(1000 * 5);
if (driver.Url.Contains("login.1688.com"))
{
Console.WriteLine("登录失败");
CookieHelp.DeleteCookies();
Console.WriteLine("2");
throw new Exception("重新登录");
}
driver.Navigate().Refresh();
CookieHelp.WriteCookies(driver.Manage().Cookies.AllCookies);
}
Thread.Sleep(1000);
}四、注意事项
% G7 u. h8 {5 a$ K5 I/ V% h! w 1、登录的时候,有时候会出现滑块验证码,这时候一般是使用其他账号重试或者是在当前的机器手工登录一次,后面基本就会被信任。
+ r: H# X0 Y( ] 2、使用这段代码本身就是模拟真实用户的行为,最大限度的减少验证码出现的几率。
8 f$ b |, A; ~8 Z% L IJavaScriptExecutor js = (IJavaScriptExecutor)driver;
////// string returnjs = (string)js.ExecuteScript("Object.defineProperties(navigator, {webdriver:{get:()=>undefined}});");
string jsfile = File.ReadAllText("stealth.min.js", Encoding.UTF8);
string returnjs = (string)js.ExecuteScript(jsfile);