spider.html 31 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372
  1. <!DOCTYPE html>
  2. <html>
  3. <head>
  4. <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
  5. <meta name="keywords" content="网络爬虫开发,Python爬虫,PHP爬虫,网络爬虫开发,数据采集,铭讯科技">
  6. <meta name="description" content="铭讯科技是专业的软件定制开发服务商,提供全行业软件定制开发服务,是一支高交付的一流团队。">
  7. <meta name="author" content="铭讯科技">
  8. <title>Python爬虫-PHP爬虫-网络爬虫开发-数据采集-铭讯科技</title>
  9. <!-- Stylesheets -->
  10. <!-- Stylesheets -->
  11. <link href="static/css/bootstrap.css" rel="stylesheet">
  12. <link href="static/css/style.css" rel="stylesheet">
  13. <link href="static/css/slick.css" rel="stylesheet">
  14. <link href="static/css/responsive.css" rel="stylesheet">
  15. <!--Color Switcher Mockup-->
  16. <link href="static/css/color-switcher-design.css" rel="stylesheet">
  17. <link href="static/css/30.css" rel="stylesheet"/>
  18. <!--Color Themes-->
  19. <link id="theme-color-file" href="static/css/official-blue-theme.css" rel="stylesheet">
  20. <link rel="icon" type="image/x-icon" href="./static/image/icon.jpg"/>
  21. <!-- Responsive -->
  22. <meta http-equiv="X-UA-Compatible" content="IE=edge">
  23. <meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=0">
  24. <!--[if lt IE 9]>
  25. <script src="/officialBlue/js/html5shiv.js"></script><![endif]-->
  26. <!--[if lt IE 9]>
  27. <script src="/officialBlue/js/respond.js"></script><![endif]-->
  28. <style>
  29. @media screen and (max-width:480px){
  30. .sidebar .sidebar-blog-category{
  31. display: none;
  32. }
  33. .footer .footer-block .footer-tit{
  34. width: 100px;
  35. margin: 0 auto 5px auto;
  36. }
  37. .footer .footer-block .hot-line{
  38. width: 340px;
  39. margin: 0;
  40. margin-left: -120px;
  41. }
  42. .footer .footer-block p>span{
  43. display:none;
  44. }
  45. }
  46. </style>
  47. </head>
  48. <body>
  49. <div class="page-wrapper">
  50. <!-- Main Header-->
  51. <!--顶部菜单 -->
  52. <div class="header-perch">
  53. <div class="hx-header-fix">
  54. <div class="auto-container">
  55. <div class="hx-header mobile">
  56. <div class="hx-logo" style="width: 108px;height: 70px;">
  57. <a href="index1.html">
  58. <img style="width: 100%;" src="static/image/login.png" class="logo_blue" alt="">
  59. </a>
  60. </div>
  61. <div class="menu-operate">
  62. <div id="menu-operate-close"><img src="static/picture/71.png" alt=""></div>
  63. <div id="menu-operate-spread"><img src="static/picture/72.png" alt=""></div>
  64. </div>
  65. <div class="hx-menu">
  66. <ul>
  67. <li><a href="index1.html">首页</a><span class="btm-line"><img src="static/picture/selected.png" alt=""></span></li>
  68. <li><a href="services.html">软件开发服务</a><span class="btm-line"><img src="static/picture/selected.png" alt=""></span></li>
  69. <li><a href="case.html">服务案例</a><span class="btm-line"><img src="static/picture/selected.png" alt=""></span></li>
  70. <li><a href="faq.html">新闻资讯</a><span class="btm-line"><img src="static/picture/selected.png" alt=""></span></li>
  71. <li><a href="contact.html">联系我们</a><span class="btm-line"><img src="static/picture/selected.png" alt=""></span></li>
  72. <li><a href="about.html">关于我们</a><span class="btm-line"><img src="static/picture/selected.png" alt=""></span></li>
  73. <li style="width: 180px;"><a href="">咨询电话:17347315117 </a><span class="btm-line"><img src="static/picture/selected.png" alt=""></span></li>
  74. </ul>
  75. </div>
  76. </div>
  77. </div>
  78. </div>
  79. </div>
  80. <!--End Main Header -->
  81. <!--Page Title-->
  82. <section class="page-title" style="background-image:url(static/picture/w-R.png);">
  83. <div class="auto-container">
  84. <h2>网络爬虫开发</h2>
  85. </div>
  86. </section>
  87. <!--Breadcrumb-->
  88. <div class="Dg" style=" width: 1190px;margin:0 auto;; margin-top: 10px;"> <a style="margin-left: 15px;" href='index1.html' class=''>主页</a> > <a href='services.html'>软件开发服务</a> > <a href="">网络爬虫开发</a></div>
  89. <!--End Page Title-->
  90. <!--Sidebar Page Container-->
  91. <div class="sidebar-page-container">
  92. <div class="auto-container">
  93. <div class="row clearfix">
  94. <div class="content-side col-lg-8 col-md-12 col-sm-12">
  95. <div class="services-single">
  96. <div class="inner-service">
  97. <div class="lower-content">
  98. <h2 style="text-align: center">网络爬虫开发介绍</h2>
  99. <div class="text">
  100. <h1 style="box-sizing: border-box; outline: 0px; margin: 8px 0px 16px; padding: 0px; font-family: 'PingFang SC', 'Microsoft YaHei', SimHei, Arial, SimSun; font-size: 28px; color: #4f4f4f; line-height: 36px; overflow-wrap: break-word; font-variant-ligatures: common-ligatures; background-color: #ffffff;">概括性的介绍:</h1>
  101. <p style="box-sizing: border-box; outline: 0px; margin: 0px 0px 16px; padding: 0px; font-size: 16px; color: #4d4d4d; line-height: 26px; overflow-wrap: break-word; font-family: -apple-system, 'SF UI Text', Arial, 'PingFang SC', 'Hiragino Sans GB', 'Microsoft YaHei', 'WenQuanYi Micro Hei', sans-serif; font-variant-ligatures: common-ligatures; background-color: #ffffff;">网络爬虫,总结起来,就是高效地收集互联网上的指定信息。<br style="box-sizing: border-box; outline: 0px; overflow-wrap: break-word;">然而,不同的网站会有不同的保护措施,以及,不同的展示信息的方式。</p>
  102. <p style="box-sizing: border-box; outline: 0px; margin: 0px 0px 16px; padding: 0px; font-size: 16px; color: #4d4d4d; line-height: 26px; overflow-wrap: break-word; font-family: -apple-system, 'SF UI Text', Arial, 'PingFang SC', 'Hiragino Sans GB', 'Microsoft YaHei', 'WenQuanYi Micro Hei', sans-serif; font-variant-ligatures: common-ligatures; background-color: #ffffff;">这篇文章是第一篇,说的不多,是为了之后发文介绍如何开发一个自动化破解网站验证机制的程序(自动化生成爬虫代码文件)做铺垫。</p>
  103. <h3 style="box-sizing: border-box; outline: 0px; margin: 8px 0px 16px; padding: 0px; font-family: 'PingFang SC', 'Microsoft YaHei', SimHei, Arial, SimSun; font-size: 22px; color: #4f4f4f; line-height: 30px; overflow-wrap: break-word; font-variant-ligatures: common-ligatures; background-color: #ffffff;"><a style="box-sizing: border-box; outline: none; margin: 0px; padding: 0px; font-weight: normal; cursor: pointer; background-color: transparent; color: #4ea1db; overflow-wrap: break-word;" name="t1"></a><a style="box-sizing: border-box; outline: none; margin: 0px; padding: 0px; font-weight: normal; cursor: pointer; background-color: transparent; color: #4ea1db; overflow-wrap: break-word;" name="t1"></a><a id="_6" style="box-sizing: border-box; outline: none; margin: 0px; padding: 0px; font-weight: normal; cursor: pointer; background-color: transparent; color: #4ea1db; overflow-wrap: break-word;"></a>爬虫的开发</h3>
  104. <p style="box-sizing: border-box; outline: 0px; margin: 0px 0px 16px; padding: 0px; font-size: 16px; color: #4d4d4d; line-height: 26px; overflow-wrap: break-word; font-family: -apple-system, 'SF UI Text', Arial, 'PingFang SC', 'Hiragino Sans GB', 'Microsoft YaHei', 'WenQuanYi Micro Hei', sans-serif; font-variant-ligatures: common-ligatures; background-color: #ffffff;">主要就是针对三个点进行代码编写:</p>
  105. <ol style="box-sizing: border-box; outline: 0px; margin: 0px 0px 24px; padding: 0px; list-style: none; font-size: 18px; overflow-wrap: break-word; color: rgba(0, 0, 0, 0.75); font-family: -apple-system, 'SF UI Text', Arial, 'PingFang SC', 'Hiragino Sans GB', 'Microsoft YaHei', 'WenQuanYi Micro Hei', sans-serif; font-variant-ligatures: common-ligatures; background-color: #ffffff;">
  106. <li style="box-sizing: border-box; outline: 0px; margin: 8px 0px 0px 40px; padding: 0px; list-style: decimal; overflow-wrap: break-word;">如何<span style="box-sizing: border-box; outline: 0px; font-weight: bold; overflow-wrap: break-word;">高效地发送</span>可以拿到目标数据的<span style="box-sizing: border-box; outline: 0px; font-weight: bold; overflow-wrap: break-word;">请求</span></li>
  107. <li style="box-sizing: border-box; outline: 0px; margin: 8px 0px 0px 40px; padding: 0px; list-style: decimal; overflow-wrap: break-word;">如何<span style="box-sizing: border-box; outline: 0px; font-weight: bold; overflow-wrap: break-word;">模仿真人操作</span>可以堂而皇之地<span style="box-sizing: border-box; outline: 0px; font-weight: bold; overflow-wrap: break-word;">通过对方的验证机制</span>获得数据</li>
  108. <li style="box-sizing: border-box; outline: 0px; margin: 8px 0px 0px 40px; padding: 0px; list-style: decimal; overflow-wrap: break-word;">如何只截取<span style="box-sizing: border-box; outline: 0px; font-weight: bold; overflow-wrap: break-word;">目标信息</span>而不要其他信息</li>
  109. </ol>
  110. <h3 style="box-sizing: border-box; outline: 0px; margin: 8px 0px 16px; padding: 0px; font-family: 'PingFang SC', 'Microsoft YaHei', SimHei, Arial, SimSun; font-size: 22px; color: #4f4f4f; line-height: 30px; overflow-wrap: break-word; font-variant-ligatures: common-ligatures; background-color: #ffffff;"><a style="box-sizing: border-box; outline: none; margin: 0px; padding: 0px; font-weight: normal; cursor: pointer; background-color: transparent; color: #4ea1db; overflow-wrap: break-word;" name="t2"></a><a style="box-sizing: border-box; outline: none; margin: 0px; padding: 0px; font-weight: normal; cursor: pointer; background-color: transparent; color: #4ea1db; overflow-wrap: break-word;" name="t2"></a><a id="_14" style="box-sizing: border-box; outline: none; margin: 0px; padding: 0px; font-weight: normal; cursor: pointer; background-color: transparent; color: #4ea1db; overflow-wrap: break-word;"></a>开发的难度</h3>
  111. <h5 style="box-sizing: border-box; outline: 0px; margin: 8px 0px 16px; padding: 0px; font-family: 'PingFang SC', 'Microsoft YaHei', SimHei, Arial, SimSun; font-size: 18px; color: #4f4f4f; line-height: 26px; overflow-wrap: break-word; font-variant-ligatures: common-ligatures; background-color: #ffffff;"><a id="_16" style="box-sizing: border-box; outline: none; margin: 0px; padding: 0px; font-weight: normal; cursor: pointer; background-color: transparent; color: #4ea1db; overflow-wrap: break-word;"></a>高效发送请求</h5>
  112. <p style="box-sizing: border-box; outline: 0px; margin: 0px 0px 16px; padding: 0px; font-size: 16px; color: #4d4d4d; line-height: 26px; overflow-wrap: break-word; font-family: -apple-system, 'SF UI Text', Arial, 'PingFang SC', 'Hiragino Sans GB', 'Microsoft YaHei', 'WenQuanYi Micro Hei', sans-serif; font-variant-ligatures: common-ligatures; background-color: #ffffff;">实现这个功能,还需要根据业务的具体需求和数据量级来决定。<br style="box-sizing: border-box; outline: 0px; overflow-wrap: break-word;">如果业务多,量级大而且方便管理的,不用说,肯定是需要做分布式的。<br style="box-sizing: border-box; outline: 0px; overflow-wrap: break-word;">如果是需要管理的业务不多的情况下,可以直接写独立的异步爬虫程序。</p>
  113. <blockquote style="box-sizing: border-box; outline: 0px; padding: 16px; margin: 0px 0px 24px; color: rgba(0, 0, 0, 0.5); border-left-width: 8px; border-left-color: #dddfe4; background: #eef0f4; overflow: auto; overflow-wrap: break-word; font-family: -apple-system, 'SF UI Text', Arial, 'PingFang SC', 'Hiragino Sans GB', 'Microsoft YaHei', 'WenQuanYi Micro Hei', sans-serif; font-size: 16px; font-variant-ligatures: common-ligatures; word-break: break-word !important;">
  114. <p style="box-sizing: border-box; outline: 0px; margin: 0px; padding: 0px; font-size: 14px; color: #555666; line-height: 22px; overflow-wrap: break-word;"><span style="box-sizing: border-box; outline: 0px; font-weight: bold; overflow-wrap: break-word;">分布式爬虫</span> 市面上的设计方案有很多,只要是对各自的需求指定的分布式方案,对于这个的实现,可以考虑爬虫平台。<br style="box-sizing: border-box; outline: 0px; overflow-wrap: break-word;"><span style="box-sizing: border-box; outline: 0px; font-weight: bold; overflow-wrap: break-word;">独立的爬虫</span> 也就是一个业务对应一个爬虫程序。</p>
  115. </blockquote>
  116. <h5 style="box-sizing: border-box; outline: 0px; margin: 8px 0px 16px; padding: 0px; font-family: 'PingFang SC', 'Microsoft YaHei', SimHei, Arial, SimSun; font-size: 18px; color: #4f4f4f; line-height: 26px; overflow-wrap: break-word; font-variant-ligatures: common-ligatures; background-color: #ffffff;"><a id="_23" style="box-sizing: border-box; outline: none; margin: 0px; padding: 0px; font-weight: normal; cursor: pointer; background-color: transparent; color: #4ea1db; overflow-wrap: break-word;"></a>破解请求</h5>
  117. <p style="box-sizing: border-box; outline: 0px; margin: 0px 0px 16px; padding: 0px; font-size: 16px; color: #4d4d4d; line-height: 26px; overflow-wrap: break-word; font-family: -apple-system, 'SF UI Text', Arial, 'PingFang SC', 'Hiragino Sans GB', 'Microsoft YaHei', 'WenQuanYi Micro Hei', sans-serif; font-variant-ligatures: common-ligatures; background-color: #ffffff;">前提:找到目标数据在哪一个url中,然后以这个url的http数据包作为目标开始破解(这一点也可以归为高效发送请求)<br style="box-sizing: border-box; outline: 0px; overflow-wrap: break-word;">破解的核心就是如何构造一个身份,通过对方的验证,只要通过,对方就会返回有效数据<br style="box-sizing: border-box; outline: 0px; overflow-wrap: break-word;">这个环节大体可以分为三个步(循环反复):</p>
  118. <ol style="box-sizing: border-box; outline: 0px; margin: 0px 0px 24px; padding: 0px; list-style: none; font-size: 18px; overflow-wrap: break-word; color: rgba(0, 0, 0, 0.75); font-family: -apple-system, 'SF UI Text', Arial, 'PingFang SC', 'Hiragino Sans GB', 'Microsoft YaHei', 'WenQuanYi Micro Hei', sans-serif; font-variant-ligatures: common-ligatures; background-color: #ffffff;">
  119. <li style="box-sizing: border-box; outline: 0px; margin: 8px 0px 0px 40px; padding: 0px; list-style: decimal; overflow-wrap: break-word;">用这些 header cookie 以及 必要参数 发送请求</li>
  120. <li style="box-sizing: border-box; outline: 0px; margin: 8px 0px 0px 40px; padding: 0px; list-style: decimal; overflow-wrap: break-word;">观察响应中是否存在我们想要的数据</li>
  121. <li style="box-sizing: border-box; outline: 0px; margin: 8px 0px 0px 40px; padding: 0px; list-style: decimal; overflow-wrap: break-word;">去除 header cookie 以及 必要参数 中的部分键值对<br style="box-sizing: border-box; outline: 0px; overflow-wrap: break-word;">这个步骤其实就是对键值对做组合。<br style="box-sizing: border-box; outline: 0px; overflow-wrap: break-word;">到了这,对于一些没有太多防范措施对网站就能拿到数据了,然后现在越来越多的网站有了更复杂的验证机制(需要连续请求或者加密算法)。</li>
  122. </ol>
  123. <p style="box-sizing: border-box; outline: 0px; margin: 0px 0px 16px; padding: 0px; font-size: 16px; color: #4d4d4d; line-height: 26px; overflow-wrap: break-word; font-family: -apple-system, 'SF UI Text', Arial, 'PingFang SC', 'Hiragino Sans GB', 'Microsoft YaHei', 'WenQuanYi Micro Hei', sans-serif; font-variant-ligatures: common-ligatures; background-color: #ffffff;"><span style="box-sizing: border-box; outline: 0px; font-weight: bold; overflow-wrap: break-word;">比如:</span></p>
  124. <ol style="box-sizing: border-box; outline: 0px; margin: 0px 0px 24px; padding: 0px; list-style: none; font-size: 18px; overflow-wrap: break-word; color: rgba(0, 0, 0, 0.75); font-family: -apple-system, 'SF UI Text', Arial, 'PingFang SC', 'Hiragino Sans GB', 'Microsoft YaHei', 'WenQuanYi Micro Hei', sans-serif; font-variant-ligatures: common-ligatures; background-color: #ffffff;">
  125. <li style="box-sizing: border-box; outline: 0px; margin: 8px 0px 0px 40px; padding: 0px; list-style: decimal; overflow-wrap: break-word;">链接对应一个唯一的 token,这需要分析从什么地方获得token(或者是其他相同功能的参数/键值对)。</li>
  126. <li style="box-sizing: border-box; outline: 0px; margin: 8px 0px 0px 40px; padding: 0px; list-style: decimal; overflow-wrap: break-word;">参数(身份)激活,需要对另一个url发送请求后才能通过目标url请求对验证。</li>
  127. <li style="box-sizing: border-box; outline: 0px; margin: 8px 0px 0px 40px; padding: 0px; list-style: decimal; overflow-wrap: break-word;">验证码</li>
  128. <li style="box-sizing: border-box; outline: 0px; margin: 8px 0px 0px 40px; padding: 0px; list-style: decimal; overflow-wrap: break-word;">js加密:内容加密/参数加密</li>
  129. </ol>
  130. <p style="box-sizing: border-box; outline: 0px; margin: 0px 0px 16px; padding: 0px; font-size: 16px; color: #4d4d4d; line-height: 26px; overflow-wrap: break-word; font-family: -apple-system, 'SF UI Text', Arial, 'PingFang SC', 'Hiragino Sans GB', 'Microsoft YaHei', 'WenQuanYi Micro Hei', sans-serif; font-variant-ligatures: common-ligatures; background-color: #ffffff;">等等<br style="box-sizing: border-box; outline: 0px; overflow-wrap: break-word;">。。。。。。</p>
  131. <p style="box-sizing: border-box; outline: 0px; margin: 0px 0px 16px; padding: 0px; font-size: 16px; color: #4d4d4d; line-height: 26px; overflow-wrap: break-word; font-family: -apple-system, 'SF UI Text', Arial, 'PingFang SC', 'Hiragino Sans GB', 'Microsoft YaHei', 'WenQuanYi Micro Hei', sans-serif; font-variant-ligatures: common-ligatures; background-color: #ffffff;">并不是只有这些验证机制,反爬手段千千万,千奇百怪的验证机制就是爬虫工程师花费精力最多的地方。</p>
  132. <h5 style="box-sizing: border-box; outline: 0px; margin: 8px 0px 16px; padding: 0px; font-family: 'PingFang SC', 'Microsoft YaHei', SimHei, Arial, SimSun; font-size: 18px; color: #4f4f4f; line-height: 26px; overflow-wrap: break-word; font-variant-ligatures: common-ligatures; background-color: #ffffff;"><a id="_47" style="box-sizing: border-box; outline: none; margin: 0px; padding: 0px; font-weight: normal; cursor: pointer; background-color: transparent; color: #4ea1db; overflow-wrap: break-word;"></a>截取数据(清洗规则)</h5>
  133. <p style="box-sizing: border-box; outline: 0px; margin: 0px 0px 16px; padding: 0px; font-size: 16px; color: #4d4d4d; line-height: 26px; overflow-wrap: break-word; font-family: -apple-system, 'SF UI Text', Arial, 'PingFang SC', 'Hiragino Sans GB', 'Microsoft YaHei', 'WenQuanYi Micro Hei', sans-serif; font-variant-ligatures: common-ligatures; background-color: #ffffff;">当完成破解了之后,发送请求就可以得到网站的响应信息,然而,我们需要的信息只需要一部分,如果把不必要的信息入库就是资源浪费。因此,对响应内容做清洗就是十分必要的了。</p>
  134. <p style="box-sizing: border-box; outline: 0px; margin: 0px 0px 16px; padding: 0px; font-size: 16px; color: #4d4d4d; line-height: 26px; overflow-wrap: break-word; font-family: -apple-system, 'SF UI Text', Arial, 'PingFang SC', 'Hiragino Sans GB', 'Microsoft YaHei', 'WenQuanYi Micro Hei', sans-serif; font-variant-ligatures: common-ligatures; background-color: #ffffff;">响应内容的数据类型就是字符串(图片视频文件的二进制流对于爬虫业务来说很少,但也可以通过爬虫去下载)</p>
  135. <p style="box-sizing: border-box; outline: 0px; margin: 0px 0px 16px; padding: 0px; font-size: 16px; color: #4d4d4d; line-height: 26px; overflow-wrap: break-word; font-family: -apple-system, 'SF UI Text', Arial, 'PingFang SC', 'Hiragino Sans GB', 'Microsoft YaHei', 'WenQuanYi Micro Hei', sans-serif; font-variant-ligatures: common-ligatures; background-color: #ffffff;">一般来说,有目标数据的响应内容只有有两种类型:html 和 json</p>
  136. <p style="box-sizing: border-box; outline: 0px; margin: 0px 0px 16px; padding: 0px; font-size: 16px; color: #4d4d4d; line-height: 26px; overflow-wrap: break-word; font-family: -apple-system, 'SF UI Text', Arial, 'PingFang SC', 'Hiragino Sans GB', 'Microsoft YaHei', 'WenQuanYi Micro Hei', sans-serif; font-variant-ligatures: common-ligatures; background-color: #ffffff;">清洗的方式有三种:</p>
  137. <ol style="box-sizing: border-box; outline: 0px; margin: 0px 0px 24px; padding: 0px; list-style: none; font-size: 18px; overflow-wrap: break-word; color: rgba(0, 0, 0, 0.75); font-family: -apple-system, 'SF UI Text', Arial, 'PingFang SC', 'Hiragino Sans GB', 'Microsoft YaHei', 'WenQuanYi Micro Hei', sans-serif; font-variant-ligatures: common-ligatures; background-color: #ffffff;">
  138. <li style="box-sizing: border-box; outline: 0px; margin: 8px 0px 0px 40px; padding: 0px; list-style: decimal; overflow-wrap: break-word;">xpath</li>
  139. <li style="box-sizing: border-box; outline: 0px; margin: 8px 0px 0px 40px; padding: 0px; list-style: decimal; overflow-wrap: break-word;">json转hashmap,根据 key 找 value</li>
  140. <li style="box-sizing: border-box; outline: 0px; margin: 8px 0px 0px 40px; padding: 0px; list-style: decimal; overflow-wrap: break-word;">通过正则表达式切出来</li>
  141. </ol>
  142. <p style="box-sizing: border-box; outline: 0px; margin: 0px 0px 16px; padding: 0px; font-size: 16px; color: #4d4d4d; line-height: 26px; overflow-wrap: break-word; font-family: -apple-system, 'SF UI Text', Arial, 'PingFang SC', 'Hiragino Sans GB', 'Microsoft YaHei', 'WenQuanYi Micro Hei', sans-serif; font-variant-ligatures: common-ligatures; background-color: #ffffff;">截取数据完成后,就是入库了<br style="box-sizing: border-box; outline: 0px; overflow-wrap: break-word;">这些清洗规则,也都是通过人为定义</p>
  143. </div>
  144. <!--Faq Section-->
  145. </div>
  146. </div>
  147. </div>
  148. </div>
  149. <!--Sidebar Side-->
  150. <div class="sidebar-side sticky-container col-lg-4 col-md-12 col-sm-12">
  151. <aside class="sidebar">
  152. <div class="inner sticky-box">
  153. <!--Blog Category Widget-->
  154. <div class="sidebar-widget sidebar-blog-category">
  155. <ul class="blog-cat">
  156. <li class=""><a href="app.html">APP</a></li>
  157. <li class=""><a href="h5.html">H5</a></li>
  158. <li class=""><a href="mini-program.html">小程序</a></li>
  159. <li class=""><a href="web.html">WEB应用</a></li>
  160. <li class="active "><a href="">网络爬虫</a></li>
  161. <li class=""><a href="ai-bigdata.html">AI、大数据</a></li>
  162. <li class=""><a href="crm.html">CRM</a></li>
  163. <li class=""><a href="erp.html">ERP</a></li>
  164. <li class=""><a href="oa.html">OA</a></li>
  165. <li class=""><a href="official-website.html">企业官网</a></li>
  166. <li class=""><a href="marketing-website.html">营销网站</a></li>
  167. <li class=""><a href="other-website.html">其他网站</a></li>
  168. </ul>
  169. </div>
  170. <div class="sidebar-widget donate-help-widget">
  171. <div class="inner-box">
  172. <h2>如果需要我们的帮助</h2>
  173. <div class="text">
  174. 您可以通过电话或者在线聊天的方式找我们,我们将竭诚为您服务!
  175. </div>
  176. <a href="contact.html" class="theme-btn btn-style-three">联系我们</a>
  177. </div>
  178. </div>
  179. <!--Brochure-->
  180. <div class="sidebar-widget-two brochure-widget">
  181. <div class="brochure-box">
  182. <div class="inner">
  183. <span class="icon flaticon-phone-call"></span>
  184. <div class="text">173-4731-5117</div>
  185. </div>
  186. <a href="tel:17347315117" class="overlay-link"></a>
  187. </div>
  188. </div>
  189. </div>
  190. </aside>
  191. </div>
  192. <!--Content Side-->
  193. </div>
  194. </div>
  195. </div>
  196. <!--Subscribe Style One-->
  197. <section class="subscribe-style-one alternate">
  198. <div class="auto-container">
  199. <div class="row clearfix">
  200. <div class="col-lg-5 col-md-12 col-sm-12">
  201. <h2>现在开始沟通需求</h2>
  202. <div class="text">我们会对您的信息做加密处理,保证您的数据安全,我们不会将您的信息泄露给任何第三方!</div>
  203. </div>
  204. <div class="col-lg-7 col-md-12 col-sm-12">
  205. <form method="post" action="/contact" id="contact-form">
  206. <input type="hidden" name="_token" value="NKHQZNojdEeHnwx3QBIorBFNqkUDMgJ1Kxn45kEy"> <div class="form-group clearfix">
  207. <input type="tel" name="phone" value="" placeholder="输入手机号" required="">
  208. <button type="submit" class="theme-btn btn-style-one">提交</button>
  209. </div>
  210. </form>
  211. </div>
  212. </div>
  213. </div>
  214. </section>
  215. <!--End Subscribe Style One-->
  216. <!--Main Footer-->
  217. <div class="footer">
  218. <div class="footer-top">
  219. <div class="wrap">
  220. <div class="footer-block w-29">
  221. <div class="logo">
  222. <img src="static/image/login-1.png" alt="login">
  223. </div>
  224. <span class="partner" style="margin-left: -38px;">让APP开发变得更简单</span>
  225. </div>
  226. <div class="footer-block w-25 " style="width: 100%; margin: 0 auto;">
  227. <div class="footer-tit col-md-12 col-sm-12 col-xs-12" >服务热线</div>
  228. <div class="hot-line col-md-12 col-sm-12 col-xs-12">173-4731-5117</div>
  229. <p class="col-md-12 col-sm-12 col-xs-12"><span>售前咨询(09:00-24:00)</span></p>
  230. </div>
  231. <div class="footer-block w-34">
  232. <div class="footer-tit">联系我们</div>
  233. <p style="margin-bottom: 10px;">长沙市岳麓区枫林三路喜地大厦802室 </p>
  234. <p class="mb-10">
  235. <a href="tencent://message/?uin=2065525518&Site=&Menu=yes" target="_blank">
  236. 企业QQ:23933725
  237. <span class="foot-qq"></span>
  238. </a>
  239. </p>
  240. <p>企业邮箱:23933725@qq.com</p>
  241. <p>电话咨询:17347315117</p>
  242. </div>
  243. <div class="footer-block w-12">
  244. <div class="footer-tit">加官方微信</div>
  245. <div class="footer-ewm">
  246. <img src="static/image/erwei.jpg" alt="ewm">
  247. </div>
  248. <p class="ml">免费咨询报价及方案</p>
  249. </div>
  250. <div class="clear"></div>
  251. </div>
  252. </div>
  253. <div class="footer-bottom" style="height: auto;padding:10px 0;line-height: 20px;">
  254. <p>
  255. Copyright@2021-2022 湘ICP备2022000793号 湖南铭迅网络科技有限公司 版权所有 |
  256. </p>
  257. <ul class="ulbox">
  258. <li><a href="about.html">关于我们 |</a></li>
  259. <li><a href="faq.html">新闻资讯 |</a></li>
  260. <li><a href="contact.html">联系我们 |</a></li>
  261. <li><a href="map.html">网站地图 |</a></li>
  262. </ul>
  263. <ul class="ulboxs">
  264. <li>友情链接 :</li>
  265. <!-- <li><a>铺好租</a></li>
  266. <li><a>百万网址导航</a></li>
  267. <li><a>广州保洁公司</a></li>
  268. <li><a>九江公司</a></li>
  269. <li><a>长沙装修公司</a></li>
  270. <li><a>大集商铺网</a></li>
  271. <li><a>长沙门面出租</a></li> -->
  272. </ul>
  273. <div class="h40"></div>
  274. <!--End Footer Bottom-->
  275. <script>
  276. // var _hmt = _hmt || [];
  277. // (function() {
  278. // var hm = document.createElement("script");
  279. // hm.src = "https://hm.baidu.com/hm.js?7c8206c3cbaf6b515a6a72f50fbb4632";
  280. // var s = document.getElementsByTagName("script")[0];
  281. // s.parentNode.insertBefore(hm, s);
  282. // })();
  283. </script>
  284. </footer>
  285. </div>
  286. </div>
  287. <!--End pagewrapper-->
  288. <!--Scroll to top-->
  289. <div class="scroll-to-top scroll-to-target" data-target="html"><span class="fa fa-arrow-up"></span></div>
  290. <script src="static/js/jquery.js"></script>
  291. <script src="static/js/popper.min.js"></script>
  292. <script src="static/js/bootstrap.min.js"></script>
  293. <script src="static/js/jquery.mCustomScrollbar.concat.min.js"></script>
  294. <script src="static/js/jquery.fancybox.js"></script>
  295. <script src="static/js/appear.js"></script>
  296. <script src="static/js/owl.js"></script>
  297. <script src="static/js/sticky.js"></script>
  298. <script src="static/js/wow.js"></script>
  299. <script src="static/js/slick.js"></script>
  300. <script src="static/js/jquery.validate.min.js"></script>
  301. <script src="static/js/additional-methods.js"></script>
  302. <script src="static/js/messages_zh.min.js"></script>
  303. <script src="static/js/jquery-ui.js"></script>
  304. <script src="static/js/script.js"></script>
  305. </body>
  306. </html>