asp.net(C#)自动下载站外图片

作者:翅膀的初衷 来源:本站原创 发布时间:2014-03-09 查看数:62007

一般情况下,大多数用户在转载网络上文章时,都会将文章内的图片下载到自己的服务器上!但是当图片数量很多的时候,这就变成了一个苦力活了!

本文正是在该情况下,来实现程序自动下载站外地图!

在上代码前,先讲下具体实现原理:

第一步:通过正则表达式找出所有图片地址

第二步:通过图片地址的域名与当前访问地址的域名进行比较判断是否外链图片

第三步:使用System.Net下的WebClient下载图片到指定目录!

第四步:替换链接地址

:::: {#highlighter_88540 .syntaxhighlighter .csharp} ::: toolbar ?{.toolbar_item .command_help .help} :::

+--------------------------------------+-------------------------------------------------------------------------------------------------------------------------------+ | ::: {.line .number1 .index0 .alt2} | ::::::::::::::::::::::::::::::::::::::::::::::::::::: container | | 1 | ::: {.line .number1 .index0 .alt2} | | ::: |     {.csharp .spaces}protected{.csharp .keyword} string{.csharp .keyword} BatchDownloadPicture({.csharp | | | .plain}string{.csharp .keyword} html,{.csharp .plain}string{.csharp .keyword} savePath,{.csharp | | ::: {.line .number2 .index1 .alt1} | .plain}string{.csharp .keyword} rootUrl){.csharp .plain} | | 2 | ::: | | ::: | | | | ::: {.line .number2 .index1 .alt1} | | ::: {.line .number3 .index2 .alt2} |     {.csharp .spaces}{{.csharp .plain} | | 3 | ::: | | ::: | | | | ::: {.line .number3 .index2 .alt2} | | ::: {.line .number4 .index3 .alt1} |         {.csharp .spaces}List<Uri> result = {.csharp .plain}new{.csharp .keyword} List<Uri>();{.csharp .plain} | | 4 | ::: | | ::: | | | | ::: {.line .number4 .index3 .alt1} | | ::: {.line .number5 .index4 .alt2} |   | | 5 | ::: | | ::: | | | | ::: {.line .number5 .index4 .alt2} | | ::: {.line .number6 .index5 .alt1} |         {.csharp .spaces}System.Text.RegularExpressions.Regex objRegExp = {.csharp .plain}new{.csharp | | 6 | .keyword} System.Text.RegularExpressions.Regex({.csharp | | ::: | .plain}@"(<img[^>]*?src\s*=\s*(['""]?))([^'""\s>]+)([^>]*>)"{.csharp | | | .string}, System.Text.RegularExpressions.RegexOptions.IgnoreCase);{.csharp .plain} | | ::: {.line .number7 .index6 .alt2} | ::: | | 7 | | | ::: | ::: {.line .number6 .index5 .alt1} | | |   | | ::: {.line .number8 .index7 .alt1} | ::: | | 8 | | | ::: | ::: {.line .number7 .index6 .alt2} | | |         {.csharp .spaces}System.Text.RegularExpressions.MatchCollection mc = objRegExp.Matches(html);{.csharp .plain} | | ::: {.line .number9 .index8 .alt2} | ::: | | 9 | | | ::: | ::: {.line .number8 .index7 .alt1} | | |   | | ::: {.line .number10 .index9 .alt1} | ::: | | 10 | | | ::: | ::: {.line .number9 .index8 .alt2} | | |         {.csharp .spaces}Uri uri;{.csharp .plain} | | ::: {.line .number11 .index10 .alt2} | ::: | | 11 | | | ::: | ::: {.line .number10 .index9 .alt1} | | |   | | ::: {.line .number12 .index11 .alt1} | ::: | | 12 | | | ::: | ::: {.line .number11 .index10 .alt2} | | |         {.csharp .spaces}for{.csharp .keyword} ({.csharp .plain}int{.csharp | | ::: {.line .number13 .index12 .alt2} | .keyword} i = 0; i < mc.Count; i++){.csharp .plain} | | 13 | ::: | | ::: | | | | ::: {.line .number12 .index11 .alt1} | | ::: {.line .number14 .index13 .alt1} |         {.csharp .spaces}{{.csharp .plain} | | 14 | ::: | | ::: | | | | ::: {.line .number13 .index12 .alt2} | | ::: {.line .number15 .index14 .alt2} |             {.csharp .spaces}uri = {.csharp .plain}new{.csharp .keyword} Uri(mc[i].Groups[3].Value);{.csharp .plain} | | 15 | ::: | | ::: | | | | ::: {.line .number14 .index13 .alt1} | | ::: {.line .number16 .index15 .alt1} |             {.csharp .spaces}if{.csharp .keyword} (mc[i].Groups[3].Value.IndexOf({.csharp .plain}"://"{.csharp | | 16 | .string}) != -1 && !result.Contains(uri) && !Request.Url.Host.Equals(uri.Host, StringComparison.OrdinalIgnoreCase)){.csharp | | ::: | .plain} | | | ::: | | ::: {.line .number17 .index16 .alt2} | | | 17 | ::: {.line .number15 .index14 .alt2} | | ::: |                 {.csharp .spaces}result.Add(uri);{.csharp .plain} | | | ::: | | ::: {.line .number18 .index17 .alt1} | | | 18 | ::: {.line .number16 .index15 .alt1} | | ::: |         {.csharp .spaces}}{.csharp .plain} | | | ::: | | ::: {.line .number19 .index18 .alt2} | | | 19 | ::: {.line .number17 .index16 .alt2} | | ::: |   | | | ::: | | ::: {.line .number20 .index19 .alt1} | | | 20 | ::: {.line .number18 .index17 .alt1} | | ::: |         {.csharp .spaces}using{.csharp .keyword} (System.Net.WebClient wc = {.csharp .plain}new{.csharp | | | .keyword} System.Net.WebClient()){.csharp .plain} | | ::: {.line .number21 .index20 .alt2} | ::: | | 21 | | | ::: | ::: {.line .number19 .index18 .alt2} | | |         {.csharp .spaces}{{.csharp .plain} | | ::: {.line .number22 .index21 .alt1} | ::: | | 22 | | | ::: | ::: {.line .number20 .index19 .alt1} | | |             {.csharp .spaces}string{.csharp .keyword} extension;{.csharp .plain} | | ::: {.line .number23 .index22 .alt2} | ::: | | 23 | | | ::: | ::: {.line .number21 .index20 .alt2} | | |             {.csharp .spaces}int{.csharp .keyword} index;{.csharp .plain} | | ::: {.line .number24 .index23 .alt1} | ::: | | 24 | | | ::: | ::: {.line .number22 .index21 .alt1} | | |             {.csharp .spaces}string{.csharp .keyword} name;{.csharp .plain} | | ::: {.line .number25 .index24 .alt2} | ::: | | 25 | | | ::: | ::: {.line .number23 .index22 .alt2} | | |             {.csharp .spaces}foreach{.csharp .keyword} (Uri node {.csharp .plain}in{.csharp | | ::: {.line .number26 .index25 .alt1} | .keyword} result){.csharp .plain} | | 26 | ::: | | ::: | | | | ::: {.line .number24 .index23 .alt1} | | ::: {.line .number27 .index26 .alt2} |             {.csharp .spaces}{{.csharp .plain} | | 27 | ::: | | ::: | | | | ::: {.line .number25 .index24 .alt2} | | ::: {.line .number28 .index27 .alt1} |                 {.csharp .spaces}try{.csharp .keyword} | | 28 | ::: | | ::: | | | | ::: {.line .number26 .index25 .alt1} | | ::: {.line .number29 .index28 .alt2} |                 {.csharp .spaces}{{.csharp .plain} | | 29 | ::: | | ::: | | | | ::: {.line .number27 .index26 .alt2} | | ::: {.line .number30 .index29 .alt1} |                     {.csharp .spaces}index = node.AbsolutePath.LastIndexOf({.csharp .plain}'.'{.csharp | | 30 | .string});{.csharp .plain} | | ::: | ::: | | | | | ::: {.line .number31 .index30 .alt2} | ::: {.line .number28 .index27 .alt1} | | 31 |                     {.csharp .spaces}if{.csharp .keyword} (index > 0){.csharp .plain} | | ::: | ::: | | | | | ::: {.line .number32 .index31 .alt1} | ::: {.line .number29 .index28 .alt2} | | 32 |                     {.csharp .spaces}{{.csharp .plain} | | ::: | ::: | | | | | ::: {.line .number33 .index32 .alt2} | ::: {.line .number30 .index29 .alt1} | | 33 |                         {.csharp .spaces}extension = node.AbsolutePath.Substring(index);{.csharp .plain} | | ::: | ::: | | | | | ::: {.line .number34 .index33 .alt1} | ::: {.line .number31 .index30 .alt2} | | 34 |                     {.csharp .spaces}}{.csharp .plain} | | ::: | ::: | | | | | ::: {.line .number35 .index34 .alt2} | ::: {.line .number32 .index31 .alt1} | | 35 |                     {.csharp .spaces}else{.csharp .keyword} | | ::: | ::: | | | | | ::: {.line .number36 .index35 .alt1} | ::: {.line .number33 .index32 .alt2} | | 36 |                     {.csharp .spaces}{{.csharp .plain} | | ::: | ::: | | | | | ::: {.line .number37 .index36 .alt2} | ::: {.line .number34 .index33 .alt1} | | 37 |                         {.csharp .spaces}continue{.csharp .keyword};{.csharp .plain} | | ::: | ::: | | | | | ::: {.line .number38 .index37 .alt1} | ::: {.line .number35 .index34 .alt2} | | 38 |                     {.csharp .spaces}}{.csharp .plain} | | ::: | ::: | | | | | ::: {.line .number39 .index38 .alt2} | ::: {.line .number36 .index35 .alt1} | | 39 |                     {.csharp .spaces}name = {.csharp .plain}string{.csharp | | ::: | .keyword}.Concat(Guid.NewGuid().ToString({.csharp .plain}"N"{.csharp .string}), extension);{.csharp .plain} | | | ::: | | ::: {.line .number40 .index39 .alt1} | | | 40 | ::: {.line .number37 .index36 .alt2} | | ::: |   | | | ::: | | ::: {.line .number41 .index40 .alt2} | | | 41 | ::: {.line .number38 .index37 .alt1} | | ::: |                     {.csharp .spaces}wc.DownloadFile(node, {.csharp .plain}string{.csharp | | | .keyword}.Concat(savePath, name));{.csharp .plain} | | ::: {.line .number42 .index41 .alt1} | ::: | | 42 | | | ::: | ::: {.line .number39 .index38 .alt2} | | |   | | ::: {.line .number43 .index42 .alt2} | ::: | | 43 | | | ::: | ::: {.line .number40 .index39 .alt1} | | |                     {.csharp .spaces}html = html.Replace(node.ToString(), {.csharp .plain}string{.csharp | | ::: {.line .number44 .index43 .alt1} | .keyword}.Concat(rootUrl, name));{.csharp .plain} | | 44 | ::: | | ::: | | | | ::: {.line .number41 .index40 .alt2} | | ::: {.line .number45 .index44 .alt2} |                 {.csharp .spaces}}{.csharp .plain} | | 45 | ::: | | ::: | | | | ::: {.line .number42 .index41 .alt1} | | ::: {.line .number46 .index45 .alt1} |                 {.csharp .spaces}catch{.csharp .keyword} | | 46 | ::: | | ::: | | | | ::: {.line .number43 .index42 .alt2} | | ::: {.line .number47 .index46 .alt2} |                 {.csharp .spaces}{{.csharp .plain} | | 47 | ::: | | ::: | | | | ::: {.line .number44 .index43 .alt1} | | ::: {.line .number48 .index47 .alt1} |                     {.csharp .spaces}continue{.csharp .keyword};{.csharp .plain} | | 48 | ::: | | ::: | | | | ::: {.line .number45 .index44 .alt2} | | ::: {.line .number49 .index48 .alt2} |                 {.csharp .spaces}}{.csharp .plain} | | 49 | ::: | | ::: | | | | ::: {.line .number46 .index45 .alt1} | | ::: {.line .number50 .index49 .alt1} |             {.csharp .spaces}}{.csharp .plain} | | 50 | ::: | | ::: | | | | ::: {.line .number47 .index46 .alt2} | | |         {.csharp .spaces}}{.csharp .plain} | | | ::: | | | | | | ::: {.line .number48 .index47 .alt1} | | |   | | | ::: | | | | | | ::: {.line .number49 .index48 .alt2} | | |         {.csharp .spaces}return{.csharp .keyword} html;{.csharp .plain} | | | ::: | | | | | | ::: {.line .number50 .index49 .alt1} | | |     {.csharp .spaces}}{.csharp .plain} | | | ::: | | | ::::::::::::::::::::::::::::::::::::::::::::::::::::: | +--------------------------------------+-------------------------------------------------------------------------------------------------------------------------------+ ::::