asp.net(C#)自动下载站外图片
一般情况下,大多数用户在转载网络上文章时,都会将文章内的图片下载到自己的服务器上!但是当图片数量很多的时候,这就变成了一个苦力活了!
本文正是在该情况下,来实现程序自动下载站外地图!
在上代码前,先讲下具体实现原理:
第一步:通过正则表达式找出所有图片地址
第二步:通过图片地址的域名与当前访问地址的域名进行比较判断是否外链图片
第三步:使用System.Net下的WebClient下载图片到指定目录!
第四步:替换链接地址
:::: {#highlighter_88540 .syntaxhighlighter .csharp} ::: toolbar ?{.toolbar_item .command_help .help} :::
+--------------------------------------+-------------------------------------------------------------------------------------------------------------------------------+
| ::: {.line .number1 .index0 .alt2} | ::::::::::::::::::::::::::::::::::::::::::::::::::::: container |
| 1 | ::: {.line .number1 .index0 .alt2} |
| ::: |
{.csharp .spaces}protected
{.csharp .keyword} string
{.csharp .keyword} BatchDownloadPicture(
{.csharp |
| | .plain}string
{.csharp .keyword} html,
{.csharp .plain}string
{.csharp .keyword} savePath,
{.csharp |
| ::: {.line .number2 .index1 .alt1} | .plain}string
{.csharp .keyword} rootUrl)
{.csharp .plain} |
| 2 | ::: |
| ::: | |
| | ::: {.line .number2 .index1 .alt1} |
| ::: {.line .number3 .index2 .alt2} |
{.csharp .spaces}{
{.csharp .plain} |
| 3 | ::: |
| ::: | |
| | ::: {.line .number3 .index2 .alt2} |
| ::: {.line .number4 .index3 .alt1} |
{.csharp .spaces}List<Uri> result =
{.csharp .plain}new
{.csharp .keyword} List<Uri>();
{.csharp .plain} |
| 4 | ::: |
| ::: | |
| | ::: {.line .number4 .index3 .alt1} |
| ::: {.line .number5 .index4 .alt2} | |
| 5 | ::: |
| ::: | |
| | ::: {.line .number5 .index4 .alt2} |
| ::: {.line .number6 .index5 .alt1} |
{.csharp .spaces}System.Text.RegularExpressions.Regex objRegExp =
{.csharp .plain}new
{.csharp |
| 6 | .keyword} System.Text.RegularExpressions.Regex(
{.csharp |
| ::: | .plain}@"(<img[^>]*?src\s*=\s*(['""]?))([^'""\s>]+)([^>]*>)"
{.csharp |
| | .string}, System.Text.RegularExpressions.RegexOptions.IgnoreCase);
{.csharp .plain} |
| ::: {.line .number7 .index6 .alt2} | ::: |
| 7 | |
| ::: | ::: {.line .number6 .index5 .alt1} |
| | |
| ::: {.line .number8 .index7 .alt1} | ::: |
| 8 | |
| ::: | ::: {.line .number7 .index6 .alt2} |
| |
{.csharp .spaces}System.Text.RegularExpressions.MatchCollection mc = objRegExp.Matches(html);
{.csharp .plain} |
| ::: {.line .number9 .index8 .alt2} | ::: |
| 9 | |
| ::: | ::: {.line .number8 .index7 .alt1} |
| | |
| ::: {.line .number10 .index9 .alt1} | ::: |
| 10 | |
| ::: | ::: {.line .number9 .index8 .alt2} |
| |
{.csharp .spaces}Uri uri;
{.csharp .plain} |
| ::: {.line .number11 .index10 .alt2} | ::: |
| 11 | |
| ::: | ::: {.line .number10 .index9 .alt1} |
| | |
| ::: {.line .number12 .index11 .alt1} | ::: |
| 12 | |
| ::: | ::: {.line .number11 .index10 .alt2} |
| |
{.csharp .spaces}for
{.csharp .keyword} (
{.csharp .plain}int
{.csharp |
| ::: {.line .number13 .index12 .alt2} | .keyword} i = 0; i < mc.Count; i++)
{.csharp .plain} |
| 13 | ::: |
| ::: | |
| | ::: {.line .number12 .index11 .alt1} |
| ::: {.line .number14 .index13 .alt1} |
{.csharp .spaces}{
{.csharp .plain} |
| 14 | ::: |
| ::: | |
| | ::: {.line .number13 .index12 .alt2} |
| ::: {.line .number15 .index14 .alt2} |
{.csharp .spaces}uri =
{.csharp .plain}new
{.csharp .keyword} Uri(mc[i].Groups[3].Value);
{.csharp .plain} |
| 15 | ::: |
| ::: | |
| | ::: {.line .number14 .index13 .alt1} |
| ::: {.line .number16 .index15 .alt1} |
{.csharp .spaces}if
{.csharp .keyword} (mc[i].Groups[3].Value.IndexOf(
{.csharp .plain}"://"
{.csharp |
| 16 | .string}) != -1 && !result.Contains(uri) && !Request.Url.Host.Equals(uri.Host, StringComparison.OrdinalIgnoreCase))
{.csharp |
| ::: | .plain} |
| | ::: |
| ::: {.line .number17 .index16 .alt2} | |
| 17 | ::: {.line .number15 .index14 .alt2} |
| ::: |
{.csharp .spaces}result.Add(uri);
{.csharp .plain} |
| | ::: |
| ::: {.line .number18 .index17 .alt1} | |
| 18 | ::: {.line .number16 .index15 .alt1} |
| ::: |
{.csharp .spaces}}
{.csharp .plain} |
| | ::: |
| ::: {.line .number19 .index18 .alt2} | |
| 19 | ::: {.line .number17 .index16 .alt2} |
| ::: | |
| | ::: |
| ::: {.line .number20 .index19 .alt1} | |
| 20 | ::: {.line .number18 .index17 .alt1} |
| ::: |
{.csharp .spaces}using
{.csharp .keyword} (System.Net.WebClient wc =
{.csharp .plain}new
{.csharp |
| | .keyword} System.Net.WebClient())
{.csharp .plain} |
| ::: {.line .number21 .index20 .alt2} | ::: |
| 21 | |
| ::: | ::: {.line .number19 .index18 .alt2} |
| |
{.csharp .spaces}{
{.csharp .plain} |
| ::: {.line .number22 .index21 .alt1} | ::: |
| 22 | |
| ::: | ::: {.line .number20 .index19 .alt1} |
| |
{.csharp .spaces}string
{.csharp .keyword} extension;
{.csharp .plain} |
| ::: {.line .number23 .index22 .alt2} | ::: |
| 23 | |
| ::: | ::: {.line .number21 .index20 .alt2} |
| |
{.csharp .spaces}int
{.csharp .keyword} index;
{.csharp .plain} |
| ::: {.line .number24 .index23 .alt1} | ::: |
| 24 | |
| ::: | ::: {.line .number22 .index21 .alt1} |
| |
{.csharp .spaces}string
{.csharp .keyword} name;
{.csharp .plain} |
| ::: {.line .number25 .index24 .alt2} | ::: |
| 25 | |
| ::: | ::: {.line .number23 .index22 .alt2} |
| |
{.csharp .spaces}foreach
{.csharp .keyword} (Uri node
{.csharp .plain}in
{.csharp |
| ::: {.line .number26 .index25 .alt1} | .keyword} result)
{.csharp .plain} |
| 26 | ::: |
| ::: | |
| | ::: {.line .number24 .index23 .alt1} |
| ::: {.line .number27 .index26 .alt2} |
{.csharp .spaces}{
{.csharp .plain} |
| 27 | ::: |
| ::: | |
| | ::: {.line .number25 .index24 .alt2} |
| ::: {.line .number28 .index27 .alt1} |
{.csharp .spaces}try
{.csharp .keyword} |
| 28 | ::: |
| ::: | |
| | ::: {.line .number26 .index25 .alt1} |
| ::: {.line .number29 .index28 .alt2} |
{.csharp .spaces}{
{.csharp .plain} |
| 29 | ::: |
| ::: | |
| | ::: {.line .number27 .index26 .alt2} |
| ::: {.line .number30 .index29 .alt1} |
{.csharp .spaces}index = node.AbsolutePath.LastIndexOf(
{.csharp .plain}'.'
{.csharp |
| 30 | .string});
{.csharp .plain} |
| ::: | ::: |
| | |
| ::: {.line .number31 .index30 .alt2} | ::: {.line .number28 .index27 .alt1} |
| 31 |
{.csharp .spaces}if
{.csharp .keyword} (index > 0)
{.csharp .plain} |
| ::: | ::: |
| | |
| ::: {.line .number32 .index31 .alt1} | ::: {.line .number29 .index28 .alt2} |
| 32 |
{.csharp .spaces}{
{.csharp .plain} |
| ::: | ::: |
| | |
| ::: {.line .number33 .index32 .alt2} | ::: {.line .number30 .index29 .alt1} |
| 33 |
{.csharp .spaces}extension = node.AbsolutePath.Substring(index);
{.csharp .plain} |
| ::: | ::: |
| | |
| ::: {.line .number34 .index33 .alt1} | ::: {.line .number31 .index30 .alt2} |
| 34 |
{.csharp .spaces}}
{.csharp .plain} |
| ::: | ::: |
| | |
| ::: {.line .number35 .index34 .alt2} | ::: {.line .number32 .index31 .alt1} |
| 35 |
{.csharp .spaces}else
{.csharp .keyword} |
| ::: | ::: |
| | |
| ::: {.line .number36 .index35 .alt1} | ::: {.line .number33 .index32 .alt2} |
| 36 |
{.csharp .spaces}{
{.csharp .plain} |
| ::: | ::: |
| | |
| ::: {.line .number37 .index36 .alt2} | ::: {.line .number34 .index33 .alt1} |
| 37 |
{.csharp .spaces}continue
{.csharp .keyword};
{.csharp .plain} |
| ::: | ::: |
| | |
| ::: {.line .number38 .index37 .alt1} | ::: {.line .number35 .index34 .alt2} |
| 38 |
{.csharp .spaces}}
{.csharp .plain} |
| ::: | ::: |
| | |
| ::: {.line .number39 .index38 .alt2} | ::: {.line .number36 .index35 .alt1} |
| 39 |
{.csharp .spaces}name =
{.csharp .plain}string
{.csharp |
| ::: | .keyword}.Concat(Guid.NewGuid().ToString(
{.csharp .plain}"N"
{.csharp .string}), extension);
{.csharp .plain} |
| | ::: |
| ::: {.line .number40 .index39 .alt1} | |
| 40 | ::: {.line .number37 .index36 .alt2} |
| ::: | |
| | ::: |
| ::: {.line .number41 .index40 .alt2} | |
| 41 | ::: {.line .number38 .index37 .alt1} |
| ::: |
{.csharp .spaces}wc.DownloadFile(node,
{.csharp .plain}string
{.csharp |
| | .keyword}.Concat(savePath, name));
{.csharp .plain} |
| ::: {.line .number42 .index41 .alt1} | ::: |
| 42 | |
| ::: | ::: {.line .number39 .index38 .alt2} |
| | |
| ::: {.line .number43 .index42 .alt2} | ::: |
| 43 | |
| ::: | ::: {.line .number40 .index39 .alt1} |
| |
{.csharp .spaces}html = html.Replace(node.ToString(),
{.csharp .plain}string
{.csharp |
| ::: {.line .number44 .index43 .alt1} | .keyword}.Concat(rootUrl, name));
{.csharp .plain} |
| 44 | ::: |
| ::: | |
| | ::: {.line .number41 .index40 .alt2} |
| ::: {.line .number45 .index44 .alt2} |
{.csharp .spaces}}
{.csharp .plain} |
| 45 | ::: |
| ::: | |
| | ::: {.line .number42 .index41 .alt1} |
| ::: {.line .number46 .index45 .alt1} |
{.csharp .spaces}catch
{.csharp .keyword} |
| 46 | ::: |
| ::: | |
| | ::: {.line .number43 .index42 .alt2} |
| ::: {.line .number47 .index46 .alt2} |
{.csharp .spaces}{
{.csharp .plain} |
| 47 | ::: |
| ::: | |
| | ::: {.line .number44 .index43 .alt1} |
| ::: {.line .number48 .index47 .alt1} |
{.csharp .spaces}continue
{.csharp .keyword};
{.csharp .plain} |
| 48 | ::: |
| ::: | |
| | ::: {.line .number45 .index44 .alt2} |
| ::: {.line .number49 .index48 .alt2} |
{.csharp .spaces}}
{.csharp .plain} |
| 49 | ::: |
| ::: | |
| | ::: {.line .number46 .index45 .alt1} |
| ::: {.line .number50 .index49 .alt1} |
{.csharp .spaces}}
{.csharp .plain} |
| 50 | ::: |
| ::: | |
| | ::: {.line .number47 .index46 .alt2} |
| |
{.csharp .spaces}}
{.csharp .plain} |
| | ::: |
| | |
| | ::: {.line .number48 .index47 .alt1} |
| | |
| | ::: |
| | |
| | ::: {.line .number49 .index48 .alt2} |
| |
{.csharp .spaces}return
{.csharp .keyword} html;
{.csharp .plain} |
| | ::: |
| | |
| | ::: {.line .number50 .index49 .alt1} |
| |
{.csharp .spaces}}
{.csharp .plain} |
| | ::: |
| | ::::::::::::::::::::::::::::::::::::::::::::::::::::: |
+--------------------------------------+-------------------------------------------------------------------------------------------------------------------------------+
::::