//用正则表达式从网页里面提取视频地址
//获得一个页面地址,拿到页面html,然后正则表达式去匹配视频地址
//详细的看注释吧。
1///2 /// 网页视频处理类 3 /// 6 ///7 /// 8 /// 9 public class WebVideo 10 { 11 ///12 /// 优酷、酷6、土豆等视频页面地址 13 /// 14 private string _pageUrl; 15 16 ///17 /// 是否启用页面压缩 18 /// 19 private bool _isCompressed; 20 21 ///22 /// 网站 23 /// 24 private VideoSite _site; 25 26 27 public WebVideo () 28 { 29 // TODO: Complete member initialization 30 } 31 32 33 ///34 /// 实例化WebVideo类 35 /// 36 ///视频页面地址 37 ///获取页面时是否启用压缩 38 public WebVideo ( string pageUrl, bool isCompressed ) 39 { 40 // TODO: Complete member initialization 41 this._pageUrl = pageUrl.Trim(); 42 this._isCompressed = isCompressed; 43 this._site = this.GetSite(_pageUrl); 44 } 45 46 47 ///48 /// 根据Url地址得到网页的html源码 49 /// (使用gzip,deflate压缩,延迟低) 50 /// 51 /// 52 ///53 public string GetWebContent ( string Url ) 54 { 55 string strResult = ""; 56 try 57 { 58 Stream decompressedStream = null; 59 //声明一个HttpWebRequest请求 60 HttpWebRequest request = (HttpWebRequest)WebRequest.Create(Url); 61 request.Accept = "*/*"; 62 request.Headers.Set("Pragma", "no-cache"); 63 //设置连接超时时间 64 request.Timeout = 9000; 65 request.UserAgent = "TaoCaiSpider1.0 Kevin-Gu's spider"; 66 request.Headers.Add("Accept-Encoding", "gzip,deflate"); 67 68 HttpWebResponse response = (HttpWebResponse)request.GetResponse(); 69 70 string compressMode = response.ContentEncoding.ToLower(); 71 Console.WriteLine(compressMode); 72 73 if (compressMode == "gzip") 74 { 75 decompressedStream 76 = new GZipStream(response.GetResponseStream(), CompressionMode.Decompress); 77 } 78 else if (compressMode == "deflate") 79 { 80 decompressedStream 81 = new DeflateStream(response.GetResponseStream(), CompressionMode.Decompress); 82 } 83 else 84 { 85 // 貌似只有优酷启用了页面压缩。。。 86 decompressedStream = response.GetResponseStream(); 87 } 88 89 Encoding encode = Encoding.GetEncoding(response.CharacterSet); 90 using (StreamReader streamReader = new StreamReader(decompressedStream, encode)) 91 { 92 strResult = streamReader.ReadToEnd(); 93 } 94 } 95 catch (Exception ex) 96 { 97 Console.WriteLine("error occored:" + ex.Message); 98 } 99 return strResult; 100 } 101 134 135 167 168 169 /// 170 /// 使用正则表达式匹配获取视频文件地址 171 /// 172 /// 173 ///174 public string GetVideoFileUrl (string htmlContent) 175 { 176 string[] rgxArr =new string[]{ 177 @"http://player\.youku\.com/player\.php/sid/[\w]{13}/v\.swf", //优酷的文件地址正则 178 @"http://player\.ku6\.com/refer/[\w]{16}/v\.swf", 179 @"http://js\.tudouui\.com/bin/player_online/[\w]+\.swf" 180 }; 193 194 Regex rgx; 195 196 // 使用不同的正则表达式来匹配视频文件地址 197 switch (_site) 198 { 199 case VideoSite.YouKu: 200 rgx = new Regex(rgxArr[0]); 201 if (rgx.IsMatch(htmlContent)) 202 { 203 return rgx.Match(htmlContent).ToString(); 204 } 205 break; 206 case VideoSite.TuDou: 207 rgx = new Regex(rgxArr[2]); 208 if (rgx.IsMatch(htmlContent)) 209 { 210 return rgx.Match(htmlContent).ToString(); 211 } 212 break; 213 case VideoSite.Ku6: 214 rgx = new Regex(rgxArr[1]); 215 if (rgx.IsMatch(htmlContent)) 216 { 217 return rgx.Match(htmlContent).ToString(); 218 } 219 break; 220 default: 221 break; 222 } 223 224 return string.Empty; 225 } 226 227 228 /// 229 /// 获得视频网页中视频文件地址 230 /// 231 ///232 public string GetVideoUrl () 233 { 234 string videoUrl = string.Empty; 235 if (_isCompressed) 236 { 237 string html = this.GetWebContent(_pageUrl); 238 videoUrl = this.GetVideoFileUrl(html); 239 } 240 else 241 { 242 string html = this.GetHtmlWithoutCompress(_pageUrl); 243 videoUrl = this.GetVideoFileUrl(html); 244 } 245 return videoUrl; 246 } 247 248 258 259 }//end class 260 261 262 /// 263 /// 视频网站枚举 264 /// 265 public enum VideoSite 266 { 267 YouKu=0, 268 Ku6=1, 269 TuDou=2, 270 };