准备工作:
通过 NUGET 安装 Microsoft.Bcl.Compression ;
使用命名空间
using System.IO.Compression ;
public static async Task<string> Get(string url) { WebRequest request = WebRequest.CreateHttp(new Uri(url)); //创建WebRequest对象 request.Method = "GET"; //设置请求方式为GET request.Headers[HttpRequestHeader.UserAgent] = "Mozilla/5.0 (Windows NT 10.0; WOW64; rv:44.0) Gecko/20100101 Firefox/44.0"; request.Headers[HttpRequestHeader.AcceptEncoding] = "gzip, deflate"; //设置接收的编码 可以接受 gzip var response = await request.GetResponseAsync(); Stream stream = null; stream = response.Headers[HttpRequestHeader.ContentEncoding].Equals("gzip", StringComparison.CurrentCultureIgnoreCase) ? new GZipStream(response.GetResponseStream(), CompressionMode.Decompress) : response.GetResponseStream(); var ms = new MemoryStream(); var buffer = new byte[1024]; while (true) { if (stream == null) continue; var sz = stream.Read(buffer, 0, 1024); if (sz == 0) break; ms.Write(buffer, 0, sz); } var bytes = ms.ToArray(); var html = GetEncoding(bytes, response.Headers[HttpRequestHeader.ContentType]).GetString(bytes); await stream.FlushAsync(); return html; }
获取编码:(暂不支持GBK,GB2312)
public static Encoding GetEncoding(byte[] bytes, string charSet) { var html = Encoding.UTF8.GetString(bytes); var regCharset = new Regex(@"charset/b/s*=/s*""*(?<charset>[^""]*)"); if (regCharset.IsMatch(html)) { return Encoding.GetEncoding(regCharset.Match(html).Groups["charset"].Value); } return charSet != string.Empty ? Encoding.GetEncoding(charSet) : Encoding.UTF8; }
虽然使用 HttpClient 更简单
var http = new HttpClient(); http.DefaultRequestHeaders.Add("user-agent", "Mozilla/5.0 (Windows NT 10.0; WOW64; rv:44.0) Gecko/20100101 Firefox/44.0"); http.DefaultRequestHeaders.Add("accept-encoding", "gzip, deflate"); var response = await http.GetAsync(new Uri(url)); response.EnsureSuccessStatusCode();//确保请求成功
但是它的响应头并没有 “Content-Encoding” ,所以无法直接判断需不需要 Gzip 解压。