webview2的强大能力,只能表现在Windows 系统上实在太可惜了,如果尝试用playwright 或者其它的 webdrive 爬网页很麻烦甚至无解时。这种把webview2转换为API的方式不失为兜底方案。
- 实现思路:在API就调用含有winform的 WebContentExtractor.exe 。
- VPS太弱装不上window上的解决思路:用argo的CDN能力可以带你内网的服务起飞到外网(大善人CF的 argo和tunnel的 网上资料很多,本站内也有)。
webview2的winform
mainform.cs
using System;
using System.Diagnostics;
using System.Threading.Tasks;
using System.Windows.Forms;
using Microsoft.Web.WebView2.Core;
namespace WebContentExtractor
{
public partial class MainForm : Form
{
private string targetUrl = string.Empty;
private string _htmlContent;
public string HtmlContent => _htmlContent;
public MainForm(string url)
{
InitializeComponent();
targetUrl = url;
}
private async void MainForm_Load(object sender, EventArgs e)
{
if (string.IsNullOrWhiteSpace(targetUrl))
{
MessageBox.Show("No URL provided.");
Application.Exit();
return;
}
// Initialize WebView2
await webView21.EnsureCoreWebView2Async();
// Load the URL
webView21.Source = new Uri(targetUrl);
// Wait for the page to load and fetch HTML content
webView21.NavigationCompleted += async (s, args) =>
{
if (args.IsSuccess)
{
try
{
string htmlContent = await webView21.CoreWebView2.ExecuteScriptAsync("document.documentElement.outerHTML;");
_htmlContent =htmlContent.Trim('"').Replace("\\n", "\n").Replace("\\t", "\t");
}
catch (Exception ex)
{
Console.WriteLine($"Error: {ex.Message}");
}
finally
{
Application.Exit(); // Close the application after processing
}
}
else
{
Console.WriteLine($"Failed to load URL: {args.WebErrorStatus}");
Application.Exit();
}
};
}
}
}
program.cs
using System;
using System.Diagnostics;
using System.Runtime.InteropServices;
using System.Windows.Forms;
using WebContentExtractor;
internal static class Program
{
[DllImport("kernel32.dll")]
private static extern bool AllocConsole();
[STAThread]
static void Main(string[] args)
{
if (args.Length == 0)
{
Console.WriteLine("Please provide a URL.");
return;
}
string url = args[0];
Application.EnableVisualStyles();
Application.SetCompatibleTextRenderingDefault(false);
// 创建 MainForm
var mainForm = new MainForm(url);
// 显示窗体并运行消息循环
Application.Run(mainForm);
// 获取 HTML 内容
string htmlContent = mainForm.HtmlContent;
//Debugger.Launch();
// 输出到控制台
if (!string.IsNullOrEmpty(htmlContent))
{
AllocConsole(); // 动态分配一个控制台
Console.WriteLine(htmlContent.Trim('"').Replace("\\n", "\n").Replace("\\t", "\t"));
//System.Diagnostics.Debug.WriteLine(htmlContent);
}
else
{
Console.WriteLine("Failed to retrieve HTML content.");
}
}
}
webApi
using Microsoft.AspNetCore.Mvc;
using System.Diagnostics;
using System;
using Microsoft.Extensions.Configuration;
namespace WebBrowserAPI.Controllers
{
[ApiController]
[Route("[controller]")]
public class MainController : ControllerBase
{
private IConfiguration _configuration;
private readonly ILogger<MainController> _logger;
public MainController(ILogger<MainController> logger, IConfiguration configuration)
{
_logger = logger;
_configuration = configuration;
}
[HttpGet]
public async Task<IActionResult> Get(string url= "https://www.cls.cn/subject/1556")
{
if (string.IsNullOrEmpty(url))
{
return BadRequest("URL is required.");
}
try
{
var exeName = _configuration["WebExePath"];
bool exists = System.IO.File.Exists(exeName);
if (!exists)
{
return BadRequest("exe not exists.");
}
// 创建进程启动信息
var processStartInfo = new ProcessStartInfo
{
FileName = exeName,
Arguments = $"\"{url}\"",
RedirectStandardOutput = true, // 重定向标准输出
RedirectStandardError = true, // 重定向标准错误
UseShellExecute = false,
CreateNoWindow = true // 隐藏窗口
};
// 启动进程
using (var process = new Process { StartInfo = processStartInfo })
{
process.Start();
// 异步读取标准输出
string output = await process.StandardOutput.ReadToEndAsync();
output = System.Text.RegularExpressions.Regex.Unescape(output)
.Trim('"')
.Replace("\\n", "\n")
.Replace("\\t", "\t");
//output = output.Trim('"').Replace("\\n", "\n").Replace("\\t", "\t");
string error = await process.StandardError.ReadToEndAsync();
process.WaitForExit();
if (process.ExitCode == 0)
{
return Ok(new
{
success = true,
data = output.Trim()
});
}
else
{
return BadRequest(new
{
success = false,
error = error.Trim()
});
}
}
}
catch (Exception ex)
{
return StatusCode(500, new
{
success = false,
error = ex.Message
});
}
}
}
}