webview2的强大能力,只能表现在Windows 系统上实在太可惜了,如果尝试用playwright 或者其它的 webdrive 爬网页很麻烦甚至无解时。这种把webview2转换为API的方式不失为兜底方案。

  1. 实现思路:在API就调用含有winform的 WebContentExtractor.exe 。
  2. VPS太弱装不上window上的解决思路:用argo的CDN能力可以带你内网的服务起飞到外网(大善人CF的 argo和tunnel的 网上资料很多,本站内也有)。

webview2的winform

mainform.cs

using System;
using System.Diagnostics;
using System.Threading.Tasks;
using System.Windows.Forms;
using Microsoft.Web.WebView2.Core;

namespace WebContentExtractor
{
    public partial class MainForm : Form
    {
        private string targetUrl = string.Empty;
        private string _htmlContent;
        public string HtmlContent => _htmlContent;
        public MainForm(string url)
        {
            InitializeComponent();
            targetUrl = url;
        }

        private async void MainForm_Load(object sender, EventArgs e)
        {
            if (string.IsNullOrWhiteSpace(targetUrl))
            {
                MessageBox.Show("No URL provided.");
                Application.Exit();
                return;
            }

            // Initialize WebView2
            await webView21.EnsureCoreWebView2Async();

            // Load the URL
            webView21.Source = new Uri(targetUrl);

            // Wait for the page to load and fetch HTML content
            webView21.NavigationCompleted += async (s, args) =>
            {
                if (args.IsSuccess)
                { 
                    try
                    {
                        string htmlContent = await webView21.CoreWebView2.ExecuteScriptAsync("document.documentElement.outerHTML;");
                        _htmlContent =htmlContent.Trim('"').Replace("\\n", "\n").Replace("\\t", "\t");
                    }
                    catch (Exception ex)
                    {
                        Console.WriteLine($"Error: {ex.Message}");
                    }
                    finally
                    {
                        Application.Exit(); // Close the application after processing
                    }
                }
                else
                {
                    Console.WriteLine($"Failed to load URL: {args.WebErrorStatus}");
                    Application.Exit();
                }
            };
        }
    }
}
 

program.cs

using System;
using System.Diagnostics;
using System.Runtime.InteropServices;
using System.Windows.Forms;
using WebContentExtractor;

internal static class Program
{
    [DllImport("kernel32.dll")]
    private static extern bool AllocConsole();

    [STAThread]
    static void Main(string[] args)
    {
        if (args.Length == 0)
        {
            Console.WriteLine("Please provide a URL.");
            return;
        }

        string url = args[0];

        Application.EnableVisualStyles();
        Application.SetCompatibleTextRenderingDefault(false);

        // 创建 MainForm
        var mainForm = new MainForm(url);

        // 显示窗体并运行消息循环
        Application.Run(mainForm);

        // 获取 HTML 内容
        string htmlContent = mainForm.HtmlContent;

        //Debugger.Launch();
        // 输出到控制台
        if (!string.IsNullOrEmpty(htmlContent))
        {
            AllocConsole(); // 动态分配一个控制台
            Console.WriteLine(htmlContent.Trim('"').Replace("\\n", "\n").Replace("\\t", "\t"));

            //System.Diagnostics.Debug.WriteLine(htmlContent);
        }
        else
        {
            Console.WriteLine("Failed to retrieve HTML content.");
        }
    }
}

webApi

using Microsoft.AspNetCore.Mvc;
using System.Diagnostics;
using System;
using Microsoft.Extensions.Configuration;

namespace WebBrowserAPI.Controllers
{
    [ApiController]
    [Route("[controller]")]
    public class MainController : ControllerBase
    {
        private IConfiguration _configuration;
        private readonly ILogger<MainController> _logger;

        public MainController(ILogger<MainController> logger, IConfiguration configuration)
        {
            _logger = logger;
            _configuration = configuration;
        }

        [HttpGet]
        public async Task<IActionResult> Get(string url= "https://www.cls.cn/subject/1556")
        {
            if (string.IsNullOrEmpty(url))
            {
                return BadRequest("URL is required.");
            }

            try
            {
                var exeName = _configuration["WebExePath"];
                bool exists = System.IO.File.Exists(exeName);
                if (!exists)
                {
                    return BadRequest("exe not exists.");
                }
                // 创建进程启动信息
                var processStartInfo = new ProcessStartInfo
                {
                    FileName =  exeName,
                    Arguments = $"\"{url}\"",
                    RedirectStandardOutput = true, // 重定向标准输出
                    RedirectStandardError = true,  // 重定向标准错误
                    UseShellExecute = false,
                    CreateNoWindow = true // 隐藏窗口
                };

                // 启动进程
                using (var process = new Process { StartInfo = processStartInfo })
                {
                    process.Start();

                    // 异步读取标准输出
                    string output = await process.StandardOutput.ReadToEndAsync();
                    output = System.Text.RegularExpressions.Regex.Unescape(output)
       .Trim('"')
       .Replace("\\n", "\n")
       .Replace("\\t", "\t");

                    //output = output.Trim('"').Replace("\\n", "\n").Replace("\\t", "\t");
                    string error = await process.StandardError.ReadToEndAsync();

                    process.WaitForExit();

                    if (process.ExitCode == 0)
                    {
                        return Ok(new
                        {
                            success = true,
                            data = output.Trim()
                        });
                    }
                    else
                    {
                        return BadRequest(new
                        {
                            success = false,
                            error = error.Trim()
                        });
                    }
                }
            }
            catch (Exception ex)
            {
                return StatusCode(500, new
                {
                    success = false,
                    error = ex.Message
                });
            }
        }
    }
}

输出

image-1732358259255