13.3. 去除HtmlNode中的子节点:removeSubHtmlNode


    //remove sub node from current html node
    //eg: 
    //"script"
    //for
    //<script type="text/javascript"> 
    public HtmlNode removeSubHtmlNode(HtmlNode curHtmlNode, string subNodeToRemove)
    {
        HtmlNode afterRemoved = curHtmlNode;
        
        ////method 1: fail
        ////foreach (var subNode in afterRemoved.Descendants(subNodeToRemove))
        //foreach (HtmlNode subNode in afterRemoved.Descendants(subNodeToRemove))
        //{
        //    //An unhandled exception of type 'System.InvalidOperationException' occurred in mscorlib.dll
        //    //Additional information: Collection was modified; enumeration operation may not execute.
            
        //    //afterRemoved.RemoveChild(subNode);
        //    //curHtmlNode.RemoveChild(subNode);
        //    subNode.Remove();
        //}

        //method 2: OK
        HtmlNodeCollection foundAllSub = curHtmlNode.SelectNodes(subNodeToRemove);
        if ((foundAllSub != null) && (foundAllSub.Count > 0))
        {
            foreach (HtmlNode subNode in foundAllSub)
            {
                curHtmlNode.RemoveChild(subNode);
            }
        }

        return afterRemoved;
    }

    

例 13.3. removeSubHtmlNode 的使用范例


HtmlNode curBulletNode = allBulletNodeList[idx];
 
HtmlNode noJsNode = crl.removeSubHtmlNode(curBulletNode, "script");
HtmlNode noStyleNode = crl.removeSubHtmlNode(curBulletNode, "style");
 
string bulletStr = noStyleNode.InnerText;