微信公众号搜"智元新知"关注
微信扫一扫可直接关注哦!

映射文本和html排名搜索

如何解决映射文本和html排名搜索

我有文字,而此文字带有html

很久以前,有一个非常有钱的人和他的三个女儿住在一起。这两个大女儿嘲笑任何穿着不像他们那么温顺的人。如果他们两个不在家休息,他们就出去买尽可能多的精美衣服和帽子。

<span>
    <div style="font-family:Calibri,Helvetica,sans-serif; font-size:12pt; color:rgb(0,0)">
        <p class="p1" style="margin: 0px; font: 17px; font-family: Helvetica Neue"><b>Once upon a time there was
                a
                very rich man who <span style="color: blue">lived</span> with his three daughters.<span class="Apple-converted-space">&nbsp;
                </span>The two older daughters laughed at anyone who di<span style="color: orange">d n</span>ot dress <span style="color: green">as</span> wel as they did.<span
                    class="Apple-converted-space">&nbsp; </span>If the two of them were not resting at home,they were out shopping for as many fine dresses and hats as they Could <span style="color: red">carry</span> home. <span
                    class="Apple-converted-space">&nbsp;</span></b></p><br>
    </div>
    <div style="font-family:Calibri,0)">
    </div>
</span>

需要一个通用的解决方案来查找从文本到html的单词/短语的位置。问题是单词/短语中可能有一些样式

di<span style="color: orange">d n</span>ot

尝试使用Levenshtein距离监听班次,但这是一个非常“困难”的解决方

解决方法

let html = document.getElementById('input').innerHTML;
let word = 'did not';

console.log(searchPositions(html,word));

function searchPositions(html,issueText) {
    let htmlArr = Array.from(html).map((item,index) => {
        return {
            item,index
        }
    });

    const regexp = /<\/?[^>]+(>|$)/g;
    const tags = html.match(regexp) || [];
    const textTrue = html.replace(/<\/?[^>]+(>|$)/g,'');

    let inTextStartPosition = textTrue.indexOf(issueText);
    let inTextEndPosition = inTextStartPosition + issueText.length - 1;

    let matches = [...html.matchAll(regexp)];
    let tagsIndexs = matches.map((item) => {
        return item.index;
    });

    let tagsInfo = tags.map((item,index) => {
        let length = item.length;
        let startPosition = tagsIndexs[index];
        let endPosition = startPosition + length;

        return {
            startPosition,endPosition,length
        }
    })

    for (let ii = 0; ii < tagsInfo.length; ii++) {
        let startPosition = tagsInfo[ii].startPosition;
        let endPosition = tagsInfo[ii].endPosition;

        while (startPosition !== endPosition) {
            htmlArr = htmlArr.filter(x => x.index !== startPosition);
            startPosition++;
        }
    }

    let start = htmlArr[inTextStartPosition].index;
    let end = htmlArr[inTextEndPosition].index;

    return {
        start,end
    }
}
<div id='input'>
        <span>
            <div style="font-family:Calibri,Helvetica,sans-serif; font-size:12pt; color:rgb(0,0)">
                <p class="p1" style="margin: 0px; font: 17px; font-family: Helvetica Neue"><b>Once upon a time there was
                        a
                        very rich man who <span style="color: blue">lived</span> with his three daughters.<span
                            class="Apple-converted-space">&nbsp;
                        </span>The two older daughters laughed at anyone who di<span style="color: orange">d n</span>ot
                        dress <span style="color: green">as</span> wel as they did.<span
                            class="Apple-converted-space">&nbsp; </span>If the two of them were not resting at home,they were out shopping for as many fine dresses and hats as they could <span
                            style="color: red">carry</span> home. <span class="Apple-converted-space">&nbsp;</span></b>
                </p><br>
            </div>
            <div style="font-family:Calibri,0)">
            </div>
        </span>
    </div>

版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。