如何解决映射文本和html排名搜索
很久以前,有一个非常有钱的人和他的三个女儿住在一起。这两个大女儿嘲笑任何穿着不像他们那么温顺的人。如果他们两个不在家休息,他们就出去买尽可能多的精美衣服和帽子。
<span>
<div style="font-family:Calibri,Helvetica,sans-serif; font-size:12pt; color:rgb(0,0)">
<p class="p1" style="margin: 0px; font: 17px; font-family: Helvetica Neue"><b>Once upon a time there was
a
very rich man who <span style="color: blue">lived</span> with his three daughters.<span class="Apple-converted-space">
</span>The two older daughters laughed at anyone who di<span style="color: orange">d n</span>ot dress <span style="color: green">as</span> wel as they did.<span
class="Apple-converted-space"> </span>If the two of them were not resting at home,they were out shopping for as many fine dresses and hats as they Could <span style="color: red">carry</span> home. <span
class="Apple-converted-space"> </span></b></p><br>
</div>
<div style="font-family:Calibri,0)">
</div>
</span>
需要一个通用的解决方案来查找从文本到html的单词/短语的位置。问题是单词/短语中可能有一些样式
di<span style="color: orange">d n</span>ot
尝试使用Levenshtein距离监听班次,但这是一个非常“困难”的解决方案
解决方法
let html = document.getElementById('input').innerHTML;
let word = 'did not';
console.log(searchPositions(html,word));
function searchPositions(html,issueText) {
let htmlArr = Array.from(html).map((item,index) => {
return {
item,index
}
});
const regexp = /<\/?[^>]+(>|$)/g;
const tags = html.match(regexp) || [];
const textTrue = html.replace(/<\/?[^>]+(>|$)/g,'');
let inTextStartPosition = textTrue.indexOf(issueText);
let inTextEndPosition = inTextStartPosition + issueText.length - 1;
let matches = [...html.matchAll(regexp)];
let tagsIndexs = matches.map((item) => {
return item.index;
});
let tagsInfo = tags.map((item,index) => {
let length = item.length;
let startPosition = tagsIndexs[index];
let endPosition = startPosition + length;
return {
startPosition,endPosition,length
}
})
for (let ii = 0; ii < tagsInfo.length; ii++) {
let startPosition = tagsInfo[ii].startPosition;
let endPosition = tagsInfo[ii].endPosition;
while (startPosition !== endPosition) {
htmlArr = htmlArr.filter(x => x.index !== startPosition);
startPosition++;
}
}
let start = htmlArr[inTextStartPosition].index;
let end = htmlArr[inTextEndPosition].index;
return {
start,end
}
}
<div id='input'>
<span>
<div style="font-family:Calibri,Helvetica,sans-serif; font-size:12pt; color:rgb(0,0)">
<p class="p1" style="margin: 0px; font: 17px; font-family: Helvetica Neue"><b>Once upon a time there was
a
very rich man who <span style="color: blue">lived</span> with his three daughters.<span
class="Apple-converted-space">
</span>The two older daughters laughed at anyone who di<span style="color: orange">d n</span>ot
dress <span style="color: green">as</span> wel as they did.<span
class="Apple-converted-space"> </span>If the two of them were not resting at home,they were out shopping for as many fine dresses and hats as they could <span
style="color: red">carry</span> home. <span class="Apple-converted-space"> </span></b>
</p><br>
</div>
<div style="font-family:Calibri,0)">
</div>
</span>
</div>
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。