获取文本偏移/文本位置的JavaScript代码?

huangapple go评论68阅读模式
英文:

js, get element from text-offset/text-position?

问题

我正在尝试解析和重新格式化某个网页。

文本格式很好,但DOM结构不好(由所见即所得编辑器生成)。

因此,我想解析文本内容,然后找到每个文本部分对应的元素。

示例问题:

//example.html
<div id="a">
  ABC
  <span id="b">
    DEF
    <span id="c">
      GHI
    </span>
    <span id="d">
      JKR
    </span>
  </span>
</div>
//script.js

let a = document.getElementById('a');
let text_pos = a.textContent.indexOf('J');
// 从text_pos获取元素 #d 的好方法?

我知道一种方法是循环遍历#a的所有子元素,然后逐个减去每个文本的长度,直到达到0。

但是否有更好的方法?

英文:

I am trying to parse and reformat some web page.

The text is well formatted but the DOM structure is not (generated from WYSIWYG editor).

Thus I would like to parse the text content, then find back corresponding element(s) of each portions of the text.

example problem:

//example.html
&lt;div id=&quot;a&quot;&gt;
  ABC
  &lt;span id=&quot;b&quot;&gt;
    DEF
    &lt;span id=&quot;c&quot;&gt;
      GHI
    &lt;/span&gt;
    &lt;span id=&quot;d&quot;&gt;
      JKR
    &lt;/span&gt;
  &lt;/span&gt;
&lt;/div&gt;
//script.js

let a = document.getElementById(&#39;a&#39;);
let text_pos=a.textContent.indexOf(&#39;J&#39;);
// good way to get element #d from text_pos? 

I know one way is to loop through all child elements of #a, then subtract each text length until 0.

But are there better way?

答案1

得分: 1

根据我理解,您想要找到搜索文本的父元素。因此,我们将使用indexOf搜索项,然后回溯以获取第一个标记,然后向前搜索以获取闭合标记,最后返回在第一个标记和最后一个标记之间的字符串部分。

另一种方法是回溯查找第一个id属性,而不是第一个HTML标记,但我不确定您的所有元素是否都具有id属性。

var data = "<div>Data<div id='d'><br/>AB</div></div>";
console.log(getparentElementOf("AB", data))

function getparentElementOf(searchTerm, data){
  var indexOfTerm = data.indexOf(searchTerm);
  var indexOfFirstTag = getStartIndexOfParentTag(indexOfTerm);
  var indexOfEndTag = getEndIndexOfParentTag(indexOfTerm + searchTerm.length, data.length);
  
  var element = data.substr(0, indexOfEndTag +1);
  element = data.substring(indexOfFirstTag, element.length);
  return element;
}

function getStartIndexOfParentTag(startFromIndex){
  var indexOfFirstTag = -1;
  var flagClosingBracket = false, flagOpeningBracket = false;
  
  // back track from that found position until you find the first tag
  for(var i = startFromIndex; i >= 0; i--){
    
    // If we have detected closing bracket
    if(flagClosingBracket == true){
      // If we have / then cancel detected closing bracket
      if(data[i] == "/"){
        flagClosingBracket = false;
      }else if(data[i] == "<"){
        // otherwise we have found index of our first tage
        flagOpeningBracket = true;
        indexOfFirstTag = i;
        i = -1; // to exit loop
      }
    }else{
      // Otherwise detect closing bracket
      if(data[i] == ">"){
        flagClosingBracket = true;
      }
    }
  }
  
  return indexOfFirstTag;
}

function getEndIndexOfParentTag(startFromIndex, to){
  var indexOfFirstTag = -1;
  var flagClosingBracket = false, flagOpeningBracket = false, flagSlash = false;
  
  // back track from that found position until you find the first tag
  for(var i = startFromIndex; i < to; i++){
    
    // If we have detected closing bracket
    if(flagOpeningBracket == true){
      // If we have / then cancel detected closing bracket
      if(data[i] == ">"){
        flagOpeningBracket = false;
      }else if(data[i] == "/"){
        // otherwise we have found index of our first tage
        flagSlash = true;
      }
    }else{
      // Otherwise detect closing bracket
      if(data[i] == "<"){
        flagOpeningBracket = true;
      }
    }
    
    if(flagSlash == true)
    {
      if(data[i] == ">"){
        flagClosingBracket = true;
        indexOfFirstTag = i;
        i = to; // to exit loop
      }
    }
  }
  
  return indexOfFirstTag;
}

注意:上述代码中包含了您提供的JavaScript代码,并对其进行了注释。

英文:

From what I understood from you is that you want to find parent element of the text that you search for. So instead of looping through all the text we will use indexOf search term and then backtrack to get first tag after that we will forward search to get closing tag and return this part of string between first tag and last tag

Another way is to backtrack to find first id= instead of first html tag but Im not sure if all you elements have id attribute

<!-- begin snippet: js hide: false console: true babel: false -->

<!-- language: lang-js -->

var data = &quot;&lt;div&gt;Data&lt;div id=&#39;d&#39;&gt;&lt;br/&gt;AB&lt;/div&gt;&lt;/div&gt;&quot;;
console.log(getparentElementOf(&quot;AB&quot;, data))
function getparentElementOf(searchTerm, data){
var indexOfTerm = data.indexOf(searchTerm);
var indexOfFirstTag = getStartIndexOfParentTag(indexOfTerm);
var indexOfEndTag = getEndIndexOfParentTag(indexOfTerm + searchTerm.length, data.length);
var element = data.substr(0, indexOfEndTag +1);
element = data.substring(indexOfFirstTag, element.length);
return element;
}
function getStartIndexOfParentTag(startFromIndex){
var indexOfFirstTag = -1;
var flagClosingBracket = false, flagOpeningBracket = false;
// back track from that found position until you find the first tag
for(var i = startFromIndex; i &gt;= 0; i--){
// If we have detected closing bracket
if(flagClosingBracket == true){
// If we have / then cancel detected closing bracket
if(data[i] == &quot;/&quot;){
flagClosingBracket = false;
}else if(data[i] == &quot;&lt;&quot;){
// otherwise we have found index of our first tage
flagOpeningBracket = true;
indexOfFirstTag = i;
i = -1; // to exit loop
}
}else{
// Otherwise detect closing bracket
if(data[i] == &quot;&gt;&quot;){
flagClosingBracket = true;
}
}
}
return indexOfFirstTag;
}
function getEndIndexOfParentTag(startFromIndex, to){
var indexOfFirstTag = -1;
var flagClosingBracket = false, flagOpeningBracket = false, flagSlash = false;;
// back track from that found position until you find the first tag
for(var i = startFromIndex; i &lt; to; i++){
// If we have detected closing bracket
if(flagOpeningBracket == true){
// If we have / then cancel detected closing bracket
if(data[i] == &quot;&gt;&quot;){
flagOpeningBracket = false;
}else if(data[i] == &quot;/&quot;){
// otherwise we have found index of our first tage
flagSlash = true;
}
}else{
// Otherwise detect closing bracket
if(data[i] == &quot;&lt;&quot;){
flagOpeningBracket = true;
}
}
if(flagSlash == true)
{
if(data[i] == &quot;&gt;&quot;){
flagClosingBracket = true;
indexOfFirstTag = i;
i = to; // to exit loop
}
}
}
return indexOfFirstTag;
}

<!-- end snippet -->

答案2

得分: 0

以下是翻译好的部分:

"Well I think the question is alittle confusing but as I undestoood you you want the text of the elements as they are nested you should loop them. As you comment at the question text. I leave you a fragment of a loop with no lenght evaluation:

var strResult = "";
let a = document.getElementById('a');
for(content_word in a.textContent.trim().split("\n")) {
var isaWord = /[aA-zZ]/.test(a.textContent.trim().split("\n")[content_word])
if (isaWord) {
strResult = strResult + a.textContent.trim().split("\n")[content_word].trim()
}
};
console.log(strResult)

I hope this could help.
Regards"

英文:

Well I think the question is alittle confusing but as I undestoood you you want the text of the elements as they are nested you should loop them. As you comment at the question text. I leave you a fragment of a loop with no lenght evaluation:

var strResult = &quot;&quot;; 
let a = document.getElementById(&#39;a&#39;);
for(content_word in a.textContent.trim().split(&quot;\n&quot;)) { 
var isaWord = /[aA-zZ]/.test(a.textContent.trim().split(&quot;\n&quot;)[content_word])
if (isaWord) {
strResult = strResult + a.textContent.trim().split(&quot;\n&quot;)[content_word].trim()
}  
}; 
console.log(strResult)

I hope this could help.
Regards

huangapple
  • 本文由 发表于 2020年1月6日 20:39:20
  • 转载请务必保留本文链接:https://go.coder-hub.com/59612293.html
匿名

发表评论

匿名网友

:?: :razz: :sad: :evil: :!: :smile: :oops: :grin: :eek: :shock: :???: :cool: :lol: :mad: :twisted: :roll: :wink: :idea: :arrow: :neutral: :cry: :mrgreen:

确定