我有一個很長的連續字串,看起來像這樣:
let myString = "onetwothreefourfivesixseveneightnineteneleventwelvethirteenfourteen";
它沒有任何分隔符可以輕松定位。
那么我如何遍歷它并拆分單詞,使其最終像:
splitString = ["one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "ten", "eleven", "twelve", "thirteen", "fourteen"];
最好使用 JavaScript。
uj5u.com熱心網友回復:
這里的問題是缺少您提到的分隔符 - 這使得軟體無法知道單詞的開始和結束位置。
鑒于你知道會出現的詞,我的技巧是這樣的:
注意:這沒有考慮到單詞重疊的可能性,并假設沒有一個單詞是其他單詞的可能子集......
- 迭代已知詞
- 搜索 (indexOf) 每個已知單詞的字串并記下它在字串中的位置
- 按索引值對值進行排序
- 使用找到的順序中包含的值生成一個陣列
/**
* This assumes that:
* - Input words are not subsets of other input words
*/
// Find all indices of the input word in the input String
function findAll(inputString, inputWord) {
const indices = [];
let index = 0;
while (index < inputString.length) {
index = inputString.indexOf(inputWord, index);
if (index == -1) break; // -1 means not found so we break here
indices.push({ index, word: inputWord });
index = inputWord.length;
}
return indices;
}
// Split the words into an array of Objects holding their positions and values
function splitWords(inputString, inputWords) {
// For holding the results
let results = [];
// Loop the input words
for (const inputWord of inputWords) {
// Find the indices and concat to the results array
results = results.concat(findAll(inputString, inputWord));
}
return results;
}
// Sort the words and return just an array of Strings
const orderWords = (inputArr) => inputArr.sort((a, b) => a.index - b.index).map(input => input.word);
/**
* Usage like so:
*/
const myString = 'onetwothreefourfivesixseveneightnineteneleventwelvethirteenfourteen';
const inputWords = ["one", "two", "three","four", "five", "six", "seven", "eight", "nine", "ten", "eleven", "twelve", "thirteen", "fourteen"];
const result = splitWords(myString, inputWords);
const ordered = orderWords(result);
console.dir(ordered);
/**
* Result:
[
'one', 'two',
'three', 'four',
'five', 'six',
'seven', 'eight',
'nine', 'ten',
'eleven', 'twelve',
'thirteen', 'four',
'fourteen'
]
*/
uj5u.com熱心網友回復:
如果如您在評論中所說,您知道預期的單詞,則創建這些單詞的陣列并遍歷您的字串以查找這些單詞
請注意,波紋管代碼考慮了匹配單詞的長度,以便您可以找到單詞,one hundred eighty five否則回圈會在找到時停止one
您可以閱讀代碼中的注釋以更好地理解它
// your string
var myString =
"onetwothreefourfivesixseveneightnineteneleventwelvethirteenfourteentwentyfiveonehundredeightyfiveeightyfive";
// the list of expected words
var possibleWords =
[
"one",
"two",
"three",
"four",
"five",
"six",
"seven",
"eight",
"nine",
"ten",
"eleven",
"twelve",
"thirteen",
"fourteen",
"twenty five",
"one hundred eighty five",
"eighty five",
];
function separateString(mergedString, possibleWords) {
// the resulted array that has all the splited words
var result = [];
// buffer to temporary store the string and match it with the expected words array
var buffer = "";
// The word that has been matched in buffer with possible word in expected words array
var matchedWord = "";
// Index if the matched word
var matchedWordLastIndex = -1;
// Converting your string into array so we can access it by index letter by letter
var splitedString = mergedString.split("");
// For every letter in your string
for (var stringIndex = 0; stringIndex < splitedString.length; stringIndex )
{
// Resetting the variables
matchedWord = "";
buffer = "";
matchedWordLastIndex = -1;
// Look a head from current string index to the end of your string and find every word that matches with expected words
for ( var lookAhead = stringIndex; lookAhead < splitedString.length; lookAhead )
{
// Append letters with each iteration of look ahead with the buffer so we can make words from it
buffer = splitedString[lookAhead];
// loop through expected words to find a match with buffer
for (var i = 0; i < possibleWords.length; i ) {
// if buffer is equal to a word in expected words array: .replace(/ /g, '') removes space if the words inside expected array of words have space such as twenty five to twentyfive
if (buffer == possibleWords[i].replace(/ /g, ''))
{
// check if the found word has more letters than the previouse matched word so we can find words like one hundred eighty five otherwise it will just find one and stops
if(matchedWord.length < buffer.length)
{
// if the word has more letters then put the word into matched word and store the look ahead index into matchedWordLastIndex
matchedWord = possibleWords[i];
matchedWordLastIndex = lookAhead;
}
}
}
}
// if a word has been found
if(matchedWord.length > 0){
// make starting index same as look ahead index since last word found ended there
stringIndex = matchedWordLastIndex;
// put the found word into result array
result.push(matchedWord);
}
}
return result;
}
console.log(separateString(myString, possibleWords));
轉載請註明出處,本文鏈接:https://www.uj5u.com/houduan/370904.html
標籤:javascript 数组 细绳 分裂
上一篇:使用單引數函式(C )將大寫字母轉換為小寫字母,反之亦然
下一篇:多個句子的首字母大寫,其他小寫
