Odia is a language with lot of combining characters. Therefore simple reversal of string like str.split('').reverse().join('') will not work . With this naive approach, the reverse of string like ଜଳାର୍ଣ୍ଣବ will end up in ବଣ୍ଣ୍ରାଳଜ where as the expected reversal would be ବର୍ଣ୍ଣଳାଜ.
Even I failed with BreakIterator of Java with Odia Locale. I also tried packages like esrever but failed. Finally I tried with basic decomposition of Unicode Odia syllables. A string like ଜଳାର୍ଣ୍ଣବ is composed of characters like [ 'ଜ', 'ଳ', 'ା', 'ର', '୍', 'ଣ', '୍', 'ଣ', 'ବ' ], therefore handling the matras and falas did the job. Here is the code in JavaScript:
/*
* Task: Find the length of an Odia word. Ex. length of ଦୁର୍ଯ୍ୟୋଧନ should be 4
* Author: Dr. Swarupananda Bissoyi (swarupananda@gmail.com)
* Original Version written on: October 7, 2022
* Bug fixes and final version on: October 12, 2025
*/
function strlen(text) {
let len = 0;
let syllable = '';
let reverseStr = '';
const maatraas = 'ାିୀୁୂୃେୈୋୌଂଁଃ';
for (let index = 0; index < text.length; index++) {
const ch = text[index];
const nextChar = index < text.length - 1 ? text[index + 1] : '\0';
const nextToNextChar = index < text.length - 2 ? text[index + 2] : '\0';
// Append current character to syllable
syllable += ch;
// Check if the next character is a maatraa
if (maatraas.includes(nextChar)) {
syllable += nextChar;
index++;
// Special case: multiple maatraas, e.g., କିଁଆ
if (maatraas.includes(nextToNextChar)) {
syllable += nextToNextChar;
index++;
}
isEnd = true;
} else if (nextChar === '\u200D' && nextToNextChar === '\u0B4D') {
// Handle Zero Width Non-Joiner (ZWNJ) followed by \u0B4D
// Case of ର୍ୟାକେଟ୍
syllable += nextChar;
index++;
syllable += nextToNextChar;
index++;
isEnd = false;
} else if (nextChar === '\u0B4D') {
// Handle consonant conjunction
syllable += nextChar;
index++;
// Handle cases where \u0B4D is the last character, e.g., ସ୍ବରୁପ୍
if (nextToNextChar === '\0') {
syllable += '\u200C'; // Add Zero Width Joiner
isEnd = true;
} else {
isEnd = false;
}
} else {
isEnd = true;
}
if (isEnd) {
reverseStr = syllable + reverseStr;
syllable = '';
len++;
}
}
console.log(`Reverse of [${text}] is [${reverseStr}]`);
return len;
}
// Example usage
const strs = [
"ଓଁ", "ଉହୁଁ", "କିଁଆ", "ସ୍ୱରୁପ", "ଅପ୍", "ବ୍ଲିଂ",
"ସ୍ବରୁପ୍",
"ଦୁଃଖ", "ର୍ୟାକେଟ୍", "ର୍ୟାକେଟ୍",
"ସଂସ୍କୃଃ", "ଲେନ୍ସ", "ଲେନ୍ସ୍",
"ପୁନର୍ଜ୍ଜୀବିତ",
"କ୍ଲୀଂହ୍ଲୀ", "ରୁକ୍ମିଣୀ", "ଯାଚ୍ଞ।", "କେଁ କଟର୍", "ହଠାତ୍", "ନ୍ଦୁନ୍ଦୁଭି",
"ଭର୍ତ୍ତୃହରି", "ପରାଙ୍ମୁଖ", "ସଂସ୍ଥାପକ", "ଦୁର୍ଯ୍ୟୋଧନ", "ଅର୍ଜ୍ଜୁନ",
"ଜଳାର୍ଣ୍ଣବ", "କଟକ", "ଅସତ୍କର୍ମ", "ନୃପସ୍ଥାୟକ",
"ଘୂର୍ଣ୍ଣିବାତ୍ୟା"
];
for (const str of strs) {
const len = strlen(str);
console.log(`Length of ${str} : ${len}`);
}
No comments:
Post a Comment