/* The contents of this file are subject to the Mozilla Public License Version 1.1 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.mozilla.org/MPL/ Software distributed under the License is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for the specific language governing rights and limitations under the License. The Original Code is comprised of the ADF directory The Initial Developer of the Original Code is PaperThin, Inc. Copyright (c) 2009-2024. All Rights Reserved. By downloading, modifying, distributing, using and/or accessing any files in this directory, you agree to the terms and conditions of the applicable end user license agreement. */ /* *************************************************************** */ /* Author: PaperThin, Inc. Name: data_2_1.cfc Summary: Data Utils component functions for the ADF Library Version: 2.1 History: 2023-03-02 - GAC - Created 2024-06-25 - GAC - Added calcStartEndRows(), symbolsToASCII(), and convertTagAttrsSingleToDoubleQuotes() methods 2024-07-09 - GAC - Added listToEmptyStruct(), generateRandomString(), and removeDuplicatesFromArray() functions */ component displayname="data_2_1" extends="data_2_0" hint="Data Utils component functions for the ADF Library" output="no" { /* PROPERTIES */ property name="version" type="string" default="1_0_0"; property name="type" value="singleton"; property name="wikiTitle" value="Data_2_1"; /* Name: parseHTMLTag(tag) Summary: I split an HTML tag string into a structure with keys: NAME (Tag Name), HTML (Original HTML Tag), ATTRIBS (struct of tag attributes) IMPORTANT: Does not support data attributes Based on article by Ben Nadel https://www.bennadel.com/blog/779-parsing-html-tag-data-into-a-coldfusion-structure.htm Arguments: string - tag History: 2023-03-03 - GAC - Added - Updated RegEx patterns to fix issues with tagName matching and not handling data-* attributes properly */ public struct function parseHTMLTag(required string tag) { var result = {}; result.Tag = {}; result.Tag.HTML = TRIM(arguments.tag); result.Tag.Name = ""; result.Tag.Attribs = {}; /*Create a pattern to find the tag name. While it might seem overkill to create a pattern just to find the name, I find it easier than dealing with token / list delimiters.*/ result.NameRegEx = "<(\w+)\s+\w+.*?>"; // GAC UPDATE for TagName Matching result.NamePattern = CreateObject( "java", "java.util.regex.Pattern").Compile(result.NameRegEx); // ORIGINAL REGEX - // ^<(\w+) // ^<(\S*).*>$ // Get the matcher for this pattern. result.NameMatcher = result.NamePattern.Matcher(arguments.tag); /*Check to see if we found the tag. We know there can only be ONE tag name, so using an IF statement rather than a conditional loop will help save us processing time.*/ if ( result.NameMatcher.Find() ) { result.Tag.Name = UCase(TRIM(result.NameMatcher.Group(1))); } /*Now that we have a tag name, let's find the attributes of the tag. Remember, attributes may or may not have quotes around their values. Also, some attributes (while not XHTML compliant) might not even have a value associated with it (ex. disabled, readonly).*/ result.AttrRegEx = "\s+([\w+\-\w+]*)(?:\s*=\s*(""[^""]*""|[^\s>]*))?"; // GAC UPDATED to add data-* attribute matching result.AttributePattern = CreateObject( "java", "java.util.regex.Pattern" ).Compile(result.AttrRegEx); // OLD REGEX - does not capture data-* attributes // "\s+(\w+)(?:\s*=\s*(""[^""]*""|[^\s>]*))?"); // Get the matcher for the attribute pattern. result.AttributeMatcher = result.AttributePattern.Matcher(arguments.tag); /*Keep looping over the attributes while we have more to match.*/ while ( result.AttributeMatcher.Find() ) { // Grab the attribute name. result.Name = result.AttributeMatcher.Group(JavaCast('int',1)); /*Create an entry for the attribute in our attributes structure. By default, just set it the empty string. For attributes that do not have a name, we are just going to have to store this empty string.*/ result.Tag.Attribs[ result.Name ] = ""; /*Get the attribute value. Save this into a scoped variable because this might return a NULL value (if the group in our name-value pattern failed to match).*/ result.Value = result.AttributeMatcher.Group(JavaCast('int',2)); /*Check to see if we still have the value. If the group failed to match then the above would have returned NULL and destroyed our variable.*/ if ( StructKeyExists(result,"Value") ) { /*We found the attribute. Now, just remove any leading or trailing quotes. This way, our values will be consistent if the tag used quoted or non-quoted attributes.*/ result.Value = result.Value.ReplaceAll("^""|""$",""); /*Store the value into the attribute entry back into our attributes structure (overwriting the default empty string).*/ result.Tag.Attribs[ result.Name ] = result.Value; } } return result.Tag; } /* Name: calcStartEndRows(total,start,qty) Summary: Arguments: numeric - total numeric - start string - qty History: 2023-06-22 - GAC - Added */ public struct function calcStartEndRows(required numeric total, numeric start=1, string qty="all") { var startRow = 1; var endRow = arguments.total; if ( arguments.start GT startRow AND arguments.start LTE arguments.total ) startRow = arguments.start; if ( IsNumeric(arguments.qty) AND arguments.qty LT arguments.total ) endRow = startRow + arguments.qty - 1; if ( endRow < startRow ) endRow = startRow; return { 'start': startRow, 'end': endRow }; } /* Map Symbols & Punctuation to ASCII (20200604) Convert the Unicode punctuation and symbols to ASCII punctuation and symbols is imperative in Natural language processing (NLP) for preserving the original documents. Based on mapping from Lexical Systems Group: https://lexsrv3.nlm.nih.gov/LexSysGroup/Projects/lvg/2013/docs/designDoc/UDF/unicode/NormOperations/mapSymbolToAscii.html Blog: https://dev.to/gamesover/convert-symbols-punctuation-to-ascii-using-coldfusion-java-3l6a TryCF: https://trycf.com/gist/6f35220d47caa7fdbf75eb884ff1cec7 History: 2023-11-08 - GAC - Added */ string function symbolsToASCII(required string inputString) { var retStr = javacast("string", arguments.inputString); retStr = retStr.replaceAll("[\u00B4\u02B9\u02BC\u02C8\u0301\u2018\u2019\u201B\u2032\u2034\u2037]", chr(39)); /* apostrophe (') */ retStr = retStr.replaceAll("[\u00AB\u00BB\u02BA\u030B\u030E\u201C\u201D\u201E\u201F\u2033\u2036\u3003\u301D\u301E]", chr(34)); /* quotation mark (") */ retStr = retStr.replaceAll("[\u00AD\u2010\u2011\u2012\u2013\u2014\u2212\u2015]", chr(45)); /* hyphen (-) */ retStr = retStr.replaceAll("[\u01C3\u2762]", chr(33)); /* exclamation mark (!) */ retStr = retStr.replaceAll("[\u266F]", chr(35)); /* music sharp sign (#) */ retStr = retStr.replaceAll("[\u066A\u2052]", chr(37)); /* percent sign (%) */ retStr = retStr.replaceAll("[\u066D\u204E\u2217\u2731\u00D7]", chr(42)); /* asterisk (*) */ retStr = retStr.replaceAll("[\u201A\uFE51\uFF64\u3001]", chr(44)); /* comma (,) */ retStr = retStr.replaceAll("[\u00F7\u0338\u2044\u2215]", chr(47)); /* slash (/) */ retStr = retStr.replaceAll("[\u0589\u05C3\u2236]", chr(58)); /* colon (:) */ retStr = retStr.replaceAll("[\u203D]", chr(63)); /* question mark (?) */ retStr = retStr.replaceAll("[\u27E6]", chr(91)); /* opening square bracket ([) */ retStr = retStr.replaceAll("[\u20E5\u2216]", chr(92)); /* backslash (\) */ retStr = retStr.replaceAll("[\u301B]", chr(93)); /* closing square bracket ([) */ retStr = retStr.replaceAll("[\u02C4\u02C6\u0302\u2038\u2303]", chr(94)); /* caret (^) */ retStr = retStr.replaceAll("[\u02CD\u0331\u0332\u2017]", chr(95)); /* underscore (_) */ retStr = retStr.replaceAll("[\u02CB\u0300\u2035]", chr(96)); /* grave accent (`) */ retStr = retStr.replaceAll("[\u2983]", chr(123)); /* opening curly bracket ({) */ retStr = retStr.replaceAll("[\u01C0\u05C0\u2223\u2758]", chr(124)); /* vertical bar / pipe (|) */ retStr = retStr.replaceAll("[\u2016]", "#chr(124)##chr(124)#"); /* double vertical bar / double pipe (||) */ retStr = retStr.replaceAll("[\u02DC\u0303\u2053\u223C\u301C]", chr(126)); /* tilde (~) */ retStr = retStr.replaceAll("[\u2039\u2329\u27E8\u3008]", chr(60)); /* less-than sign (<) */ retStr = retStr.replaceAll("[\u2264\u2266]", "#chr(60)##chr(61)#"); /* less-than equal-to sign (<=) */ retStr = retStr.replaceAll("[\u203A\u232A\u27E9\u3009]", chr(62)); /* greater-than sign (>) */ retStr = retStr.replaceAll("[\u2265\u2267]", "#chr(62)##chr(61)#"); /* greater-than equal-to sign (>=) */ retStr = retStr.replaceAll("[\u200B\u2060\uFEFF]", chr(32)); /* space ( ) */ retStr = retStr.replaceAll("\u2153", "1/3"); retStr = retStr.replaceAll("\u2154", "2/3"); retStr = retStr.replaceAll("\u2155", "1/5"); retStr = retStr.replaceAll("\u2156", "2/5"); retStr = retStr.replaceAll("\u2157", "3/5"); retStr = retStr.replaceAll("\u2158", "4/5"); retStr = retStr.replaceAll("\u2159", "1/6"); retStr = retStr.replaceAll("\u215A", "5/6"); retStr = retStr.replaceAll("\u215B", "1/8"); retStr = retStr.replaceAll("\u215C", "3/8"); retStr = retStr.replaceAll("\u215D", "5/8"); retStr = retStr.replaceAll("\u215E", "7/8"); retStr = retStr.replaceAll("\u2026", "\.\.\."); return retStr; } /* convertTagAttrsSingleToDoubleQuotes(inputStr) - convert single quotes inside HTML tags to double quotes. History: 2023-11-08 - GAC - Created 2024-09-06 - GAC - Updated the function and arguments definitions */ string function convertTagAttrsSingleToDoubleQuotes(string inputStr="") { var pattern = "='([^']*?)'"; // regular expression pattern to match single quotes inside HTML tags var matchArr = reMatch(pattern,arguments.inputStr); var srcStr = ''; var destStr = ''; var tempRepChars = '_^^^_'; for (var i=1; i <= ArrayLen(matchArr); i++ ) { srcStr = matchArr[i]; destStr = srcStr; // preserve inner double quotes as replacement chars destStr = replace(destStr, '"', tempRepChars, "all"); // replace single quotes with double destStr = reReplace(destStr, "\'", '"', "all"); // convert replacement chars to a single quote destStr = replace(destStr, tempRepChars, "'", "all"); arguments.inputStr = replace(arguments.inputStr, srcStr , destStr, "one"); } return arguments.inputStr; } /* listToEmptyStruct(list) Summary: Converts a list of values to be the keys of a structure with empty values. Arguments: string - list History: 2024-07-09 - GAC - Created */ struct function listToEmptyStruct(required string list) { var retStruct = {}; var arr = ListToArray(arguments.list); for ( var i=1; i <= ArrayLen(arr); i++ ) { retStruct[arr[i]] = ''; } return retStruct; } /* generateRandomString(length,chars) Summary: Creates a random string of a desired length, using a pre-defined character list Based on: https://lunaticthinker.me/index.php/cfml-generating-random-string/ Arguments: length - integer chars - string History: 2024-07-29 - GAC - Added */ public string function generateRandomString(required numeric length, string chars="ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789") { var retStr = ""; // remove commas if 'chars' is passed as a comma-delimitered list arguments.chars = listChangeDelims(arguments.chars, '', ',', false); for ( var i = 1; i <= arguments.length; i++ ) { retStr &= Mid(arguments.chars, RandRange(1, len(arguments.chars), "SHA1PRNG"), 1); } return retStr; } /* removeDuplicatesFromArray(input,sortArray,sortType,sortDirection) Summary: Removes duplicates from an array (with extra parameters for sorting the result array) Based on: https://adampresley.github.io/2009/10/22/removing-duplicate-array-items-in-coldfusion.html Arguments: input - array sortArray - boolean sortType - string sortDirection - string History: 2024-07-29 - GAC - Added */ public string function removeDuplicatesFromArray(required array input,boolean sortArray="false",string sortType="textnocase",string sortDirection="asc") { var list = arguments.input.subList(0, arguments.input.size()); var set = createObject("java", "java.util.LinkedHashSet").init(list); var result = []; result.addAll(set); if (arguments.sortArray) { arraySort(result, arguments.sortType, arguments.sortDirection); } return result; } }